Source code for PubChemFingerprints

# -*- coding: utf-8 -*-
#  Copyright (c) 2016-2017, Zhijiang Yao, Jie Dong and Dongsheng Cao
#  All rights reserved.
#  This file is part of the PyBioMed.
#  The contents are covered by the terms of the BSD license
#  which is included in the file license.txt, found at the root
#  of the PyBioMed source tree.
"""
This file provides internal functions to calculate pubchem fingerprints
If you have any questions, please feel free to contact us.
E-mail: biomed@csu.edu.cn

@File name: PubChemFingerprints
@author: Jie Dong and Zhijiang Yao
"""

from rdkit import Chem
from rdkit import DataStructs
# these are SMARTS patterns corresponding to the PubChem fingerprints
# https://astro.temple.edu/~tua87106/list_fingerprints.pdf
# ftp://ftp.ncbi.nlm.nih.gov/pubchem/specifications/pubchem_fingerprints.txt

smartsPatts = {
1:('[H]', 3),# 1-115
2:('[H]', 7),
3:('[H]', 15),
4:('[H]', 31),
5:('[Li]', 0),
6:('[Li]', 1),
7:('[B]', 0),
8:('[B]', 1),
9:('[B]', 3),
10:('[C]', 1),
11:('[C]', 3),
12:('[C]', 7),
13:('[C]', 15),
14:('[C]', 31),
15:('[N]', 0),
16:('[N]', 1),
17:('[N]', 3),
18:('[N]', 7),
19:('[O]', 0),
20:('[O]', 1),
21:('[O]', 3),
22:('[O]', 7),
23:('[O]', 15),
24:('[F]', 0),
25:('[F]', 1),
26:('[F]', 3),
27:('[Na]', 0),
28:('[Na]', 1),
29:('[Si]', 0),
30:('[Si]', 1),
31:('[P]', 0),
32:('[P]', 1),
33:('[P]', 3),
34:('[S]', 0),
35:('[S]', 1),
36:('[S]', 3),
37:('[S]', 7),
38:('[Cl]', 0),
39:('[Cl]', 1),
40:('[Cl]', 3),
41:('[Cl]', 7),
42:('[K]', 0),
43:('[K]', 1),
44:('[Br]', 0),
45:('[Br]', 1),
46:('[Br]', 3),
47:('[I]', 0),
48:('[I]', 1),
49:('[I]', 3),
50:('[Be]', 0),
51:('[Mg]', 0),
52:('[Al]', 0),
53:('[Ca]', 0),
54:('[Sc]', 0),
55:('[Ti]', 0),
56:('[V]', 0),
57:('[Cr]', 0),
58:('[Mn]', 0),
59:('[Fe]', 0),
60:('[CO]', 0),
61:('[Ni]', 0),
62:('[Cu]', 0),
63:('[Zn]', 0),
64:('[Ga]', 0),
65:('[Ge]', 0),
66:('[As]', 0),
67:('[Se]', 0),
68:('[Kr]', 0),
69:('[Rb]', 0),
70:('[Sr]', 0),
71:('[Y]', 0),
72:('[Zr]', 0),
73:('[Nb]', 0),
74:('[Mo]', 0),
75:('[Ru]', 0),
76:('[Rh]', 0),
77:('[Pd]', 0),
78:('[Ag]', 0),
79:('[Cd]', 0),
80:('[In]', 0),
81:('[Sn]', 0),
82:('[Sb]', 0),
83:('[Te]', 0),
84:('[Xe]', 0),
85:('[Cs]', 0),
86:('[Ba]', 0),
87:('[Lu]', 0),
88:('[Hf]', 0),
89:('[Ta]', 0),
90:('[W]', 0),
91:('[Re]', 0),
92:('[Os]', 0),
93:('[Ir]', 0),
94:('[Pt]', 0),
95:('[Au]', 0),
96:('[Hg]', 0),
97:('[Tl]', 0),
98:('[Pb]', 0),
99:('[Bi]', 0),
100:('[La]', 0),
101:('[Ce]', 0),
102:('[Pr]', 0),
103:('[Nd]', 0),
104:('[Pm]', 0),
105:('[Sm]', 0),
106:('[Eu]', 0),
107:('[Gd]', 0),
108:('[Tb]', 0),
109:('[Dy]', 0),
110:('[Ho]', 0),
111:('[Er]', 0),
112:('[Tm]', 0),
113:('[Yb]', 0),
114:('[Tc]', 0),
115:('[U]', 0),
116:('[Li&!H0]', 0),#264-881
117:('[Li]~[Li]', 0),
118:('[Li]~[#5]', 0),
119:('[Li]~[#6]', 0),
120:('[Li]~[#8]', 0),
121:('[Li]~[F]', 0),
122:('[Li]~[#15]', 0),
123:('[Li]~[#16]', 0),
124:('[Li]~[Cl]', 0),
125:('[#5&!H0]', 0),
126:('[#5]~[#5]', 0),
127:('[#5]~[#6]', 0),
128:('[#5]~[#7]', 0),
129:('[#5]~[#8]', 0),
130:('[#5]~[F]', 0),
131:('[#5]~[#14]', 0),
132:('[#5]~[#15]', 0),
133:('[#5]~[#16]', 0),
134:('[#5]~[Cl]', 0),
135:('[#5]~[Br]', 0),
136:('[#6&!H0]', 0),
137:('[#6]~[#6]', 0),
138:('[#6]~[#7]', 0),
139:('[#6]~[#8]', 0),
140:('[#6]~[F]', 0),
141:('[#6]~[Na]', 0),
142:('[#6]~[Mg]', 0),
143:('[#6]~[Al]', 0),
144:('[#6]~[#14]', 0),
145:('[#6]~[#15]', 0),
146:('[#6]~[#16]', 0),
147:('[#6]~[Cl]', 0),
148:('[#6]~[#33]', 0),
149:('[#6]~[#34]', 0),
150:('[#6]~[Br]', 0),
151:('[#6]~[I]', 0),
152:('[#7&!H0]', 0),
153:('[#7]~[#7]', 0),
154:('[#7]~[#8]', 0),
155:('[#7]~[F]', 0),
156:('[#7]~[#14]', 0),
157:('[#7]~[#15]', 0),
158:('[#7]~[#16]', 0),
159:('[#7]~[Cl]', 0),
160:('[#7]~[Br]', 0),
161:('[#8&!H0]', 0),
162:('[#8]~[#8]', 0),
163:('[#8]~[Mg]', 0),
164:('[#8]~[Na]', 0),
165:('[#8]~[Al]', 0),
166:('[#8]~[#14]', 0),
167:('[#8]~[#15]', 0),
168:('[#8]~[K]', 0),
169:('[F]~[#15]', 0),
170:('[F]~[#16]', 0),
171:('[Al&!H0]', 0),
172:('[Al]~[Cl]', 0),
173:('[#14&!H0]', 0),
174:('[#14]~[#14]', 0),
175:('[#14]~[Cl]', 0),
176:('[#15&!H0]', 0),
177:('[#15]~[#15]', 0),
178:('[#33&!H0]', 0),
179:('[#33]~[#33]', 0),
180:('[#6](~Br)(~[#6])', 0),
181:('[#6](~Br)(~[#6])(~[#6])', 0),
182:('[#6&!H0]~[Br]', 0),
183:('[#6](~[Br])(:[c])', 0),
184:('[#6](~[Br])(:[n])', 0),
185:('[#6](~[#6])(~[#6])', 0),
186:('[#6](~[#6])(~[#6])(~[#6])', 0),
187:('[#6](~[#6])(~[#6])(~[#6])(~[#6])', 0),
188:('[#6H1](~[#6])(~[#6])(~[#6])', 0),
189:('[#6](~[#6])(~[#6])(~[#6])(~[#7])', 0),
190:('[#6](~[#6])(~[#6])(~[#6])(~[#8])', 0),
191:('[#6H1](~[#6])(~[#6])(~[#7])', 0),
192:('[#6H1](~[#6])(~[#6])(~[#8])', 0),
193:('[#6](~[#6])(~[#6])(~[#7])', 0),
194:('[#6](~[#6])(~[#6])(~[#8])', 0),
195:('[#6](~[#6])(~[Cl])', 0),
196:('[#6&!H0](~[#6])(~[Cl])', 0),
197:('[#6H,#6H2,#6H3,#6H4]~[#6]', 0),
198:('[#6&!H0](~[#6])(~[#7])', 0),
199:('[#6&!H0](~[#6])(~[#8])', 0),
200:('[#6H1](~[#6])(~[#8])(~[#8])', 0),
201:('[#6&!H0](~[#6])(~[#15])', 0),
202:('[#6&!H0](~[#6])(~[#16])', 0),
203:('[#6](~[#6])(~[I])', 0),
204:('[#6](~[#6])(~[#7])', 0),
205:('[#6](~[#6])(~[#8])', 0),
206:('[#6](~[#6])(~[#16])', 0),
207:('[#6](~[#6])(~[#14])', 0),
208:('[#6](~[#6])(:c)', 0),
209:('[#6](~[#6])(:c)(:c)', 0),
210:('[#6](~[#6])(:c)(:n)', 0),
211:('[#6](~[#6])(:n)', 0),
212:('[#6](~[#6])(:n)(:n)', 0),
213:('[#6](~[Cl])(~[Cl])', 0),
214:('[#6&!H0](~[Cl])', 0),
215:('[#6](~[Cl])(:c)', 0),
216:('[#6](~[F])(~[F])', 0),
217:('[#6](~[F])(:c)', 0),
218:('[#6&!H0](~[#7])', 0),
219:('[#6&!H0](~[#8])', 0),
220:('[#6&!H0](~[#8])(~[#8])', 0),
221:('[#6&!H0](~[#16])', 0),
222:('[#6&!H0](~[#14])', 0),
223:('[#6&!H0]:c', 0),
224:('[#6&!H0](:c)(:c)', 0),
225:('[#6&!H0](:c)(:n)', 0),
226:('[#6&!H0](:n)', 0),
227:('[#6H3]', 0),
228:('[#6](~[#7])(~[#7])', 0),
229:('[#6](~[#7])(:c)', 0),
230:('[#6](~[#7])(:c)(:c)', 0),
231:('[#6](~[#7])(:c)(:n)', 0),
232:('[#6](~[#7])(:n)', 0),
233:('[#6](~[#8])(~[#8])', 0),
234:('[#6](~[#8])(:c)', 0),
235:('[#6](~[#8])(:c)(:c)', 0),
236:('[#6](~[#16])(:c)', 0),
237:('[#6](:c)(:c)', 0),
238:('[#6](:c)(:c)(:c)', 0),
239:('[#6](:c)(:c)(:n)', 0),
240:('[#6](:c)(:n)', 0),
241:('[#6](:c)(:n)(:n)', 0),
242:('[#6](:n)(:n)', 0),
243:('[#7](~[#6])(~[#6])', 0),
244:('[#7](~[#6])(~[#6])(~[#6])', 0),
245:('[#7&!H0](~[#6])(~[#6])', 0),
246:('[#7&!H0](~[#6])', 0),
247:('[#7&!H0](~[#6])(~[#7])', 0),
248:('[#7](~[#6])(~[#8])', 0),
249:('[#7](~[#6])(:c)', 0),
250:('[#7](~[#6])(:c)(:c)', 0),
251:('[#7&!H0](~[#7])', 0),
252:('[#7&!H0](:c)', 0),
253:('[#7&!H0](:c)(:c)', 0),
254:('[#7](~[#8])(~[#8])', 0),
255:('[#7](~[#8])(:o)', 0),
256:('[#7](:c)(:c)', 0),
257:('[#7](:c)(:c)(:c)', 0),
258:('[#8](~[#6])(~[#6])', 0),
259:('[#8&!H0](~[#6])', 0),
260:('[#8](~[#6])(~[#15])', 0),
261:('[#8&!H0](~[#16])', 0),
262:('[#8](:c)(:c)', 0),
263:('[#15](~[#6])(~[#6])', 0),
264:('[#15](~[#8])(~[#8])', 0),
265:('[#16](~[#6])(~[#6])', 0),
266:('[#16&!H0](~[#6])', 0),
267:('[#16](~[#6])(~[#8])', 0),
268:('[#14](~[#6])(~[#6])', 0),
269:('[#6]=,:[#6]', 0),
270:('[#6]#[#6]', 0),
271:('[#6]=,:[#7]', 0),
272:('[#6]#[#7]', 0),
273:('[#6]=,:[#8]', 0),
274:('[#6]=,:[#16]', 0),
275:('[#7]=,:[#7]', 0),
276:('[#7]=,:[#8]', 0),
277:('[#7]=,:[#15]', 0),
278:('[#15]=,:[#8]', 0),
279:('[#15]=,:[#15]', 0),
280:('[#6](#[#6])(-,:[#6])', 0),
281:('[#6&!H0](#[#6])', 0),
282:('[#6](#[#7])(-,:[#6])', 0),
283:('[#6](-,:[#6])(-,:[#6])(=,:[#6])', 0),
284:('[#6](-,:[#6])(-,:[#6])(=,:[#7])', 0),
285:('[#6](-,:[#6])(-,:[#6])(=,:[#8])', 0),
286:('[#6](-,:[#6])([Cl])(=,:[#8])', 0),
287:('[#6&!H0](-,:[#6])(=,:[#6])', 0),
288:('[#6&!H0](-,:[#6])(=,:[#7])', 0),
289:('[#6&!H0](-,:[#6])(=,:[#8])', 0),
290:('[#6](-,:[#6])(-,:[#7])(=,:[#6])', 0),
291:('[#6](-,:[#6])(-,:[#7])(=,:[#7])', 0),
292:('[#6](-,:[#6])(-,:[#7])(=,:[#8])', 0),
293:('[#6](-,:[#6])(-,:[#8])(=,:[#8])', 0),
294:('[#6](-,:[#6])(=,:[#6])', 0),
295:('[#6](-,:[#6])(=,:[#7])', 0),
296:('[#6](-,:[#6])(=,:[#8])', 0),
297:('[#6]([Cl])(=,:[#8])', 0),
298:('[#6&!H0](-,:[#7])(=,:[#6])', 0),
299:('[#6&!H0](=,:[#6])', 0),
300:('[#6&!H0](=,:[#7])', 0),
301:('[#6&!H0](=,:[#8])', 0),
302:('[#6](-,:[#7])(=,:[#6])', 0),
303:('[#6](-,:[#7])(=,:[#7])', 0),
304:('[#6](-,:[#7])(=,:[#8])', 0),
305:('[#6](-,:[#8])(=,:[#8])', 0),
306:('[#7](-,:[#6])(=,:[#6])', 0),
307:('[#7](-,:[#6])(=,:[#8])', 0),
308:('[#7](-,:[#8])(=,:[#8])', 0),
309:('[#15](-,:[#8])(=,:[#8])', 0),
310:('[#16](-,:[#6])(=,:[#8])', 0),
311:('[#16](-,:[#8])(=,:[#8])', 0),
312:('[#16](=,:[#8])(=,:[#8])', 0),
313:('[#6]-,:[#6]-,:[#6]#[#6]', 0),
314:('[#8]-,:[#6]-,:[#6]=,:[#7]', 0),
315:('[#8]-,:[#6]-,:[#6]=,:[#8]', 0),
316:('[#7]:[#6]-,:[#16&!H0]', 0),
317:('[#7]-,:[#6]-,:[#6]=,:[#6]', 0),
318:('[#8]=,:[#16]-,:[#6]-,:[#6]', 0),
319:('[#7]#[#6]-,:[#6]=,:[#6]', 0),
320:('[#6]=,:[#7]-,:[#7]-,:[#6]', 0),
321:('[#8]=,:[#16]-,:[#6]-,:[#7]', 0),
322:('[#16]-,:[#16]-,:[#6]:[#6]', 0),
323:('[#6]:[#6]-,:[#6]=,:[#6]', 0),
324:('[#16]:[#6]:[#6]:[#6]', 0),
325:('[#6]:[#7]:[#6]-,:[#6]', 0),
326:('[#16]-,:[#6]:[#7]:[#6]', 0),
327:('[#16]:[#6]:[#6]:[#7]', 0),
328:('[#16]-,:[#6]=,:[#7]-,:[#6]', 0),
329:('[#6]-,:[#8]-,:[#6]=,:[#6]', 0),
330:('[#7]-,:[#7]-,:[#6]:[#6]', 0),
331:('[#16]-,:[#6]=,:[#7&!H0]', 0),
332:('[#16]-,:[#6]-,:[#16]-,:[#6]', 0),
333:('[#6]:[#16]:[#6]-,:[#6]', 0),
334:('[#8]-,:[#16]-,:[#6]:[#6]', 0),
335:('[#6]:[#7]-,:[#6]:[#6]', 0),
336:('[#7]-,:[#16]-,:[#6]:[#6]', 0),
337:('[#7]-,:[#6]:[#7]:[#6]', 0),
338:('[#7]:[#6]:[#6]:[#7]', 0),
339:('[#7]-,:[#6]:[#7]:[#7]', 0),
340:('[#7]-,:[#6]=,:[#7]-,:[#6]', 0),
341:('[#7]-,:[#6]=,:[#7&!H0]', 0),
342:('[#7]-,:[#6]-,:[#16]-,:[#6]', 0),
343:('[#6]-,:[#6]-,:[#6]=,:[#6]', 0),
344:('[#6]-,:[#7]:[#6&!H0]', 0),
345:('[#7]-,:[#6]:[#8]:[#6]', 0),
346:('[#8]=,:[#6]-,:[#6]:[#6]', 0),
347:('[#8]=,:[#6]-,:[#6]:[#7]', 0),
348:('[#6]-,:[#7]-,:[#6]:[#6]', 0),
349:('[#7]:[#7]-,:[#6&!H0]', 0),
350:('[#8]-,:[#6]:[#6]:[#7]', 0),
351:('[#8]-,:[#6]=,:[#6]-,:[#6]', 0),
352:('[#7]-,:[#6]:[#6]:[#7]', 0),
353:('[#6]-,:[#16]-,:[#6]:[#6]', 0),
354:('[Cl]-,:[#6]:[#6]-,:[#6]', 0),
355:('[#7]-,:[#6]=,:[#6&!H0]', 0),
356:('[Cl]-,:[#6]:[#6&!H0]', 0),
357:('[#7]:[#6]:[#7]-,:[#6]', 0),
358:('[Cl]-,:[#6]:[#6]-,:[#8]', 0),
359:('[#6]-,:[#6]:[#7]:[#6]', 0),
360:('[#6]-,:[#6]-,:[#16]-,:[#6]', 0),
361:('[#16]=,:[#6]-,:[#7]-,:[#6]', 0),
362:('[Br]-,:[#6]:[#6]-,:[#6]', 0),
363:('[#7&!H0]-,:[#7&!H0]', 0),
364:('[#16]=,:[#6]-,:[#7&!H0]', 0),
365:('[#6]-,:[#33]-[#8&!H0]', 0),
366:('[#16]:[#6]:[#6&!H0]', 0),
367:('[#8]-,:[#7]-,:[#6]-,:[#6]', 0),
368:('[#7]-,:[#7]-,:[#6]-,:[#6]', 0),
369:('[#6H,#6H2,#6H3]=,:[#6H,#6H2,#6H3]', 0),
370:('[#7]-,:[#7]-,:[#6]-,:[#7]', 0),
371:('[#8]=,:[#6]-,:[#7]-,:[#7]', 0),
372:('[#7]=,:[#6]-,:[#7]-,:[#6]', 0),
373:('[#6]=,:[#6]-,:[#6]:[#6]', 0),
374:('[#6]:[#7]-,:[#6&!H0]', 0),
375:('[#6]-,:[#7]-,:[#7&!H0]', 0),
376:('[#7]:[#6]:[#6]-,:[#6]', 0),
377:('[#6]-,:[#6]=,:[#6]-,:[#6]', 0),
378:('[#33]-,:[#6]:[#6&!H0]', 0),
379:('[Cl]-,:[#6]:[#6]-,:[Cl]', 0),
380:('[#6]:[#6]:[#7&!H0]', 0),
381:('[#7&!H0]-,:[#6&!H0]', 0),
382:('[Cl]-,:[#6]-,:[#6]-,:[Cl]', 0),
383:('[#7]:[#6]-,:[#6]:[#6]', 0),
384:('[#16]-,:[#6]:[#6]-,:[#6]', 0),
385:('[#16]-,:[#6]:[#6&!H0]', 0),
386:('[#16]-,:[#6]:[#6]-,:[#7]', 0),
387:('[#16]-,:[#6]:[#6]-,:[#8]', 0),
388:('[#8]=,:[#6]-,:[#6]-,:[#6]', 0),
389:('[#8]=,:[#6]-,:[#6]-,:[#7]', 0),
390:('[#8]=,:[#6]-,:[#6]-,:[#8]', 0),
391:('[#7]=,:[#6]-,:[#6]-,:[#6]', 0),
392:('[#7]=,:[#6]-,:[#6&!H0]', 0),
393:('[#6]-,:[#7]-,:[#6&!H0]', 0),
394:('[#8]-,:[#6]:[#6]-,:[#6]', 0),
395:('[#8]-,:[#6]:[#6&!H0]', 0),
396:('[#8]-,:[#6]:[#6]-,:[#7]', 0),
397:('[#8]-,:[#6]:[#6]-,:[#8]', 0),
398:('[#7]-,:[#6]:[#6]-,:[#6]', 0),
399:('[#7]-,:[#6]:[#6&!H0]', 0),
400:('[#7]-,:[#6]:[#6]-,:[#7]', 0),
401:('[#8]-,:[#6]-,:[#6]:[#6]', 0),
402:('[#7]-,:[#6]-,:[#6]:[#6]', 0),
403:('[Cl]-,:[#6]-,:[#6]-,:[#6]', 0),
404:('[Cl]-,:[#6]-,:[#6]-,:[#8]', 0),
405:('[#6]:[#6]-,:[#6]:[#6]', 0),
406:('[#8]=,:[#6]-,:[#6]=,:[#6]', 0),
407:('[Br]-,:[#6]-,:[#6]-,:[#6]', 0),
408:('[#7]=,:[#6]-,:[#6]=,:[#6]', 0),
409:('[#6]=,:[#6]-,:[#6]-,:[#6]', 0),
410:('[#7]:[#6]-,:[#8&!H0]', 0),
411:('[#8]=,:[#7]-,:c:c', 0),
412:('[#8]-,:[#6]-,:[#7&!H0]', 0),
413:('[#7]-,:[#6]-,:[#7]-,:[#6]', 0),
414:('[Cl]-,:[#6]-,:[#6]=,:[#8]', 0),
415:('[Br]-,:[#6]-,:[#6]=,:[#8]', 0),
416:('[#8]-,:[#6]-,:[#8]-,:[#6]', 0),
417:('[#6]=,:[#6]-,:[#6]=,:[#6]', 0),
418:('[#6]:[#6]-,:[#8]-,:[#6]', 0),
419:('[#8]-,:[#6]-,:[#6]-,:[#7]', 0),
420:('[#8]-,:[#6]-,:[#6]-,:[#8]', 0),
421:('N#[#6]-,:[#6]-,:[#6]', 0),
422:('[#7]-,:[#6]-,:[#6]-,:[#7]', 0),
423:('[#6]:[#6]-,:[#6]-,:[#6]', 0),
424:('[#6&!H0]-,:[#8&!H0]', 0),
425:('n:c:n:c', 0),
426:('[#8]-,:[#6]-,:[#6]=,:[#6]', 0),
427:('[#8]-,:[#6]-,:[#6]:[#6]-,:[#6]', 0),
428:('[#8]-,:[#6]-,:[#6]:[#6]-,:[#8]', 0),
429:('[#7]=,:[#6]-,:[#6]:[#6&!H0]', 0),
430:('c:c-,:[#7]-,:c:c', 0),
431:('[#6]-,:[#6]:[#6]-,:c:c', 0),
432:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
433:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0),
434:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#8]', 0),
435:('[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
436:('[Cl]-,:[#6]:[#6]-,:[#8]-,:[#6]', 0),
437:('c:c-,:[#6]=,:[#6]-,:[#6]', 0),
438:('[#6]-,:[#6]:[#6]-,:[#7]-,:[#6]', 0),
439:('[#6]-,:[#16]-,:[#6]-,:[#6]-,:[#6]', 0),
440:('[#7]-,:[#6]:[#6]-,:[#8&!H0]', 0),
441:('[#8]=,:[#6]-,:[#6]-,:[#6]=,:[#8]', 0),
442:('[#6]-,:[#6]:[#6]-,:[#8]-,:[#6]', 0),
443:('[#6]-,:[#6]:[#6]-,:[#8&!H0]', 0),
444:('[Cl]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
445:('[#7]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
446:('[#7]-,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0),
447:('[#6]-,:[#8]-,:[#6]-,:[#6]=,:[#6]', 0),
448:('c:c-,:[#6]-,:[#6]-,:[#6]', 0),
449:('[#7]=,:[#6]-,:[#7]-,:[#6]-,:[#6]', 0),
450:('[#8]=,:[#6]-,:[#6]-,:c:c', 0),
451:('[Cl]-,:[#6]:[#6]:[#6]-,:[#6]', 0),
452:('[#6H,#6H2,#6H3]-,:[#6]=,:[#6H,#6H2,#6H3]', 0),
453:('[#7]-,:[#6]:[#6]:[#6]-,:[#6]', 0),
454:('[#7]-,:[#6]:[#6]:[#6]-,:[#7]', 0),
455:('[#8]=,:[#6]-,:[#6]-,:[#7]-,:[#6]', 0),
456:('[#6]-,:c:c:[#6]-,:[#6]', 0),
457:('[#6]-,:[#8]-,:[#6]-,:[#6]:c', 0),
458:('[#8]=,:[#6]-,:[#6]-,:[#8]-,:[#6]', 0),
459:('[#8]-,:[#6]:[#6]-,:[#6]-,:[#6]', 0),
460:('[#7]-,:[#6]-,:[#6]-,:[#6]:c', 0),
461:('[#6]-,:[#6]-,:[#6]-,:[#6]:c', 0),
462:('[Cl]-,:[#6]-,:[#6]-,:[#7]-,:[#6]', 0),
463:('[#6]-,:[#8]-,:[#6]-,:[#8]-,:[#6]', 0),
464:('[#7]-,:[#6]-,:[#6]-,:[#7]-,:[#6]', 0),
465:('[#7]-,:[#6]-,:[#8]-,:[#6]-,:[#6]', 0),
466:('[#6]-,:[#7]-,:[#6]-,:[#6]-,:[#6]', 0),
467:('[#6]-,:[#6]-,:[#8]-,:[#6]-,:[#6]', 0),
468:('[#7]-,:[#6]-,:[#6]-,:[#8]-,:[#6]', 0),
469:('c:c:n:n:c', 0),
470:('[#6]-,:[#6]-,:[#6]-,:[#8&!H0]', 0),
471:('c:[#6]-,:[#6]-,:[#6]:c', 0),
472:('[#8]-,:[#6]-,:[#6]=,:[#6]-,:[#6]', 0),
473:('c:c-,:[#8]-,:[#6]-,:[#6]', 0),
474:('[#7]-,:[#6]:c:c:n', 0),
475:('[#8]=,:[#6]-,:[#8]-,:[#6]:c', 0),
476:('[#8]=,:[#6]-,:[#6]:[#6]-,:[#6]', 0),
477:('[#8]=,:[#6]-,:[#6]:[#6]-,:[#7]', 0),
478:('[#8]=,:[#6]-,:[#6]:[#6]-,:[#8]', 0),
479:('[#6]-,:[#8]-,:[#6]:[#6]-,:[#6]', 0),
480:('[#8]=,:[#33]-,:[#6]:c:c', 0),
481:('[#6]-,:[#7]-,:[#6]-,:[#6]:c', 0),
482:('[#16]-,:[#6]:c:c-,:[#7]', 0),
483:('[#8]-,:[#6]:[#6]-,:[#8]-,:[#6]', 0),
484:('[#8]-,:[#6]:[#6]-,:[#8&!H0]', 0),
485:('[#6]-,:[#6]-,:[#8]-,:[#6]:c', 0),
486:('[#7]-,:[#6]-,:[#6]:[#6]-,:[#6]', 0),
487:('[#6]-,:[#6]-,:[#6]:[#6]-,:[#6]', 0),
488:('[#7]-,:[#7]-,:[#6]-,:[#7&!H0]', 0),
489:('[#6]-,:[#7]-,:[#6]-,:[#7]-,:[#6]', 0),
490:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
491:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0),
492:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#8]', 0),
493:('[#6]=,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
494:('[#8]-,:[#6]-,:[#6]-,:[#6]=,:[#6]', 0),
495:('[#8]-,:[#6]-,:[#6]-,:[#6]=,:[#8]', 0),
496:('[#6&!H0]-,:[#6]-,:[#7&!H0]', 0),
497:('[#6]-,:[#6]=,:[#7]-,:[#7]-,:[#6]', 0),
498:('[#8]=,:[#6]-,:[#7]-,:[#6]-,:[#6]', 0),
499:('[#8]=,:[#6]-,:[#7]-,:[#6&!H0]', 0),
500:('[#8]=,:[#6]-,:[#7]-,:[#6]-,:[#7]', 0),
501:('[#8]=,:[#7]-,:[#6]:[#6]-,:[#7]', 0),
502:('[#8]=,:[#7]-,:c:c-,:[#8]', 0),
503:('[#8]=,:[#6]-,:[#7]-,:[#6]=,:[#8]', 0),
504:('[#8]-,:[#6]:[#6]:[#6]-,:[#6]', 0),
505:('[#8]-,:[#6]:[#6]:[#6]-,:[#7]', 0),
506:('[#8]-,:[#6]:[#6]:[#6]-,:[#8]', 0),
507:('[#7]-,:[#6]-,:[#7]-,:[#6]-,:[#6]', 0),
508:('[#8]-,:[#6]-,:[#6]-,:[#6]:c', 0),
509:('[#6]-,:[#6]-,:[#7]-,:[#6]-,:[#6]', 0),
510:('[#6]-,:[#7]-,:[#6]:[#6]-,:[#6]', 0),
511:('[#6]-,:[#6]-,:[#16]-,:[#6]-,:[#6]', 0),
512:('[#8]-,:[#6]-,:[#6]-,:[#7]-,:[#6]', 0),
513:('[#6]-,:[#6]=,:[#6]-,:[#6]-,:[#6]', 0),
514:('[#8]-,:[#6]-,:[#8]-,:[#6]-,:[#6]', 0),
515:('[#8]-,:[#6]-,:[#6]-,:[#8]-,:[#6]', 0),
516:('[#8]-,:[#6]-,:[#6]-,:[#8&!H0]', 0),
517:('[#6]-,:[#6]=,:[#6]-,:[#6]=,:[#6]', 0),
518:('[#7]-,:[#6]:[#6]-,:[#6]-,:[#6]', 0),
519:('[#6]=,:[#6]-,:[#6]-,:[#8]-,:[#6]', 0),
520:('[#6]=,:[#6]-,:[#6]-,:[#8&!H0]', 0),
521:('[#6]-,:[#6]:[#6]-,:[#6]-,:[#6]', 0),
522:('[Cl]-,:[#6]:[#6]-,:[#6]=,:[#8]', 0),
523:('[Br]-,:[#6]:c:c-,:[#6]', 0),
524:('[#8]=,:[#6]-,:[#6]=,:[#6]-,:[#6]', 0),
525:('[#8]=,:[#6]-,:[#6]=,:[#6&!H0]', 0),
526:('[#8]=,:[#6]-,:[#6]=,:[#6]-,:[#7]', 0),
527:('[#7]-,:[#6]-,:[#7]-,:[#6]:c', 0),
528:('[Br]-,:[#6]-,:[#6]-,:[#6]:c', 0),
529:('[#7]#[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
530:('[#6]-,:[#6]=,:[#6]-,:[#6]:c', 0),
531:('[#6]-,:[#6]-,:[#6]=,:[#6]-,:[#6]', 0),
532:('[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
533:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
534:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#8]', 0),
535:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0),
536:('[#7]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
537:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
538:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0),
539:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#8]', 0),
540:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]=,:[#8]', 0),
541:('[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
542:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
543:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#8]', 0),
544:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0),
545:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
546:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#8]', 0),
547:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]=,:[#8]', 0),
548:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0),
549:('[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
550:('[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](-,:[#6])-,:[#6]', 0),
551:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
552:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](-,:[#6])-,:[#6]', 0),
553:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#8]-,:[#6]', 0),
554:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](-,:[#8])-,:[#6]', 0),
555:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#7]-,:[#6]', 0),
556:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](-,:[#7])-,:[#6]', 0),
557:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
558:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](-,:[#8])-,:[#6]', 0),
559:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](=,:[#8])-,:[#6]', 0),
560:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](-,:[#7])-,:[#6]', 0),
561:('[#6]-,:[#6](-,:[#6])-,:[#6]-,:[#6]', 0),
562:('[#6]-,:[#6](-,:[#6])-,:[#6]-,:[#6]-,:[#6]', 0),
563:('[#6]-,:[#6]-,:[#6](-,:[#6])-,:[#6]-,:[#6]', 0),
564:('[#6]-,:[#6](-,:[#6])(-,:[#6])-,:[#6]-,:[#6]', 0),
565:('[#6]-,:[#6](-,:[#6])-,:[#6](-,:[#6])-,:[#6]', 0),
566:('[#6]c1ccc([#6])cc1', 0),
567:('[#6]c1ccc([#8])cc1', 0),
568:('[#6]c1ccc([#16])cc1', 0),
569:('[#6]c1ccc([#7])cc1', 0),
570:('[#6]c1ccc(Cl)cc1', 0),
571:('[#6]c1ccc(Br)cc1', 0),
572:('[#8]c1ccc([#8])cc1', 0),
573:('[#8]c1ccc([#16])cc1', 0),
574:('[#8]c1ccc([#7])cc1', 0),
575:('[#8]c1ccc(Cl)cc1', 0),
576:('[#8]c1ccc(Br)cc1', 0),
577:('[#16]c1ccc([#16])cc1', 0),
578:('[#16]c1ccc([#7])cc1', 0),
579:('[#16]c1ccc(Cl)cc1', 0),
580:('[#16]c1ccc(Br)cc1', 0),
581:('[#7]c1ccc([#7])cc1', 0),
582:('[#7]c1ccc(Cl)cc1', 0),
583:('[#7]c1ccc(Br)cc1', 0),
584:('Clc1ccc(Cl)cc1', 0),
585:('Clc1ccc(Br)cc1', 0),
586:('Brc1ccc(Br)cc1', 0),
587:('[#6]c1cc([#6])ccc1', 0),
588:('[#6]c1cc([#8])ccc1', 0),
589:('[#6]c1cc([#16])ccc1', 0),
590:('[#6]c1cc([#7])ccc1', 0),
591:('[#6]c1cc(Cl)ccc1', 0),
592:('[#6]c1cc(Br)ccc1', 0),
593:('[#8]c1cc([#8])ccc1', 0),
594:('[#8]c1cc([#16])ccc1', 0),
595:('[#8]c1cc([#7])ccc1', 0),
596:('[#8]c1cc(Cl)ccc1', 0),
597:('[#8]c1cc(Br)ccc1', 0),
598:('[#16]c1cc([#16])ccc1', 0),
599:('[#16]c1cc([#7])ccc1', 0),
600:('[#16]c1cc(Cl)ccc1', 0),
601:('[#16]c1cc(Br)ccc1', 0),
602:('[#7]c1cc([#7])ccc1', 0),
603:('[#7]c1cc(Cl)ccc1', 0),
604:('[#7]c1cc(Br)ccc1', 0),
605:('Clc1cc(Cl)ccc1', 0),
606:('Clc1cc(Br)ccc1', 0),
607:('Brc1cc(Br)ccc1', 0),
608:('[#6]c1c([#6])cccc1', 0),
609:('[#6]c1c([#8])cccc1', 0),
610:('[#6]c1c([#16])cccc1', 0),
611:('[#6]c1c([#7])cccc1', 0),
612:('[#6]c1c(Cl)cccc1', 0),
613:('[#6]c1c(Br)cccc1', 0),
614:('[#8]c1c([#8])cccc1', 0),
615:('[#8]c1c([#16])cccc1', 0),
616:('[#8]c1c([#7])cccc1', 0),
617:('[#8]c1c(Cl)cccc1', 0),
618:('[#8]c1c(Br)cccc1', 0),
619:('[#16]c1c([#16])cccc1', 0),
620:('[#16]c1c([#7])cccc1', 0),
621:('[#16]c1c(Cl)cccc1', 0),
622:('[#16]c1c(Br)cccc1', 0),
623:('[#7]c1c([#7])cccc1', 0),
624:('[#7]c1c(Cl)cccc1', 0),
625:('[#7]c1c(Br)cccc1', 0),
626:('Clc1c(Cl)cccc1', 0),
627:('Clc1c(Br)cccc1', 0),
628:('Brc1c(Br)cccc1', 0),
629:('[#6][#6]1[#6][#6][#6]([#6])[#6][#6]1', 0),
630:('[#6][#6]1[#6][#6][#6]([#8])[#6][#6]1', 0),
631:('[#6][#6]1[#6][#6][#6]([#16])[#6][#6]1', 0),
632:('[#6][#6]1[#6][#6][#6]([#7])[#6][#6]1', 0),
633:('[#6][#6]1[#6][#6][#6](Cl)[#6][#6]1', 0),
634:('[#6][#6]1[#6][#6][#6](Br)[#6][#6]1', 0),
635:('[#8][#6]1[#6][#6][#6]([#8])[#6][#6]1', 0),
636:('[#8][#6]1[#6][#6][#6]([#16])[#6][#6]1', 0),
637:('[#8][#6]1[#6][#6][#6]([#7])[#6][#6]1', 0),
638:('[#8][#6]1[#6][#6][#6](Cl)[#6][#6]1', 0),
639:('[#8][#6]1[#6][#6][#6](Br)[#6][#6]1', 0),
640:('[#16][#6]1[#6][#6][#6]([#16])[#6][#6]1', 0),
641:('[#16][#6]1[#6][#6][#6]([#7])[#6][#6]1', 0),
642:('[#16][#6]1[#6][#6][#6](Cl)[#6][#6]1', 0),
643:('[#16][#6]1[#6][#6][#6](Br)[#6][#6]1', 0),
644:('[#7][#6]1[#6][#6][#6]([#7])[#6][#6]1', 0),
645:('[#7][#6]1[#6][#6][#6](Cl)[#6][#6]1', 0),
646:('[#7][#6]1[#6][#6][#6](Br)[#6][#6]1', 0),
647:('Cl[#6]1[#6][#6][#6](Cl)[#6][#6]1', 0),
648:('Cl[#6]1[#6][#6][#6](Br)[#6][#6]1', 0),
649:('Br[#6]1[#6][#6][#6](Br)[#6][#6]1', 0),
650:('[#6][#6]1[#6][#6]([#6])[#6][#6][#6]1', 0),
651:('[#6][#6]1[#6][#6]([#8])[#6][#6][#6]1', 0),
652:('[#6][#6]1[#6][#6]([#16])[#6][#6][#6]1', 0),
653:('[#6][#6]1[#6][#6]([#7])[#6][#6][#6]1', 0),
654:('[#6][#6]1[#6][#6](Cl)[#6][#6][#6]1', 0),
655:('[#6][#6]1[#6][#6](Br)[#6][#6][#6]1', 0),
656:('[#8][#6]1[#6][#6]([#8])[#6][#6][#6]1', 0),
657:('[#8][#6]1[#6][#6]([#16])[#6][#6][#6]1', 0),
658:('[#8][#6]1[#6][#6]([#7])[#6][#6][#6]1', 0),
659:('[#8][#6]1[#6][#6](Cl)[#6][#6][#6]1', 0),
660:('[#8][#6]1[#6][#6](Br)[#6][#6][#6]1', 0),
661:('[#16][#6]1[#6][#6]([#16])[#6][#6][#6]1', 0),
662:('[#16][#6]1[#6][#6]([#7])[#6][#6][#6]1', 0),
663:('[#16][#6]1[#6][#6](Cl)[#6][#6][#6]1', 0),
664:('[#16][#6]1[#6][#6](Br)[#6][#6][#6]1', 0),
665:('[#7][#6]1[#6][#6]([#7])[#6][#6][#6]1', 0),
666:('[#7][#6]1[#6][#6](Cl)[#6][#6][#6]1', 0),
667:('[#7][#6]1[#6][#6](Br)[#6][#6][#6]1', 0),
668:('Cl[#6]1[#6][#6](Cl)[#6][#6][#6]1', 0),
669:('Cl[#6]1[#6][#6](Br)[#6][#6][#6]1', 0),
670:('Br[#6]1[#6][#6](Br)[#6][#6][#6]1', 0),
671:('[#6][#6]1[#6]([#6])[#6][#6][#6][#6]1', 0),
672:('[#6][#6]1[#6]([#8])[#6][#6][#6][#6]1', 0),
673:('[#6][#6]1[#6]([#16])[#6][#6][#6][#6]1', 0),
674:('[#6][#6]1[#6]([#7])[#6][#6][#6][#6]1', 0),
675:('[#6][#6]1[#6](Cl)[#6][#6][#6][#6]1', 0),
676:('[#6][#6]1[#6](Br)[#6][#6][#6][#6]1', 0),
677:('[#8][#6]1[#6]([#8])[#6][#6][#6][#6]1', 0),
678:('[#8][#6]1[#6]([#16])[#6][#6][#6][#6]1', 0),
679:('[#8][#6]1[#6]([#7])[#6][#6][#6][#6]1', 0),
680:('[#8][#6]1[#6](Cl)[#6][#6][#6][#6]1', 0),
681:('[#8][#6]1[#6](Br)[#6][#6][#6][#6]1', 0),
682:('[#16][#6]1[#6]([#16])[#6][#6][#6][#6]1', 0),
683:('[#16][#6]1[#6]([#7])[#6][#6][#6][#6]1', 0),
684:('[#16][#6]1[#6](Cl)[#6][#6][#6][#6]1', 0),
685:('[#16][#6]1[#6](Br)[#6][#6][#6][#6]1', 0),
686:('[#7][#6]1[#6]([#7])[#6][#6][#6][#6]1', 0),
687:('[#7][#6]1[#6](Cl)[#6][#6][#6][#6]1', 0),
688:('[#7][#6]1[#6](Br)[#6][#6][#6][#6]1', 0),
689:('Cl[#6]1[#6](Cl)[#6][#6][#6][#6]1', 0),
690:('Cl[#6]1[#6](Br)[#6][#6][#6][#6]1', 0),
691:('Br[#6]1[#6](Br)[#6][#6][#6][#6]1', 0),
692:('[#6][#6]1[#6][#6]([#6])[#6][#6]1', 0),
693:('[#6][#6]1[#6][#6]([#8])[#6][#6]1', 0),
694:('[#6][#6]1[#6][#6]([#16])[#6][#6]1', 0),
695:('[#6][#6]1[#6][#6]([#7])[#6][#6]1', 0),
696:('[#6][#6]1[#6][#6](Cl)[#6][#6]1', 0),
697:('[#6][#6]1[#6][#6](Br)[#6][#6]1', 0),
698:('[#8][#6]1[#6][#6]([#8])[#6][#6]1', 0),
699:('[#8][#6]1[#6][#6]([#16])[#6][#6]1', 0),
700:('[#8][#6]1[#6][#6]([#7])[#6][#6]1', 0),
701:('[#8][#6]1[#6][#6](Cl)[#6][#6]1', 0),
702:('[#8][#6]1[#6][#6](Br)[#6][#6]1', 0),
703:('[#16][#6]1[#6][#6]([#16])[#6][#6]1', 0),
704:('[#16][#6]1[#6][#6]([#7])[#6][#6]1', 0),
705:('[#16][#6]1[#6][#6](Cl)[#6][#6]1', 0),
706:('[#16][#6]1[#6][#6](Br)[#6][#6]1', 0),
707:('[#7][#6]1[#6][#6]([#7])[#6][#6]1', 0),
708:('[#7][#6]1[#6][#6](Cl)[#6][#6]1', 0),
709:('[#7][#6]1[#6][#6](Br)[#6][#6]1', 0),
710:('Cl[#6]1[#6][#6](Cl)[#6][#6]1', 0),
711:('Cl[#6]1[#6][#6](Br)[#6][#6]1', 0),
712:('Br[#6]1[#6][#6](Br)[#6][#6]1', 0),
713:('[#6][#6]1[#6]([#6])[#6][#6][#6]1', 0),
714:('[#6][#6]1[#6]([#8])[#6][#6][#6]1', 0),
715:('[#6][#6]1[#6]([#16])[#6][#6][#6]1', 0),
716:('[#6][#6]1[#6]([#7])[#6][#6][#6]1', 0),
717:('[#6][#6]1[#6](Cl)[#6][#6][#6]1', 0),
718:('[#6][#6]1[#6](Br)[#6][#6][#6]1', 0),
719:('[#8][#6]1[#6]([#8])[#6][#6][#6]1', 0),
720:('[#8][#6]1[#6]([#16])[#6][#6][#6]1', 0),
721:('[#8][#6]1[#6]([#7])[#6][#6][#6]1', 0),
722:('[#8][#6]1[#6](Cl)[#6][#6][#6]1', 0),
723:('[#8][#6]1[#6](Br)[#6][#6][#6]1', 0),
724:('[#16][#6]1[#6]([#16])[#6][#6][#6]1', 0),
725:('[#16][#6]1[#6]([#7])[#6][#6][#6]1', 0),
726:('[#16][#6]1[#6](Cl)[#6][#6][#6]1', 0),
727:('[#16][#6]1[#6](Br)[#6][#6][#6]1', 0),
728:('[#7][#6]1[#6]([#7])[#6][#6][#6]1', 0),
729:('[#7][#6]1[#6](Cl)[#6][#6]1', 0),
730:('[#7][#6]1[#6](Br)[#6][#6][#6]1', 0),
731:('Cl[#6]1[#6](Cl)[#6][#6][#6]1', 0),
732:('Cl[#6]1[#6](Br)[#6][#6][#6]1', 0),
733:('Br[#6]1[#6](Br)[#6][#6][#6]1', 0)}

PubchemKeys = None


[docs]def InitKeys(keyList, keyDict): """ *Internal Use Only* generates SMARTS patterns for the keys, run once """ assert len(keyList) == len(keyDict.keys()), 'length mismatch' for key in keyDict.keys(): patt, count = keyDict[key] if patt != '?': sma = Chem.MolFromSmarts(patt) if not sma: print('SMARTS parser error for key #%d: %s' % (key, patt)) else: keyList[key - 1] = sma, count
[docs]def calcPubChemFingerPart1(mol, **kwargs): """ Calculate PubChem Fingerprints (1-115; 263-881) **Arguments** - mol: the molecule to be fingerprinted - any extra keyword arguments are ignored **Returns** a _DataStructs.SparseBitVect_ containing the fingerprint. >>> m = Chem.MolFromSmiles('CNO') >>> bv = PubChemFingerPart1(m) >>> tuple(bv.GetOnBits()) (24, 68, 69, 71, 93, 94, 102, 124, 131, 139, 151, 158, 160, 161, 164) >>> bv = PubChemFingerPart1(Chem.MolFromSmiles('CCC')) >>> tuple(bv.GetOnBits()) (74, 114, 149, 155, 160) """ global PubchemKeys if PubchemKeys is None: PubchemKeys = [(None, 0)] * len(smartsPatts.keys()) InitKeys(PubchemKeys, smartsPatts) ctor = kwargs.get('ctor', DataStructs.SparseBitVect) res = ctor(len(PubchemKeys) + 1) for i, (patt, count) in enumerate(PubchemKeys): if patt is not None: if count == 0: res[i + 1] = mol.HasSubstructMatch(patt) else: matches = mol.GetSubstructMatches(patt) if len(matches) > count: res[i + 1] = 1 return res
[docs]def func_1(mol,bits): """ *Internal Use Only* Calculate PubChem Fingerprints (116-263) """ ringSize=[] temp={3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0, 10:0} AllRingsAtom = mol.GetRingInfo().AtomRings() for ring in AllRingsAtom: ringSize.append(len(ring)) for k,v in temp.items(): if len(ring) == k: temp[k]+=1 if temp[3]>=2: bits[0]=1;bits[7]=1 elif temp[3]==1: bits[0]=1 else: pass if temp[4]>=2: bits[14]=1;bits[21]=1 elif temp[4]==1: bits[14]=1 else: pass if temp[5]>=5: bits[28]=1;bits[35]=1;bits[42]=1;bits[49]=1;bits[56]=1 elif temp[5]==4: bits[28]=1;bits[35]=1;bits[42]=1;bits[49]=1 elif temp[5]==3: bits[28]=1;bits[35]=1;bits[42]=1 elif temp[5]==2: bits[28]=1;bits[35]=1 elif temp[5]==1: bits[28]=1 else: pass if temp[6]>=5: bits[63]=1;bits[70]=1;bits[77]=1;bits[84]=1;bits[91]=1 elif temp[6]==4: bits[63]=1;bits[70]=1;bits[77]=1;bits[84]=1 elif temp[6]==3: bits[63]=1;bits[70]=1;bits[77]=1 elif temp[6]==2: bits[63]=1;bits[70]=1 elif temp[6]==1: bits[63]=1 else: pass if temp[7]>=2: bits[98]=1;bits[105]=1 elif temp[7]==1: bits[98]=1 else: pass if temp[8]>=2: bits[112]=1;bits[119]=1 elif temp[8]==1: bits[112]=1 else: pass if temp[9]>=1: bits[126]=1; else: pass if temp[10]>=1: bits[133]=1; else: pass return ringSize,bits
[docs]def func_2(mol,bits): """ *Internal Use Only* saturated or aromatic carbon-only ring """ AllRingsBond = mol.GetRingInfo().BondRings() ringSize=[] temp={3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0, 10:0} for ring in AllRingsBond: ######### saturated nonsingle = False for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name!='SINGLE': nonsingle = True break if nonsingle == False: ringSize.append(len(ring)) for k,v in temp.items(): if len(ring) == k: temp[k]+=1 ######## aromatic carbon-only aromatic = True AllCarb = True for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name!='AROMATIC': aromatic = False break for bondIdx in ring: BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom() EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom() if BeginAtom.GetAtomicNum() != 6 or EndAtom.GetAtomicNum() != 6: AllCarb = False break if aromatic == True and AllCarb == True: ringSize.append(len(ring)) for k,v in temp.items(): if len(ring) == k: temp[k]+=1 if temp[3]>=2: bits[1]=1;bits[8]=1 elif temp[3]==1: bits[1]=1 else: pass if temp[4]>=2: bits[15]=1;bits[22]=1 elif temp[4]==1: bits[15]=1 else: pass if temp[5]>=5: bits[29]=1;bits[36]=1;bits[43]=1;bits[50]=1;bits[57]=1 elif temp[5]==4: bits[29]=1;bits[36]=1;bits[43]=1;bits[50]=1 elif temp[5]==3: bits[29]=1;bits[36]=1;bits[43]=1 elif temp[5]==2: bits[29]=1;bits[36]=1 elif temp[5]==1: bits[29]=1 else: pass if temp[6]>=5: bits[64]=1;bits[71]=1;bits[78]=1;bits[85]=1;bits[92]=1 elif temp[6]==4: bits[64]=1;bits[71]=1;bits[78]=1;bits[85]=1 elif temp[6]==3: bits[64]=1;bits[71]=1;bits[78]=1 elif temp[6]==2: bits[64]=1;bits[71]=1 elif temp[6]==1: bits[64]=1 else: pass if temp[7]>=2: bits[99]=1;bits[106]=1 elif temp[7]==1: bits[99]=1 else: pass if temp[8]>=2: bits[113]=1;bits[120]=1 elif temp[8]==1: bits[113]=1 else: pass if temp[9]>=1: bits[127]=1; else: pass if temp[10]>=1: bits[134]=1; else: pass return ringSize, bits
[docs]def func_3(mol,bits): """ *Internal Use Only* saturated or aromatic nitrogen-containing """ AllRingsBond = mol.GetRingInfo().BondRings() ringSize=[] temp={3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0, 10:0} for ring in AllRingsBond: ######### saturated nonsingle = False for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name!='SINGLE': nonsingle = True break if nonsingle == False: ringSize.append(len(ring)) for k,v in temp.items(): if len(ring) == k: temp[k]+=1 ######## aromatic nitrogen-containing aromatic = True ContainNitro = False for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name!='AROMATIC': aromatic = False break for bondIdx in ring: BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom() EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom() if BeginAtom.GetAtomicNum() == 7 or EndAtom.GetAtomicNum() == 7: ContainNitro = True break if aromatic == True and ContainNitro == True: ringSize.append(len(ring)) for k,v in temp.items(): if len(ring) == k: temp[k]+=1 if temp[3]>=2: bits[2]=1;bits[9]=1 elif temp[3]==1: bits[2]=1 else: pass if temp[4]>=2: bits[16]=1;bits[23]=1 elif temp[4]==1: bits[16]=1 else: pass if temp[5]>=5: bits[30]=1;bits[37]=1;bits[44]=1;bits[51]=1;bits[58]=1 elif temp[5]==4: bits[30]=1;bits[37]=1;bits[44]=1;bits[51]=1 elif temp[5]==3: bits[30]=1;bits[37]=1;bits[44]=1 elif temp[5]==2: bits[30]=1;bits[37]=1 elif temp[5]==1: bits[30]=1 else: pass if temp[6]>=5: bits[65]=1;bits[72]=1;bits[79]=1;bits[86]=1;bits[93]=1 elif temp[6]==4: bits[65]=1;bits[72]=1;bits[79]=1;bits[86]=1 elif temp[6]==3: bits[65]=1;bits[72]=1;bits[79]=1 elif temp[6]==2: bits[65]=1;bits[72]=1 elif temp[6]==1: bits[65]=1 else: pass if temp[7]>=2: bits[100]=1;bits[107]=1 elif temp[7]==1: bits[100]=1 else: pass if temp[8]>=2: bits[114]=1;bits[121]=1 elif temp[8]==1: bits[114]=1 else: pass if temp[9]>=1: bits[128]=1; else: pass if temp[10]>=1: bits[135]=1; else: pass return ringSize, bits
[docs]def func_4(mol,bits): """ *Internal Use Only* saturated or aromatic heteroatom-containing """ AllRingsBond = mol.GetRingInfo().BondRings() ringSize=[] temp={3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0, 10:0} for ring in AllRingsBond: ######### saturated nonsingle = False for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name!='SINGLE': nonsingle = True break if nonsingle == False: ringSize.append(len(ring)) for k,v in temp.items(): if len(ring) == k: temp[k]+=1 ######## aromatic heteroatom-containing aromatic = True heteroatom = False for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name!='AROMATIC': aromatic = False break for bondIdx in ring: BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom() EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom() if BeginAtom.GetAtomicNum() not in [1,6] or EndAtom.GetAtomicNum() not in [1,6]: heteroatom = True break if aromatic == True and heteroatom == True: ringSize.append(len(ring)) for k,v in temp.items(): if len(ring) == k: temp[k]+=1 if temp[3]>=2: bits[3]=1;bits[10]=1 elif temp[3]==1: bits[3]=1 else: pass if temp[4]>=2: bits[17]=1;bits[24]=1 elif temp[4]==1: bits[17]=1 else: pass if temp[5]>=5: bits[31]=1;bits[38]=1;bits[45]=1;bits[52]=1;bits[59]=1 elif temp[5]==4: bits[31]=1;bits[38]=1;bits[45]=1;bits[52]=1 elif temp[5]==3: bits[31]=1;bits[38]=1;bits[45]=1 elif temp[5]==2: bits[31]=1;bits[38]=1 elif temp[5]==1: bits[31]=1 else: pass if temp[6]>=5: bits[66]=1;bits[73]=1;bits[80]=1;bits[87]=1;bits[94]=1 elif temp[6]==4: bits[66]=1;bits[73]=1;bits[80]=1;bits[87]=1 elif temp[6]==3: bits[66]=1;bits[73]=1;bits[80]=1 elif temp[6]==2: bits[66]=1;bits[73]=1 elif temp[6]==1: bits[66]=1 else: pass if temp[7]>=2: bits[101]=1;bits[108]=1 elif temp[7]==1: bits[101]=1 else: pass if temp[8]>=2: bits[115]=1;bits[122]=1 elif temp[8]==1: bits[115]=1 else: pass if temp[9]>=1: bits[129]=1; else: pass if temp[10]>=1: bits[136]=1; else: pass return ringSize,bits
[docs]def func_5(mol,bits): """ *Internal Use Only* unsaturated non-aromatic carbon-only """ ringSize=[] AllRingsBond = mol.GetRingInfo().BondRings() temp={3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0, 10:0} for ring in AllRingsBond: unsaturated = False nonaromatic = True Allcarb = True ######### unsaturated for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name!='SINGLE': unsaturated = True break ######## non-aromatic for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name=='AROMATIC': nonaromatic = False break ######## allcarb for bondIdx in ring: BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom() EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom() if BeginAtom.GetAtomicNum() != 6 or EndAtom.GetAtomicNum() != 6: Allcarb = False break if unsaturated == True and nonaromatic == True and Allcarb == True: ringSize.append(len(ring)) for k,v in temp.items(): if len(ring) == k: temp[k]+=1 if temp[3]>=2: bits[4]=1;bits[11]=1 elif temp[3]==1: bits[4]=1 else: pass if temp[4]>=2: bits[18]=1;bits[25]=1 elif temp[4]==1: bits[18]=1 else: pass if temp[5]>=5: bits[32]=1;bits[39]=1;bits[46]=1;bits[53]=1;bits[60]=1 elif temp[5]==4: bits[32]=1;bits[39]=1;bits[46]=1;bits[53]=1 elif temp[5]==3: bits[32]=1;bits[39]=1;bits[46]=1 elif temp[5]==2: bits[32]=1;bits[39]=1 elif temp[5]==1: bits[32]=1 else: pass if temp[6]>=5: bits[67]=1;bits[74]=1;bits[81]=1;bits[88]=1;bits[95]=1 elif temp[6]==4: bits[67]=1;bits[74]=1;bits[81]=1;bits[88]=1 elif temp[6]==3: bits[67]=1;bits[74]=1;bits[81]=1 elif temp[6]==2: bits[67]=1;bits[74]=1 elif temp[6]==1: bits[67]=1 else: pass if temp[7]>=2: bits[102]=1;bits[109]=1 elif temp[7]==1: bits[102]=1 else: pass if temp[8]>=2: bits[116]=1;bits[123]=1 elif temp[8]==1: bits[116]=1 else: pass if temp[9]>=1: bits[130]=1; else: pass if temp[10]>=1: bits[137]=1; else: pass return ringSize,bits
[docs]def func_6(mol,bits): """ *Internal Use Only* unsaturated non-aromatic nitrogen-containing """ ringSize=[] AllRingsBond = mol.GetRingInfo().BondRings() temp={3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0, 10:0} for ring in AllRingsBond: unsaturated = False nonaromatic = True ContainNitro = False ######### unsaturated for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name!='SINGLE': unsaturated = True break ######## non-aromatic for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name=='AROMATIC': nonaromatic = False break ######## nitrogen-containing for bondIdx in ring: BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom() EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom() if BeginAtom.GetAtomicNum() == 7 or EndAtom.GetAtomicNum() == 7: ContainNitro = True break if unsaturated == True and nonaromatic == True and ContainNitro== True: ringSize.append(len(ring)) for k,v in temp.items(): if len(ring) == k: temp[k]+=1 if temp[3]>=2: bits[5]=1;bits[12]=1 elif temp[3]==1: bits[5]=1 else: pass if temp[4]>=2: bits[19]=1;bits[26]=1 elif temp[4]==1: bits[19]=1 else: pass if temp[5]>=5: bits[33]=1;bits[40]=1;bits[47]=1;bits[54]=1;bits[61]=1 elif temp[5]==4: bits[33]=1;bits[40]=1;bits[47]=1;bits[54]=1 elif temp[5]==3: bits[33]=1;bits[40]=1;bits[47]=1 elif temp[5]==2: bits[33]=1;bits[40]=1 elif temp[5]==1: bits[33]=1 else: pass if temp[6]>=5: bits[68]=1;bits[75]=1;bits[82]=1;bits[89]=1;bits[96]=1 elif temp[6]==4: bits[68]=1;bits[75]=1;bits[82]=1;bits[89]=1 elif temp[6]==3: bits[68]=1;bits[75]=1;bits[82]=1 elif temp[6]==2: bits[68]=1;bits[75]=1 elif temp[6]==1: bits[68]=1 else: pass if temp[7]>=2: bits[103]=1;bits[110]=1 elif temp[7]==1: bits[103]=1 else: pass if temp[8]>=2: bits[117]=1;bits[124]=1 elif temp[8]==1: bits[117]=1 else: pass if temp[9]>=1: bits[131]=1; else: pass if temp[10]>=1: bits[138]=1; else: pass return ringSize,bits
[docs]def func_7(mol,bits): """ *Internal Use Only* unsaturated non-aromatic heteroatom-containing """ ringSize=[] AllRingsBond = mol.GetRingInfo().BondRings() temp={3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0, 10:0} for ring in AllRingsBond: unsaturated = False nonaromatic = True heteroatom = False ######### unsaturated for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name!='SINGLE': unsaturated = True break ######## non-aromatic for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name=='AROMATIC': nonaromatic = False break ######## heteroatom-containing for bondIdx in ring: BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom() EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom() if BeginAtom.GetAtomicNum() not in [1,6] or EndAtom.GetAtomicNum() not in [1,6]: heteroatom = True break if unsaturated == True and nonaromatic == True and heteroatom == True: ringSize.append(len(ring)) for k,v in temp.items(): if len(ring) == k: temp[k]+=1 if temp[3]>=2: bits[6]=1;bits[13]=1 elif temp[3]==1: bits[6]=1 else: pass if temp[4]>=2: bits[20]=1;bits[27]=1 elif temp[4]==1: bits[20]=1 else: pass if temp[5]>=5: bits[34]=1;bits[41]=1;bits[48]=1;bits[55]=1;bits[62]=1 elif temp[5]==4: bits[34]=1;bits[41]=1;bits[48]=1;bits[55]=1 elif temp[5]==3: bits[34]=1;bits[41]=1;bits[48]=1 elif temp[5]==2: bits[34]=1;bits[41]=1 elif temp[5]==1: bits[34]=1 else: pass if temp[6]>=5: bits[69]=1;bits[76]=1;bits[83]=1;bits[90]=1;bits[97]=1 elif temp[6]==4: bits[69]=1;bits[76]=1;bits[83]=1;bits[90]=1 elif temp[6]==3: bits[69]=1;bits[76]=1;bits[83]=1 elif temp[6]==2: bits[69]=1;bits[76]=1 elif temp[6]==1: bits[69]=1 else: pass if temp[7]>=2: bits[104]=1;bits[111]=1 elif temp[7]==1: bits[104]=1 else: pass if temp[8]>=2: bits[118]=1;bits[125]=1 elif temp[8]==1: bits[118]=1 else: pass if temp[9]>=1: bits[132]=1; else: pass if temp[10]>=1: bits[139]=1; else: pass return ringSize,bits
[docs]def func_8(mol, bits): """ *Internal Use Only* aromatic rings or hetero-aromatic rings """ AllRingsBond = mol.GetRingInfo().BondRings() temp={'aromatic':0,'heteroatom':0} for ring in AllRingsBond: aromatic = True heteroatom = False for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name!='AROMATIC': aromatic = False break if aromatic==True: temp['aromatic']+=1 for bondIdx in ring: BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom() EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom() if BeginAtom.GetAtomicNum() not in [1,6] or EndAtom.GetAtomicNum() not in [1,6]: heteroatom = True break if heteroatom==True: temp['heteroatom']+=1 if temp['aromatic']>=4: bits[140]=1;bits[142]=1;bits[144]=1;bits[146]=1 elif temp['aromatic']==3: bits[140]=1;bits[142]=1;bits[144]=1 elif temp['aromatic']==2: bits[140]=1;bits[142]=1 elif temp['aromatic']==1: bits[140]=1 else: pass if temp['aromatic']>=4 and temp['heteroatom']>=4: bits[141]=1;bits[143]=1;bits[145]=1;bits[147]=1 elif temp['aromatic']==3 and temp['heteroatom']==3: bits[141]=1;bits[143]=1;bits[145]=1 elif temp['aromatic']==2 and temp['heteroatom']==2: bits[141]=1;bits[143]=1 elif temp['aromatic']==1 and temp['heteroatom']==1: bits[141]=1 else: pass return bits
[docs]def calcPubChemFingerPart2(mol):# 116-263 """ *Internal Use Only* Calculate PubChem Fingerprints (116-263) """ bits=[0]*148 bits=func_1(mol,bits)[1] bits=func_2(mol,bits)[1] bits=func_3(mol,bits)[1] bits=func_4(mol,bits)[1] bits=func_5(mol,bits)[1] bits=func_6(mol,bits)[1] bits=func_7(mol,bits)[1] bits=func_8(mol,bits) return bits
[docs]def calcPubChemFingerAll(mol): """*Internal Use Only* Calculate PubChem Fingerprints """ AllBits=[0]*881 res1=list(calcPubChemFingerPart1(mol).ToBitString()) for index, item in enumerate(res1[1:116]): if item == '1': AllBits[index] = 1 for index2, item2 in enumerate(res1[116:734]): if item2 == '1': AllBits[index2+115+148] = 1 res2=calcPubChemFingerPart2(mol) for index3, item3 in enumerate(res2): if item3==1: AllBits[index3+115]=1 return AllBits
# ------------------------------------ if __name__ == '__main__': print '-'*10+'START'+'-'*10 SMILES = 'C1=NC2NC3=CNCC3=CC2CC1' mol = Chem.MolFromSmiles(SMILES) mol2 = Chem.AddHs(mol) result = calcPubChemFingerAll(mol2) print 'Molecule: %s'%SMILES print '-'*25 print 'Results: %s'%result print '-'*10+'END'+'-'*10