# -*- coding: utf-8 -*-
# Copyright (c) 2016-2017, Zhijiang Yao, Jie Dong and Dongsheng Cao
# All rights reserved.
# This file is part of the PyBioMed.
# The contents are covered by the terms of the BSD license
# which is included in the file license.txt, found at the root
# of the PyBioMed source tree.
"""
This file provides internal functions to calculate pubchem fingerprints
If you have any questions, please feel free to contact us.
E-mail: biomed@csu.edu.cn
@File name: PubChemFingerprints
@author: Jie Dong and Zhijiang Yao
"""
from rdkit import Chem
from rdkit import DataStructs
# these are SMARTS patterns corresponding to the PubChem fingerprints
# https://astro.temple.edu/~tua87106/list_fingerprints.pdf
# ftp://ftp.ncbi.nlm.nih.gov/pubchem/specifications/pubchem_fingerprints.txt
smartsPatts = {
1:('[H]', 3),# 1-115
2:('[H]', 7),
3:('[H]', 15),
4:('[H]', 31),
5:('[Li]', 0),
6:('[Li]', 1),
7:('[B]', 0),
8:('[B]', 1),
9:('[B]', 3),
10:('[C]', 1),
11:('[C]', 3),
12:('[C]', 7),
13:('[C]', 15),
14:('[C]', 31),
15:('[N]', 0),
16:('[N]', 1),
17:('[N]', 3),
18:('[N]', 7),
19:('[O]', 0),
20:('[O]', 1),
21:('[O]', 3),
22:('[O]', 7),
23:('[O]', 15),
24:('[F]', 0),
25:('[F]', 1),
26:('[F]', 3),
27:('[Na]', 0),
28:('[Na]', 1),
29:('[Si]', 0),
30:('[Si]', 1),
31:('[P]', 0),
32:('[P]', 1),
33:('[P]', 3),
34:('[S]', 0),
35:('[S]', 1),
36:('[S]', 3),
37:('[S]', 7),
38:('[Cl]', 0),
39:('[Cl]', 1),
40:('[Cl]', 3),
41:('[Cl]', 7),
42:('[K]', 0),
43:('[K]', 1),
44:('[Br]', 0),
45:('[Br]', 1),
46:('[Br]', 3),
47:('[I]', 0),
48:('[I]', 1),
49:('[I]', 3),
50:('[Be]', 0),
51:('[Mg]', 0),
52:('[Al]', 0),
53:('[Ca]', 0),
54:('[Sc]', 0),
55:('[Ti]', 0),
56:('[V]', 0),
57:('[Cr]', 0),
58:('[Mn]', 0),
59:('[Fe]', 0),
60:('[CO]', 0),
61:('[Ni]', 0),
62:('[Cu]', 0),
63:('[Zn]', 0),
64:('[Ga]', 0),
65:('[Ge]', 0),
66:('[As]', 0),
67:('[Se]', 0),
68:('[Kr]', 0),
69:('[Rb]', 0),
70:('[Sr]', 0),
71:('[Y]', 0),
72:('[Zr]', 0),
73:('[Nb]', 0),
74:('[Mo]', 0),
75:('[Ru]', 0),
76:('[Rh]', 0),
77:('[Pd]', 0),
78:('[Ag]', 0),
79:('[Cd]', 0),
80:('[In]', 0),
81:('[Sn]', 0),
82:('[Sb]', 0),
83:('[Te]', 0),
84:('[Xe]', 0),
85:('[Cs]', 0),
86:('[Ba]', 0),
87:('[Lu]', 0),
88:('[Hf]', 0),
89:('[Ta]', 0),
90:('[W]', 0),
91:('[Re]', 0),
92:('[Os]', 0),
93:('[Ir]', 0),
94:('[Pt]', 0),
95:('[Au]', 0),
96:('[Hg]', 0),
97:('[Tl]', 0),
98:('[Pb]', 0),
99:('[Bi]', 0),
100:('[La]', 0),
101:('[Ce]', 0),
102:('[Pr]', 0),
103:('[Nd]', 0),
104:('[Pm]', 0),
105:('[Sm]', 0),
106:('[Eu]', 0),
107:('[Gd]', 0),
108:('[Tb]', 0),
109:('[Dy]', 0),
110:('[Ho]', 0),
111:('[Er]', 0),
112:('[Tm]', 0),
113:('[Yb]', 0),
114:('[Tc]', 0),
115:('[U]', 0),
116:('[Li&!H0]', 0),#264-881
117:('[Li]~[Li]', 0),
118:('[Li]~[#5]', 0),
119:('[Li]~[#6]', 0),
120:('[Li]~[#8]', 0),
121:('[Li]~[F]', 0),
122:('[Li]~[#15]', 0),
123:('[Li]~[#16]', 0),
124:('[Li]~[Cl]', 0),
125:('[#5&!H0]', 0),
126:('[#5]~[#5]', 0),
127:('[#5]~[#6]', 0),
128:('[#5]~[#7]', 0),
129:('[#5]~[#8]', 0),
130:('[#5]~[F]', 0),
131:('[#5]~[#14]', 0),
132:('[#5]~[#15]', 0),
133:('[#5]~[#16]', 0),
134:('[#5]~[Cl]', 0),
135:('[#5]~[Br]', 0),
136:('[#6&!H0]', 0),
137:('[#6]~[#6]', 0),
138:('[#6]~[#7]', 0),
139:('[#6]~[#8]', 0),
140:('[#6]~[F]', 0),
141:('[#6]~[Na]', 0),
142:('[#6]~[Mg]', 0),
143:('[#6]~[Al]', 0),
144:('[#6]~[#14]', 0),
145:('[#6]~[#15]', 0),
146:('[#6]~[#16]', 0),
147:('[#6]~[Cl]', 0),
148:('[#6]~[#33]', 0),
149:('[#6]~[#34]', 0),
150:('[#6]~[Br]', 0),
151:('[#6]~[I]', 0),
152:('[#7&!H0]', 0),
153:('[#7]~[#7]', 0),
154:('[#7]~[#8]', 0),
155:('[#7]~[F]', 0),
156:('[#7]~[#14]', 0),
157:('[#7]~[#15]', 0),
158:('[#7]~[#16]', 0),
159:('[#7]~[Cl]', 0),
160:('[#7]~[Br]', 0),
161:('[#8&!H0]', 0),
162:('[#8]~[#8]', 0),
163:('[#8]~[Mg]', 0),
164:('[#8]~[Na]', 0),
165:('[#8]~[Al]', 0),
166:('[#8]~[#14]', 0),
167:('[#8]~[#15]', 0),
168:('[#8]~[K]', 0),
169:('[F]~[#15]', 0),
170:('[F]~[#16]', 0),
171:('[Al&!H0]', 0),
172:('[Al]~[Cl]', 0),
173:('[#14&!H0]', 0),
174:('[#14]~[#14]', 0),
175:('[#14]~[Cl]', 0),
176:('[#15&!H0]', 0),
177:('[#15]~[#15]', 0),
178:('[#33&!H0]', 0),
179:('[#33]~[#33]', 0),
180:('[#6](~Br)(~[#6])', 0),
181:('[#6](~Br)(~[#6])(~[#6])', 0),
182:('[#6&!H0]~[Br]', 0),
183:('[#6](~[Br])(:[c])', 0),
184:('[#6](~[Br])(:[n])', 0),
185:('[#6](~[#6])(~[#6])', 0),
186:('[#6](~[#6])(~[#6])(~[#6])', 0),
187:('[#6](~[#6])(~[#6])(~[#6])(~[#6])', 0),
188:('[#6H1](~[#6])(~[#6])(~[#6])', 0),
189:('[#6](~[#6])(~[#6])(~[#6])(~[#7])', 0),
190:('[#6](~[#6])(~[#6])(~[#6])(~[#8])', 0),
191:('[#6H1](~[#6])(~[#6])(~[#7])', 0),
192:('[#6H1](~[#6])(~[#6])(~[#8])', 0),
193:('[#6](~[#6])(~[#6])(~[#7])', 0),
194:('[#6](~[#6])(~[#6])(~[#8])', 0),
195:('[#6](~[#6])(~[Cl])', 0),
196:('[#6&!H0](~[#6])(~[Cl])', 0),
197:('[#6H,#6H2,#6H3,#6H4]~[#6]', 0),
198:('[#6&!H0](~[#6])(~[#7])', 0),
199:('[#6&!H0](~[#6])(~[#8])', 0),
200:('[#6H1](~[#6])(~[#8])(~[#8])', 0),
201:('[#6&!H0](~[#6])(~[#15])', 0),
202:('[#6&!H0](~[#6])(~[#16])', 0),
203:('[#6](~[#6])(~[I])', 0),
204:('[#6](~[#6])(~[#7])', 0),
205:('[#6](~[#6])(~[#8])', 0),
206:('[#6](~[#6])(~[#16])', 0),
207:('[#6](~[#6])(~[#14])', 0),
208:('[#6](~[#6])(:c)', 0),
209:('[#6](~[#6])(:c)(:c)', 0),
210:('[#6](~[#6])(:c)(:n)', 0),
211:('[#6](~[#6])(:n)', 0),
212:('[#6](~[#6])(:n)(:n)', 0),
213:('[#6](~[Cl])(~[Cl])', 0),
214:('[#6&!H0](~[Cl])', 0),
215:('[#6](~[Cl])(:c)', 0),
216:('[#6](~[F])(~[F])', 0),
217:('[#6](~[F])(:c)', 0),
218:('[#6&!H0](~[#7])', 0),
219:('[#6&!H0](~[#8])', 0),
220:('[#6&!H0](~[#8])(~[#8])', 0),
221:('[#6&!H0](~[#16])', 0),
222:('[#6&!H0](~[#14])', 0),
223:('[#6&!H0]:c', 0),
224:('[#6&!H0](:c)(:c)', 0),
225:('[#6&!H0](:c)(:n)', 0),
226:('[#6&!H0](:n)', 0),
227:('[#6H3]', 0),
228:('[#6](~[#7])(~[#7])', 0),
229:('[#6](~[#7])(:c)', 0),
230:('[#6](~[#7])(:c)(:c)', 0),
231:('[#6](~[#7])(:c)(:n)', 0),
232:('[#6](~[#7])(:n)', 0),
233:('[#6](~[#8])(~[#8])', 0),
234:('[#6](~[#8])(:c)', 0),
235:('[#6](~[#8])(:c)(:c)', 0),
236:('[#6](~[#16])(:c)', 0),
237:('[#6](:c)(:c)', 0),
238:('[#6](:c)(:c)(:c)', 0),
239:('[#6](:c)(:c)(:n)', 0),
240:('[#6](:c)(:n)', 0),
241:('[#6](:c)(:n)(:n)', 0),
242:('[#6](:n)(:n)', 0),
243:('[#7](~[#6])(~[#6])', 0),
244:('[#7](~[#6])(~[#6])(~[#6])', 0),
245:('[#7&!H0](~[#6])(~[#6])', 0),
246:('[#7&!H0](~[#6])', 0),
247:('[#7&!H0](~[#6])(~[#7])', 0),
248:('[#7](~[#6])(~[#8])', 0),
249:('[#7](~[#6])(:c)', 0),
250:('[#7](~[#6])(:c)(:c)', 0),
251:('[#7&!H0](~[#7])', 0),
252:('[#7&!H0](:c)', 0),
253:('[#7&!H0](:c)(:c)', 0),
254:('[#7](~[#8])(~[#8])', 0),
255:('[#7](~[#8])(:o)', 0),
256:('[#7](:c)(:c)', 0),
257:('[#7](:c)(:c)(:c)', 0),
258:('[#8](~[#6])(~[#6])', 0),
259:('[#8&!H0](~[#6])', 0),
260:('[#8](~[#6])(~[#15])', 0),
261:('[#8&!H0](~[#16])', 0),
262:('[#8](:c)(:c)', 0),
263:('[#15](~[#6])(~[#6])', 0),
264:('[#15](~[#8])(~[#8])', 0),
265:('[#16](~[#6])(~[#6])', 0),
266:('[#16&!H0](~[#6])', 0),
267:('[#16](~[#6])(~[#8])', 0),
268:('[#14](~[#6])(~[#6])', 0),
269:('[#6]=,:[#6]', 0),
270:('[#6]#[#6]', 0),
271:('[#6]=,:[#7]', 0),
272:('[#6]#[#7]', 0),
273:('[#6]=,:[#8]', 0),
274:('[#6]=,:[#16]', 0),
275:('[#7]=,:[#7]', 0),
276:('[#7]=,:[#8]', 0),
277:('[#7]=,:[#15]', 0),
278:('[#15]=,:[#8]', 0),
279:('[#15]=,:[#15]', 0),
280:('[#6](#[#6])(-,:[#6])', 0),
281:('[#6&!H0](#[#6])', 0),
282:('[#6](#[#7])(-,:[#6])', 0),
283:('[#6](-,:[#6])(-,:[#6])(=,:[#6])', 0),
284:('[#6](-,:[#6])(-,:[#6])(=,:[#7])', 0),
285:('[#6](-,:[#6])(-,:[#6])(=,:[#8])', 0),
286:('[#6](-,:[#6])([Cl])(=,:[#8])', 0),
287:('[#6&!H0](-,:[#6])(=,:[#6])', 0),
288:('[#6&!H0](-,:[#6])(=,:[#7])', 0),
289:('[#6&!H0](-,:[#6])(=,:[#8])', 0),
290:('[#6](-,:[#6])(-,:[#7])(=,:[#6])', 0),
291:('[#6](-,:[#6])(-,:[#7])(=,:[#7])', 0),
292:('[#6](-,:[#6])(-,:[#7])(=,:[#8])', 0),
293:('[#6](-,:[#6])(-,:[#8])(=,:[#8])', 0),
294:('[#6](-,:[#6])(=,:[#6])', 0),
295:('[#6](-,:[#6])(=,:[#7])', 0),
296:('[#6](-,:[#6])(=,:[#8])', 0),
297:('[#6]([Cl])(=,:[#8])', 0),
298:('[#6&!H0](-,:[#7])(=,:[#6])', 0),
299:('[#6&!H0](=,:[#6])', 0),
300:('[#6&!H0](=,:[#7])', 0),
301:('[#6&!H0](=,:[#8])', 0),
302:('[#6](-,:[#7])(=,:[#6])', 0),
303:('[#6](-,:[#7])(=,:[#7])', 0),
304:('[#6](-,:[#7])(=,:[#8])', 0),
305:('[#6](-,:[#8])(=,:[#8])', 0),
306:('[#7](-,:[#6])(=,:[#6])', 0),
307:('[#7](-,:[#6])(=,:[#8])', 0),
308:('[#7](-,:[#8])(=,:[#8])', 0),
309:('[#15](-,:[#8])(=,:[#8])', 0),
310:('[#16](-,:[#6])(=,:[#8])', 0),
311:('[#16](-,:[#8])(=,:[#8])', 0),
312:('[#16](=,:[#8])(=,:[#8])', 0),
313:('[#6]-,:[#6]-,:[#6]#[#6]', 0),
314:('[#8]-,:[#6]-,:[#6]=,:[#7]', 0),
315:('[#8]-,:[#6]-,:[#6]=,:[#8]', 0),
316:('[#7]:[#6]-,:[#16&!H0]', 0),
317:('[#7]-,:[#6]-,:[#6]=,:[#6]', 0),
318:('[#8]=,:[#16]-,:[#6]-,:[#6]', 0),
319:('[#7]#[#6]-,:[#6]=,:[#6]', 0),
320:('[#6]=,:[#7]-,:[#7]-,:[#6]', 0),
321:('[#8]=,:[#16]-,:[#6]-,:[#7]', 0),
322:('[#16]-,:[#16]-,:[#6]:[#6]', 0),
323:('[#6]:[#6]-,:[#6]=,:[#6]', 0),
324:('[#16]:[#6]:[#6]:[#6]', 0),
325:('[#6]:[#7]:[#6]-,:[#6]', 0),
326:('[#16]-,:[#6]:[#7]:[#6]', 0),
327:('[#16]:[#6]:[#6]:[#7]', 0),
328:('[#16]-,:[#6]=,:[#7]-,:[#6]', 0),
329:('[#6]-,:[#8]-,:[#6]=,:[#6]', 0),
330:('[#7]-,:[#7]-,:[#6]:[#6]', 0),
331:('[#16]-,:[#6]=,:[#7&!H0]', 0),
332:('[#16]-,:[#6]-,:[#16]-,:[#6]', 0),
333:('[#6]:[#16]:[#6]-,:[#6]', 0),
334:('[#8]-,:[#16]-,:[#6]:[#6]', 0),
335:('[#6]:[#7]-,:[#6]:[#6]', 0),
336:('[#7]-,:[#16]-,:[#6]:[#6]', 0),
337:('[#7]-,:[#6]:[#7]:[#6]', 0),
338:('[#7]:[#6]:[#6]:[#7]', 0),
339:('[#7]-,:[#6]:[#7]:[#7]', 0),
340:('[#7]-,:[#6]=,:[#7]-,:[#6]', 0),
341:('[#7]-,:[#6]=,:[#7&!H0]', 0),
342:('[#7]-,:[#6]-,:[#16]-,:[#6]', 0),
343:('[#6]-,:[#6]-,:[#6]=,:[#6]', 0),
344:('[#6]-,:[#7]:[#6&!H0]', 0),
345:('[#7]-,:[#6]:[#8]:[#6]', 0),
346:('[#8]=,:[#6]-,:[#6]:[#6]', 0),
347:('[#8]=,:[#6]-,:[#6]:[#7]', 0),
348:('[#6]-,:[#7]-,:[#6]:[#6]', 0),
349:('[#7]:[#7]-,:[#6&!H0]', 0),
350:('[#8]-,:[#6]:[#6]:[#7]', 0),
351:('[#8]-,:[#6]=,:[#6]-,:[#6]', 0),
352:('[#7]-,:[#6]:[#6]:[#7]', 0),
353:('[#6]-,:[#16]-,:[#6]:[#6]', 0),
354:('[Cl]-,:[#6]:[#6]-,:[#6]', 0),
355:('[#7]-,:[#6]=,:[#6&!H0]', 0),
356:('[Cl]-,:[#6]:[#6&!H0]', 0),
357:('[#7]:[#6]:[#7]-,:[#6]', 0),
358:('[Cl]-,:[#6]:[#6]-,:[#8]', 0),
359:('[#6]-,:[#6]:[#7]:[#6]', 0),
360:('[#6]-,:[#6]-,:[#16]-,:[#6]', 0),
361:('[#16]=,:[#6]-,:[#7]-,:[#6]', 0),
362:('[Br]-,:[#6]:[#6]-,:[#6]', 0),
363:('[#7&!H0]-,:[#7&!H0]', 0),
364:('[#16]=,:[#6]-,:[#7&!H0]', 0),
365:('[#6]-,:[#33]-[#8&!H0]', 0),
366:('[#16]:[#6]:[#6&!H0]', 0),
367:('[#8]-,:[#7]-,:[#6]-,:[#6]', 0),
368:('[#7]-,:[#7]-,:[#6]-,:[#6]', 0),
369:('[#6H,#6H2,#6H3]=,:[#6H,#6H2,#6H3]', 0),
370:('[#7]-,:[#7]-,:[#6]-,:[#7]', 0),
371:('[#8]=,:[#6]-,:[#7]-,:[#7]', 0),
372:('[#7]=,:[#6]-,:[#7]-,:[#6]', 0),
373:('[#6]=,:[#6]-,:[#6]:[#6]', 0),
374:('[#6]:[#7]-,:[#6&!H0]', 0),
375:('[#6]-,:[#7]-,:[#7&!H0]', 0),
376:('[#7]:[#6]:[#6]-,:[#6]', 0),
377:('[#6]-,:[#6]=,:[#6]-,:[#6]', 0),
378:('[#33]-,:[#6]:[#6&!H0]', 0),
379:('[Cl]-,:[#6]:[#6]-,:[Cl]', 0),
380:('[#6]:[#6]:[#7&!H0]', 0),
381:('[#7&!H0]-,:[#6&!H0]', 0),
382:('[Cl]-,:[#6]-,:[#6]-,:[Cl]', 0),
383:('[#7]:[#6]-,:[#6]:[#6]', 0),
384:('[#16]-,:[#6]:[#6]-,:[#6]', 0),
385:('[#16]-,:[#6]:[#6&!H0]', 0),
386:('[#16]-,:[#6]:[#6]-,:[#7]', 0),
387:('[#16]-,:[#6]:[#6]-,:[#8]', 0),
388:('[#8]=,:[#6]-,:[#6]-,:[#6]', 0),
389:('[#8]=,:[#6]-,:[#6]-,:[#7]', 0),
390:('[#8]=,:[#6]-,:[#6]-,:[#8]', 0),
391:('[#7]=,:[#6]-,:[#6]-,:[#6]', 0),
392:('[#7]=,:[#6]-,:[#6&!H0]', 0),
393:('[#6]-,:[#7]-,:[#6&!H0]', 0),
394:('[#8]-,:[#6]:[#6]-,:[#6]', 0),
395:('[#8]-,:[#6]:[#6&!H0]', 0),
396:('[#8]-,:[#6]:[#6]-,:[#7]', 0),
397:('[#8]-,:[#6]:[#6]-,:[#8]', 0),
398:('[#7]-,:[#6]:[#6]-,:[#6]', 0),
399:('[#7]-,:[#6]:[#6&!H0]', 0),
400:('[#7]-,:[#6]:[#6]-,:[#7]', 0),
401:('[#8]-,:[#6]-,:[#6]:[#6]', 0),
402:('[#7]-,:[#6]-,:[#6]:[#6]', 0),
403:('[Cl]-,:[#6]-,:[#6]-,:[#6]', 0),
404:('[Cl]-,:[#6]-,:[#6]-,:[#8]', 0),
405:('[#6]:[#6]-,:[#6]:[#6]', 0),
406:('[#8]=,:[#6]-,:[#6]=,:[#6]', 0),
407:('[Br]-,:[#6]-,:[#6]-,:[#6]', 0),
408:('[#7]=,:[#6]-,:[#6]=,:[#6]', 0),
409:('[#6]=,:[#6]-,:[#6]-,:[#6]', 0),
410:('[#7]:[#6]-,:[#8&!H0]', 0),
411:('[#8]=,:[#7]-,:c:c', 0),
412:('[#8]-,:[#6]-,:[#7&!H0]', 0),
413:('[#7]-,:[#6]-,:[#7]-,:[#6]', 0),
414:('[Cl]-,:[#6]-,:[#6]=,:[#8]', 0),
415:('[Br]-,:[#6]-,:[#6]=,:[#8]', 0),
416:('[#8]-,:[#6]-,:[#8]-,:[#6]', 0),
417:('[#6]=,:[#6]-,:[#6]=,:[#6]', 0),
418:('[#6]:[#6]-,:[#8]-,:[#6]', 0),
419:('[#8]-,:[#6]-,:[#6]-,:[#7]', 0),
420:('[#8]-,:[#6]-,:[#6]-,:[#8]', 0),
421:('N#[#6]-,:[#6]-,:[#6]', 0),
422:('[#7]-,:[#6]-,:[#6]-,:[#7]', 0),
423:('[#6]:[#6]-,:[#6]-,:[#6]', 0),
424:('[#6&!H0]-,:[#8&!H0]', 0),
425:('n:c:n:c', 0),
426:('[#8]-,:[#6]-,:[#6]=,:[#6]', 0),
427:('[#8]-,:[#6]-,:[#6]:[#6]-,:[#6]', 0),
428:('[#8]-,:[#6]-,:[#6]:[#6]-,:[#8]', 0),
429:('[#7]=,:[#6]-,:[#6]:[#6&!H0]', 0),
430:('c:c-,:[#7]-,:c:c', 0),
431:('[#6]-,:[#6]:[#6]-,:c:c', 0),
432:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
433:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0),
434:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#8]', 0),
435:('[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
436:('[Cl]-,:[#6]:[#6]-,:[#8]-,:[#6]', 0),
437:('c:c-,:[#6]=,:[#6]-,:[#6]', 0),
438:('[#6]-,:[#6]:[#6]-,:[#7]-,:[#6]', 0),
439:('[#6]-,:[#16]-,:[#6]-,:[#6]-,:[#6]', 0),
440:('[#7]-,:[#6]:[#6]-,:[#8&!H0]', 0),
441:('[#8]=,:[#6]-,:[#6]-,:[#6]=,:[#8]', 0),
442:('[#6]-,:[#6]:[#6]-,:[#8]-,:[#6]', 0),
443:('[#6]-,:[#6]:[#6]-,:[#8&!H0]', 0),
444:('[Cl]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
445:('[#7]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
446:('[#7]-,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0),
447:('[#6]-,:[#8]-,:[#6]-,:[#6]=,:[#6]', 0),
448:('c:c-,:[#6]-,:[#6]-,:[#6]', 0),
449:('[#7]=,:[#6]-,:[#7]-,:[#6]-,:[#6]', 0),
450:('[#8]=,:[#6]-,:[#6]-,:c:c', 0),
451:('[Cl]-,:[#6]:[#6]:[#6]-,:[#6]', 0),
452:('[#6H,#6H2,#6H3]-,:[#6]=,:[#6H,#6H2,#6H3]', 0),
453:('[#7]-,:[#6]:[#6]:[#6]-,:[#6]', 0),
454:('[#7]-,:[#6]:[#6]:[#6]-,:[#7]', 0),
455:('[#8]=,:[#6]-,:[#6]-,:[#7]-,:[#6]', 0),
456:('[#6]-,:c:c:[#6]-,:[#6]', 0),
457:('[#6]-,:[#8]-,:[#6]-,:[#6]:c', 0),
458:('[#8]=,:[#6]-,:[#6]-,:[#8]-,:[#6]', 0),
459:('[#8]-,:[#6]:[#6]-,:[#6]-,:[#6]', 0),
460:('[#7]-,:[#6]-,:[#6]-,:[#6]:c', 0),
461:('[#6]-,:[#6]-,:[#6]-,:[#6]:c', 0),
462:('[Cl]-,:[#6]-,:[#6]-,:[#7]-,:[#6]', 0),
463:('[#6]-,:[#8]-,:[#6]-,:[#8]-,:[#6]', 0),
464:('[#7]-,:[#6]-,:[#6]-,:[#7]-,:[#6]', 0),
465:('[#7]-,:[#6]-,:[#8]-,:[#6]-,:[#6]', 0),
466:('[#6]-,:[#7]-,:[#6]-,:[#6]-,:[#6]', 0),
467:('[#6]-,:[#6]-,:[#8]-,:[#6]-,:[#6]', 0),
468:('[#7]-,:[#6]-,:[#6]-,:[#8]-,:[#6]', 0),
469:('c:c:n:n:c', 0),
470:('[#6]-,:[#6]-,:[#6]-,:[#8&!H0]', 0),
471:('c:[#6]-,:[#6]-,:[#6]:c', 0),
472:('[#8]-,:[#6]-,:[#6]=,:[#6]-,:[#6]', 0),
473:('c:c-,:[#8]-,:[#6]-,:[#6]', 0),
474:('[#7]-,:[#6]:c:c:n', 0),
475:('[#8]=,:[#6]-,:[#8]-,:[#6]:c', 0),
476:('[#8]=,:[#6]-,:[#6]:[#6]-,:[#6]', 0),
477:('[#8]=,:[#6]-,:[#6]:[#6]-,:[#7]', 0),
478:('[#8]=,:[#6]-,:[#6]:[#6]-,:[#8]', 0),
479:('[#6]-,:[#8]-,:[#6]:[#6]-,:[#6]', 0),
480:('[#8]=,:[#33]-,:[#6]:c:c', 0),
481:('[#6]-,:[#7]-,:[#6]-,:[#6]:c', 0),
482:('[#16]-,:[#6]:c:c-,:[#7]', 0),
483:('[#8]-,:[#6]:[#6]-,:[#8]-,:[#6]', 0),
484:('[#8]-,:[#6]:[#6]-,:[#8&!H0]', 0),
485:('[#6]-,:[#6]-,:[#8]-,:[#6]:c', 0),
486:('[#7]-,:[#6]-,:[#6]:[#6]-,:[#6]', 0),
487:('[#6]-,:[#6]-,:[#6]:[#6]-,:[#6]', 0),
488:('[#7]-,:[#7]-,:[#6]-,:[#7&!H0]', 0),
489:('[#6]-,:[#7]-,:[#6]-,:[#7]-,:[#6]', 0),
490:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
491:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0),
492:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#8]', 0),
493:('[#6]=,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
494:('[#8]-,:[#6]-,:[#6]-,:[#6]=,:[#6]', 0),
495:('[#8]-,:[#6]-,:[#6]-,:[#6]=,:[#8]', 0),
496:('[#6&!H0]-,:[#6]-,:[#7&!H0]', 0),
497:('[#6]-,:[#6]=,:[#7]-,:[#7]-,:[#6]', 0),
498:('[#8]=,:[#6]-,:[#7]-,:[#6]-,:[#6]', 0),
499:('[#8]=,:[#6]-,:[#7]-,:[#6&!H0]', 0),
500:('[#8]=,:[#6]-,:[#7]-,:[#6]-,:[#7]', 0),
501:('[#8]=,:[#7]-,:[#6]:[#6]-,:[#7]', 0),
502:('[#8]=,:[#7]-,:c:c-,:[#8]', 0),
503:('[#8]=,:[#6]-,:[#7]-,:[#6]=,:[#8]', 0),
504:('[#8]-,:[#6]:[#6]:[#6]-,:[#6]', 0),
505:('[#8]-,:[#6]:[#6]:[#6]-,:[#7]', 0),
506:('[#8]-,:[#6]:[#6]:[#6]-,:[#8]', 0),
507:('[#7]-,:[#6]-,:[#7]-,:[#6]-,:[#6]', 0),
508:('[#8]-,:[#6]-,:[#6]-,:[#6]:c', 0),
509:('[#6]-,:[#6]-,:[#7]-,:[#6]-,:[#6]', 0),
510:('[#6]-,:[#7]-,:[#6]:[#6]-,:[#6]', 0),
511:('[#6]-,:[#6]-,:[#16]-,:[#6]-,:[#6]', 0),
512:('[#8]-,:[#6]-,:[#6]-,:[#7]-,:[#6]', 0),
513:('[#6]-,:[#6]=,:[#6]-,:[#6]-,:[#6]', 0),
514:('[#8]-,:[#6]-,:[#8]-,:[#6]-,:[#6]', 0),
515:('[#8]-,:[#6]-,:[#6]-,:[#8]-,:[#6]', 0),
516:('[#8]-,:[#6]-,:[#6]-,:[#8&!H0]', 0),
517:('[#6]-,:[#6]=,:[#6]-,:[#6]=,:[#6]', 0),
518:('[#7]-,:[#6]:[#6]-,:[#6]-,:[#6]', 0),
519:('[#6]=,:[#6]-,:[#6]-,:[#8]-,:[#6]', 0),
520:('[#6]=,:[#6]-,:[#6]-,:[#8&!H0]', 0),
521:('[#6]-,:[#6]:[#6]-,:[#6]-,:[#6]', 0),
522:('[Cl]-,:[#6]:[#6]-,:[#6]=,:[#8]', 0),
523:('[Br]-,:[#6]:c:c-,:[#6]', 0),
524:('[#8]=,:[#6]-,:[#6]=,:[#6]-,:[#6]', 0),
525:('[#8]=,:[#6]-,:[#6]=,:[#6&!H0]', 0),
526:('[#8]=,:[#6]-,:[#6]=,:[#6]-,:[#7]', 0),
527:('[#7]-,:[#6]-,:[#7]-,:[#6]:c', 0),
528:('[Br]-,:[#6]-,:[#6]-,:[#6]:c', 0),
529:('[#7]#[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
530:('[#6]-,:[#6]=,:[#6]-,:[#6]:c', 0),
531:('[#6]-,:[#6]-,:[#6]=,:[#6]-,:[#6]', 0),
532:('[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
533:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
534:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#8]', 0),
535:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0),
536:('[#7]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
537:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
538:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0),
539:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#8]', 0),
540:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]=,:[#8]', 0),
541:('[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
542:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
543:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#8]', 0),
544:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0),
545:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
546:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#8]', 0),
547:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]=,:[#8]', 0),
548:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0),
549:('[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
550:('[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](-,:[#6])-,:[#6]', 0),
551:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
552:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](-,:[#6])-,:[#6]', 0),
553:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#8]-,:[#6]', 0),
554:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](-,:[#8])-,:[#6]', 0),
555:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#7]-,:[#6]', 0),
556:('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](-,:[#7])-,:[#6]', 0),
557:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0),
558:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](-,:[#8])-,:[#6]', 0),
559:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](=,:[#8])-,:[#6]', 0),
560:('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](-,:[#7])-,:[#6]', 0),
561:('[#6]-,:[#6](-,:[#6])-,:[#6]-,:[#6]', 0),
562:('[#6]-,:[#6](-,:[#6])-,:[#6]-,:[#6]-,:[#6]', 0),
563:('[#6]-,:[#6]-,:[#6](-,:[#6])-,:[#6]-,:[#6]', 0),
564:('[#6]-,:[#6](-,:[#6])(-,:[#6])-,:[#6]-,:[#6]', 0),
565:('[#6]-,:[#6](-,:[#6])-,:[#6](-,:[#6])-,:[#6]', 0),
566:('[#6]c1ccc([#6])cc1', 0),
567:('[#6]c1ccc([#8])cc1', 0),
568:('[#6]c1ccc([#16])cc1', 0),
569:('[#6]c1ccc([#7])cc1', 0),
570:('[#6]c1ccc(Cl)cc1', 0),
571:('[#6]c1ccc(Br)cc1', 0),
572:('[#8]c1ccc([#8])cc1', 0),
573:('[#8]c1ccc([#16])cc1', 0),
574:('[#8]c1ccc([#7])cc1', 0),
575:('[#8]c1ccc(Cl)cc1', 0),
576:('[#8]c1ccc(Br)cc1', 0),
577:('[#16]c1ccc([#16])cc1', 0),
578:('[#16]c1ccc([#7])cc1', 0),
579:('[#16]c1ccc(Cl)cc1', 0),
580:('[#16]c1ccc(Br)cc1', 0),
581:('[#7]c1ccc([#7])cc1', 0),
582:('[#7]c1ccc(Cl)cc1', 0),
583:('[#7]c1ccc(Br)cc1', 0),
584:('Clc1ccc(Cl)cc1', 0),
585:('Clc1ccc(Br)cc1', 0),
586:('Brc1ccc(Br)cc1', 0),
587:('[#6]c1cc([#6])ccc1', 0),
588:('[#6]c1cc([#8])ccc1', 0),
589:('[#6]c1cc([#16])ccc1', 0),
590:('[#6]c1cc([#7])ccc1', 0),
591:('[#6]c1cc(Cl)ccc1', 0),
592:('[#6]c1cc(Br)ccc1', 0),
593:('[#8]c1cc([#8])ccc1', 0),
594:('[#8]c1cc([#16])ccc1', 0),
595:('[#8]c1cc([#7])ccc1', 0),
596:('[#8]c1cc(Cl)ccc1', 0),
597:('[#8]c1cc(Br)ccc1', 0),
598:('[#16]c1cc([#16])ccc1', 0),
599:('[#16]c1cc([#7])ccc1', 0),
600:('[#16]c1cc(Cl)ccc1', 0),
601:('[#16]c1cc(Br)ccc1', 0),
602:('[#7]c1cc([#7])ccc1', 0),
603:('[#7]c1cc(Cl)ccc1', 0),
604:('[#7]c1cc(Br)ccc1', 0),
605:('Clc1cc(Cl)ccc1', 0),
606:('Clc1cc(Br)ccc1', 0),
607:('Brc1cc(Br)ccc1', 0),
608:('[#6]c1c([#6])cccc1', 0),
609:('[#6]c1c([#8])cccc1', 0),
610:('[#6]c1c([#16])cccc1', 0),
611:('[#6]c1c([#7])cccc1', 0),
612:('[#6]c1c(Cl)cccc1', 0),
613:('[#6]c1c(Br)cccc1', 0),
614:('[#8]c1c([#8])cccc1', 0),
615:('[#8]c1c([#16])cccc1', 0),
616:('[#8]c1c([#7])cccc1', 0),
617:('[#8]c1c(Cl)cccc1', 0),
618:('[#8]c1c(Br)cccc1', 0),
619:('[#16]c1c([#16])cccc1', 0),
620:('[#16]c1c([#7])cccc1', 0),
621:('[#16]c1c(Cl)cccc1', 0),
622:('[#16]c1c(Br)cccc1', 0),
623:('[#7]c1c([#7])cccc1', 0),
624:('[#7]c1c(Cl)cccc1', 0),
625:('[#7]c1c(Br)cccc1', 0),
626:('Clc1c(Cl)cccc1', 0),
627:('Clc1c(Br)cccc1', 0),
628:('Brc1c(Br)cccc1', 0),
629:('[#6][#6]1[#6][#6][#6]([#6])[#6][#6]1', 0),
630:('[#6][#6]1[#6][#6][#6]([#8])[#6][#6]1', 0),
631:('[#6][#6]1[#6][#6][#6]([#16])[#6][#6]1', 0),
632:('[#6][#6]1[#6][#6][#6]([#7])[#6][#6]1', 0),
633:('[#6][#6]1[#6][#6][#6](Cl)[#6][#6]1', 0),
634:('[#6][#6]1[#6][#6][#6](Br)[#6][#6]1', 0),
635:('[#8][#6]1[#6][#6][#6]([#8])[#6][#6]1', 0),
636:('[#8][#6]1[#6][#6][#6]([#16])[#6][#6]1', 0),
637:('[#8][#6]1[#6][#6][#6]([#7])[#6][#6]1', 0),
638:('[#8][#6]1[#6][#6][#6](Cl)[#6][#6]1', 0),
639:('[#8][#6]1[#6][#6][#6](Br)[#6][#6]1', 0),
640:('[#16][#6]1[#6][#6][#6]([#16])[#6][#6]1', 0),
641:('[#16][#6]1[#6][#6][#6]([#7])[#6][#6]1', 0),
642:('[#16][#6]1[#6][#6][#6](Cl)[#6][#6]1', 0),
643:('[#16][#6]1[#6][#6][#6](Br)[#6][#6]1', 0),
644:('[#7][#6]1[#6][#6][#6]([#7])[#6][#6]1', 0),
645:('[#7][#6]1[#6][#6][#6](Cl)[#6][#6]1', 0),
646:('[#7][#6]1[#6][#6][#6](Br)[#6][#6]1', 0),
647:('Cl[#6]1[#6][#6][#6](Cl)[#6][#6]1', 0),
648:('Cl[#6]1[#6][#6][#6](Br)[#6][#6]1', 0),
649:('Br[#6]1[#6][#6][#6](Br)[#6][#6]1', 0),
650:('[#6][#6]1[#6][#6]([#6])[#6][#6][#6]1', 0),
651:('[#6][#6]1[#6][#6]([#8])[#6][#6][#6]1', 0),
652:('[#6][#6]1[#6][#6]([#16])[#6][#6][#6]1', 0),
653:('[#6][#6]1[#6][#6]([#7])[#6][#6][#6]1', 0),
654:('[#6][#6]1[#6][#6](Cl)[#6][#6][#6]1', 0),
655:('[#6][#6]1[#6][#6](Br)[#6][#6][#6]1', 0),
656:('[#8][#6]1[#6][#6]([#8])[#6][#6][#6]1', 0),
657:('[#8][#6]1[#6][#6]([#16])[#6][#6][#6]1', 0),
658:('[#8][#6]1[#6][#6]([#7])[#6][#6][#6]1', 0),
659:('[#8][#6]1[#6][#6](Cl)[#6][#6][#6]1', 0),
660:('[#8][#6]1[#6][#6](Br)[#6][#6][#6]1', 0),
661:('[#16][#6]1[#6][#6]([#16])[#6][#6][#6]1', 0),
662:('[#16][#6]1[#6][#6]([#7])[#6][#6][#6]1', 0),
663:('[#16][#6]1[#6][#6](Cl)[#6][#6][#6]1', 0),
664:('[#16][#6]1[#6][#6](Br)[#6][#6][#6]1', 0),
665:('[#7][#6]1[#6][#6]([#7])[#6][#6][#6]1', 0),
666:('[#7][#6]1[#6][#6](Cl)[#6][#6][#6]1', 0),
667:('[#7][#6]1[#6][#6](Br)[#6][#6][#6]1', 0),
668:('Cl[#6]1[#6][#6](Cl)[#6][#6][#6]1', 0),
669:('Cl[#6]1[#6][#6](Br)[#6][#6][#6]1', 0),
670:('Br[#6]1[#6][#6](Br)[#6][#6][#6]1', 0),
671:('[#6][#6]1[#6]([#6])[#6][#6][#6][#6]1', 0),
672:('[#6][#6]1[#6]([#8])[#6][#6][#6][#6]1', 0),
673:('[#6][#6]1[#6]([#16])[#6][#6][#6][#6]1', 0),
674:('[#6][#6]1[#6]([#7])[#6][#6][#6][#6]1', 0),
675:('[#6][#6]1[#6](Cl)[#6][#6][#6][#6]1', 0),
676:('[#6][#6]1[#6](Br)[#6][#6][#6][#6]1', 0),
677:('[#8][#6]1[#6]([#8])[#6][#6][#6][#6]1', 0),
678:('[#8][#6]1[#6]([#16])[#6][#6][#6][#6]1', 0),
679:('[#8][#6]1[#6]([#7])[#6][#6][#6][#6]1', 0),
680:('[#8][#6]1[#6](Cl)[#6][#6][#6][#6]1', 0),
681:('[#8][#6]1[#6](Br)[#6][#6][#6][#6]1', 0),
682:('[#16][#6]1[#6]([#16])[#6][#6][#6][#6]1', 0),
683:('[#16][#6]1[#6]([#7])[#6][#6][#6][#6]1', 0),
684:('[#16][#6]1[#6](Cl)[#6][#6][#6][#6]1', 0),
685:('[#16][#6]1[#6](Br)[#6][#6][#6][#6]1', 0),
686:('[#7][#6]1[#6]([#7])[#6][#6][#6][#6]1', 0),
687:('[#7][#6]1[#6](Cl)[#6][#6][#6][#6]1', 0),
688:('[#7][#6]1[#6](Br)[#6][#6][#6][#6]1', 0),
689:('Cl[#6]1[#6](Cl)[#6][#6][#6][#6]1', 0),
690:('Cl[#6]1[#6](Br)[#6][#6][#6][#6]1', 0),
691:('Br[#6]1[#6](Br)[#6][#6][#6][#6]1', 0),
692:('[#6][#6]1[#6][#6]([#6])[#6][#6]1', 0),
693:('[#6][#6]1[#6][#6]([#8])[#6][#6]1', 0),
694:('[#6][#6]1[#6][#6]([#16])[#6][#6]1', 0),
695:('[#6][#6]1[#6][#6]([#7])[#6][#6]1', 0),
696:('[#6][#6]1[#6][#6](Cl)[#6][#6]1', 0),
697:('[#6][#6]1[#6][#6](Br)[#6][#6]1', 0),
698:('[#8][#6]1[#6][#6]([#8])[#6][#6]1', 0),
699:('[#8][#6]1[#6][#6]([#16])[#6][#6]1', 0),
700:('[#8][#6]1[#6][#6]([#7])[#6][#6]1', 0),
701:('[#8][#6]1[#6][#6](Cl)[#6][#6]1', 0),
702:('[#8][#6]1[#6][#6](Br)[#6][#6]1', 0),
703:('[#16][#6]1[#6][#6]([#16])[#6][#6]1', 0),
704:('[#16][#6]1[#6][#6]([#7])[#6][#6]1', 0),
705:('[#16][#6]1[#6][#6](Cl)[#6][#6]1', 0),
706:('[#16][#6]1[#6][#6](Br)[#6][#6]1', 0),
707:('[#7][#6]1[#6][#6]([#7])[#6][#6]1', 0),
708:('[#7][#6]1[#6][#6](Cl)[#6][#6]1', 0),
709:('[#7][#6]1[#6][#6](Br)[#6][#6]1', 0),
710:('Cl[#6]1[#6][#6](Cl)[#6][#6]1', 0),
711:('Cl[#6]1[#6][#6](Br)[#6][#6]1', 0),
712:('Br[#6]1[#6][#6](Br)[#6][#6]1', 0),
713:('[#6][#6]1[#6]([#6])[#6][#6][#6]1', 0),
714:('[#6][#6]1[#6]([#8])[#6][#6][#6]1', 0),
715:('[#6][#6]1[#6]([#16])[#6][#6][#6]1', 0),
716:('[#6][#6]1[#6]([#7])[#6][#6][#6]1', 0),
717:('[#6][#6]1[#6](Cl)[#6][#6][#6]1', 0),
718:('[#6][#6]1[#6](Br)[#6][#6][#6]1', 0),
719:('[#8][#6]1[#6]([#8])[#6][#6][#6]1', 0),
720:('[#8][#6]1[#6]([#16])[#6][#6][#6]1', 0),
721:('[#8][#6]1[#6]([#7])[#6][#6][#6]1', 0),
722:('[#8][#6]1[#6](Cl)[#6][#6][#6]1', 0),
723:('[#8][#6]1[#6](Br)[#6][#6][#6]1', 0),
724:('[#16][#6]1[#6]([#16])[#6][#6][#6]1', 0),
725:('[#16][#6]1[#6]([#7])[#6][#6][#6]1', 0),
726:('[#16][#6]1[#6](Cl)[#6][#6][#6]1', 0),
727:('[#16][#6]1[#6](Br)[#6][#6][#6]1', 0),
728:('[#7][#6]1[#6]([#7])[#6][#6][#6]1', 0),
729:('[#7][#6]1[#6](Cl)[#6][#6]1', 0),
730:('[#7][#6]1[#6](Br)[#6][#6][#6]1', 0),
731:('Cl[#6]1[#6](Cl)[#6][#6][#6]1', 0),
732:('Cl[#6]1[#6](Br)[#6][#6][#6]1', 0),
733:('Br[#6]1[#6](Br)[#6][#6][#6]1', 0)}
PubchemKeys = None
[docs]def InitKeys(keyList, keyDict):
""" *Internal Use Only*
generates SMARTS patterns for the keys, run once
"""
assert len(keyList) == len(keyDict.keys()), 'length mismatch'
for key in keyDict.keys():
patt, count = keyDict[key]
if patt != '?':
sma = Chem.MolFromSmarts(patt)
if not sma:
print('SMARTS parser error for key #%d: %s' % (key, patt))
else:
keyList[key - 1] = sma, count
[docs]def calcPubChemFingerPart1(mol, **kwargs):
""" Calculate PubChem Fingerprints (1-115; 263-881)
**Arguments**
- mol: the molecule to be fingerprinted
- any extra keyword arguments are ignored
**Returns**
a _DataStructs.SparseBitVect_ containing the fingerprint.
>>> m = Chem.MolFromSmiles('CNO')
>>> bv = PubChemFingerPart1(m)
>>> tuple(bv.GetOnBits())
(24, 68, 69, 71, 93, 94, 102, 124, 131, 139, 151, 158, 160, 161, 164)
>>> bv = PubChemFingerPart1(Chem.MolFromSmiles('CCC'))
>>> tuple(bv.GetOnBits())
(74, 114, 149, 155, 160)
"""
global PubchemKeys
if PubchemKeys is None:
PubchemKeys = [(None, 0)] * len(smartsPatts.keys())
InitKeys(PubchemKeys, smartsPatts)
ctor = kwargs.get('ctor', DataStructs.SparseBitVect)
res = ctor(len(PubchemKeys) + 1)
for i, (patt, count) in enumerate(PubchemKeys):
if patt is not None:
if count == 0:
res[i + 1] = mol.HasSubstructMatch(patt)
else:
matches = mol.GetSubstructMatches(patt)
if len(matches) > count:
res[i + 1] = 1
return res
[docs]def func_1(mol,bits):
""" *Internal Use Only*
Calculate PubChem Fingerprints (116-263)
"""
ringSize=[]
temp={3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0, 10:0}
AllRingsAtom = mol.GetRingInfo().AtomRings()
for ring in AllRingsAtom:
ringSize.append(len(ring))
for k,v in temp.items():
if len(ring) == k:
temp[k]+=1
if temp[3]>=2:
bits[0]=1;bits[7]=1
elif temp[3]==1:
bits[0]=1
else:
pass
if temp[4]>=2:
bits[14]=1;bits[21]=1
elif temp[4]==1:
bits[14]=1
else:
pass
if temp[5]>=5:
bits[28]=1;bits[35]=1;bits[42]=1;bits[49]=1;bits[56]=1
elif temp[5]==4:
bits[28]=1;bits[35]=1;bits[42]=1;bits[49]=1
elif temp[5]==3:
bits[28]=1;bits[35]=1;bits[42]=1
elif temp[5]==2:
bits[28]=1;bits[35]=1
elif temp[5]==1:
bits[28]=1
else:
pass
if temp[6]>=5:
bits[63]=1;bits[70]=1;bits[77]=1;bits[84]=1;bits[91]=1
elif temp[6]==4:
bits[63]=1;bits[70]=1;bits[77]=1;bits[84]=1
elif temp[6]==3:
bits[63]=1;bits[70]=1;bits[77]=1
elif temp[6]==2:
bits[63]=1;bits[70]=1
elif temp[6]==1:
bits[63]=1
else:
pass
if temp[7]>=2:
bits[98]=1;bits[105]=1
elif temp[7]==1:
bits[98]=1
else:
pass
if temp[8]>=2:
bits[112]=1;bits[119]=1
elif temp[8]==1:
bits[112]=1
else:
pass
if temp[9]>=1:
bits[126]=1;
else:
pass
if temp[10]>=1:
bits[133]=1;
else:
pass
return ringSize,bits
[docs]def func_2(mol,bits):
""" *Internal Use Only*
saturated or aromatic carbon-only ring
"""
AllRingsBond = mol.GetRingInfo().BondRings()
ringSize=[]
temp={3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0, 10:0}
for ring in AllRingsBond:
######### saturated
nonsingle = False
for bondIdx in ring:
if mol.GetBondWithIdx(bondIdx).GetBondType().name!='SINGLE':
nonsingle = True
break
if nonsingle == False:
ringSize.append(len(ring))
for k,v in temp.items():
if len(ring) == k:
temp[k]+=1
######## aromatic carbon-only
aromatic = True
AllCarb = True
for bondIdx in ring:
if mol.GetBondWithIdx(bondIdx).GetBondType().name!='AROMATIC':
aromatic = False
break
for bondIdx in ring:
BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom()
EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom()
if BeginAtom.GetAtomicNum() != 6 or EndAtom.GetAtomicNum() != 6:
AllCarb = False
break
if aromatic == True and AllCarb == True:
ringSize.append(len(ring))
for k,v in temp.items():
if len(ring) == k:
temp[k]+=1
if temp[3]>=2:
bits[1]=1;bits[8]=1
elif temp[3]==1:
bits[1]=1
else:
pass
if temp[4]>=2:
bits[15]=1;bits[22]=1
elif temp[4]==1:
bits[15]=1
else:
pass
if temp[5]>=5:
bits[29]=1;bits[36]=1;bits[43]=1;bits[50]=1;bits[57]=1
elif temp[5]==4:
bits[29]=1;bits[36]=1;bits[43]=1;bits[50]=1
elif temp[5]==3:
bits[29]=1;bits[36]=1;bits[43]=1
elif temp[5]==2:
bits[29]=1;bits[36]=1
elif temp[5]==1:
bits[29]=1
else:
pass
if temp[6]>=5:
bits[64]=1;bits[71]=1;bits[78]=1;bits[85]=1;bits[92]=1
elif temp[6]==4:
bits[64]=1;bits[71]=1;bits[78]=1;bits[85]=1
elif temp[6]==3:
bits[64]=1;bits[71]=1;bits[78]=1
elif temp[6]==2:
bits[64]=1;bits[71]=1
elif temp[6]==1:
bits[64]=1
else:
pass
if temp[7]>=2:
bits[99]=1;bits[106]=1
elif temp[7]==1:
bits[99]=1
else:
pass
if temp[8]>=2:
bits[113]=1;bits[120]=1
elif temp[8]==1:
bits[113]=1
else:
pass
if temp[9]>=1:
bits[127]=1;
else:
pass
if temp[10]>=1:
bits[134]=1;
else:
pass
return ringSize, bits
[docs]def func_3(mol,bits):
""" *Internal Use Only*
saturated or aromatic nitrogen-containing
"""
AllRingsBond = mol.GetRingInfo().BondRings()
ringSize=[]
temp={3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0, 10:0}
for ring in AllRingsBond:
######### saturated
nonsingle = False
for bondIdx in ring:
if mol.GetBondWithIdx(bondIdx).GetBondType().name!='SINGLE':
nonsingle = True
break
if nonsingle == False:
ringSize.append(len(ring))
for k,v in temp.items():
if len(ring) == k:
temp[k]+=1
######## aromatic nitrogen-containing
aromatic = True
ContainNitro = False
for bondIdx in ring:
if mol.GetBondWithIdx(bondIdx).GetBondType().name!='AROMATIC':
aromatic = False
break
for bondIdx in ring:
BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom()
EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom()
if BeginAtom.GetAtomicNum() == 7 or EndAtom.GetAtomicNum() == 7:
ContainNitro = True
break
if aromatic == True and ContainNitro == True:
ringSize.append(len(ring))
for k,v in temp.items():
if len(ring) == k:
temp[k]+=1
if temp[3]>=2:
bits[2]=1;bits[9]=1
elif temp[3]==1:
bits[2]=1
else:
pass
if temp[4]>=2:
bits[16]=1;bits[23]=1
elif temp[4]==1:
bits[16]=1
else:
pass
if temp[5]>=5:
bits[30]=1;bits[37]=1;bits[44]=1;bits[51]=1;bits[58]=1
elif temp[5]==4:
bits[30]=1;bits[37]=1;bits[44]=1;bits[51]=1
elif temp[5]==3:
bits[30]=1;bits[37]=1;bits[44]=1
elif temp[5]==2:
bits[30]=1;bits[37]=1
elif temp[5]==1:
bits[30]=1
else:
pass
if temp[6]>=5:
bits[65]=1;bits[72]=1;bits[79]=1;bits[86]=1;bits[93]=1
elif temp[6]==4:
bits[65]=1;bits[72]=1;bits[79]=1;bits[86]=1
elif temp[6]==3:
bits[65]=1;bits[72]=1;bits[79]=1
elif temp[6]==2:
bits[65]=1;bits[72]=1
elif temp[6]==1:
bits[65]=1
else:
pass
if temp[7]>=2:
bits[100]=1;bits[107]=1
elif temp[7]==1:
bits[100]=1
else:
pass
if temp[8]>=2:
bits[114]=1;bits[121]=1
elif temp[8]==1:
bits[114]=1
else:
pass
if temp[9]>=1:
bits[128]=1;
else:
pass
if temp[10]>=1:
bits[135]=1;
else:
pass
return ringSize, bits
[docs]def func_4(mol,bits):
""" *Internal Use Only*
saturated or aromatic heteroatom-containing
"""
AllRingsBond = mol.GetRingInfo().BondRings()
ringSize=[]
temp={3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0, 10:0}
for ring in AllRingsBond:
######### saturated
nonsingle = False
for bondIdx in ring:
if mol.GetBondWithIdx(bondIdx).GetBondType().name!='SINGLE':
nonsingle = True
break
if nonsingle == False:
ringSize.append(len(ring))
for k,v in temp.items():
if len(ring) == k:
temp[k]+=1
######## aromatic heteroatom-containing
aromatic = True
heteroatom = False
for bondIdx in ring:
if mol.GetBondWithIdx(bondIdx).GetBondType().name!='AROMATIC':
aromatic = False
break
for bondIdx in ring:
BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom()
EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom()
if BeginAtom.GetAtomicNum() not in [1,6] or EndAtom.GetAtomicNum() not in [1,6]:
heteroatom = True
break
if aromatic == True and heteroatom == True:
ringSize.append(len(ring))
for k,v in temp.items():
if len(ring) == k:
temp[k]+=1
if temp[3]>=2:
bits[3]=1;bits[10]=1
elif temp[3]==1:
bits[3]=1
else:
pass
if temp[4]>=2:
bits[17]=1;bits[24]=1
elif temp[4]==1:
bits[17]=1
else:
pass
if temp[5]>=5:
bits[31]=1;bits[38]=1;bits[45]=1;bits[52]=1;bits[59]=1
elif temp[5]==4:
bits[31]=1;bits[38]=1;bits[45]=1;bits[52]=1
elif temp[5]==3:
bits[31]=1;bits[38]=1;bits[45]=1
elif temp[5]==2:
bits[31]=1;bits[38]=1
elif temp[5]==1:
bits[31]=1
else:
pass
if temp[6]>=5:
bits[66]=1;bits[73]=1;bits[80]=1;bits[87]=1;bits[94]=1
elif temp[6]==4:
bits[66]=1;bits[73]=1;bits[80]=1;bits[87]=1
elif temp[6]==3:
bits[66]=1;bits[73]=1;bits[80]=1
elif temp[6]==2:
bits[66]=1;bits[73]=1
elif temp[6]==1:
bits[66]=1
else:
pass
if temp[7]>=2:
bits[101]=1;bits[108]=1
elif temp[7]==1:
bits[101]=1
else:
pass
if temp[8]>=2:
bits[115]=1;bits[122]=1
elif temp[8]==1:
bits[115]=1
else:
pass
if temp[9]>=1:
bits[129]=1;
else:
pass
if temp[10]>=1:
bits[136]=1;
else:
pass
return ringSize,bits
[docs]def func_5(mol,bits):
""" *Internal Use Only*
unsaturated non-aromatic carbon-only
"""
ringSize=[]
AllRingsBond = mol.GetRingInfo().BondRings()
temp={3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0, 10:0}
for ring in AllRingsBond:
unsaturated = False
nonaromatic = True
Allcarb = True
######### unsaturated
for bondIdx in ring:
if mol.GetBondWithIdx(bondIdx).GetBondType().name!='SINGLE':
unsaturated = True
break
######## non-aromatic
for bondIdx in ring:
if mol.GetBondWithIdx(bondIdx).GetBondType().name=='AROMATIC':
nonaromatic = False
break
######## allcarb
for bondIdx in ring:
BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom()
EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom()
if BeginAtom.GetAtomicNum() != 6 or EndAtom.GetAtomicNum() != 6:
Allcarb = False
break
if unsaturated == True and nonaromatic == True and Allcarb == True:
ringSize.append(len(ring))
for k,v in temp.items():
if len(ring) == k:
temp[k]+=1
if temp[3]>=2:
bits[4]=1;bits[11]=1
elif temp[3]==1:
bits[4]=1
else:
pass
if temp[4]>=2:
bits[18]=1;bits[25]=1
elif temp[4]==1:
bits[18]=1
else:
pass
if temp[5]>=5:
bits[32]=1;bits[39]=1;bits[46]=1;bits[53]=1;bits[60]=1
elif temp[5]==4:
bits[32]=1;bits[39]=1;bits[46]=1;bits[53]=1
elif temp[5]==3:
bits[32]=1;bits[39]=1;bits[46]=1
elif temp[5]==2:
bits[32]=1;bits[39]=1
elif temp[5]==1:
bits[32]=1
else:
pass
if temp[6]>=5:
bits[67]=1;bits[74]=1;bits[81]=1;bits[88]=1;bits[95]=1
elif temp[6]==4:
bits[67]=1;bits[74]=1;bits[81]=1;bits[88]=1
elif temp[6]==3:
bits[67]=1;bits[74]=1;bits[81]=1
elif temp[6]==2:
bits[67]=1;bits[74]=1
elif temp[6]==1:
bits[67]=1
else:
pass
if temp[7]>=2:
bits[102]=1;bits[109]=1
elif temp[7]==1:
bits[102]=1
else:
pass
if temp[8]>=2:
bits[116]=1;bits[123]=1
elif temp[8]==1:
bits[116]=1
else:
pass
if temp[9]>=1:
bits[130]=1;
else:
pass
if temp[10]>=1:
bits[137]=1;
else:
pass
return ringSize,bits
[docs]def func_6(mol,bits):
""" *Internal Use Only*
unsaturated non-aromatic nitrogen-containing
"""
ringSize=[]
AllRingsBond = mol.GetRingInfo().BondRings()
temp={3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0, 10:0}
for ring in AllRingsBond:
unsaturated = False
nonaromatic = True
ContainNitro = False
######### unsaturated
for bondIdx in ring:
if mol.GetBondWithIdx(bondIdx).GetBondType().name!='SINGLE':
unsaturated = True
break
######## non-aromatic
for bondIdx in ring:
if mol.GetBondWithIdx(bondIdx).GetBondType().name=='AROMATIC':
nonaromatic = False
break
######## nitrogen-containing
for bondIdx in ring:
BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom()
EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom()
if BeginAtom.GetAtomicNum() == 7 or EndAtom.GetAtomicNum() == 7:
ContainNitro = True
break
if unsaturated == True and nonaromatic == True and ContainNitro== True:
ringSize.append(len(ring))
for k,v in temp.items():
if len(ring) == k:
temp[k]+=1
if temp[3]>=2:
bits[5]=1;bits[12]=1
elif temp[3]==1:
bits[5]=1
else:
pass
if temp[4]>=2:
bits[19]=1;bits[26]=1
elif temp[4]==1:
bits[19]=1
else:
pass
if temp[5]>=5:
bits[33]=1;bits[40]=1;bits[47]=1;bits[54]=1;bits[61]=1
elif temp[5]==4:
bits[33]=1;bits[40]=1;bits[47]=1;bits[54]=1
elif temp[5]==3:
bits[33]=1;bits[40]=1;bits[47]=1
elif temp[5]==2:
bits[33]=1;bits[40]=1
elif temp[5]==1:
bits[33]=1
else:
pass
if temp[6]>=5:
bits[68]=1;bits[75]=1;bits[82]=1;bits[89]=1;bits[96]=1
elif temp[6]==4:
bits[68]=1;bits[75]=1;bits[82]=1;bits[89]=1
elif temp[6]==3:
bits[68]=1;bits[75]=1;bits[82]=1
elif temp[6]==2:
bits[68]=1;bits[75]=1
elif temp[6]==1:
bits[68]=1
else:
pass
if temp[7]>=2:
bits[103]=1;bits[110]=1
elif temp[7]==1:
bits[103]=1
else:
pass
if temp[8]>=2:
bits[117]=1;bits[124]=1
elif temp[8]==1:
bits[117]=1
else:
pass
if temp[9]>=1:
bits[131]=1;
else:
pass
if temp[10]>=1:
bits[138]=1;
else:
pass
return ringSize,bits
[docs]def func_7(mol,bits):
""" *Internal Use Only*
unsaturated non-aromatic heteroatom-containing
"""
ringSize=[]
AllRingsBond = mol.GetRingInfo().BondRings()
temp={3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0, 10:0}
for ring in AllRingsBond:
unsaturated = False
nonaromatic = True
heteroatom = False
######### unsaturated
for bondIdx in ring:
if mol.GetBondWithIdx(bondIdx).GetBondType().name!='SINGLE':
unsaturated = True
break
######## non-aromatic
for bondIdx in ring:
if mol.GetBondWithIdx(bondIdx).GetBondType().name=='AROMATIC':
nonaromatic = False
break
######## heteroatom-containing
for bondIdx in ring:
BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom()
EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom()
if BeginAtom.GetAtomicNum() not in [1,6] or EndAtom.GetAtomicNum() not in [1,6]:
heteroatom = True
break
if unsaturated == True and nonaromatic == True and heteroatom == True:
ringSize.append(len(ring))
for k,v in temp.items():
if len(ring) == k:
temp[k]+=1
if temp[3]>=2:
bits[6]=1;bits[13]=1
elif temp[3]==1:
bits[6]=1
else:
pass
if temp[4]>=2:
bits[20]=1;bits[27]=1
elif temp[4]==1:
bits[20]=1
else:
pass
if temp[5]>=5:
bits[34]=1;bits[41]=1;bits[48]=1;bits[55]=1;bits[62]=1
elif temp[5]==4:
bits[34]=1;bits[41]=1;bits[48]=1;bits[55]=1
elif temp[5]==3:
bits[34]=1;bits[41]=1;bits[48]=1
elif temp[5]==2:
bits[34]=1;bits[41]=1
elif temp[5]==1:
bits[34]=1
else:
pass
if temp[6]>=5:
bits[69]=1;bits[76]=1;bits[83]=1;bits[90]=1;bits[97]=1
elif temp[6]==4:
bits[69]=1;bits[76]=1;bits[83]=1;bits[90]=1
elif temp[6]==3:
bits[69]=1;bits[76]=1;bits[83]=1
elif temp[6]==2:
bits[69]=1;bits[76]=1
elif temp[6]==1:
bits[69]=1
else:
pass
if temp[7]>=2:
bits[104]=1;bits[111]=1
elif temp[7]==1:
bits[104]=1
else:
pass
if temp[8]>=2:
bits[118]=1;bits[125]=1
elif temp[8]==1:
bits[118]=1
else:
pass
if temp[9]>=1:
bits[132]=1;
else:
pass
if temp[10]>=1:
bits[139]=1;
else:
pass
return ringSize,bits
[docs]def func_8(mol, bits):
""" *Internal Use Only*
aromatic rings or hetero-aromatic rings
"""
AllRingsBond = mol.GetRingInfo().BondRings()
temp={'aromatic':0,'heteroatom':0}
for ring in AllRingsBond:
aromatic = True
heteroatom = False
for bondIdx in ring:
if mol.GetBondWithIdx(bondIdx).GetBondType().name!='AROMATIC':
aromatic = False
break
if aromatic==True:
temp['aromatic']+=1
for bondIdx in ring:
BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom()
EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom()
if BeginAtom.GetAtomicNum() not in [1,6] or EndAtom.GetAtomicNum() not in [1,6]:
heteroatom = True
break
if heteroatom==True:
temp['heteroatom']+=1
if temp['aromatic']>=4:
bits[140]=1;bits[142]=1;bits[144]=1;bits[146]=1
elif temp['aromatic']==3:
bits[140]=1;bits[142]=1;bits[144]=1
elif temp['aromatic']==2:
bits[140]=1;bits[142]=1
elif temp['aromatic']==1:
bits[140]=1
else:
pass
if temp['aromatic']>=4 and temp['heteroatom']>=4:
bits[141]=1;bits[143]=1;bits[145]=1;bits[147]=1
elif temp['aromatic']==3 and temp['heteroatom']==3:
bits[141]=1;bits[143]=1;bits[145]=1
elif temp['aromatic']==2 and temp['heteroatom']==2:
bits[141]=1;bits[143]=1
elif temp['aromatic']==1 and temp['heteroatom']==1:
bits[141]=1
else:
pass
return bits
[docs]def calcPubChemFingerPart2(mol):# 116-263
""" *Internal Use Only*
Calculate PubChem Fingerprints (116-263)
"""
bits=[0]*148
bits=func_1(mol,bits)[1]
bits=func_2(mol,bits)[1]
bits=func_3(mol,bits)[1]
bits=func_4(mol,bits)[1]
bits=func_5(mol,bits)[1]
bits=func_6(mol,bits)[1]
bits=func_7(mol,bits)[1]
bits=func_8(mol,bits)
return bits
[docs]def calcPubChemFingerAll(mol):
"""*Internal Use Only*
Calculate PubChem Fingerprints
"""
AllBits=[0]*881
res1=list(calcPubChemFingerPart1(mol).ToBitString())
for index, item in enumerate(res1[1:116]):
if item == '1':
AllBits[index] = 1
for index2, item2 in enumerate(res1[116:734]):
if item2 == '1':
AllBits[index2+115+148] = 1
res2=calcPubChemFingerPart2(mol)
for index3, item3 in enumerate(res2):
if item3==1:
AllBits[index3+115]=1
return AllBits
# ------------------------------------
if __name__ == '__main__':
print '-'*10+'START'+'-'*10
SMILES = 'C1=NC2NC3=CNCC3=CC2CC1'
mol = Chem.MolFromSmiles(SMILES)
mol2 = Chem.AddHs(mol)
result = calcPubChemFingerAll(mol2)
print 'Molecule: %s'%SMILES
print '-'*25
print 'Results: %s'%result
print '-'*10+'END'+'-'*10