Source code for AtomTypes

# -*- coding: utf-8 -*-
#  Copyright (c) 2016-2017, Zhijiang Yao, Jie Dong and Dongsheng Cao
#  All rights reserved.
#  This file is part of the PyBioMed.
#  The contents are covered by the terms of the BSD license
#  which is included in the file license.txt, found at the root
#  of the PyBioMed source tree.
"""
You can freely use and distribute it. If you hava  

any problem, you could contact with us timely!

Authors: Zhijiang Yao and Dongsheng Cao.

Date: 2016.06.04

Email: gadsby@163.com

contains SMARTS definitions and calculators for EState atom types

defined in: Hall and Kier JCICS _35_ 1039-1045 (1995)  Table 1
"""
from rdkit import Chem
import numpy
import sys

_rawD = [
  ('sLi','[LiD1]-*'),

  ('ssBe','[BeD2](-*)-*'),
  ('ssssBe','[BeD4](-*)(-*)(-*)-*'),

  ('ssBH', '[BD2H](-*)-*'),
  ('sssB', '[BD3](-*)(-*)-*'),
  ('ssssB','[BD4](-*)(-*)(-*)-*'),

  ('sCH3', '[CD1H3]-*'),
  ('dCH2', '[CD1H2]=*'),
  ('ssCH2','[CD2H2](-*)-*'),
  ('tCH',  '[CD1H]#*'),
  ('dsCH', '[CD2H](=*)-*'),
  ('aaCH', '[C,c;D2H](:*):*'),
  ('sssCH','[CD3H](-*)(-*)-*'),
  ('ddC',  '[CD2H0](=*)=*'),
  ('tsC',  '[CD2H0](#*)-*'),
  ('dssC', '[CD3H0](=*)(-*)-*'),  
  ('aasC', '[C,c;D3H0](:*)(:*)-*'),
  ('aaaC', '[C,c;D3H0](:*)(:*):*'),
  ('ssssC','[CD4H0](-*)(-*)(-*)-*'),

  ('sNH3', '[ND1H3]-*'),
  ('sNH2', '[ND1H2]-*'),
  ('ssNH2','[ND2H2](-*)-*'),
  ('dNH',  '[ND1H]=*'),
  ('ssNH', '[ND2H](-*)-*'),
  ('aaNH', '[N,nD2H](:*):*'),
  ('tN',   '[ND1H0]#*'),
  ('sssNH','[ND3H](-*)(-*)-*'),
  ('dsN',  '[ND2H0](=*)-*'),
  ('aaN',  '[N,nD2H0](:*):*'),
  ('sssN', '[ND3H0](-*)(-*)-*'),
  ('ddsN', '[ND3H0](~[OD1H0])(~[OD1H0])-,:*'),  # mod
  ('aasN', '[N,nD3H0](:*)(:*)-,:*'),              # mod
  ('ssssN','[ND4H0](-*)(-*)(-*)-*'),

  ('sOH','[OD1H]-*'),
  ('dO', '[OD1H0]=*'),
  ('ssO','[OD2H0](-*)-*'),
  ('aaO','[O,oD2H0](:*):*'),

  ('sF','[FD1]-*'),

  ('sSiH3', '[SiD1H3]-*'),
  ('ssSiH2','[SiD2H2](-*)-*'),
  ('sssSiH','[SiD3H1](-*)(-*)-*'),
  ('ssssSi','[SiD4H0](-*)(-*)(-*)-*'),

  ('sPH2',  '[PD1H2]-*'),
  ('ssPH',  '[PD2H1](-*)-*'),
  ('sssP',  '[PD3H0](-*)(-*)-*'),
  ('dsssP', '[PD4H0](=*)(-*)(-*)-*'),
  ('sssssP','[PD5H0](-*)(-*)(-*)(-*)-*'),
   
  ('sSH',  '[SD1H1]-*'),
  ('dS',   '[SD1H0]=*'),
  ('ssS',  '[SD2H0](-*)-*'),
  ('aaS',  '[S,sD2H0](:*):*'),
  ('dssS', '[SD3H0](=*)(-*)-*'),
  ('ddssS','[SD4H0](~[OD1H0])(~[OD1H0])(-*)-*'),  # mod

  ('sCl', '[ClD1]-*'),

  ('sGeH3', '[GeD1H3](-*)'),
  ('ssGeH2','[GeD2H2](-*)-*'),
  ('sssGeH','[GeD3H1](-*)(-*)-*'),
  ('ssssGe','[GeD4H0](-*)(-*)(-*)-*'),

  ('sAsH2',  '[AsD1H2]-*'),
  ('ssAsH',  '[AsD2H1](-*)-*'),
  ('sssAs',  '[AsD3H0](-*)(-*)-*'),
  ('sssdAs', '[AsD4H0](=*)(-*)(-*)-*'),
  ('sssssAs','[AsD5H0](-*)(-*)(-*)(-*)-*'),

  ('sSeH',  '[SeD1H1]-*'),
  ('dSe',   '[SeD1H0]=*'),
  ('ssSe',  '[SeD2H0](-*)-*'),
  ('aaSe',  '[SeD2H0](:*):*'),
  ('dssSe', '[SeD3H0](=*)(-*)-*'),
  ('ddssSe','[SeD4H0](=*)(=*)(-*)-*'),

  ('sBr','[BrD1]-*'),

  ('sSnH3', '[SnD1H3]-*'),
  ('ssSnH2','[SnD2H2](-*)-*'),
  ('sssSnH','[SnD3H1](-*)(-*)-*'),
  ('ssssSn','[SnD4H0](-*)(-*)(-*)-*'),

  ('sI','[ID1]-*'),

  ('sPbH3', '[PbD1H3]-*'),
  ('ssPbH2','[PbD2H2](-*)-*'),
  ('sssPbH','[PbD3H1](-*)(-*)-*'),
  ('ssssPb','[PbD4H0](-*)(-*)(-*)-*'),
]

esPatterns=None
[docs]def BuildPatts(rawV=None): """ Internal Use Only """ global esPatterns,_rawD if rawV is None: rawV = _rawD esPatterns = [None]*len(rawV) for i,(name,sma) in enumerate(rawV): try: patt = Chem.MolFromSmarts(sma) except: sys.stderr.write('WARNING: problems with pattern %s (name: %s), skipped.\n'%(sma,name)) else: esPatterns[i] = name,patt
[docs]def TypeAtoms(mol): """ assigns each atom in a molecule to an EState type **Returns:** list of tuples (atoms can possibly match multiple patterns) with atom types """ if esPatterns is None: BuildPatts() nAtoms = mol.GetNumAtoms() res = [None]*nAtoms for name,patt in esPatterns: matches = mol.GetSubstructMatches(patt,uniquify=0) for match in matches: idx = match[0] if res[idx] is None: res[idx] = [name] elif name not in res[idx]: res[idx].append(name) for i,v in enumerate(res): if v is not None: res[i] = tuple(v) else: res[i] = () return res
[docs]def GetAtomLabel(mol): """ Obtain the atom index in a molecule for the above given atom types """ if esPatterns is None: BuildPatts() res=[] for name, patt in esPatterns: matches = mol.GetSubstructMatches(patt,uniquify=0) cc=[] for match in matches: # remain=match[1:] cc.append(match[0]) bb=list(numpy.unique(numpy.array(cc))) res.append(bb) return res