Source code for topology

# -*- coding: utf-8 -*-
#  Copyright (c) 2016-2017, Zhijiang Yao, Jie Dong and Dongsheng Cao
#  All rights reserved.
#  This file is part of the PyBioMed.
#  The contents are covered by the terms of the BSD license
#  which is included in the file license.txt, found at the root
#  of the PyBioMed source tree.
"""
##############################################################################
The calculation of molecular topological indices based on its topological

structure. You can get 25 molecular topological descriptors. You can freely

use and distribute it. If you hava  any problem, you could contact with us timely!

Authors: Zhijiang Yao and Dongsheng Cao.

Date: 2016.06.04

Email: gadsby@163.com and oriental-cds@163.com

##############################################################################
"""

from rdkit import Chem
from rdkit.Chem import rdchem
#from rdkit.Chem import pyPeriodicTable as PeriodicTable
from rdkit.Chem import GraphDescriptors as GD
import numpy
import scipy


periodicTable = rdchem.GetPeriodicTable()

Version=1.0
################################################################

def _GetPrincipleQuantumNumber(atNum):
    """
    #################################################################
    *Internal Use Only*
    
    Get the principle quantum number of atom with atomic
    
    number equal to atNum 
    #################################################################
    """
    if atNum<=2:
        return 1
    elif atNum<=10:
        return 2
    elif atNum<=18:
        return 3
    elif atNum<=36:
        return 4
    elif atNum<=54:
        return 5
    elif atNum<=86:
        return 6
    else:
        return 7


[docs]def CalculateWeiner(mol): """ ################################################################# Calculation of Weiner number in a molecule ---->W Usage: result=CalculateWeiner(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ return 1.0/2*sum(sum(Chem.GetDistanceMatrix(mol)))
[docs]def CalculateMeanWeiner(mol): """ ################################################################# Calculation of Mean Weiner number in a molecule ---->AW Usage: result=CalculateWeiner(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ N=mol.GetNumAtoms() WeinerNumber=CalculateWeiner(mol) return 2.0*WeinerNumber/(N*(N-1))
[docs]def CalculateBalaban(mol): """ ################################################################# Calculation of Balaban index in a molecule ---->J Usage: result=CalculateBalaban(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ adjMat=Chem.GetAdjacencyMatrix(mol) Distance= Chem.GetDistanceMatrix(mol) Nbond=mol.GetNumBonds() Natom=mol.GetNumAtoms() S=numpy.sum(Distance,axis=1) mu=Nbond-Natom+1 sumk=0. for i in range(len(Distance)): si=S[i] for j in range(i,len(Distance)): if adjMat[i,j]==1: sumk += 1./numpy.sqrt(si*S[j]) if mu+1 !=0: J=float(Nbond)/float(mu+1)*sumk else: J=0 return J
[docs]def CalculateGraphDistance(mol): """ ################################################################# Calculation of graph distance index ---->Tigdi(log value) Usage: result=CalculateGraphDistance(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ Distance= Chem.GetDistanceMatrix(mol) n=int(Distance.max()) res=0.0 for i in range(n): # print Distance==i+1 temp=1./2*sum(sum(Distance==i+1)) #print temp res = res+temp**2 return numpy.log10(res)
[docs]def CalculateDiameter(mol): """ ################################################################# Calculation of diameter, which is Largest value in the distance matrix [Petitjean 1992]. ---->diametert Usage: result=CalculateDiameter(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ Distance=Chem.GetDistanceMatrix(mol) return Distance.max()
[docs]def CalculateRadius(mol): """ ################################################################# Calculation of radius based on topology. It is :If ri is the largest matrix entry in row i of the distance matrix D,then the radius is defined as the smallest of the ri [Petitjean 1992]. ---->radiust Usage: result=CalculateRadius(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ Distance=Chem.GetDistanceMatrix(mol) temp=[] for i in Distance: temp.append(max(i)) return min(temp)
[docs]def CalculatePetitjean(mol): """ ################################################################# Calculation of Petitjean based on topology. Value of (diameter - radius) / diameter as defined in [Petitjean 1992]. ---->petitjeant Usage: result=CalculatePetitjean(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ diameter=CalculateDiameter(mol) radius=CalculateRadius(mol) return 1-radius/float(diameter)
[docs]def CalculateXuIndex(mol): """ ################################################################# Calculation of Xu index ---->Xu Usage: result=CalculateXuIndex(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ nAT=mol.GetNumAtoms() deltas=[x.GetDegree() for x in mol.GetAtoms()] Distance= Chem.GetDistanceMatrix(mol) sigma=scipy.sum(Distance,axis=1) temp1=0.0 temp2=0.0 for i in range(nAT): temp1=temp1+deltas[i]*((sigma[i])**2) temp2=temp2+deltas[i]*(sigma[i]) Xu=numpy.sqrt(nAT)*numpy.log(temp1/temp2) return Xu
[docs]def CalculateGutmanTopo(mol): """ ################################################################# Calculation of Gutman molecular topological index based on simple vertex degree ---->GMTI(log value) Usage: result=CalculateGutmanTopo(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ nAT=mol.GetNumAtoms() deltas=[x.GetDegree() for x in mol.GetAtoms()] Distance= Chem.GetDistanceMatrix(mol) res=0.0 for i in range(nAT): for j in range(i+1,nAT): res=res+deltas[i]*deltas[j]*Distance[i,j] return numpy.log10(res)
[docs]def CalculatePolarityNumber(mol): """ ################################################################# Calculation of Polarity number. It is the number of pairs of vertexes at distance matrix equal to 3 ---->Pol Usage: result=CalculatePolarityNumber(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ Distance= Chem.GetDistanceMatrix(mol) res=1./2*sum(sum(Distance==3)) return res
[docs]def CalculatePoglianiIndex(mol): """ ################################################################# Calculation of Poglicani index The Pogliani index (Dz) is the sum over all non-hydrogen atoms of a modified vertex degree calculated as the ratio of the number of valence electrons over the principal quantum number of an atom [L. Pogliani, J.Phys.Chem. 1996, 100, 18065-18077]. ---->DZ Usage: result=CalculatePoglianiIndex(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ res=0.0 for atom in mol.GetAtoms(): n=atom.GetAtomicNum() nV=periodicTable.GetNOuterElecs(n) mP=_GetPrincipleQuantumNumber(n) res=res+(nV+0.0)/mP return res
[docs]def CalculateIpc(mol): """ ################################################################# This returns the information content of the coefficients of the characteristic polynomial of the adjacency matrix of a hydrogen-suppressed graph of a molecule. 'avg = 1' returns the information content divided by the total population. From D. Bonchev & N. Trinajstic, J. Chem. Phys. vol 67, 4517-4533 (1977) ---->Ipc(log value) Usage: result=CalculateIpc(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ temp=GD.Ipc(mol) if temp>0: return numpy.log10(temp) else: return "NaN"
[docs]def CalculateBertzCT(mol): """ ################################################################# A topological index meant to quantify "complexity" of molecules. Consists of a sum of two terms, one representing the complexity of the bonding, the other representing the complexity of the distribution of heteroatoms. From S. H. Bertz, J. Am. Chem. Soc., vol 103, 3599-3601 (1981) ---->BertzCT(log value) Usage: result=CalculateBertzCT(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ temp=GD.BertzCT(mol) if temp>0: return numpy.log10(temp) else: return "NaN"
[docs]def CalculateHarary(mol): """ ################################################################# Calculation of Harary number ---->Thara Usage: result=CalculateHarary(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ Distance=numpy.array(Chem.GetDistanceMatrix(mol),'d') return 1.0/2*(sum(1.0/Distance[Distance!=0]))
[docs]def CalculateSchiultz(mol): """ ################################################################# Calculation of Schiultz number ---->Tsch(log value) Usage: result=CalculateSchiultz(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ Distance=numpy.array(Chem.GetDistanceMatrix(mol),'d') Adjacent=numpy.array(Chem.GetAdjacencyMatrix(mol),'d') VertexDegree=sum(Adjacent) return sum(scipy.dot((Distance+Adjacent),VertexDegree))
[docs]def CalculateZagreb1(mol): """ ################################################################# Calculation of Zagreb index with order 1 in a molecule ---->ZM1 Usage: result=CalculateZagreb1(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ deltas=[x.GetDegree() for x in mol.GetAtoms()] return sum(numpy.array(deltas)**2)
[docs]def CalculateZagreb2(mol): """ ################################################################# Calculation of Zagreb index with order 2 in a molecule ---->ZM2 Usage: result=CalculateZagreb2(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ ke = [x.GetBeginAtom().GetDegree()*x.GetEndAtom().GetDegree() for x in mol.GetBonds()] return sum(ke)
[docs]def CalculateMZagreb1(mol): """ ################################################################# Calculation of Modified Zagreb index with order 1 in a molecule ---->MZM1 Usage: result=CalculateMZagreb1(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ deltas=[x.GetDegree() for x in mol.GetAtoms()] while 0 in deltas: deltas.remove(0) deltas=numpy.array(deltas,'d') res=sum((1./deltas)**2) return res
[docs]def CalculateMZagreb2(mol): """ ################################################################# Calculation of Modified Zagreb index with order 2 in a molecule ---->MZM2 Usage: result=CalculateMZagreb2(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ cc = [x.GetBeginAtom().GetDegree()*x.GetEndAtom().GetDegree() for x in mol.GetBonds()] while 0 in cc: cc.remove(0) cc = numpy.array(cc,'d') res = sum((1./cc)**2) return res
[docs]def CalculateQuadratic(mol): """ ################################################################# Calculation of Quadratic index in a molecule ---->Qindex Usage: result=CalculateQuadratic(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ M=CalculateZagreb1(mol) N=mol.GetNumAtoms() return 3-2*N+M/2.0
[docs]def CalculatePlatt(mol): """ ################################################################# Calculation of Platt number in a molecule ---->Platt Usage: result=CalculatePlatt(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ cc = [x.GetBeginAtom().GetDegree()+x.GetEndAtom().GetDegree()-2 for x in mol.GetBonds()] return sum(cc)
[docs]def CalculateSimpleTopoIndex(mol): """ ################################################################# Calculation of the logarithm of the simple topological index by Narumi, which is defined as the product of the vertex degree. ---->Sito Usage: result=CalculateSimpleTopoIndex(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ deltas=[x.GetDegree() for x in mol.GetAtoms()] while 0 in deltas: deltas.remove(0) deltas=numpy.array(deltas,'d') res=numpy.prod(deltas) if res>0: return numpy.log10(res) else: return "NaN"
[docs]def CalculateHarmonicTopoIndex(mol): """ ################################################################# Calculation of harmonic topological index proposed by Narnumi. ---->Hato Usage: result=CalculateHarmonicTopoIndex(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ deltas=[x.GetDegree() for x in mol.GetAtoms()] while 0 in deltas: deltas.remove(0) deltas=numpy.array(deltas,'d') nAtoms=mol.GetNumAtoms() res=nAtoms/sum(1./deltas) return res
[docs]def CalculateGeometricTopoIndex(mol): """ ################################################################# Geometric topological index by Narumi ---->Geto Usage: result=CalculateGeometricTopoIndex(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ nAtoms=mol.GetNumAtoms() deltas=[x.GetDegree() for x in mol.GetAtoms()] while 0 in deltas: deltas.remove(0) deltas=numpy.array(deltas,'d') temp=numpy.prod(deltas) res=numpy.power(temp,1./nAtoms) return res
[docs]def CalculateArithmeticTopoIndex(mol): """ ################################################################# Arithmetic topological index by Narumi ---->Arto Usage: result=CalculateArithmeticTopoIndex(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ nAtoms=mol.GetNumAtoms() nBonds=mol.GetNumBonds() res=2.*nBonds/nAtoms return res
_Topology={'W':CalculateWeiner, 'AW':CalculateMeanWeiner, 'J':CalculateBalaban, 'Tigdi':CalculateGraphDistance, 'Xu':CalculateXuIndex, 'GMTI':CalculateGutmanTopo, 'Pol':CalculatePolarityNumber, 'DZ':CalculatePoglianiIndex, 'Ipc':CalculateIpc, 'BertzCT':CalculateBertzCT, 'Thara':CalculateHarary, 'Tsch':CalculateSchiultz, 'ZM1':CalculateZagreb1, 'ZM2':CalculateZagreb2, 'MZM1':CalculateMZagreb1, 'MZM2':CalculateMZagreb2, 'Qindex':CalculateQuadratic, 'Platt':CalculatePlatt, 'diametert':CalculateDiameter, 'radiust':CalculateRadius, 'petitjeant':CalculatePetitjean, 'Sito':CalculateSimpleTopoIndex, 'Hato':CalculateHarmonicTopoIndex, 'Geto':CalculateGeometricTopoIndex, 'Arto':CalculateArithmeticTopoIndex }
[docs]def GetTopology(mol): """ ################################################################# Get the dictionary of constitutional descriptors for given moelcule mol Usage: result=CalculateWeiner(mol) Input: mol is a molecule object Output: result is a dict form containing all topological indices. ################################################################# """ result={} for DesLabel in _Topology.keys(): result[DesLabel]=round(_Topology[DesLabel](mol),3) return result
def _GetHTMLDoc(): """ ################################################################# Write HTML documentation for this module. ################################################################# """ import pydoc pydoc.writedoc('topology') ################################################################################ ##################################################################### if __name__ =='__main__': smis = ['CCCC','CCCCC','CCCCCC','CC(N)C(=O)O','CC(N)C(=O)[O-]'] for index, smi in enumerate(smis): m = Chem.MolFromSmiles(smi) print index+1 print smi print '\t',GetTopology(m) print '\t',len(GetTopology(m))