Source code for Getmol

# -*- coding: utf-8 -*-
#  Copyright (c) 2016-2017, Zhijiang Yao, Jie Dong and Dongsheng Cao
#  All rights reserved.
#  This file is part of the PyBioMed.
#  The contents are covered by the terms of the BSD license
#  which is included in the file license.txt, found at the root
#  of the PyBioMed source tree.
"""
This module is to get different formats of molecules from file and web. If you

have any question please contact me via email.

Authors: Zhijiang Yao and Dongsheng Cao.

Date: 2016.06.04

Email: gadsby@163.com
"""

import urllib
import re
import string
import os
from rdkit import Chem

Version=1.0

[docs]def ReadMolFromSDF(filename=""): """ Read a set of molecules by SDF file format. Note: the output of this function is a set of molecular objects. You need to use for statement to call each object. Usage: res=ReadMolFromSDF(filename) Input: filename is a file name with path. Output: res is a set of molecular object. """ molset=Chem.SDMolSupplier(filename) return molset
[docs]def ReadMolFromMOL(filename=""): """ Read a molecule by mol file format. Usage: res=ReadMolFromMOL(filename) Input: filename is a file name with path. Output: res is a molecular object. """ mol=Chem.MolFromMolFile(filename) return mol
[docs]def ReadMolFromSmile(smi=""): """ ################################################################# Read a molecule by SMILES string. Usage: res=ReadMolFromSmile(smi) Input: smi is a SMILES string. Output: res is a molecule object. ################################################################# """ mol = Chem.MolFromSmiles(string.strip(smi)) return mol
[docs]def ReadMolFromInchi(inchi=""): """ ################################################################# Read a molecule by Inchi string. Usage: res=ReadMolFromInchi(inchi) Input: inchi is a InChi string. Output: res is a molecule object. ################################################################# """ import pybel temp=pybel.readstring("inchi",inchi) smi=temp.write("smi") mol = Chem.MolFromSmiles(string.strip(smi)) return mol
[docs]def ReadMolFromMol(filename=""): """ ################################################################# Read a molecule with mol file format. Usage: res=ReadMolFromMol(filename) Input: filename is a file name. Output: res is a molecule object. ################################################################# """ mol=Chem.MolFromMolFile(filename) return mol
#############################################################################
[docs]def GetMolFromCAS(casid=""): """ Downloading the molecules from http://www.chemnet.com/cas/ by CAS ID (casid). if you want to use this function, you must be install pybel. """ import pybel casid=string.strip(casid) localfile=urllib.urlopen('http://www.chemnet.com/cas/supplier.cgi?terms='+casid+'&l=&exact=dict') temp=localfile.readlines() for i in temp: if re.findall('InChI=',i)==['InChI=']: k=i.split(' <td align="left">') kk=k[1].split('</td>\r\n') if kk[0][0:5]=="InChI": res=kk[0] else: res="None" localfile.close() mol=pybel.readstring('inchi',string.strip(res)) smile=mol.write('smi') return string.strip(smile)
[docs]def GetMolFromEBI(): """ """ pass
[docs]def GetMolFromNCBI(cid=""): """ Downloading the molecules from http://pubchem.ncbi.nlm.nih.gov/ by cid (cid). """ cid=string.strip(cid) localfile=urllib.urlopen('http://pubchem.ncbi.nlm.nih.gov/summary/summary.cgi?cid='+cid+'&disopt=SaveSDF') temp=localfile.readlines() f=file("temp.sdf",'w') f.writelines(temp) f.close() localfile.close() m=Chem.MolFromMolFile("temp.sdf") os.remove("temp.sdf") temp=Chem.MolToSmiles(m,isomericSmiles=True) return temp
[docs]def GetMolFromDrugbank(dbid=""): """ Downloading the molecules from http://www.drugbank.ca/ by dbid (dbid). """ dbid=string.strip(dbid) localfile=urllib.urlopen('http://www.drugbank.ca/drugs/'+dbid+'.sdf') temp=localfile.readlines() f=file("temp.sdf",'w') f.writelines(temp) f.close() localfile.close() m=Chem.MolFromMolFile("temp.sdf") os.remove("temp.sdf") temp=Chem.MolToSmiles(m,isomericSmiles=True) return temp
[docs]def GetMolFromKegg(kid=""): """ Downloading the molecules from http://www.genome.jp/ by kegg id (kid). """ ID=str(kid) localfile=urllib.urlopen('http://www.genome.jp/dbget-bin/www_bget?-f+m+drug+'+ID) temp=localfile.readlines() f=file("temp.mol",'w') f.writelines(temp) f.close() localfile.close() m=Chem.MolFromMolFile("temp.mol") os.remove("temp.mol") temp=Chem.MolToSmiles(m,isomericSmiles=True) return temp
############################################################################# if __name__=="__main__": print '-'*10+'START'+'-'*10 print 'Only PyBioMed is successfully installed the code below can be run!' from PyBioMed.PyGetMol.GetProtein import timelimited @timelimited(10) def run_GetMolFromCAS(): temp=GetMolFromCAS(casid="50-12-4") print temp @timelimited(10) def run_GetMolFromNCBI(): temp=GetMolFromNCBI(cid="2244") print temp @timelimited(10) def run_GetMolFromDrugbank(): temp=GetMolFromDrugbank(dbid="DB00133") print temp @timelimited(10) def run_GetMolFromKegg(): temp=GetMolFromKegg(kid="D02176") print temp run_GetMolFromCAS() print '-'*25 run_GetMolFromNCBI() print '-'*25 run_GetMolFromDrugbank() print '-'*25 run_GetMolFromKegg() print '-'*10+'END'+'-'*10