Source code for GetProteinFromUniprot

# -*- coding: utf-8 -*-
#  Copyright (c) 2016-2017, Zhijiang Yao, Jie Dong and Dongsheng Cao
#  All rights reserved.
#  This file is part of the PyBioMed.
#  The contents are covered by the terms of the BSD license
#  which is included in the file license.txt, found at the root
#  of the PyBioMed source tree.
"""
################################################################################################

This module is used to download the protein sequence from the uniprot (http://www.uniprot.org/) 

website. You can only need input a protein ID or prepare a file (ID.txt) related to ID. You can

 obtain a .txt (ProteinSequence.txt) file saving protein sequence you need.  You can freely use 
 
 and distribute it. If you hava  any problem, you could contact with us timely!
 
Authors: Zhijiang Yao and Dongsheng Cao.

Date: 2016.06.04

Email: gadsby@163.com

################################################################################################
"""

import urllib
import string


##################################################################################################
[docs]def GetProteinSequence(ProteinID): """ ######################################################################################### Get the protein sequence from the uniprot website by ID. Usage: result=GetProteinSequence(ProteinID) Input: ProteinID is a string indicating ID such as "P48039". Output: result is a protein sequence. ######################################################################################### """ ID = str(ProteinID) localfile = urllib.urlopen('http://www.uniprot.org/uniprot/' + ID + '.fasta') temp = localfile.readlines() res = '' for i in range(1, len(temp)): res = res + string.strip(temp[i]) return res
##################################################################################################
[docs]def GetProteinSequenceFromTxt(path, openfile, savefile): """ ######################################################################################### Get the protein sequence from the uniprot website by the file containing ID. Usage: result=GetProteinSequenceFromTxt(path,openfile,savefile) Input: path is a directory path containing the ID file such as "/home/orient/protein/" openfile is the ID file such as "proteinID.txt" savefile is the file saving the obtained protein sequences such as "protein.txt" ######################################################################################### """ f1 = file(path + savefile, 'wb') f2 = file(path + openfile, 'r') # res=[] for index, i in enumerate(f2): itrim = string.strip(i) if itrim == "": continue else: temp = GetProteinSequence(itrim) print "--------------------------------------------------------" print "The %d protein sequence has been downloaded!" % (index + 1) print temp f1.write(temp + '\n') print "--------------------------------------------------------" # res.append(temp+'\n') # f1.writelines(res) f2.close() f1.close() return 0
################################################################################################## if __name__ == '__main__': localfile = ['P48039'] for index, i in enumerate(localfile): itrim = string.strip(i) if itrim == "": continue else: temp = GetProteinSequence(itrim) print "--------------------------------------------------------" print "The %d protein sequence has been downloaded!" % (index + 1) print temp print "--------------------------------------------------------"