Source code for AAIndex

# -*- coding: utf-8 -*-
#  Copyright (c) 2016-2017, Zhijiang Yao, Jie Dong and Dongsheng Cao
#  All rights reserved.
#  This file is part of the PyBioMed.
#  The contents are covered by the terms of the BSD license
#  which is included in the file license.txt, found at the root
#  of the PyBioMed source tree.
'''
This module is used for obtaining the properties of amino acids or their pairs

from the aaindex database. You can freely use and distribute it. If you hava 

any problem, you could contact with us timely!

Authors: Zhijiang Yao and Dongsheng Cao.

Date: 2016.06.04

Email: gadsby@163.com
 
'''

import sys, os, string

AALetter = ["A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V"]
_aaindex = dict()


#####################################################################################################
[docs]class Record: ''' Amino acid index (AAindex) Record ''' aakeys = 'ARNDCQEGHILKMFPSTWYV' def __init__(self): self.key = None self.desc = '' self.ref = '' self.authors = '' self.title = '' self.journal = '' self.correlated = dict() self.index = dict() self.comment = ''
[docs] def extend(self, row): i = len(self.index) for x in row: self.index[self.aakeys[i]] = x i += 1
[docs] def get(self, aai, aaj=None, d=None): assert aaj is None return self.index.get(aai, d)
def __getitem__(self, aai): return self.get(aai)
[docs] def median(self): x = sorted(filter(None, self.index.values())) half = len(x) / 2 if len(x) % 2 == 1: return x[half] return (x[half - 1] + x[half]) / 2.0
def __str__(self): desc = self.desc.replace('\n', ' ').strip() return '%s(%s: %s)' % (self.__class__.__name__, self.key, desc)
#####################################################################################################
[docs]class MatrixRecord(Record): ''' Matrix record for mutation matrices or pair-wise contact potentials ''' def __init__(self): Record.__init__(self) self.index = [] self.rows = dict() self.cols = dict()
[docs] def extend(self, row): self.index.append(row)
def _get(self, aai, aaj): i = self.rows[aai] j = self.cols[aaj] return self.index[i][j]
[docs] def get(self, aai, aaj, d=None): try: return self._get(aai, aaj) except: pass try: return self._get(aaj, aai) except: return d
def __getitem__(self, aaij): return self.get(aaij[0], aaij[1])
[docs] def median(self): x = [] for y in self.index: x.extend(filter(None, y)) x.sort() if len(x) % 2 == 1: return x[len(x) / 2] return sum(x[len(x) / 2 - 1:len(x) / 2 + 1]) / 2.0
##################################################################################################### #####################################################################################################
[docs]def grep(pattern): ''' Search for pattern in title and description of all records (case insensitive) and print results on standard output. ''' for record in search(pattern): print record
#####################################################################################################
[docs]def get(key): ''' Get record for key ''' if len(_aaindex) == 0: init() return _aaindex[key]
##################################################################################################### def _float_or_None(x): if x == 'NA' or x == '-': return None return float(x) #####################################################################################################
[docs]def init(path=None, index='123'): ''' Read in the aaindex files. You need to run this (once) before you can access any records. If the files are not within the current directory, you need to specify the correct directory path. By default all three aaindex files are read in. ''' index = str(index) if path is None: for path in [os.path.split(__file__)[0], '.']: if os.path.exists(os.path.join(path, 'aaindex' + index[0])): break print >> sys.stderr, 'path =', path if '1' in index: _parse(path + '/aaindex1', Record) if '2' in index: _parse(path + '/aaindex2', MatrixRecord) if '3' in index: _parse(path + '/aaindex3', MatrixRecord)
#####################################################################################################
[docs]def init_from_file(filename, type=Record): _parse(filename, type)
##################################################################################################### def _parse(filename, rec, quiet=True): ''' Parse aaindex input file. `rec` must be `Record` for aaindex1 and `MarixRecord` for aaindex2 and aaindex3. ''' if not os.path.exists(filename): import urllib url = 'ftp://ftp.genome.jp/pub/db/community/aaindex/' + os.path.split(filename)[1] # print 'Downloading "%s"' % (url) filename = urllib.urlretrieve(url, filename)[0] # print 'Saved to "%s"' % (filename) f = open(filename) current = rec() lastkey = None for line in f: key = line[0:2] if key[0] == ' ': key = lastkey if key == '//': _aaindex[current.key] = current current = rec() elif key == 'H ': current.key = line[2:].strip() elif key == 'R ': current.ref += line[2:] elif key == 'D ': current.desc += line[2:] elif key == 'A ': current.authors += line[2:] elif key == 'T ': current.title += line[2:] elif key == 'J ': current.journal += line[2:] elif key == '* ': current.comment += line[2:] elif key == 'C ': a = line[2:].split() for i in range(0, len(a), 2): current.correlated[a[i]] = float(a[i + 1]) elif key == 'I ': a = line[1:].split() if a[0] != 'A/L': current.extend(map(_float_or_None, a)) elif list(Record.aakeys) != [i[0] for i in a] + [i[-1] for i in a]: print 'Warning: wrong amino acid sequence for', current.key else: try: assert list(Record.aakeys[:10]) == [i[0] for i in a] assert list(Record.aakeys[10:]) == [i[2] for i in a] except: print 'Warning: wrong amino acid sequence for', current.key elif key == 'M ': a = line[2:].split() if a[0] == 'rows': if a[4] == 'rows': a.pop(4) assert a[3] == 'cols' and len(a) == 6 i = 0 for aa in a[2]: current.rows[aa] = i i += 1 i = 0 for aa in a[5]: current.cols[aa] = i i += 1 else: current.extend(map(_float_or_None, a)) elif not quiet: print 'Warning: line starts with "%s"' % (key) lastkey = key f.close() #####################################################################################################
[docs]def GetAAIndex1(name, path='.'): """ Get the amino acid property values from aaindex1 Usage: result=GetAAIndex1(name) Input: name is the name of amino acid property (e.g., KRIW790103) Output: result is a dict form containing the properties of 20 amino acids """ init(path=path) name = str(name) temp = get(string.strip(name)) res = {} for i in AALetter: res[i] = temp.get(i) return res
#####################################################################################################
[docs]def GetAAIndex23(name, path='.'): """ Get the amino acid property values from aaindex2 and aaindex3 Usage: result=GetAAIndex23(name) Input: name is the name of amino acid property (e.g.,TANS760101,GRAR740104) Output: result is a dict form containing the properties of 400 amino acid pairs """ init(path=path) name = str(name) temp = get(string.strip(name)) res = {} for i in AALetter: for j in AALetter: res[i + j] = temp.get(i, j) return res
##################################################################################################### if __name__ == "__main__": # init(path='.') # grep('volume') # x = get('KRIW790103') # print x # print x.get('W') temp1 = GetAAIndex1('KRIW790103') print len(temp1) temp2 = GetAAIndex23('TANS760101') print len(temp2) temp2 = GetAAIndex23('GRAR740104') print len(temp2)