# -*- coding: utf-8 -*-
# Copyright (c) 2016-2017, Zhijiang Yao, Jie Dong and Dongsheng Cao
# All rights reserved.
# This file is part of the PyBioMed.
# The contents are covered by the terms of the BSD license
# which is included in the file license.txt, found at the root
# of the PyBioMed source tree.
'''
This module is used for obtaining the properties of amino acids or their pairs
from the aaindex database. You can freely use and distribute it. If you hava
any problem, you could contact with us timely!
Authors: Zhijiang Yao and Dongsheng Cao.
Date: 2016.06.04
Email: gadsby@163.com
'''
import sys, os, string
AALetter = ["A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V"]
_aaindex = dict()
#####################################################################################################
[docs]class Record:
'''
Amino acid index (AAindex) Record
'''
aakeys = 'ARNDCQEGHILKMFPSTWYV'
def __init__(self):
self.key = None
self.desc = ''
self.ref = ''
self.authors = ''
self.title = ''
self.journal = ''
self.correlated = dict()
self.index = dict()
self.comment = ''
[docs] def extend(self, row):
i = len(self.index)
for x in row:
self.index[self.aakeys[i]] = x
i += 1
[docs] def get(self, aai, aaj=None, d=None):
assert aaj is None
return self.index.get(aai, d)
def __getitem__(self, aai):
return self.get(aai)
def __str__(self):
desc = self.desc.replace('\n', ' ').strip()
return '%s(%s: %s)' % (self.__class__.__name__, self.key, desc)
#####################################################################################################
[docs]class MatrixRecord(Record):
'''
Matrix record for mutation matrices or pair-wise contact potentials
'''
def __init__(self):
Record.__init__(self)
self.index = []
self.rows = dict()
self.cols = dict()
[docs] def extend(self, row):
self.index.append(row)
def _get(self, aai, aaj):
i = self.rows[aai]
j = self.cols[aaj]
return self.index[i][j]
[docs] def get(self, aai, aaj, d=None):
try:
return self._get(aai, aaj)
except:
pass
try:
return self._get(aaj, aai)
except:
return d
def __getitem__(self, aaij):
return self.get(aaij[0], aaij[1])
#####################################################################################################
[docs]def search(pattern, searchtitle=True, casesensitive=False):
'''
Search for pattern in description and title (optional) of all records and
return matched records as list. By default search case insensitive.
'''
whatcase = lambda i: i
if not casesensitive:
pattern = pattern.lower()
whatcase = lambda i: i.lower()
matches = []
for record in _aaindex.itervalues():
if pattern in whatcase(record.desc) or searchtitle and pattern in whatcase(record.title):
matches.append(record)
return matches
#####################################################################################################
[docs]def grep(pattern):
'''
Search for pattern in title and description of all records (case
insensitive) and print results on standard output.
'''
for record in search(pattern):
print record
#####################################################################################################
[docs]def get(key):
'''
Get record for key
'''
if len(_aaindex) == 0:
init()
return _aaindex[key]
#####################################################################################################
def _float_or_None(x):
if x == 'NA' or x == '-':
return None
return float(x)
#####################################################################################################
[docs]def init(path=None, index='123'):
'''
Read in the aaindex files. You need to run this (once) before you can
access any records. If the files are not within the current directory,
you need to specify the correct directory path. By default all three
aaindex files are read in.
'''
index = str(index)
if path is None:
for path in [os.path.split(__file__)[0], '.']:
if os.path.exists(os.path.join(path, 'aaindex' + index[0])):
break
print >> sys.stderr, 'path =', path
if '1' in index:
_parse(path + '/aaindex1', Record)
if '2' in index:
_parse(path + '/aaindex2', MatrixRecord)
if '3' in index:
_parse(path + '/aaindex3', MatrixRecord)
#####################################################################################################
[docs]def init_from_file(filename, type=Record):
_parse(filename, type)
#####################################################################################################
def _parse(filename, rec, quiet=True):
'''
Parse aaindex input file. `rec` must be `Record` for aaindex1 and
`MarixRecord` for aaindex2 and aaindex3.
'''
if not os.path.exists(filename):
import urllib
url = 'ftp://ftp.genome.jp/pub/db/community/aaindex/' + os.path.split(filename)[1]
# print 'Downloading "%s"' % (url)
filename = urllib.urlretrieve(url, filename)[0]
# print 'Saved to "%s"' % (filename)
f = open(filename)
current = rec()
lastkey = None
for line in f:
key = line[0:2]
if key[0] == ' ':
key = lastkey
if key == '//':
_aaindex[current.key] = current
current = rec()
elif key == 'H ':
current.key = line[2:].strip()
elif key == 'R ':
current.ref += line[2:]
elif key == 'D ':
current.desc += line[2:]
elif key == 'A ':
current.authors += line[2:]
elif key == 'T ':
current.title += line[2:]
elif key == 'J ':
current.journal += line[2:]
elif key == '* ':
current.comment += line[2:]
elif key == 'C ':
a = line[2:].split()
for i in range(0, len(a), 2):
current.correlated[a[i]] = float(a[i + 1])
elif key == 'I ':
a = line[1:].split()
if a[0] != 'A/L':
current.extend(map(_float_or_None, a))
elif list(Record.aakeys) != [i[0] for i in a] + [i[-1] for i in a]:
print 'Warning: wrong amino acid sequence for', current.key
else:
try:
assert list(Record.aakeys[:10]) == [i[0] for i in a]
assert list(Record.aakeys[10:]) == [i[2] for i in a]
except:
print 'Warning: wrong amino acid sequence for', current.key
elif key == 'M ':
a = line[2:].split()
if a[0] == 'rows':
if a[4] == 'rows':
a.pop(4)
assert a[3] == 'cols' and len(a) == 6
i = 0
for aa in a[2]:
current.rows[aa] = i
i += 1
i = 0
for aa in a[5]:
current.cols[aa] = i
i += 1
else:
current.extend(map(_float_or_None, a))
elif not quiet:
print 'Warning: line starts with "%s"' % (key)
lastkey = key
f.close()
#####################################################################################################
[docs]def GetAAIndex1(name, path='.'):
"""
Get the amino acid property values from aaindex1
Usage:
result=GetAAIndex1(name)
Input: name is the name of amino acid property (e.g., KRIW790103)
Output: result is a dict form containing the properties of 20 amino acids
"""
init(path=path)
name = str(name)
temp = get(string.strip(name))
res = {}
for i in AALetter:
res[i] = temp.get(i)
return res
#####################################################################################################
[docs]def GetAAIndex23(name, path='.'):
"""
Get the amino acid property values from aaindex2 and aaindex3
Usage:
result=GetAAIndex23(name)
Input: name is the name of amino acid property (e.g.,TANS760101,GRAR740104)
Output: result is a dict form containing the properties of 400 amino acid pairs
"""
init(path=path)
name = str(name)
temp = get(string.strip(name))
res = {}
for i in AALetter:
for j in AALetter:
res[i + j] = temp.get(i, j)
return res
#####################################################################################################
if __name__ == "__main__":
# init(path='.')
# grep('volume')
# x = get('KRIW790103')
# print x
# print x.get('W')
temp1 = GetAAIndex1('KRIW790103')
print len(temp1)
temp2 = GetAAIndex23('TANS760101')
print len(temp2)
temp2 = GetAAIndex23('GRAR740104')
print len(temp2)