Source code for QuasiSequenceOrder

# -*- coding: utf-8 -*-
#  Copyright (c) 2016-2017, Zhijiang Yao, Jie Dong and Dongsheng Cao
#  All rights reserved.
#  This file is part of the PyBioMed.
#  The contents are covered by the terms of the BSD license
#  which is included in the file license.txt, found at the root
#  of the PyBioMed source tree.
"""
##############################################################################################
This module is used for computing the quasi sequence order descriptors based on the 

given protein sequence. We can obtain two types of descriptors: Sequence-order-coupling

number and quasi-sequence-order descriptors. Two distance matrixes between 20 amino acids

are employed. You can freely use and distribute it. If you have any problem, please contact

us immediately.

References:

[1]:Kuo-Chen Chou. Prediction of Protein Subcellar Locations by Incorporating

Quasi-Sequence-Order Effect. Biochemical and Biophysical Research Communications 

2000, 278, 477-483.

[2]: Kuo-Chen Chou and Yu-Dong Cai. Prediction of Protein sucellular locations by

GO-FunD-PseAA predictor, Biochemical and Biophysical Research Communications,

2004, 320, 1236-1239.

[3]:Gisbert Schneider and Paul wrede. The Rational Design of Amino Acid

Sequences by Artifical Neural Networks and Simulated Molecular Evolution: Do

Novo Design of an Idealized Leader Cleavge Site. Biophys Journal, 1994, 66,

335-344.

Authors: Zhijiang Yao and Dongsheng Cao.

Date: 2016.06.04

Email: gadsby@163.com

##############################################################################################
"""

import math
import string

AALetter = ["A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V"]
## Distance is the Schneider-Wrede physicochemical distance matrix used by Chou et. al. 
_Distance1 = {"GW": 0.923, "GV": 0.464, "GT": 0.272, "GS": 0.158, "GR": 1.0, "GQ": 0.467, "GP": 0.323, "GY": 0.728,
              "GG": 0.0, "GF": 0.727, "GE": 0.807, "GD": 0.776, "GC": 0.312, "GA": 0.206, "GN": 0.381, "GM": 0.557,
              "GL": 0.591, "GK": 0.894, "GI": 0.592, "GH": 0.769, "ME": 0.879, "MD": 0.932, "MG": 0.569, "MF": 0.182,
              "MA": 0.383, "MC": 0.276, "MM": 0.0, "ML": 0.062, "MN": 0.447, "MI": 0.058, "MH": 0.648, "MK": 0.884,
              "MT": 0.358, "MW": 0.391, "MV": 0.12, "MQ": 0.372, "MP": 0.285, "MS": 0.417, "MR": 1.0, "MY": 0.255,
              "FP": 0.42, "FQ": 0.459, "FR": 1.0, "FS": 0.548, "FT": 0.499, "FV": 0.252, "FW": 0.207, "FY": 0.179,
              "FA": 0.508, "FC": 0.405, "FD": 0.977, "FE": 0.918, "FF": 0.0, "FG": 0.69, "FH": 0.663, "FI": 0.128,
              "FK": 0.903, "FL": 0.131, "FM": 0.169, "FN": 0.541, "SY": 0.615, "SS": 0.0, "SR": 1.0, "SQ": 0.358,
              "SP": 0.181, "SW": 0.827, "SV": 0.342, "ST": 0.174, "SK": 0.883, "SI": 0.478, "SH": 0.718, "SN": 0.289,
              "SM": 0.44, "SL": 0.474, "SC": 0.185, "SA": 0.1, "SG": 0.17, "SF": 0.622, "SE": 0.812, "SD": 0.801,
              "YI": 0.23, "YH": 0.678, "YK": 0.904, "YM": 0.268, "YL": 0.219, "YN": 0.512, "YA": 0.587, "YC": 0.478,
              "YE": 0.932, "YD": 1.0, "YG": 0.782, "YF": 0.202, "YY": 0.0, "YQ": 0.404, "YP": 0.444, "YS": 0.612,
              "YR": 0.995, "YT": 0.557, "YW": 0.244, "YV": 0.328, "LF": 0.139, "LG": 0.596, "LD": 0.944, "LE": 0.892,
              "LC": 0.296, "LA": 0.405, "LN": 0.452, "LL": 0.0, "LM": 0.062, "LK": 0.893, "LH": 0.653, "LI": 0.013,
              "LV": 0.133, "LW": 0.341, "LT": 0.397, "LR": 1.0, "LS": 0.443, "LP": 0.309, "LQ": 0.376, "LY": 0.205,
              "RT": 0.808, "RV": 0.914, "RW": 1.0, "RP": 0.796, "RQ": 0.668, "RR": 0.0, "RS": 0.86, "RY": 0.859,
              "RD": 0.305, "RE": 0.225, "RF": 0.977, "RG": 0.928, "RA": 0.919, "RC": 0.905, "RL": 0.92, "RM": 0.908,
              "RN": 0.69, "RH": 0.498, "RI": 0.929, "RK": 0.141, "VH": 0.649, "VI": 0.135, "EM": 0.83, "EL": 0.854,
              "EN": 0.599, "EI": 0.86, "EH": 0.406, "EK": 0.143, "EE": 0.0, "ED": 0.133, "EG": 0.779, "EF": 0.932,
              "EA": 0.79, "EC": 0.788, "VM": 0.12, "EY": 0.837, "VN": 0.38, "ET": 0.682, "EW": 1.0, "EV": 0.824,
              "EQ": 0.598, "EP": 0.688, "ES": 0.726, "ER": 0.234, "VP": 0.212, "VQ": 0.339, "VR": 1.0, "VT": 0.305,
              "VW": 0.472, "KC": 0.871, "KA": 0.889, "KG": 0.9, "KF": 0.957, "KE": 0.149, "KD": 0.279, "KK": 0.0,
              "KI": 0.899, "KH": 0.438, "KN": 0.667, "KM": 0.871, "KL": 0.892, "KS": 0.825, "KR": 0.154, "KQ": 0.639,
              "KP": 0.757, "KW": 1.0, "KV": 0.882, "KT": 0.759, "KY": 0.848, "DN": 0.56, "DL": 0.841, "DM": 0.819,
              "DK": 0.249, "DH": 0.435, "DI": 0.847, "DF": 0.924, "DG": 0.697, "DD": 0.0, "DE": 0.124, "DC": 0.742,
              "DA": 0.729, "DY": 0.836, "DV": 0.797, "DW": 1.0, "DT": 0.649, "DR": 0.295, "DS": 0.667, "DP": 0.657,
              "DQ": 0.584, "QQ": 0.0, "QP": 0.272, "QS": 0.461, "QR": 1.0, "QT": 0.389, "QW": 0.831, "QV": 0.464,
              "QY": 0.522, "QA": 0.512, "QC": 0.462, "QE": 0.861, "QD": 0.903, "QG": 0.648, "QF": 0.671, "QI": 0.532,
              "QH": 0.765, "QK": 0.881, "QM": 0.505, "QL": 0.518, "QN": 0.181, "WG": 0.829, "WF": 0.196, "WE": 0.931,
              "WD": 1.0, "WC": 0.56, "WA": 0.658, "WN": 0.631, "WM": 0.344, "WL": 0.304, "WK": 0.892, "WI": 0.305,
              "WH": 0.678, "WW": 0.0, "WV": 0.418, "WT": 0.638, "WS": 0.689, "WR": 0.968, "WQ": 0.538, "WP": 0.555,
              "WY": 0.204, "PR": 1.0, "PS": 0.196, "PP": 0.0, "PQ": 0.228, "PV": 0.244, "PW": 0.72, "PT": 0.161,
              "PY": 0.481, "PC": 0.179, "PA": 0.22, "PF": 0.515, "PG": 0.376, "PD": 0.852, "PE": 0.831, "PK": 0.875,
              "PH": 0.696, "PI": 0.363, "PN": 0.231, "PL": 0.357, "PM": 0.326, "CK": 0.887, "CI": 0.304, "CH": 0.66,
              "CN": 0.324, "CM": 0.277, "CL": 0.301, "CC": 0.0, "CA": 0.114, "CG": 0.32, "CF": 0.437, "CE": 0.838,
              "CD": 0.847, "CY": 0.457, "CS": 0.176, "CR": 1.0, "CQ": 0.341, "CP": 0.157, "CW": 0.639, "CV": 0.167,
              "CT": 0.233, "IY": 0.213, "VA": 0.275, "VC": 0.165, "VD": 0.9, "VE": 0.867, "VF": 0.269, "VG": 0.471,
              "IQ": 0.383, "IP": 0.311, "IS": 0.443, "IR": 1.0, "VL": 0.134, "IT": 0.396, "IW": 0.339, "IV": 0.133,
              "II": 0.0, "IH": 0.652, "IK": 0.892, "VS": 0.322, "IM": 0.057, "IL": 0.013, "VV": 0.0, "IN": 0.457,
              "IA": 0.403, "VY": 0.31, "IC": 0.296, "IE": 0.891, "ID": 0.942, "IG": 0.592, "IF": 0.134, "HY": 0.821,
              "HR": 0.697, "HS": 0.865, "HP": 0.777, "HQ": 0.716, "HV": 0.831, "HW": 0.981, "HT": 0.834, "HK": 0.566,
              "HH": 0.0, "HI": 0.848, "HN": 0.754, "HL": 0.842, "HM": 0.825, "HC": 0.836, "HA": 0.896, "HF": 0.907,
              "HG": 1.0, "HD": 0.629, "HE": 0.547, "NH": 0.78, "NI": 0.615, "NK": 0.891, "NL": 0.603, "NM": 0.588,
              "NN": 0.0, "NA": 0.424, "NC": 0.425, "ND": 0.838, "NE": 0.835, "NF": 0.766, "NG": 0.512, "NY": 0.641,
              "NP": 0.266, "NQ": 0.175, "NR": 1.0, "NS": 0.361, "NT": 0.368, "NV": 0.503, "NW": 0.945, "TY": 0.596,
              "TV": 0.345, "TW": 0.816, "TT": 0.0, "TR": 1.0, "TS": 0.185, "TP": 0.159, "TQ": 0.322, "TN": 0.315,
              "TL": 0.453, "TM": 0.403, "TK": 0.866, "TH": 0.737, "TI": 0.455, "TF": 0.604, "TG": 0.312, "TD": 0.83,
              "TE": 0.812, "TC": 0.261, "TA": 0.251, "AA": 0.0, "AC": 0.112, "AE": 0.827, "AD": 0.819, "AG": 0.208,
              "AF": 0.54, "AI": 0.407, "AH": 0.696, "AK": 0.891, "AM": 0.379, "AL": 0.406, "AN": 0.318, "AQ": 0.372,
              "AP": 0.191, "AS": 0.094, "AR": 1.0, "AT": 0.22, "AW": 0.739, "AV": 0.273, "AY": 0.552, "VK": 0.889}

## Distance is the Grantham chemical distance matrix used by Grantham et. al. 
_Distance2 = {"GW": 0.923, "GV": 0.464, "GT": 0.272, "GS": 0.158, "GR": 1.0, "GQ": 0.467, "GP": 0.323, "GY": 0.728,
              "GG": 0.0, "GF": 0.727, "GE": 0.807, "GD": 0.776, "GC": 0.312, "GA": 0.206, "GN": 0.381, "GM": 0.557,
              "GL": 0.591, "GK": 0.894, "GI": 0.592, "GH": 0.769, "ME": 0.879, "MD": 0.932, "MG": 0.569, "MF": 0.182,
              "MA": 0.383, "MC": 0.276, "MM": 0.0, "ML": 0.062, "MN": 0.447, "MI": 0.058, "MH": 0.648, "MK": 0.884,
              "MT": 0.358, "MW": 0.391, "MV": 0.12, "MQ": 0.372, "MP": 0.285, "MS": 0.417, "MR": 1.0, "MY": 0.255,
              "FP": 0.42, "FQ": 0.459, "FR": 1.0, "FS": 0.548, "FT": 0.499, "FV": 0.252, "FW": 0.207, "FY": 0.179,
              "FA": 0.508, "FC": 0.405, "FD": 0.977, "FE": 0.918, "FF": 0.0, "FG": 0.69, "FH": 0.663, "FI": 0.128,
              "FK": 0.903, "FL": 0.131, "FM": 0.169, "FN": 0.541, "SY": 0.615, "SS": 0.0, "SR": 1.0, "SQ": 0.358,
              "SP": 0.181, "SW": 0.827, "SV": 0.342, "ST": 0.174, "SK": 0.883, "SI": 0.478, "SH": 0.718, "SN": 0.289,
              "SM": 0.44, "SL": 0.474, "SC": 0.185, "SA": 0.1, "SG": 0.17, "SF": 0.622, "SE": 0.812, "SD": 0.801,
              "YI": 0.23, "YH": 0.678, "YK": 0.904, "YM": 0.268, "YL": 0.219, "YN": 0.512, "YA": 0.587, "YC": 0.478,
              "YE": 0.932, "YD": 1.0, "YG": 0.782, "YF": 0.202, "YY": 0.0, "YQ": 0.404, "YP": 0.444, "YS": 0.612,
              "YR": 0.995, "YT": 0.557, "YW": 0.244, "YV": 0.328, "LF": 0.139, "LG": 0.596, "LD": 0.944, "LE": 0.892,
              "LC": 0.296, "LA": 0.405, "LN": 0.452, "LL": 0.0, "LM": 0.062, "LK": 0.893, "LH": 0.653, "LI": 0.013,
              "LV": 0.133, "LW": 0.341, "LT": 0.397, "LR": 1.0, "LS": 0.443, "LP": 0.309, "LQ": 0.376, "LY": 0.205,
              "RT": 0.808, "RV": 0.914, "RW": 1.0, "RP": 0.796, "RQ": 0.668, "RR": 0.0, "RS": 0.86, "RY": 0.859,
              "RD": 0.305, "RE": 0.225, "RF": 0.977, "RG": 0.928, "RA": 0.919, "RC": 0.905, "RL": 0.92, "RM": 0.908,
              "RN": 0.69, "RH": 0.498, "RI": 0.929, "RK": 0.141, "VH": 0.649, "VI": 0.135, "EM": 0.83, "EL": 0.854,
              "EN": 0.599, "EI": 0.86, "EH": 0.406, "EK": 0.143, "EE": 0.0, "ED": 0.133, "EG": 0.779, "EF": 0.932,
              "EA": 0.79, "EC": 0.788, "VM": 0.12, "EY": 0.837, "VN": 0.38, "ET": 0.682, "EW": 1.0, "EV": 0.824,
              "EQ": 0.598, "EP": 0.688, "ES": 0.726, "ER": 0.234, "VP": 0.212, "VQ": 0.339, "VR": 1.0, "VT": 0.305,
              "VW": 0.472, "KC": 0.871, "KA": 0.889, "KG": 0.9, "KF": 0.957, "KE": 0.149, "KD": 0.279, "KK": 0.0,
              "KI": 0.899, "KH": 0.438, "KN": 0.667, "KM": 0.871, "KL": 0.892, "KS": 0.825, "KR": 0.154, "KQ": 0.639,
              "KP": 0.757, "KW": 1.0, "KV": 0.882, "KT": 0.759, "KY": 0.848, "DN": 0.56, "DL": 0.841, "DM": 0.819,
              "DK": 0.249, "DH": 0.435, "DI": 0.847, "DF": 0.924, "DG": 0.697, "DD": 0.0, "DE": 0.124, "DC": 0.742,
              "DA": 0.729, "DY": 0.836, "DV": 0.797, "DW": 1.0, "DT": 0.649, "DR": 0.295, "DS": 0.667, "DP": 0.657,
              "DQ": 0.584, "QQ": 0.0, "QP": 0.272, "QS": 0.461, "QR": 1.0, "QT": 0.389, "QW": 0.831, "QV": 0.464,
              "QY": 0.522, "QA": 0.512, "QC": 0.462, "QE": 0.861, "QD": 0.903, "QG": 0.648, "QF": 0.671, "QI": 0.532,
              "QH": 0.765, "QK": 0.881, "QM": 0.505, "QL": 0.518, "QN": 0.181, "WG": 0.829, "WF": 0.196, "WE": 0.931,
              "WD": 1.0, "WC": 0.56, "WA": 0.658, "WN": 0.631, "WM": 0.344, "WL": 0.304, "WK": 0.892, "WI": 0.305,
              "WH": 0.678, "WW": 0.0, "WV": 0.418, "WT": 0.638, "WS": 0.689, "WR": 0.968, "WQ": 0.538, "WP": 0.555,
              "WY": 0.204, "PR": 1.0, "PS": 0.196, "PP": 0.0, "PQ": 0.228, "PV": 0.244, "PW": 0.72, "PT": 0.161,
              "PY": 0.481, "PC": 0.179, "PA": 0.22, "PF": 0.515, "PG": 0.376, "PD": 0.852, "PE": 0.831, "PK": 0.875,
              "PH": 0.696, "PI": 0.363, "PN": 0.231, "PL": 0.357, "PM": 0.326, "CK": 0.887, "CI": 0.304, "CH": 0.66,
              "CN": 0.324, "CM": 0.277, "CL": 0.301, "CC": 0.0, "CA": 0.114, "CG": 0.32, "CF": 0.437, "CE": 0.838,
              "CD": 0.847, "CY": 0.457, "CS": 0.176, "CR": 1.0, "CQ": 0.341, "CP": 0.157, "CW": 0.639, "CV": 0.167,
              "CT": 0.233, "IY": 0.213, "VA": 0.275, "VC": 0.165, "VD": 0.9, "VE": 0.867, "VF": 0.269, "VG": 0.471,
              "IQ": 0.383, "IP": 0.311, "IS": 0.443, "IR": 1.0, "VL": 0.134, "IT": 0.396, "IW": 0.339, "IV": 0.133,
              "II": 0.0, "IH": 0.652, "IK": 0.892, "VS": 0.322, "IM": 0.057, "IL": 0.013, "VV": 0.0, "IN": 0.457,
              "IA": 0.403, "VY": 0.31, "IC": 0.296, "IE": 0.891, "ID": 0.942, "IG": 0.592, "IF": 0.134, "HY": 0.821,
              "HR": 0.697, "HS": 0.865, "HP": 0.777, "HQ": 0.716, "HV": 0.831, "HW": 0.981, "HT": 0.834, "HK": 0.566,
              "HH": 0.0, "HI": 0.848, "HN": 0.754, "HL": 0.842, "HM": 0.825, "HC": 0.836, "HA": 0.896, "HF": 0.907,
              "HG": 1.0, "HD": 0.629, "HE": 0.547, "NH": 0.78, "NI": 0.615, "NK": 0.891, "NL": 0.603, "NM": 0.588,
              "NN": 0.0, "NA": 0.424, "NC": 0.425, "ND": 0.838, "NE": 0.835, "NF": 0.766, "NG": 0.512, "NY": 0.641,
              "NP": 0.266, "NQ": 0.175, "NR": 1.0, "NS": 0.361, "NT": 0.368, "NV": 0.503, "NW": 0.945, "TY": 0.596,
              "TV": 0.345, "TW": 0.816, "TT": 0.0, "TR": 1.0, "TS": 0.185, "TP": 0.159, "TQ": 0.322, "TN": 0.315,
              "TL": 0.453, "TM": 0.403, "TK": 0.866, "TH": 0.737, "TI": 0.455, "TF": 0.604, "TG": 0.312, "TD": 0.83,
              "TE": 0.812, "TC": 0.261, "TA": 0.251, "AA": 0.0, "AC": 0.112, "AE": 0.827, "AD": 0.819, "AG": 0.208,
              "AF": 0.54, "AI": 0.407, "AH": 0.696, "AK": 0.891, "AM": 0.379, "AL": 0.406, "AN": 0.318, "AQ": 0.372,
              "AP": 0.191, "AS": 0.094, "AR": 1.0, "AT": 0.22, "AW": 0.739, "AV": 0.273, "AY": 0.552, "VK": 0.889}


#############################################################################################
#############################################################################################
[docs]def GetSequenceOrderCouplingNumber(ProteinSequence, d=1, distancematrix=_Distance1): """ ############################################################################### Computing the dth-rank sequence order coupling number for a protein. Usage: result = GetSequenceOrderCouplingNumber(protein,d) Input: protein is a pure protein sequence. d is the gap between two amino acids. Output: result is numeric value. ############################################################################### """ NumProtein = len(ProteinSequence) tau = 0.0 for i in range(NumProtein - d): temp1 = ProteinSequence[i] temp2 = ProteinSequence[i + d] tau = tau + math.pow(distancematrix[temp1 + temp2], 2) return round(tau, 3)
#############################################################################################
[docs]def GetSequenceOrderCouplingNumberp(ProteinSequence, maxlag=30, distancematrix={}): """ ############################################################################### Computing the sequence order coupling numbers from 1 to maxlag for a given protein sequence based on the user-defined property. Usage: result = GetSequenceOrderCouplingNumberp(protein, maxlag,distancematrix) Input: protein is a pure protein sequence maxlag is the maximum lag and the length of the protein should be larger than maxlag. default is 30. distancematrix is the a dict form containing 400 distance values Output: result is a dict form containing all sequence order coupling numbers based on the given property ############################################################################### """ NumProtein = len(ProteinSequence) Tau = {} for i in range(maxlag): Tau["tau" + str(i + 1)] = GetSequenceOrderCouplingNumber(ProteinSequence, i + 1, distancematrix) return Tau
#############################################################################################
[docs]def GetSequenceOrderCouplingNumberSW(ProteinSequence, maxlag=30, distancematrix=_Distance1): """ ############################################################################### Computing the sequence order coupling numbers from 1 to maxlag for a given protein sequence based on the Schneider-Wrede physicochemical distance matrix Usage: result = GetSequenceOrderCouplingNumberSW(protein, maxlag,distancematrix) Input: protein is a pure protein sequence maxlag is the maximum lag and the length of the protein should be larger than maxlag. default is 30. distancematrix is a dict form containing Schneider-Wrede physicochemical distance matrix. omitted! Output: result is a dict form containing all sequence order coupling numbers based on the Schneider-Wrede physicochemical distance matrix ############################################################################### """ NumProtein = len(ProteinSequence) Tau = {} for i in range(maxlag): Tau["tausw" + str(i + 1)] = GetSequenceOrderCouplingNumber(ProteinSequence, i + 1, distancematrix) return Tau
#############################################################################################
[docs]def GetSequenceOrderCouplingNumberGrant(ProteinSequence, maxlag=30, distancematrix=_Distance2): """ ############################################################################### Computing the sequence order coupling numbers from 1 to maxlag for a given protein sequence based on the Grantham chemical distance matrix. Usage: result = GetSequenceOrderCouplingNumberGrant(protein, maxlag,distancematrix) Input: protein is a pure protein sequence maxlag is the maximum lag and the length of the protein should be larger than maxlag. default is 30. distancematrix is a dict form containing Grantham chemical distance matrix. omitted! Output: result is a dict form containing all sequence order coupling numbers based on the Grantham chemical distance matrix ############################################################################### """ NumProtein = len(ProteinSequence) Tau = {} for i in range(maxlag): Tau["taugrant" + str(i + 1)] = GetSequenceOrderCouplingNumber(ProteinSequence, i + 1, distancematrix) return Tau
#############################################################################################
[docs]def GetSequenceOrderCouplingNumberTotal(ProteinSequence, maxlag=30): """ ############################################################################### Computing the sequence order coupling numbers from 1 to maxlag for a given protein sequence. Usage: result = GetSequenceOrderCouplingNumberTotal(protein, maxlag) Input: protein is a pure protein sequence maxlag is the maximum lag and the length of the protein should be larger than maxlag. default is 30. Output: result is a dict form containing all sequence order coupling numbers ############################################################################### """ Tau = {} Tau.update(GetSequenceOrderCouplingNumberSW(ProteinSequence, maxlag=maxlag)) Tau.update(GetSequenceOrderCouplingNumberGrant(ProteinSequence, maxlag=maxlag)) return Tau
#############################################################################################
[docs]def GetAAComposition(ProteinSequence): """ ############################################################################### Calculate the composition of Amino acids for a given protein sequence. Usage: result=CalculateAAComposition(protein) Input: protein is a pure protein sequence. Output: result is a dict form containing the composition of 20 amino acids. ############################################################################### """ LengthSequence = len(ProteinSequence) Result = {} for i in AALetter: Result[i] = round(float(ProteinSequence.count(i)) / LengthSequence, 3) return Result
#############################################################################################
[docs]def GetQuasiSequenceOrder1(ProteinSequence, maxlag=30, weight=0.1, distancematrix={}): """ ############################################################################### Computing the first 20 quasi-sequence-order descriptors for a given protein sequence. Usage: result = GetQuasiSequenceOrder1(protein,maxlag,weigt) see method GetQuasiSequenceOrder for the choice of parameters. ############################################################################### """ rightpart = 0.0 for i in range(maxlag): rightpart = rightpart + GetSequenceOrderCouplingNumber(ProteinSequence, i + 1, distancematrix) AAC = GetAAComposition(ProteinSequence) result = {} temp = 1 + weight * rightpart for index, i in enumerate(AALetter): result['QSO' + str(index + 1)] = round(AAC[i] / temp, 6) return result
#############################################################################################
[docs]def GetQuasiSequenceOrder2(ProteinSequence, maxlag=30, weight=0.1, distancematrix={}): """ ############################################################################### Computing the last maxlag quasi-sequence-order descriptors for a given protein sequence. Usage: result = GetQuasiSequenceOrder2(protein,maxlag,weigt) see method GetQuasiSequenceOrder for the choice of parameters. ############################################################################### """ rightpart = [] for i in range(maxlag): rightpart.append(GetSequenceOrderCouplingNumber(ProteinSequence, i + 1, distancematrix)) AAC = GetAAComposition(ProteinSequence) result = {} temp = 1 + weight * sum(rightpart) for index in range(20, 20 + maxlag): result['QSO' + str(index + 1)] = round(weight * rightpart[index - 20] / temp, 6) return result
#############################################################################################
[docs]def GetQuasiSequenceOrder1SW(ProteinSequence, maxlag=30, weight=0.1, distancematrix=_Distance1): """ ############################################################################### Computing the first 20 quasi-sequence-order descriptors for a given protein sequence. Usage: result = GetQuasiSequenceOrder1SW(protein,maxlag,weigt) see method GetQuasiSequenceOrder for the choice of parameters. ############################################################################### """ rightpart = 0.0 for i in range(maxlag): rightpart = rightpart + GetSequenceOrderCouplingNumber(ProteinSequence, i + 1, distancematrix) AAC = GetAAComposition(ProteinSequence) result = {} temp = 1 + weight * rightpart for index, i in enumerate(AALetter): result['QSOSW' + str(index + 1)] = round(AAC[i] / temp, 6) return result
#############################################################################################
[docs]def GetQuasiSequenceOrder2SW(ProteinSequence, maxlag=30, weight=0.1, distancematrix=_Distance1): """ ############################################################################### Computing the last maxlag quasi-sequence-order descriptors for a given protein sequence. Usage: result = GetQuasiSequenceOrder2SW(protein,maxlag,weigt) see method GetQuasiSequenceOrder for the choice of parameters. ############################################################################### """ rightpart = [] for i in range(maxlag): rightpart.append(GetSequenceOrderCouplingNumber(ProteinSequence, i + 1, distancematrix)) AAC = GetAAComposition(ProteinSequence) result = {} temp = 1 + weight * sum(rightpart) for index in range(20, 20 + maxlag): result['QSOSW' + str(index + 1)] = round(weight * rightpart[index - 20] / temp, 6) return result
#############################################################################################
[docs]def GetQuasiSequenceOrder1Grant(ProteinSequence, maxlag=30, weight=0.1, distancematrix=_Distance2): """ ############################################################################### Computing the first 20 quasi-sequence-order descriptors for a given protein sequence. Usage: result = GetQuasiSequenceOrder1Grant(protein,maxlag,weigt) see method GetQuasiSequenceOrder for the choice of parameters. ############################################################################### """ rightpart = 0.0 for i in range(maxlag): rightpart = rightpart + GetSequenceOrderCouplingNumber(ProteinSequence, i + 1, distancematrix) AAC = GetAAComposition(ProteinSequence) result = {} temp = 1 + weight * rightpart for index, i in enumerate(AALetter): result['QSOgrant' + str(index + 1)] = round(AAC[i] / temp, 6) return result
#############################################################################################
[docs]def GetQuasiSequenceOrder2Grant(ProteinSequence, maxlag=30, weight=0.1, distancematrix=_Distance2): """ ############################################################################### Computing the last maxlag quasi-sequence-order descriptors for a given protein sequence. Usage: result = GetQuasiSequenceOrder2Grant(protein,maxlag,weigt) see method GetQuasiSequenceOrder for the choice of parameters. ############################################################################### """ rightpart = [] for i in range(maxlag): rightpart.append(GetSequenceOrderCouplingNumber(ProteinSequence, i + 1, distancematrix)) AAC = GetAAComposition(ProteinSequence) result = {} temp = 1 + weight * sum(rightpart) for index in range(20, 20 + maxlag): result['QSOgrant' + str(index + 1)] = round(weight * rightpart[index - 20] / temp, 6) return result
#############################################################################################
[docs]def GetQuasiSequenceOrder(ProteinSequence, maxlag=30, weight=0.1): """ ############################################################################### Computing quasi-sequence-order descriptors for a given protein. [1]:Kuo-Chen Chou. Prediction of Protein Subcellar Locations by Incorporating Quasi-Sequence-Order Effect. Biochemical and Biophysical Research Communications 2000, 278, 477-483. Usage: result = GetQuasiSequenceOrder(protein,maxlag,weight) Input: protein is a pure protein sequence maxlag is the maximum lag and the length of the protein should be larger than maxlag. default is 30. weight is a weight factor. please see reference 1 for its choice. default is 0.1. Output: result is a dict form containing all quasi-sequence-order descriptors ############################################################################### """ result = dict() result.update(GetQuasiSequenceOrder1SW(ProteinSequence, maxlag, weight, _Distance1)) result.update(GetQuasiSequenceOrder2SW(ProteinSequence, maxlag, weight, _Distance1)) result.update(GetQuasiSequenceOrder1Grant(ProteinSequence, maxlag, weight, _Distance2)) result.update(GetQuasiSequenceOrder2Grant(ProteinSequence, maxlag, weight, _Distance2)) return result
#############################################################################################
[docs]def GetQuasiSequenceOrderp(ProteinSequence, maxlag=30, weight=0.1, distancematrix={}): """ ############################################################################### Computing quasi-sequence-order descriptors for a given protein. [1]:Kuo-Chen Chou. Prediction of Protein Subcellar Locations by Incorporating Quasi-Sequence-Order Effect. Biochemical and Biophysical Research Communications 2000, 278, 477-483. Usage: result = GetQuasiSequenceOrderp(protein,maxlag,weight,distancematrix) Input: protein is a pure protein sequence maxlag is the maximum lag and the length of the protein should be larger than maxlag. default is 30. weight is a weight factor. please see reference 1 for its choice. default is 0.1. distancematrix is a dict form containing 400 distance values Output: result is a dict form containing all quasi-sequence-order descriptors ############################################################################### """ result = dict() result.update(GetQuasiSequenceOrder1(ProteinSequence, maxlag, weight, distancematrix)) result.update(GetQuasiSequenceOrder2(ProteinSequence, maxlag, weight, distancematrix)) return result
############################################################################################# if __name__ == "__main__": protein = "ELRLRYCAPAGFALLKCNDADYDGFKTNCSNVSVVHCTNLMNTTVTTGLLLNGSYSENRT\ QIWQKHRTSNDSALILLNKHYNLTVTCKRPGNKTVLPVTIMAGLVFHSQKYNLRLRQAWC\ HFPSNWKGAWKEVKEEIVNLPKERYRGTNDPKRIFFQRQWGDPETANLWFNCHGEFFYCK\ MDWFLNYLNNLTVDADHNECKNTSGTKSGNKRAPGPCVQRTYVACHIRSVIIWLETISKK\ TYAPPREGHLECTSTVTGMTVELNYIPKNRTNVTLSPQIESIWAAELDRYKLVEITPIGF\ APTEVRRYTGGHERQKRVPFVVQSQHLLAGILQQQKNLLAAVEAQQQMLKLTIWGVK" print len(protein) SCN = GetSequenceOrderCouplingNumberTotal(protein, maxlag=30) print len(SCN) for i in SCN: print i, SCN[i] QSO1 = GetQuasiSequenceOrder1(protein, maxlag=30, weight=0.1, distancematrix=_Distance1) print QSO1 for i in QSO1: print i, QSO1[i] QSO2 = GetQuasiSequenceOrder2(protein, maxlag=30, weight=0.1, distancematrix=_Distance1) print QSO2 for i in QSO2: print i, QSO2[i] QSO = GetQuasiSequenceOrder(protein, maxlag=30, weight=0.1) print len(QSO) for i in QSO: print i, QSO[i] SCN = GetSequenceOrderCouplingNumberp(protein, maxlag=30, distancematrix=_Distance1) print len(SCN) for i in SCN: print i, SCN[i] QSO = GetQuasiSequenceOrderp(protein, maxlag=30, distancematrix=_Distance1) print len(QSO) for i in QSO: print i, QSO[i]