Source code for PyDNAacutil
# -*- coding: utf-8 -*-
# Copyright (c) 2016-2017, Zhijiang Yao, Jie Dong and Dongsheng Cao
# All rights reserved.
# This file is part of the PyBioMed.
# The contents are covered by the terms of the BSD license
# which is included in the file license.txt, found at the root
# of the PyBioMed source tree.
"""
##############################################################################
A class used for computing different types of DNA descriptors!
You can freely use and distribute it. If you have any problem,
you could contact with us timely.
Authors: Zhijiang Yao and Dongsheng Cao.
Date: 2016.08.14
Email: gadsby@163.com and oriental-cds@163.com
##############################################################################
"""
ALPHABET = 'ACGT'
[docs]def ExtendPhycheIndex(original_index, extend_index):
"""Extend {phyche:[value, ... ]}"""
if 0 == len(extend_index):
return original_index
for key in list(original_index.keys()):
original_index[key].extend(extend_index[key])
return original_index
[docs]def MakeACVector(sequence_list, lag, phyche_value, k):
phyche_values = list(phyche_value.values())
len_phyche_value = len(phyche_values[0])
vec_ac = []
for sequence in sequence_list:
len_seq = len(sequence)
each_vec = []
for temp_lag in range(1, lag + 1):
for j in range(len_phyche_value):
# Calculate average phyche_value for a nucleotide.
ave_phyche_value = 0.0
for i in range(len_seq - temp_lag - k + 1):
nucleotide = sequence[i: i + k]
ave_phyche_value += float(phyche_value[nucleotide][j])
ave_phyche_value /= len_seq
# Calculate the vector.
temp_sum = 0.0
for i in range(len_seq - temp_lag - k + 1):
nucleotide1 = sequence[i: i + k]
nucleotide2 = sequence[i + temp_lag: i + temp_lag + k]
temp_sum += (float(phyche_value[nucleotide1][j]) - ave_phyche_value) * (
float(phyche_value[nucleotide2][j]))
each_vec.append(round(temp_sum / (len_seq - temp_lag - k + 1), 3))
vec_ac.append(each_vec)
return vec_ac
[docs]def MakeCCVector(sequence_list, lag, phyche_value, k):
phyche_values = list(phyche_value.values())
len_phyche_value = len(phyche_values[0])
vec_cc = []
for sequence in sequence_list:
len_seq = len(sequence)
each_vec = []
for temp_lag in range(1, lag + 1):
for i1 in range(len_phyche_value):
for i2 in range(len_phyche_value):
if i1 != i2:
# Calculate average phyche_value for a nucleotide.
ave_phyche_value1 = 0.0
ave_phyche_value2 = 0.0
for j in range(len_seq - temp_lag - k + 1):
nucleotide = sequence[j: j + k]
ave_phyche_value1 += float(phyche_value[nucleotide][i1])
ave_phyche_value2 += float(phyche_value[nucleotide][i2])
ave_phyche_value1 /= len_seq
ave_phyche_value2 /= len_seq
# Calculate the vector.
temp_sum = 0.0
for j in range(len_seq - temp_lag - k + 1):
nucleotide1 = sequence[j: j + k]
nucleotide2 = sequence[j + temp_lag: j + temp_lag + k]
temp_sum += (float(phyche_value[nucleotide1][i1]) - ave_phyche_value1) * \
(float(phyche_value[nucleotide2][i2]) - ave_phyche_value2)
each_vec.append(round(temp_sum / (len_seq - temp_lag - k + 1), 3))
vec_cc.append(each_vec)
return vec_cc
if __name__ == '__main__':
pass