1 import math
2 from CodonUsageIndices import SharpEcoliIndex
3 from Bio import Fasta
4
5 CodonsDict = {'TTT':0, 'TTC':0, 'TTA':0, 'TTG':0, 'CTT':0,
6 'CTC':0, 'CTA':0, 'CTG':0, 'ATT':0, 'ATC':0,
7 'ATA':0, 'ATG':0, 'GTT':0, 'GTC':0, 'GTA':0,
8 'GTG':0, 'TAT':0, 'TAC':0, 'TAA':0, 'TAG':0,
9 'CAT':0, 'CAC':0, 'CAA':0, 'CAG':0, 'AAT':0,
10 'AAC':0, 'AAA':0, 'AAG':0, 'GAT':0, 'GAC':0,
11 'GAA':0, 'GAG':0, 'TCT':0, 'TCC':0, 'TCA':0,
12 'TCG':0, 'CCT':0, 'CCC':0, 'CCA':0, 'CCG':0,
13 'ACT':0, 'ACC':0, 'ACA':0, 'ACG':0, 'GCT':0,
14 'GCC':0, 'GCA':0, 'GCG':0, 'TGT':0, 'TGC':0,
15 'TGA':0, 'TGG':0, 'CGT':0, 'CGC':0, 'CGA':0,
16 'CGG':0, 'AGT':0, 'AGC':0, 'AGA':0, 'AGG':0,
17 'GGT':0, 'GGC':0, 'GGA':0, 'GGG':0}
18
19
20
21 SynonymousCodons = {'CYS': ['TGT', 'TGC'], 'ASP': ['GAT', 'GAC'],
22 'SER': ['TCT', 'TCG', 'TCA', 'TCC', 'AGC', 'AGT'],
23 'GLN': ['CAA', 'CAG'], 'MET': ['ATG'], 'ASN': ['AAC', 'AAT'],
24 'PRO': ['CCT', 'CCG', 'CCA', 'CCC'], 'LYS': ['AAG', 'AAA'],
25 'STOP': ['TAG', 'TGA', 'TAA'], 'THR': ['ACC', 'ACA', 'ACG', 'ACT'],
26 'PHE': ['TTT', 'TTC'], 'ALA': ['GCA', 'GCC', 'GCG', 'GCT'],
27 'GLY': ['GGT', 'GGG', 'GGA', 'GGC'], 'ILE': ['ATC', 'ATA', 'ATT'],
28 'LEU': ['TTA', 'TTG', 'CTC', 'CTT', 'CTG', 'CTA'], 'HIS': ['CAT', 'CAC'],
29 'ARG': ['CGA', 'CGC', 'CGG', 'CGT', 'AGG', 'AGA'], 'TRP': ['TGG'],
30 'VAL': ['GTA', 'GTC', 'GTG', 'GTT'], 'GLU': ['GAG', 'GAA'], 'TYR': ['TAT', 'TAC']}
31
32
34 """
35
36 This class implements the codon adaptaion index (CAI) described by Sharp and
37 Li (Nucleic Acids Res. 1987 Feb 11;15(3):1281-95).
38
39 methods:
40
41 set_cai_index(Index):
42
43 This mehtod sets-up an index to be used when calculating CAI for a gene.
44 Just pass a dictionary similar to the SharpEcoliIndex in CodonUsageIndices
45 module.
46
47 generate_index(FastaFile):
48
49 This method takes a location of a FastaFile and generates an index. This
50 index can later be used to calculate CAI of a gene.
51
52 cai_for_gene(DNAsequence):
53
54 This mehtod uses the Index (either the one you set or the one you generated)
55 and returns the CAI for the DNA sequence.
56
57 print_index():
58 This method prints out the index you used.
59
60 """
62 self.index = {}
63 self.codon_count={}
64
65
68
91
92
94 caiValue = 0
95 LengthForCai = 0
96
97 if self.index=={}:
98 self.set_cai_index(SharpEcoliIndex)
99
100 if DNAsequence.islower():
101 DNAsequence = DNAsequence.upper()
102 for i in range (0,len(DNAsequence),3):
103 codon = DNAsequence[i:i+3]
104 if self.index.has_key(codon):
105 if codon!='ATG' and codon!= 'TGG':
106 caiValue += math.log(self.index[codon])
107 LengthForCai += 1
108 elif codon not in ['TGA','TAA', 'TAG']:
109 raise TypeError("illegal codon in sequence: %s.\n%s" % (codon, self.index))
110 return math.exp(caiValue*(1.0/(LengthForCai-1)))
111
113 InputFile = open(FastaFile, 'r')
114
115 parser = Fasta.RecordParser()
116 iterator = Fasta.Iterator(InputFile, parser)
117 cur_record = iterator.next()
118
119
120 self.codon_count = CodonsDict.copy()
121
122
123
124 while cur_record:
125
126 if cur_record.sequence.islower():
127 DNAsequence = cur_record.sequence.upper()
128 else:
129 DNAsequence = cur_record.sequence
130 for i in range(0,len(DNAsequence),3):
131 codon = DNAsequence[i:i+3]
132 if self.codon_count.has_key(codon):
133 self.codon_count[codon] += 1
134 else:
135 raise TypeError("illegal codon %s in gene: %s" % (codon, cur_record.title))
136
137 cur_record = iterator.next()
138 InputFile.close()
139
140
146