Package Bio :: Package Data :: Module CodonTable
[hide private]
[frames] | no frames]

Source Code for Module Bio.Data.CodonTable

  1  import string 
  2  from Bio import Alphabet 
  3  from Bio.Alphabet import IUPAC 
  4  from Bio.Data import IUPACData 
  5   
  6  unambiguous_dna_by_name = {} 
  7  unambiguous_dna_by_id = {} 
  8  unambiguous_rna_by_name = {} 
  9  unambiguous_rna_by_id = {} 
 10  generic_by_name = {} 
 11  generic_by_id = {} 
 12   
 13  # standard IUPAC unambiguous codons 
 14  standard_dna_table = None 
 15  standard_rna_table = None 
 16   
 17  # In the future, the back_table could return a statistically 
 18  # appropriate distribution of codons, so do not cache the results of 
 19  # back_table lookups! 
 20   
21 -class TranslationError(Exception):
22 pass
23
24 -class CodonTable:
25 nucleotide_alphabet = Alphabet.generic_nucleotide 26 protein_alphabet = Alphabet.generic_protein 27 28 forward_table = {} # only includes codons which actually code 29 back_table = {} # for back translations 30 start_codons = [] 31 stop_codons = [] 32 # Not always called from derived classes!
33 - def __init__(self, nucleotide_alphabet = nucleotide_alphabet, 34 protein_alphabet = protein_alphabet, 35 forward_table = forward_table, back_table = back_table, 36 start_codons = start_codons, stop_codons = stop_codons):
43
44 - def __str__(self) :
45 """Returns a simple text representation of the codon table 46 47 e.g. 48 >>> import Bio.Data.CodonTable 49 >>> print Bio.Data.CodonTable.standard_dna_table 50 >>> print Bio.Data.CodonTable.generic_by_id[1]""" 51 52 if self.id : 53 answer = "Table %i" % self.id 54 else : 55 answer = "Table ID unknown" 56 if self.names : 57 answer += " " + ", ".join(filter(None, self.names)) 58 59 #Use the main four letters (and the conventional ordering) 60 #even for ambiguous tables 61 letters = self.nucleotide_alphabet.letters 62 if isinstance(self.nucleotide_alphabet, Alphabet.DNAAlphabet) \ 63 or (letters is not None and "T" in letters) : 64 letters = "TCAG" 65 else : 66 #Should be either RNA or generic nucleotides, 67 #e.g. Bio.Data.CodonTable.generic_by_id[1] 68 letters = "TCAG" 69 70 #Build the table... 71 answer=answer + "\n\n |" + "|".join( \ 72 [" %s " % c2 for c2 in letters] \ 73 ) + "|" 74 answer=answer + "\n--+" \ 75 + "+".join(["---------" for c2 in letters]) + "+--" 76 for c1 in letters : 77 for c3 in letters : 78 line = c1 + " |" 79 for c2 in letters : 80 codon = c1+c2+c3 81 line = line + " %s" % codon 82 if codon in self.stop_codons : 83 line = line + " Stop|" 84 else : 85 try : 86 amino = self.forward_table[codon] 87 except KeyError : 88 amino = "?" 89 except TranslationError : 90 amino = "?" 91 if codon in self.start_codons : 92 line = line + " %s(s)|" % amino 93 else : 94 line = line + " %s |" % amino 95 line = line + " " + c3 96 answer = answer + "\n"+ line 97 answer=answer + "\n--+" \ 98 + "+".join(["---------" for c2 in letters]) + "+--" 99 return answer
100
101 -def make_back_table(table, default_stop_codon):
102 # ONLY RETURNS A SINGLE CODON 103 # Do the sort so changes in the hash implementation won't affect 104 # the result when one amino acid is coded by more than one codon. 105 back_table = {} 106 keys = table.keys() ; keys.sort() 107 for key in keys: 108 back_table[table[key]] = key 109 back_table[None] = default_stop_codon 110 return back_table
111 112
113 -class NCBICodonTable(CodonTable):
114 nucleotide_alphabet = Alphabet.generic_nucleotide 115 protein_alphabet = IUPAC.protein 116
117 - def __init__(self, id, names, table, start_codons, stop_codons):
118 self.id = id 119 self.names = names 120 self.forward_table = table 121 self.back_table = make_back_table(table, stop_codons[0]) 122 self.start_codons = start_codons 123 self.stop_codons = stop_codons
124 125
126 -class NCBICodonTableDNA(NCBICodonTable):
127 nucleotide_alphabet = IUPAC.unambiguous_dna
128
129 -class NCBICodonTableRNA(NCBICodonTable):
130 nucleotide_alphabet = IUPAC.unambiguous_rna
131 132 133
134 -def register_ncbi_table(name, alt_name, id, 135 table, start_codons, stop_codons):
136 names = string.split(name, "; ") 137 138 dna = NCBICodonTableDNA(id, names + [alt_name], table, start_codons, 139 stop_codons) 140 # replace all T's with U's for the RNA tables 141 rna_table = {} 142 generic_table = {} 143 for codon, val in table.items(): 144 generic_table[codon] = val 145 codon = codon.replace("T", "U") 146 generic_table[codon] = val 147 rna_table[codon] = val 148 rna_start_codons = [] 149 generic_start_codons = [] 150 for codon in start_codons: 151 generic_start_codons.append(codon) 152 codon = codon.replace("T", "U") 153 generic_start_codons.append(codon) 154 rna_start_codons.append(codon) 155 rna_stop_codons = [] 156 generic_stop_codons = [] 157 for codon in stop_codons: 158 generic_stop_codons.append(codon) 159 codon = codon.replace("T", "U") 160 generic_stop_codons.append(codon) 161 rna_stop_codons.append(codon) 162 163 generic = NCBICodonTable(id, names + [alt_name], generic_table, 164 generic_start_codons, generic_stop_codons) 165 rna = NCBICodonTableRNA(id, names + [alt_name], rna_table, 166 rna_start_codons, rna_stop_codons) 167 168 if id == 1: 169 global standard_dna_table, standard_rna_table 170 standard_dna_table = dna 171 standard_rna_table = rna 172 173 unambiguous_dna_by_id[id] = dna 174 unambiguous_rna_by_id[id] = rna 175 generic_by_id[id] = generic 176 177 if alt_name is not None: 178 names.append(alt_name) 179 180 for name in names: 181 unambiguous_dna_by_name[name] = dna 182 unambiguous_rna_by_name[name] = rna 183 generic_by_name[name] = generic
184 185 ### These tables created from the data file 186 ### ftp://ncbi.nlm.nih.gov/entrez/misc/data/gc.prt 187 ### using the following: 188 ##import re 189 ##for line in open("gc.prt").readlines(): 190 ## if line[:2] == " {": 191 ## names = [] 192 ## id = None 193 ## aa = None 194 ## start = None 195 ## bases = [] 196 ## elif line[:6] == " name": 197 ## names.append(re.search('"([^"]*)"', line).group(1)) 198 ## elif line[:8] == " name": 199 ## names.append(re.search('"(.*)$', line).group(1)) 200 ## elif line == ' Mitochondrial; Mycoplasma; Spiroplasma" ,\n': 201 ## names[-1] = names[-1] + " Mitochondrial; Mycoplasma; Spiroplasma" 202 ## elif line[:4] == " id": 203 ## id = int(re.search('(\d+)', line).group(1)) 204 ## elif line[:10] == " ncbieaa ": 205 ## aa = line[12:12+64] 206 ## elif line[:10] == " sncbieaa": 207 ## start = line[12:12+64] 208 ## elif line[:9] == " -- Base": 209 ## bases.append(line[12:12+64]) 210 ## elif line[:2] == " }": 211 ## assert names != [] and id is not None and aa is not None 212 ## assert start is not None and bases != [] 213 ## if len(names) == 1: 214 ## names.append(None) 215 ## print "register_ncbi_table(name = %s," % repr(names[0]) 216 ## print " alt_name = %s, id = %d", % \ 217 ## (repr(names[1]), id) 218 ## print " table = {" 219 ## s = " " 220 ## for i in range(64): 221 ## if aa[i] != "*": 222 ## t = " '%s%s%s': '%s'," % (bases[0][i], bases[1][i], 223 ## bases[2][i], aa[i]) 224 ## if len(s) + len(t) > 75: 225 ## print s 226 ## s = " " + t 227 ## else: 228 ## s = s + t 229 ## print s, "}," 230 231 ## s = " stop_codons = [" 232 ## for i in range(64): 233 ## if aa[i] == "*": 234 ## t = " '%s%s%s'," % (bases[0][i], bases[1][i], bases[2][i]) 235 ## if len(s) + len(t) > 75: 236 ## print s 237 ## s = " " + t 238 ## else: 239 ## s = s + t 240 ## print s, "]," 241 242 ## s = " start_codons = [" 243 ## for i in range(64): 244 ## if start[i] == "M": 245 ## t = " '%s%s%s'," % (bases[0][i], bases[1][i], bases[2][i]) 246 ## if len(s) + len(t) > 75: 247 ## print s 248 ## s = " " + t 249 ## else: 250 ## s = s + t 251 ## print s, "]" 252 ## print " )" 253 ## elif line[:2] == "--" or line == "\n" or line == "}\n" or \ 254 ## line == 'Genetic-code-table ::= {\n': 255 ## pass 256 ## else: 257 ## raise "Unparsed", repr(line) 258 259 register_ncbi_table(name = 'Standard', 260 alt_name = 'SGC0', id = 1, 261 table = { 262 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 263 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 264 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 265 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 266 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 267 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 268 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 269 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 270 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 271 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 272 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 273 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 274 'GGG': 'G', }, 275 stop_codons = [ 'TAA', 'TAG', 'TGA', ], 276 start_codons = [ 'TTG', 'CTG', 'ATG', ] 277 ) 278 register_ncbi_table(name = 'Vertebrate Mitochondrial', 279 alt_name = 'SGC1', id = 2, 280 table = { 281 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 282 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 283 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 284 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 285 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 286 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 287 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 288 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 289 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'GTT': 'V', 290 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 291 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 292 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 293 stop_codons = [ 'TAA', 'TAG', 'AGA', 'AGG', ], 294 start_codons = [ 'ATT', 'ATC', 'ATA', 'ATG', 'GTG', ] 295 ) 296 register_ncbi_table(name = 'Yeast Mitochondrial', 297 alt_name = 'SGC2', id = 3, 298 table = { 299 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 300 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 301 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'T', 302 'CTC': 'T', 'CTA': 'T', 'CTG': 'T', 'CCT': 'P', 'CCC': 'P', 303 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 304 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 305 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 306 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 307 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 308 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 309 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 310 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 311 'GGA': 'G', 'GGG': 'G', }, 312 stop_codons = [ 'TAA', 'TAG', ], 313 start_codons = [ 'ATG', ] 314 ) 315 register_ncbi_table(name = 'Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma', 316 alt_name = 'SGC3', id = 4, 317 table = { 318 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 319 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 320 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 321 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 322 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 323 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 324 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 325 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 326 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 327 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 328 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 329 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 330 'GGA': 'G', 'GGG': 'G', }, 331 stop_codons = [ 'TAA', 'TAG', ], 332 start_codons = [ 'TTA', 'TTG', 'CTG', 'ATT', 'ATC', 333 'ATA', 'ATG', 'GTG', ] 334 ) 335 register_ncbi_table(name = 'Invertebrate Mitochondrial', 336 alt_name = 'SGC4', id = 5, 337 table = { 338 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 339 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 340 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 341 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 342 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 343 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 344 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 345 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 346 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 347 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 348 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 349 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 350 'GGA': 'G', 'GGG': 'G', }, 351 stop_codons = [ 'TAA', 'TAG', ], 352 start_codons = [ 'TTG', 'ATT', 'ATC', 'ATA', 'ATG', 353 'GTG', ] 354 ) 355 register_ncbi_table(name = 'Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear', 356 alt_name = 'SGC5', id = 6, 357 table = { 358 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 359 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 360 'TAA': 'Q', 'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 361 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 362 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 363 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 364 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 365 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 366 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 367 'AGA': 'R', 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 368 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 369 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 370 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 371 stop_codons = [ 'TGA', ], 372 start_codons = [ 'ATG', ] 373 ) 374 register_ncbi_table(name = 'Echinoderm Mitochondrial', 375 alt_name = 'SGC8', id = 9, 376 table = { 377 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 378 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 379 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 380 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 381 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 382 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 383 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 384 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 385 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 386 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 387 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 388 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 389 'GGA': 'G', 'GGG': 'G', }, 390 stop_codons = [ 'TAA', 'TAG', ], 391 start_codons = [ 'ATG', ] 392 ) 393 register_ncbi_table(name = 'Euplotid Nuclear', 394 alt_name = 'SGC9', id = 10, 395 table = { 396 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 397 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 398 'TGT': 'C', 'TGC': 'C', 'TGA': 'C', 'TGG': 'W', 'CTT': 'L', 399 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 400 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 401 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 402 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 403 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 404 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 405 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 406 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 407 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 408 'GGA': 'G', 'GGG': 'G', }, 409 stop_codons = [ 'TAA', 'TAG', ], 410 start_codons = [ 'ATG', ] 411 ) 412 register_ncbi_table(name = 'Bacterial', 413 alt_name = None, id = 11, 414 table = { 415 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 416 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 417 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 418 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 419 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 420 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 421 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 422 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 423 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 424 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 425 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 426 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 427 'GGG': 'G', }, 428 stop_codons = [ 'TAA', 'TAG', 'TGA', ], 429 start_codons = [ 'TTG', 'CTG', 'ATT', 'ATC', 'ATA', 430 'ATG', 'GTG', ] 431 ) 432 register_ncbi_table(name = 'Alternative Yeast Nuclear', 433 alt_name = None, id = 12, 434 table = { 435 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 436 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 437 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 438 'CTA': 'L', 'CTG': 'S', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 439 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 440 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 441 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 442 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 443 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 444 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 445 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 446 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 447 'GGG': 'G', }, 448 stop_codons = [ 'TAA', 'TAG', 'TGA', ], 449 start_codons = [ 'CTG', 'ATG', ] 450 ) 451 register_ncbi_table(name = 'Ascidian Mitochondrial', 452 alt_name = None, id = 13, 453 table = { 454 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 455 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 456 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 457 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 458 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 459 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 460 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 461 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 462 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'G', 463 'AGG': 'G', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 464 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 465 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 466 'GGA': 'G', 'GGG': 'G', }, 467 stop_codons = [ 'TAA', 'TAG', ], 468 start_codons = [ 'ATG', ] 469 ) 470 register_ncbi_table(name = 'Flatworm Mitochondrial', 471 alt_name = None, id = 14, 472 table = { 473 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 474 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 475 'TAA': 'Y', 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 476 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 477 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 478 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 479 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 480 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 481 'AAC': 'N', 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 482 'AGA': 'S', 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 483 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 484 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 485 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 486 stop_codons = [ 'TAG', ], 487 start_codons = [ 'ATG', ] 488 ) 489 register_ncbi_table(name = 'Blepharisma Macronuclear', 490 alt_name = None, id = 15, 491 table = { 492 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 493 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 494 'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 495 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 496 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 497 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 498 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 499 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 500 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 501 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 502 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 503 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 504 'GGA': 'G', 'GGG': 'G', }, 505 stop_codons = [ 'TAA', 'TGA', ], 506 start_codons = [ 'ATG', ] 507 ) 508 509 ######### Deal with ambiguous forward translations 510
511 -class AmbiguousCodonTable(CodonTable):
512 - def __init__(self, codon_table, 513 ambiguous_nucleotide_alphabet, 514 ambiguous_nucleotide_values, 515 ambiguous_protein_alphabet, 516 ambiguous_protein_values):
517 CodonTable.__init__(self, 518 ambiguous_nucleotide_alphabet, 519 ambiguous_protein_alphabet, 520 AmbiguousForwardTable(codon_table.forward_table, 521 ambiguous_nucleotide_values, 522 ambiguous_protein_values), 523 codon_table.back_table, 524 525 # These two are WRONG! I need to get the 526 # list of ambiguous codons which code for 527 # the stop codons XXX 528 list_ambiguous_codons(codon_table.start_codons), 529 list_ambiguous_codons(codon_table.stop_codons) 530 ) 531 self._codon_table = codon_table
532 533 # Be sneaky and forward attribute lookups to the original table. 534 # This lets us get the names, if the original table is an NCBI 535 # table.
536 - def __getattr__(self, name):
537 return getattr(self._codon_table, name)
538
539 -def list_possible_proteins(codon, forward_table, ambiguous_nucleotide_values):
540 c1, c2, c3 = codon 541 x1 = ambiguous_nucleotide_values[c1] 542 x2 = ambiguous_nucleotide_values[c2] 543 x3 = ambiguous_nucleotide_values[c3] 544 possible = {} 545 stops = [] 546 for y1 in x1: 547 for y2 in x2: 548 for y3 in x3: 549 try: 550 possible[forward_table[y1+y2+y3]] = 1 551 except KeyError: 552 # If tripping over a stop codon 553 stops.append(y1+y2+y3) 554 if stops: 555 if possible.keys(): 556 raise TranslationError, ("ambiguous codon codes for both proteins and stop codons", codon) 557 # This is a true stop codon - tell the caller about it 558 raise KeyError, codon 559 return possible.keys()
560
561 -def list_ambiguous_codons(codons):
562 # XXX not implemented! 563 return codons
564 565 # Forward translation is "onto", that is, any given codon always maps 566 # to the same protein, or it doesn't map at all. Thus, I can build 567 # off of an existing table to produce the ambiguous mappings. 568 # 569 # This handles the general case. Perhaps it's overkill? 570 # >>> t = CodonTable.ambiguous_dna_by_id[1] 571 # >>> t.forward_table["AAT"] 572 # 'N' 573 # >>> t.forward_table["GAT"] 574 # 'D' 575 # >>> t.forward_table["RAT"] 576 # 'B' 577 # >>> t.forward_table["YTA"] 578 # 'L' 579
580 -class AmbiguousForwardTable:
581 - def __init__(self, forward_table, ambiguous_nucleotide, ambiguous_protein):
582 self.forward_table = forward_table 583 584 self.ambiguous_nucleotide = ambiguous_nucleotide 585 self.ambiguous_protein = ambiguous_protein 586 587 inverted = {} 588 for name, val in ambiguous_protein.items(): 589 for c in val: 590 x = inverted.get(c, {}) 591 x[name] = 1 592 inverted[c] = x 593 for name, val in inverted.items(): 594 inverted[name] = val.keys() 595 self._inverted = inverted 596 597 self._cache = {}
598
599 - def get(self, codon, failobj = None):
600 try: 601 return self.__getitem__(codon) 602 except KeyError: 603 return failobj
604
605 - def __getitem__(self, codon):
606 try: 607 x = self._cache[codon] 608 except KeyError: 609 pass 610 else: 611 if x is TranslationError: 612 raise TranslationError, codon # no unique translation 613 if x is KeyError: 614 raise KeyError, codon # it's a stop codon 615 return x 616 try: 617 x = self.forward_table[codon] 618 self._cache[codon] = x 619 return x 620 except KeyError: 621 pass 622 623 # XXX Need to make part of this into a method which returns 624 # a list of all possible encodings for a codon! 625 try: 626 possible = list_possible_proteins(codon, 627 self.forward_table, 628 self.ambiguous_nucleotide) 629 except KeyError: 630 self._cache[codon] = KeyError 631 raise KeyError, codon # stop codon 632 except TranslationError: 633 self._cache[codon] = TranslationError 634 raise TranslationError, codon # does not code 635 assert len(possible) > 0, "unambiguous codons must code" 636 637 # Hah! Only one possible protein, so use it 638 if len(possible) == 1: 639 self._cache[codon] = possible[0] 640 return possible[0] 641 642 # See if there's an ambiguous protein encoding for the multiples. 643 # Find residues which exist in every coding set. 644 ambiguous_possible = {} 645 for amino in possible: 646 for term in self._inverted[amino]: 647 ambiguous_possible[term] = ambiguous_possible.get(term, 0) + 1 648 649 n = len(possible) 650 possible = [] 651 for amino, val in ambiguous_possible.items(): 652 if val == n: 653 possible.append(amino) 654 655 # No amino acid encoding for the results 656 if len(possible) == 0: 657 self._cache[codon] = TranslationError 658 raise TranslationError, codon # no valid translation 659 660 # All of these are valid, so choose one 661 # To be unique, sort by smallet ambiguity then alphabetically 662 # Can get this if "X" encodes for everything. 663 def _sort(x, y, table = self.ambiguous_protein): 664 a = cmp(len(table[x]), len(table[y])) 665 if a == 0: 666 return cmp(x, y) 667 return a
668 possible.sort(_sort) 669 670 x = possible[0] 671 self._cache[codon] = x 672 return x
673 674 675 ambiguous_dna_by_name = {} 676 for key, val in unambiguous_dna_by_name.items(): 677 ambiguous_dna_by_name[key] = AmbiguousCodonTable(val, 678 IUPAC.ambiguous_dna, 679 IUPACData.ambiguous_dna_values, 680 IUPAC.extended_protein, 681 IUPACData.extended_protein_values) 682 ambiguous_dna_by_id = {} 683 for key, val in unambiguous_dna_by_id.items(): 684 ambiguous_dna_by_id[key] = AmbiguousCodonTable(val, 685 IUPAC.ambiguous_dna, 686 IUPACData.ambiguous_dna_values, 687 IUPAC.extended_protein, 688 IUPACData.extended_protein_values) 689 690 ambiguous_rna_by_name = {} 691 for key, val in unambiguous_rna_by_name.items(): 692 ambiguous_rna_by_name[key] = AmbiguousCodonTable(val, 693 IUPAC.ambiguous_rna, 694 IUPACData.ambiguous_rna_values, 695 IUPAC.extended_protein, 696 IUPACData.extended_protein_values) 697 ambiguous_rna_by_id = {} 698 for key, val in unambiguous_rna_by_id.items(): 699 ambiguous_rna_by_id[key] = AmbiguousCodonTable(val, 700 IUPAC.ambiguous_rna, 701 IUPACData.ambiguous_rna_values, 702 IUPAC.extended_protein, 703 IUPACData.extended_protein_values) 704 del key, val 705