1
2
3
4
5
6
7
8 import string, array
9
10 import Alphabet
11 from Alphabet import IUPAC
12 from Data.IUPACData import ambiguous_dna_complement, ambiguous_rna_complement
13 from Bio.Data import CodonTable
14
23
25 return "%s(%s, %s)" % (self.__class__.__name__,
26 repr(self.data),
27 repr(self.alphabet))
29 if len(self.data) > 60:
30 s = repr(self.data[:60] + " ...")
31 else:
32 s = repr(self.data)
33 return "%s(%s, %s)" % (self.__class__.__name__, s,
34 repr(self.alphabet))
35
36
37
38
39
40
41
43
45
46
47
48 if isinstance(index, int) :
49
50 return self.data[index]
51 else :
52
53 return Seq(self.data[index], self.alphabet)
54
65
74
75
77 return self.data
78
80 return MutableSeq(self.data, self.alphabet)
81
84
86 """Seq.__maketrans(alphabet) -> translation table.
87
88 Return a translation table for use with complement()
89 and reverse_complement().
90
91 Compatible with lower case and upper case sequences.
92
93 alphabet is a dictionary as implement in Data.IUPACData
94
95 For internal use only.
96 """
97 before = ''.join(alphabet.keys())
98 after = ''.join(alphabet.values())
99 before = before + before.lower()
100 after = after + after.lower()
101 return string.maketrans(before, after)
102
121
123 """Returns the reverse complement sequence. New Seq object.
124 """
125
126 return self.complement()[::-1]
127
136 return "%s(%s, %s)" % (self.__class__.__name__,
137 repr(self.data),
138 repr(self.alphabet))
139
141 if len(self.data) > 60:
142 s = repr(string.join(self.data[:60], "") + " ...")
143 else:
144 s = repr(string.join(self.data, ""))
145 return "%s(%s, %s)" % (self.__class__.__name__, s,
146 repr(self.alphabet))
162
164
175
191
199
216
221 - def pop(self, i = (-1)):
222 c = self.data[i]
223 del self.data[i]
224 return c
226 for i in range(len(self.data)):
227 if self.data[i] == item:
228 del self.data[i]
229 return
230 raise ValueError, "MutableSeq.remove(x): x not in list"
238 for i in range(len(self.data)):
239 if self.data[i] == item:
240 return i
241 raise ValueError, "MutableSeq.index(x): x not in list"
242
244 """Modify the MutableSequence to reverse itself
245
246 No return value"""
247 self.data.reverse()
248
267
269 """Modify the MutableSequence to take on its reverse complement.
270
271 No return value"""
272 if isinstance(self.alphabet, Alphabet.ProteinAlphabet) :
273 raise ValueError, "Proteins do not have complements!"
274 self.complement()
275 self.data.reverse()
276
277
278
279
287
289 return string.join(self.data, "")
290
293
294
295
296
297
298
300 """Transcribes a DNA sequence into RNA.
301
302 If given a string, returns a new string object.
303 Given a Seq or MutableSeq, returns a new Seq object with the same alphabet.
304 """
305 if isinstance(dna, Seq) or isinstance(dna, MutableSeq):
306 if isinstance(dna.alphabet, Alphabet.ProteinAlphabet) :
307 raise ValueError, "Proteins cannot be transcribed!"
308
309 rna = dna.tostring().replace('T','U').replace('t','u')
310 if dna.alphabet==IUPAC.unambiguous_dna:
311 alphabet = IUPAC.unambiguous_rna
312 elif dna.alphabet==IUPAC.ambiguous_dna:
313 alphabet = IUPAC.ambiguous_rna
314 else:
315 alphabet = Alphabet.generic_rna
316 return Seq(rna, alphabet)
317 else:
318 rna = dna.replace('T','U').replace('t','u')
319 return rna
320
321
323 """Back-transcribes an RNA sequence into DNA.
324
325 If given a string, returns a new string object.
326 Given a Seq or MutableSeq, returns a new Seq object with the same alphabet.
327 """
328 if isinstance(rna, Seq) or isinstance(rna, MutableSeq):
329 if isinstance(rna.alphabet, Alphabet.ProteinAlphabet) :
330 raise ValueError, "Proteins cannot be (back)transcribed!"
331
332 dna = rna.data.replace('U','T').replace('u','t')
333 if rna.alphabet==IUPAC.unambiguous_rna:
334 alphabet = IUPAC.unambiguous_dna
335 elif rna.alphabet==IUPAC.ambiguous_rna:
336 alphabet = IUPAC.ambiguous_dna
337 else:
338 alphabet = Alphabet.generic_dna
339 return Seq(dna, alphabet)
340 else:
341 dna = rna.replace('U','T').replace('u','t')
342 return dna
343
344
345 -def translate(sequence, table = "Standard", stop_symbol = "*"):
346 """Translate a nucleotide sequence into amino acids.
347
348 If given a string, returns a new string object.
349 Given a Seq or MutableSeq, returns a Seq object.
350
351 table - Which codon table to use? This can be either a name
352 (string) or an identifier (integer)
353
354 NOTE - Does NOT support unambiguous nucleotide sequences
355 It will however translate either DNA or RNA."""
356 try:
357 id = int(table)
358 except:
359 id = None
360 if isinstance(sequence, Seq) or isinstance(sequence, MutableSeq):
361 if isinstance(sequence.alphabet, Alphabet.ProteinAlphabet) :
362 raise ValueError, "Proteins cannot be translated!"
363 if sequence.alphabet==IUPAC.unambiguous_dna:
364 if id==None:
365 table = CodonTable.unambiguous_dna_by_name[table]
366 else:
367 table = CodonTable.unambiguous_dna_by_id[id]
368 elif sequence.alphabet==IUPAC.ambiguous_dna:
369 if id==None:
370 table = CodonTable.ambiguous_dna_by_name[table]
371 else:
372 table = CodonTable.ambiguous_dna_by_id[id]
373 elif sequence.alphabet==IUPAC.unambiguous_rna:
374 if id==None:
375 table = CodonTable.unambiguous_rna_by_name[table]
376 else:
377 table = CodonTable.unambiguous_rna_by_id[id]
378 elif sequence.alphabet==IUPAC.ambiguous_rna:
379 if id==None:
380 table = CodonTable.ambiguous_rna_by_name[table]
381 else:
382 table = CodonTable.ambiguous_rna_by_id[id]
383 else:
384 if id==None:
385 table = CodonTable.generic_by_name[table]
386 else:
387 table = CodonTable.generic_by_id[id]
388 sequence = sequence.tostring().upper()
389 n = len(sequence)
390 get = table.forward_table.get
391 protein = [get(sequence[i:i+3], stop_symbol) for i in xrange(0,n-n%3,3)]
392 protein = "".join(protein)
393 alphabet = Alphabet.HasStopCodon(table.protein_alphabet)
394 return Seq(protein, alphabet)
395 else:
396 if id==None:
397 table = CodonTable.generic_by_name[table]
398 else:
399 table = CodonTable.generic_by_id[id]
400 get = table.forward_table.get
401 sequence = sequence.upper()
402 n = len(sequence)
403 protein = [get(sequence[i:i+3], stop_symbol) for i in xrange(0,n-n%3,3)]
404 protein = "".join(protein)
405 return protein
406
407
428
429 if __name__ == "__main__" :
430 print "Quick self test"
431 from Bio.Data.IUPACData import ambiguous_dna_values, ambiguous_rna_values
432 from Bio.Alphabet import generic_dna, generic_rna
433 from sets import Set
434 print ambiguous_dna_complement
435 for ambig_char, values in ambiguous_dna_values.iteritems() :
436 compl_values = reverse_complement(values)[::-1]
437 print "%s={%s} --> {%s}=%s" % \
438 (ambig_char, values, compl_values, ambiguous_dna_complement[ambig_char])
439 assert Set(compl_values) == Set(ambiguous_dna_values[ambiguous_dna_complement[ambig_char]])
440
441 for s in ["".join(ambiguous_dna_values),
442 Seq("".join(ambiguous_dna_values)),
443 Seq("".join(ambiguous_dna_values), generic_dna),
444 "".join(ambiguous_rna_values),
445 Seq("".join(ambiguous_rna_values)),
446 Seq("".join(ambiguous_dna_values), generic_rna)]:
447 print "%s -> %s [RC]" % (repr(s), repr(reverse_complement(s)))
448 print "%s -> %s [RNA]" % (repr(s), repr(transcribe(s)))
449 print "%s -> %s [DNA]" % (repr(s), repr(back_transcribe(s)))
450