Package Bio :: Package Alphabet
[hide private]
[frames] | no frames]

Source Code for Package Bio.Alphabet

  1  import string, re 
  2   
  3  # This is used by sequences which contain a finite number of similar 
  4  # words. 
  5   
6 -class Alphabet:
7 size = None # no fixed size for words 8 letters = None # no fixed alphabet; implement as a list-like 9 # interface,
10 - def __repr__(self):
11 return self.__class__.__name__ + "()"
12
13 - def contains(self, other):
14 return isinstance(other, self.__class__)
15 16 generic_alphabet = Alphabet() 17
18 -class SingleLetterAlphabet(Alphabet):
19 size = 1 20 letters = None # string of all letters in the alphabet
21 22 single_letter_alphabet = SingleLetterAlphabet() 23 24 ########### Protein 25
26 -class ProteinAlphabet(SingleLetterAlphabet):
27 pass
28 29 generic_protein = ProteinAlphabet() 30 31 ########### DNA
32 -class NucleotideAlphabet(SingleLetterAlphabet):
33 pass
34 35 generic_nucleotide = NucleotideAlphabet() 36
37 -class DNAAlphabet(NucleotideAlphabet):
38 pass
39 40 generic_dna = DNAAlphabet() 41 42 43 ########### RNA 44
45 -class RNAAlphabet(NucleotideAlphabet):
46 pass
47 48 generic_rna = RNAAlphabet() 49 50 51 52 ########### Other per-sequence encodings 53
54 -class SecondaryStructure(SingleLetterAlphabet):
55 letters = "HSTC"
56
57 -class ThreeLetterProtein(Alphabet):
58 size = 3 59 letters = [ 60 "Ala", "Asx", "Cys", "Asp", "Glu", "Phe", "Gly", "His", "Ile", 61 "Lys", "Leu", "Met", "Asn", "Pro", "Gln", "Arg", "Ser", "Thr", 62 "Sec", "Val", "Trp", "Xaa", "Tyr", "Glx", 63 ]
64 65 ###### Non per-sequence modifications 66 67 # (These are Decorator classes) 68
69 -class AlphabetEncoder:
70 - def __init__(self, alphabet, new_letters):
71 self.alphabet = alphabet 72 self.new_letters = new_letters 73 if alphabet.letters is not None: 74 self.letters = alphabet.letters + new_letters 75 else: 76 self.letters = None
77 - def __getattr__(self, key):
78 if key[:2] == "__" and key[-2:] == "__": 79 raise AttributeError(key) 80 return getattr(self.alphabet, key)
81
82 - def __repr__(self):
83 return "%s(%r, %r)" % (self.__class__.__name__, self.alphabet, 84 self.new_letters)
85
86 - def contains(self, other):
87 return 0
88
89 -class Gapped(AlphabetEncoder):
90 gap_char = '-'
91 - def __init__(self, alphabet, gap_char = gap_char):
93
94 - def contains(self, other):
95 return other.gap_char == self.gap_char and \ 96 self.alphabet.contains(other.alphabet)
97
98 -class HasStopCodon(AlphabetEncoder):
99 stop_symbol = "*"
100 - def __init__(self, alphabet, stop_symbol = stop_symbol):
102 - def __cmp__(self, other):
103 x = cmp(self.alphabet, other.alphabet) 104 if x == 0: 105 return cmp(self.stop_symbol, other.stop_symbol) 106 return x
107
108 - def contains(self, other):
109 return other.stop_symbol == self.stop_symbol and \ 110 self.alphabet.contains(other.alphabet)
111