Package Bio :: Package Alphabet :: Module IUPAC
[hide private]
[frames] | no frames]

Source Code for Module Bio.Alphabet.IUPAC

  1  # Define the IUPAC Alphabets you know and love 
  2   
  3  from Bio import Alphabet 
  4  from Bio.Data import IUPACData 
  5   
  6  ##################### Protein 
  7   
  8  # From the IUPAC definition at: 
  9  #   http://www.chem.qmw.ac.uk/iupac/AminoAcid/A2021.html#AA21 
10 -class IUPACProtein(Alphabet.ProteinAlphabet):
11 letters = IUPACData.protein_letters
12 13 protein = IUPACProtein() 14 15 # This could be considered the base class for the standard IUPAC 16 # protein, except that some encodings will use "X" to mean "unknown 17 # character", which causes a collision. If you use X for 18 # selenocysteines, then you'll need a new alphabet. 19
20 -class ExtendedIUPACProtein(Alphabet.ProteinAlphabet):
21 letters = IUPACData.extended_protein_letters
22 # B = "Asx"; aspartic acid or asparagine 23 # X = "Sec"; selenocysteine Note: IUPAC is moving to use 'U' for this 24 # Z = "Glx"; glutamic acid or glutamine (or substances such as 25 # 4-carboxyglutamic acid and 5-oxoproline that yield glutamic 26 # acid on acid hydrolysis of peptides) 27 28 extended_protein = ExtendedIUPACProtein() 29 30 ##################### DNA 31 32 # The next two are the IUPAC definitions, from: 33 # http://www.chem.qmw.ac.uk/iubmb/misc/naseq.html
34 -class IUPACAmbiguousDNA(Alphabet.DNAAlphabet):
35 letters = IUPACData.ambiguous_dna_letters
36 37 ambiguous_dna = IUPACAmbiguousDNA() 38
39 -class IUPACUnambiguousDNA(IUPACAmbiguousDNA):
40 letters = IUPACData.unambiguous_dna_letters
41 42 unambiguous_dna = IUPACUnambiguousDNA() 43 44 45 # Also from the URL, but not part of the standard
46 -class ExtendedIUPACDNA(Alphabet.DNAAlphabet):
47 letters = IUPACData.extended_dna_letters
48 # B == 5-bromouridine 49 # D == 5,6-dihydrouridine 50 # S == thiouridine 51 # W == wyosine 52 53 extended_dna = ExtendedIUPACDNA() 54 55 ##################### RNA 56
57 -class IUPACAmbiguousRNA(Alphabet.RNAAlphabet):
58 letters = IUPACData.ambiguous_rna_letters
59 60 ambiguous_rna = IUPACAmbiguousRNA() 61
62 -class IUPACUnambiguousRNA(IUPACAmbiguousRNA):
63 letters = IUPACData.unambiguous_rna_letters
64 65 unambiguous_rna = IUPACUnambiguousRNA() 66 67 # are there extended forms? 68 #class ExtendedIUPACRNA(Alphabet.RNAAlphabet): 69 # letters = extended_rna_letters 70 # # B == 5-bromouridine 71 # # D == 5,6-dihydrouridine 72 # # S == thiouridine 73 # # W == wyosine 74 75 76 # We need to load the property resolution information, but we need to 77 # wait until after the systems have been loaded. (There's a nasty loop 78 # where, eg, translation objects need an alphabet, which need to be 79 # assocated with translators.) 80 81 from Bio.PropertyManager import default_manager 82
83 -def _bootstrap(manager, klass, property):
84 assert manager is default_manager 85 del default_manager.class_resolver[IUPACProtein] 86 del default_manager.class_resolver[ExtendedIUPACProtein] 87 del default_manager.class_resolver[IUPACAmbiguousDNA] 88 del default_manager.class_resolver[IUPACUnambiguousDNA] 89 del default_manager.class_resolver[ExtendedIUPACDNA] 90 del default_manager.class_resolver[IUPACAmbiguousRNA] 91 del default_manager.class_resolver[IUPACUnambiguousRNA] 92 93 from Bio.Encodings import IUPACEncoding 94 95 return manager.resolve_class(klass, property)
96 97 default_manager.class_resolver[IUPACProtein] = _bootstrap 98 default_manager.class_resolver[ExtendedIUPACProtein] = _bootstrap 99 default_manager.class_resolver[IUPACAmbiguousDNA] = _bootstrap 100 default_manager.class_resolver[IUPACUnambiguousDNA] = _bootstrap 101 default_manager.class_resolver[ExtendedIUPACDNA] = _bootstrap 102 default_manager.class_resolver[IUPACAmbiguousRNA] = _bootstrap 103 default_manager.class_resolver[IUPACUnambiguousRNA] = _bootstrap 104