Package Bio :: Package Align :: Module Generic
[hide private]
[frames] | no frames]

Source Code for Module Bio.Align.Generic

  1  """ 
  2  Contains classes to deal with generic sequence alignment stuff not 
  3  specific to a particular program or format. 
  4   
  5  classes: 
  6  o Alignment 
  7  """ 
  8   
  9  # standard library 
 10  import string 
 11   
 12  # biopython 
 13  from Bio.Seq import Seq 
 14  from Bio.SeqRecord import SeqRecord 
 15  from Bio import Alphabet 
 16  from Bio.Alphabet import IUPAC 
 17   
18 -class Alignment:
19 """Represent a set of alignments. 20 21 This is a base class to represent alignments, which can be subclassed 22 to deal with an alignment in a specific format. 23 """
24 - def __init__(self, alphabet):
25 """Initialize a new Alignment object. 26 27 Arguments: 28 o alphabet - The alphabet to use for the sequence objects that are 29 created. This alphabet must be a gapped type. 30 """ 31 self._alphabet = alphabet 32 # hold everything at a list of seq record objects 33 self._records = []
34
35 - def get_all_seqs(self):
36 """Return all of the sequences involved in the alignment. 37 38 The return value is a list of SeqRecord objects. 39 """ 40 return self._records
41
42 - def __iter__(self) :
43 """Iterate over alignment rows as SeqRecord objects 44 45 e.g. 46 47 for record in align : 48 print record.id 49 print record.seq 50 """ 51 return iter(self._records)
52
53 - def get_seq_by_num(self, number):
54 """Retrieve a sequence by the number of the sequence in the consensus. 55 56 Returns: 57 o A Seq object for the requested sequence. 58 59 Raises: 60 o IndexError - If the specified number is out of range. 61 """ 62 return self._records[number].seq
63
64 - def get_alignment_length(self):
65 """Return the maximum length of the alignment. 66 67 All objects in the alignment should (hopefully) have the same 68 length. This function will go through and find this length 69 by finding the maximum length of sequences in the alignment. 70 """ 71 max_length = 0 72 73 for record in self._records: 74 if len(record.seq) > max_length: 75 max_length = len(record.seq) 76 77 return max_length
78
79 - def add_sequence(self, descriptor, sequence, start = None, end = None, 80 weight = 1.0):
81 """Add a sequence to the alignment. 82 83 This doesn't do any kind of alignment, it just adds in the sequence 84 object, which is assumed to be prealigned with the existing 85 sequences. 86 87 Arguments: 88 o descriptor - The descriptive id of the sequence being added. 89 This will be used as the resulting SeqRecord's 90 .id property (and, for historical compatibility, 91 also the .description property) 92 o sequence - A string with sequence info. 93 o start - You can explicitly set the start point of the sequence. 94 This is useful (at least) for BLAST alignments, which can just 95 be partial alignments of sequences. 96 o end - Specify the end of the sequence, which is important 97 for the same reason as the start. 98 o weight - The weight to place on the sequence in the alignment. 99 By default, all sequences have the same weight. (0.0 => no weight, 100 1.0 => highest weight) 101 """ 102 new_seq = Seq(sequence, self._alphabet) 103 104 #We are now effectively using the SeqRecord's .id as 105 #the primary identifier (e.g. in Bio.SeqIO) so we should 106 #populate it with the descriptor. 107 #For backwards compatibility, also store this in the 108 #SeqRecord's description property. 109 new_record = SeqRecord(new_seq, 110 id = descriptor, 111 description = descriptor) 112 113 # hack! We really need to work out how to deal with annotations 114 # and features in biopython. Right now, I'll just use the 115 # generic annotations dictionary we've got to store the start 116 # and end, but we should think up something better. I don't know 117 # if I'm really a big fan of the LocatableSeq thing they've got 118 # in BioPerl, but I'm not positive what the best thing to do on 119 # this is... 120 if start: 121 new_record.annotations['start'] = start 122 if end: 123 new_record.annotations['end'] = end 124 125 # another hack to add weight information to the sequence 126 new_record.annotations['weight'] = weight 127 128 self._records.append(new_record)
129
130 - def get_column(self,col):
131 """Returns a string containing a given column""" 132 col_str = '' 133 assert col >= 0 and col <= self.get_alignment_length() 134 for rec in self._records: 135 col_str += rec.seq[col] 136 return col_str
137 138 if __name__ == "__main__" : 139 print "Mini self test..." 140 141 raw_data = ["ACGATCAGCTAGCT", "CCGATCAGCTAGCT", "ACGATGAGCTAGCT"] 142 a = Alignment(Alphabet.generic_dna) 143 a.add_sequence("Alpha", raw_data[0], weight=2) 144 a.add_sequence("Beta", raw_data[1]) 145 a.add_sequence("Gamma", raw_data[2]) 146 147 #Iterating over the rows... 148 for rec in a : 149 assert isinstance(rec, SeqRecord) 150 for r,rec in enumerate(a) : 151 assert isinstance(rec, SeqRecord) 152 assert raw_data[r] == rec.seq.tostring() 153 if r==0 : assert rec.annotations['weight']==2 154 print "Alignment iteraction as SeqRecord OK" 155