Package Bio :: Package Align :: Module AlignInfo
[hide private]
[frames] | no frames]

Source Code for Module Bio.Align.AlignInfo

  1  """Extract information from alignment objects. 
  2   
  3  In order to try and avoid huge alignment objects with tons of functions, 
  4  functions which return summary type information about alignments should 
  5  be put into classes in this module. 
  6   
  7  classes: 
  8  o SummaryInfo 
  9  o PSSM 
 10  """ 
 11   
 12   
 13  # standard library 
 14  import string 
 15  import math 
 16  import sys 
 17   
 18  # biopython modules 
 19  from Bio import Alphabet 
 20  from Bio.Alphabet import IUPAC 
 21  from Bio.Seq import Seq 
 22   
 23  # Expected random distributions for 20-letter protein, and 
 24  # for 4-letter nucleotide alphabets 
 25  Protein20Random = 0.05 
 26  Nucleotide4Random = 0.25 
27 -class SummaryInfo:
28 """Calculate summary info about the alignment. 29 30 This class should be used to caclculate information summarizing the 31 results of an alignment. This may either be straight consensus info 32 or more complicated things. 33 """
34 - def __init__(self, alignment):
35 """Initialize with the alignment to calculate information on. 36 ic_vector attribute. A dictionary. Keys: column numbers. Values: 37 """ 38 self.alignment = alignment 39 self.ic_vector = {}
40
41 - def dumb_consensus(self, threshold = .7, ambiguous = "X", 42 consensus_alpha = None, require_multiple = 0):
43 """Output a fast consensus sequence of the alignment. 44 45 This doesn't do anything fancy at all. It will just go through the 46 sequence residue by residue and count up the number of each type 47 of residue (ie. A or G or T or C for DNA) in all sequences in the 48 alignment. If the percentage of the most common residue type is 49 greater then the passed threshold, then we will add that residue type, 50 otherwise an ambiguous character will be added. 51 52 This could be made a lot fancier (ie. to take a substitution matrix 53 into account), but it just meant for a quick and dirty consensus. 54 55 Arguments: 56 o threshold - The threshold value that is required to add a particular 57 atom. 58 o ambiguous - The ambiguous character to be added when the threshold is 59 not reached. 60 o consensus_alpha - The alphabet to return for the consensus sequence. 61 If this is None, then we will try to guess the alphabet. 62 o require_multiple - If set as 1, this will require that more than 63 1 sequence be part of an alignment to put it in the consensus (ie. 64 not just 1 sequence and gaps). 65 """ 66 # Iddo Friedberg, 1-JUL-2004: changed ambiguous default to "X" 67 consensus = '' 68 69 # find the length of the consensus we are creating 70 con_len = self.alignment.get_alignment_length() 71 72 # go through each seq item 73 for n in range(con_len): 74 # keep track of the counts of the different atoms we get 75 atom_dict = {} 76 num_atoms = 0 77 78 for record in self.alignment._records: 79 # make sure we haven't run past the end of any sequences 80 # if they are of different lengths 81 if n < len(record.seq): 82 if record.seq[n] != '-' and record.seq[n] != '.': 83 if record.seq[n] not in atom_dict.keys(): 84 atom_dict[record.seq[n]] = 1 85 else: 86 atom_dict[record.seq[n]] = \ 87 atom_dict[record.seq[n]] + 1 88 89 num_atoms = num_atoms + 1 90 91 max_atoms = [] 92 max_size = 0 93 94 for atom in atom_dict.keys(): 95 if atom_dict[atom] > max_size: 96 max_atoms = [atom] 97 max_size = atom_dict[atom] 98 elif atom_dict[atom] == max_size: 99 max_atoms.append(atom) 100 101 if require_multiple and num_atoms == 1: 102 consensus = consensus + ambiguous 103 elif (len(max_atoms) == 1) and ((float(max_size)/float(num_atoms)) 104 >= threshold): 105 consensus = consensus + max_atoms[0] 106 else: 107 consensus = consensus + ambiguous 108 109 # we need to guess a consensus alphabet if one isn't specified 110 if consensus_alpha is None: 111 consensus_alpha = self._guess_consensus_alphabet(ambiguous) 112 113 return Seq(consensus, consensus_alpha)
114
115 - def gap_consensus(self, threshold = .7, ambiguous = "X", 116 consensus_alpha = None, require_multiple = 0):
117 """Same as dumb_consensus(), but allows gap on the output. 118 119 Things to do: Let the user define that with only one gap, the result 120 character in consensus is gap. Let the user select gap character, now 121 it takes the same is input. 122 """ 123 # Iddo Friedberg, 1-JUL-2004: changed ambiguous default to "X" 124 consensus = '' 125 126 # find the length of the consensus we are creating 127 con_len = self.alignment.get_alignment_length() 128 129 # go through each seq item 130 for n in range(con_len): 131 # keep track of the counts of the different atoms we get 132 atom_dict = {} 133 num_atoms = 0 134 135 for record in self.alignment._records: 136 # make sure we haven't run past the end of any sequences 137 # if they are of different lengths 138 if n < len(record.seq): 139 if record.seq[n] not in atom_dict.keys(): 140 atom_dict[record.seq[n]] = 1 141 else: 142 atom_dict[record.seq[n]] = \ 143 atom_dict[record.seq[n]] + 1 144 145 num_atoms = num_atoms + 1 146 147 max_atoms = [] 148 max_size = 0 149 150 for atom in atom_dict.keys(): 151 if atom_dict[atom] > max_size: 152 max_atoms = [atom] 153 max_size = atom_dict[atom] 154 elif atom_dict[atom] == max_size: 155 max_atoms.append(atom) 156 157 if require_multiple and num_atoms == 1: 158 consensus = consensus + ambiguous 159 elif (len(max_atoms) == 1) and ((float(max_size)/float(num_atoms)) 160 >= threshold): 161 consensus = consensus + max_atoms[0] 162 else: 163 consensus = consensus + ambiguous 164 165 # we need to guess a consensus alphabet if one isn't specified 166 if consensus_alpha is None: 167 #TODO - Should we make this into a Gapped alphabet? 168 consensus_alpha = self._guess_consensus_alphabet(ambiguous) 169 170 return Seq(consensus, consensus_alpha)
171
172 - def _guess_consensus_alphabet(self, ambiguous):
173 """Pick an (ungapped) alphabet for an alignment consesus sequence. 174 175 This just looks at the sequences we have, checks their type, and 176 returns as appropriate type which seems to make sense with the 177 sequences we've got. 178 """ 179 #Start with the (un-gapped version of) the alignment alphabet 180 if isinstance(self.alignment._alphabet, Alphabet.Gapped) : 181 a = self.alignment._alphabet.alphabet 182 elif isinstance(self.alignment._alphabet, Alphabet.Alphabet) : 183 a = self.alignment._alphabet 184 else : 185 raise ValueError \ 186 ("Alignment's alphabet property is invalid.") 187 188 #Now check its compatible with all the rest of the sequences 189 for record in self.alignment : 190 #Get the (un-gapped version of) the sequence's alphabet 191 if isinstance(record.seq.alphabet, Alphabet.Gapped) : 192 alt = record.seq.alphabet.alphabet 193 elif isinstance(record.seq.alphabet, Alphabet.Alphabet) : 194 alt = record.seq.alphabet 195 else : 196 raise ValueError \ 197 ("Non-alphabet found in first of the alignment sequences.") 198 199 if not isinstance(alt, a.__class__) : 200 raise ValueError \ 201 ("Alignment contains a sequence with an incompatible alphabet.") 202 203 #Check the ambiguous character we are going to use in the consensus 204 #is in the alphabet's list of valid letters (if defined). 205 if hasattr(a, "letters") and a.letters is not None \ 206 and ambiguous not in a.letters : 207 #We'll need to pick a more generic alphabet... 208 if isinstance(a, IUPAC.IUPACUnambiguousDNA) : 209 if ambiguous in IUPAC.IUPACUnambiguousDNA().letters : 210 a = IUPAC.IUPACUnambiguousDNA() 211 else : 212 a = Alphabet.generic_dna 213 elif isinstance(a, IUPAC.IUPACUnambiguousRNA) : 214 if ambiguous in IUPAC.IUPACUnambiguousRNA().letters : 215 a = IUPAC.IUPACUnambiguousRNA() 216 else : 217 a = Alphabet.generic_rna 218 elif isinstance(a, IUPAC.IUPACProtein) : 219 if ambiguous in IUPAC.ExtendedIUPACProtein().letters : 220 a = IUPAC.ExtendedIUPACProtein() 221 else : 222 a = Alphabet.generic_protein 223 else : 224 a = Alphabet.single_letter_alphabet 225 return a
226
227 - def replacement_dictionary(self, skip_chars = []):
228 """Generate a replacement dictionary to plug into a substitution matrix 229 230 This should look at an alignment, and be able to generate the number 231 of substitutions of different residues for each other in the 232 aligned object. 233 234 Will then return a dictionary with this information: 235 {('A', 'C') : 10, ('C', 'A') : 12, ('G', 'C') : 15 ....} 236 237 This also treats weighted sequences. The following example shows how 238 we calculate the replacement dictionary. Given the following 239 multiple sequence alignments: 240 241 GTATC 0.5 242 AT--C 0.8 243 CTGTC 1.0 244 245 For the first column we have: 246 ('A', 'G') : 0.5 * 0.8 = 0.4 247 ('C', 'G') : 0.5 * 1.0 = 0.5 248 ('A', 'C') : 0.8 * 1.0 = 0.8 249 250 We then continue this for all of the columns in the alignment, summing 251 the information for each substitution in each column, until we end 252 up with the replacement dictionary. 253 254 Arguments: 255 o skip_chars - A list of characters to skip when creating the dictionary. 256 For instance, you might have Xs (screened stuff) or Ns, and not want 257 to include the ambiguity characters in the dictionary. 258 """ 259 # get a starting dictionary based on the alphabet of the alignment 260 rep_dict, skip_items = self._get_base_replacements(skip_chars) 261 262 # iterate through each record 263 for rec_num1 in range(len(self.alignment._records)): 264 # iterate through each record from one beyond the current record 265 # to the end of the list of records 266 for rec_num2 in range(rec_num1 + 1, len(self.alignment._records)): 267 # for each pair of records, compare the sequences and add 268 # the pertinent info to the dictionary 269 rep_dict = self._pair_replacement( 270 self.alignment._records[rec_num1].seq, 271 self.alignment._records[rec_num2].seq, 272 self.alignment._records[rec_num1].annotations.get('weight',1), 273 self.alignment._records[rec_num2].annotations.get('weight',1), 274 rep_dict, skip_items) 275 276 return rep_dict
277
278 - def _pair_replacement(self, seq1, seq2, weight1, weight2, 279 start_dict, ignore_chars):
280 """Compare two sequences and generate info on the replacements seen. 281 282 Arguments: 283 o seq1, seq2 - The two sequences to compare. 284 o weight1, weight2 - The relative weights of seq1 and seq2. 285 o start_dict - The dictionary containing the starting replacement 286 info that we will modify. 287 o ignore_chars - A list of characters to ignore when calculating 288 replacements (ie. '-'). 289 290 Returns: 291 o A replacment dictionary which is modified from initial_dict with 292 the information from the sequence comparison. 293 """ 294 # loop through each residue in the sequences 295 for residue_num in range(len(seq1)): 296 residue1 = seq1[residue_num] 297 try: 298 residue2 = seq2[residue_num] 299 # if seq2 is shorter, then we just stop looking at replacements 300 # and return the information 301 except IndexError: 302 return start_dict 303 304 # if the two residues are characters we want to count 305 if (residue1 not in ignore_chars) and (residue2 not in ignore_chars): 306 try: 307 # add info about the replacement to the dictionary, 308 # modified by the sequence weights 309 start_dict[(residue1, residue2)] = \ 310 start_dict[(residue1, residue2)] + \ 311 weight1 * weight2 312 313 # if we get a key error, then we've got a problem with alphabets 314 except KeyError: 315 raise ValueError("Residues %s, %s not found in alphabet %s" 316 % (residue1, residue2, 317 self.alignment._alphabet)) 318 319 return start_dict
320 321
322 - def _get_all_letters(self):
323 """Returns a string containing the expected letters in the alignment.""" 324 all_letters = self.alignment._alphabet.letters 325 if all_letters is None \ 326 or (isinstance(self.alignment._alphabet, Alphabet.Gapped) \ 327 and all_letters == self.alignment._alphabet.gap_char): 328 #We are dealing with a generic alphabet class where the 329 #letters are not defined! We must build a list of the 330 #letters used... 331 from sets import Set 332 set_letters = Set() 333 for record in self.alignment : 334 set_letters.union_update(record.seq) 335 list_letters = list(set_letters) 336 list_letters.sort() 337 all_letters = "".join(list_letters) 338 return all_letters
339
340 - def _get_base_replacements(self, skip_items = []):
341 """Get a zeroed dictonary of all possible letter combinations. 342 343 This looks at the type of alphabet and gets the letters for it. 344 It then creates a dictionary with all possible combinations of these 345 letters as keys (ie. ('A', 'G')) and sets the values as zero. 346 347 Returns: 348 o The base dictionary created 349 o A list of alphabet items to skip when filling the dictionary.Right 350 now the only thing I can imagine in this list is gap characters, but 351 maybe X's or something else might be useful later. This will also 352 include any characters that are specified to be skipped. 353 """ 354 base_dictionary = {} 355 all_letters = self._get_all_letters() 356 357 # if we have a gapped alphabet we need to find the gap character 358 # and drop it out 359 if isinstance(self.alignment._alphabet, Alphabet.Gapped): 360 skip_items.append(self.alignment._alphabet.gap_char) 361 all_letters = string.replace(all_letters, 362 self.alignment._alphabet.gap_char, 363 '') 364 365 # now create the dictionary 366 for first_letter in all_letters: 367 for second_letter in all_letters: 368 if (first_letter not in skip_items and 369 second_letter not in skip_items): 370 base_dictionary[(first_letter, second_letter)] = 0 371 372 return base_dictionary, skip_items
373 374
375 - def pos_specific_score_matrix(self, axis_seq = None, 376 chars_to_ignore = []):
377 """Create a position specific score matrix object for the alignment. 378 379 This creates a position specific score matrix (pssm) which is an 380 alternative method to look at a consensus sequence. 381 382 Arguments: 383 o chars_to_ignore - A listing of all characters not to include in 384 the pssm. If the alignment alphabet declares a gap character, 385 then it will be excluded automatically. 386 o axis_seq - An optional argument specifying the sequence to 387 put on the axis of the PSSM. This should be a Seq object. If nothing 388 is specified, the consensus sequence, calculated with default 389 parameters, will be used. 390 391 Returns: 392 o A PSSM (position specific score matrix) object. 393 """ 394 # determine all of the letters we have to deal with 395 all_letters = self._get_all_letters() 396 assert all_letters 397 398 if not isinstance(chars_to_ignore, list) : 399 raise TypeError("chars_to_ignore should be a list.") 400 401 # if we have a gap char, add it to stuff to ignore 402 if isinstance(self.alignment._alphabet, Alphabet.Gapped): 403 chars_to_ignore.append(self.alignment._alphabet.gap_char) 404 405 for char in chars_to_ignore: 406 all_letters = string.replace(all_letters, char, '') 407 408 if axis_seq: 409 left_seq = axis_seq 410 assert len(axis_seq) == self.alignment.get_alignment_length() 411 else: 412 left_seq = self.dumb_consensus() 413 414 pssm_info = [] 415 # now start looping through all of the sequences and getting info 416 for residue_num in range(len(left_seq)): 417 score_dict = self._get_base_letters(all_letters) 418 for record in self.alignment._records: 419 try: 420 this_residue = record.seq[residue_num] 421 # if we hit an index error we've run out of sequence and 422 # should not add new residues 423 except IndexError: 424 this_residue = None 425 426 if this_residue and this_residue not in chars_to_ignore: 427 weight = record.annotations.get('weight', 1) 428 try: 429 score_dict[this_residue] += weight 430 # if we get a KeyError then we have an alphabet problem 431 except KeyError: 432 raise ValueError("Residue %s not found in alphabet %s" 433 % (this_residue, 434 self.alignment._alphabet)) 435 436 pssm_info.append((left_seq[residue_num], 437 score_dict)) 438 439 440 return PSSM(pssm_info)
441
442 - def _get_base_letters(self, letters):
443 """Create a zeroed dictionary with all of the specified letters. 444 """ 445 base_info = {} 446 for letter in letters: 447 base_info[letter] = 0 448 449 return base_info
450
451 - def information_content(self, start = 0, 452 end = None, 453 e_freq_table = None, log_base = 2, 454 chars_to_ignore = []):
455 """Calculate the information content for each residue along an alignment. 456 457 Arguments: 458 o start, end - The starting an ending points to calculate the 459 information content. These points should be relative to the first 460 sequence in the alignment, starting at zero (ie. even if the 'real' 461 first position in the seq is 203 in the initial sequence, for 462 the info content, we need to use zero). This defaults to the entire 463 length of the first sequence. 464 o e_freq_table - A FreqTable object specifying the expected frequencies 465 for each letter in the alphabet we are using (ie. {'G' : 0.4, 466 'C' : 0.4, 'T' : 0.1, 'A' : 0.1}). Gap characters should not be 467 included, since these should not have expected frequencies. 468 o log_base - The base of the logathrim to use in calculating the 469 information content. This defaults to 2 so the info is in bits. 470 o chars_to_ignore - A listing of characterw which should be ignored 471 in calculating the info content. 472 473 Returns: 474 o A number representing the info content for the specified region. 475 476 Please see the Biopython manual for more information on how information 477 content is calculated. 478 """ 479 # if no end was specified, then we default to the end of the sequence 480 if end is None: 481 end = len(self.alignment._records[0].seq) 482 483 if start < 0 or end > len(self.alignment._records[0].seq): 484 raise ValueError \ 485 ("Start (%s) and end (%s) are not in the range %s to %s" 486 % (start, end, 0, len(self.alignment._records[0].seq))) 487 # determine random expected frequencies, if necessary 488 # Iddo Friedberg, 1-JUL-2004: fixed self.alignment._alphabet.alphabet to 489 # self.alignment._alphabet 490 random_expected = None 491 if not e_freq_table: 492 if isinstance(self.alignment._alphabet, 493 Alphabet.ProteinAlphabet): 494 random_expected = Protein20Random 495 elif isinstance(self.alignment._alphabet, Alphabet.RNAAlphabet) or \ 496 isinstance(self.alignment._alphabet, Alphabet.DNAAlphabet): 497 random_expected = Nucleotide4Random 498 # Iddo, 15-FEB-2005: added the following for encoded Alpahbets (e.g. Gapped()) 499 elif isinstance(self.alignment._alphabet, Alphabet.AlphabetEncoder): 500 if isinstance(self.alignment._alphabet.alphabet, Alphabet.ProteinAlphabet): 501 random_expected = Protein20Random 502 elif isinstance(self.alignment._alphabet.alphabet, Alphabet.RNAAlphabet) or \ 503 isinstance(self.alignment._alphabet.alphabet, Alphabet.DNAAlphabet): 504 random_expected = Nucleotide4Random 505 if not random_expected : 506 errstr = "Error in alphabet: not Nucleotide or Protein, " 507 errstr += "supply expected frequencies" 508 raise ValueError, errstr 509 510 # determine all of the letters we have to deal with 511 all_letters = self._get_all_letters() 512 for char in chars_to_ignore: 513 all_letters = string.replace(all_letters, char, '') 514 515 info_content = {} 516 for residue_num in range(start, end): 517 freq_dict = self._get_letter_freqs(residue_num, 518 self.alignment._records, 519 all_letters, chars_to_ignore) 520 # print freq_dict, 521 column_score = self._get_column_info_content(freq_dict, 522 e_freq_table, 523 log_base, 524 random_expected) 525 526 info_content[residue_num] = column_score 527 # sum up the score 528 total_info = 0 529 for column_info in info_content.values(): 530 total_info = total_info + column_info 531 # fill in the ic_vector member: holds IC for each column 532 for i in info_content.keys(): 533 self.ic_vector[i] = info_content[i] 534 return total_info
535
536 - def _get_letter_freqs(self, residue_num, all_records, letters, to_ignore):
537 """Determine the frequency of specific letters in the alignment. 538 539 Arguments: 540 o residue_num - The number of the column we are getting frequencies 541 from. 542 o all_records - All of the SeqRecords in the alignment. 543 o letters - The letters we are interested in getting the frequency 544 for. 545 o to_ignore - Letters we are specifically supposed to ignore. 546 547 This will calculate the frequencies of each of the specified letters 548 in the alignment at the given frequency, and return this as a 549 dictionary where the keys are the letters and the values are the 550 frequencies. 551 """ 552 freq_info = self._get_base_letters(letters) 553 554 total_count = 0 555 # collect the count info into the dictionary for all the records 556 for record in all_records: 557 try: 558 if record.seq[residue_num] not in to_ignore: 559 weight = record.annotations.get('weight',1) 560 freq_info[record.seq[residue_num]] += weight 561 total_count += weight 562 # getting a key error means we've got a problem with the alphabet 563 except KeyError: 564 raise ValueError("Residue %s not found in alphabet %s" 565 % (record.seq[residue_num], 566 self.alignment._alphabet)) 567 568 # now convert the counts into frequencies 569 for letter in freq_info.keys(): 570 freq_info[letter] = freq_info[letter] / total_count 571 572 return freq_info
573
574 - def _get_column_info_content(self, obs_freq, e_freq_table, log_base, 575 random_expected):
576 """Calculate the information content for a column. 577 578 Arguments: 579 o obs_freq - The frequencies observed for each letter in the column. 580 o e_freq_table - An optional argument specifying the expected 581 frequencies for each letter. This is a SubsMat.FreqTable instance. 582 o log_base - The base of the logathrim to use in calculating the 583 info content. 584 """ 585 if e_freq_table: 586 # check the expected freq information to make sure it is good 587 for key in obs_freq.keys(): 588 if (key != self.alignment._alphabet.gap_char and 589 key not in e_freq_table): 590 raise ValueError("Expected frequency letters %s" + 591 " do not match observed %s" 592 % (e_freq_table.keys(), obs_freq.keys() - 593 [self.alignment._alphabet.gap_char])) 594 595 total_info = 0 596 597 for letter in obs_freq.keys(): 598 inner_log = 0. 599 # if we have expected frequencies, modify the log value by them 600 # gap characters do not have expected frequencies, so they 601 # should just be the observed frequency. 602 if letter != self.alignment._alphabet.gap_char: 603 if e_freq_table: 604 inner_log = obs_freq[letter] / e_freq_table[letter] 605 else: 606 inner_log = obs_freq[letter] / random_expected 607 # if the observed frequency is zero, we don't add any info to the 608 # total information content 609 if inner_log > 0: 610 letter_info = (obs_freq[letter] * 611 math.log(inner_log) / math.log(log_base)) 612 total_info = total_info + letter_info 613 return total_info
614
615 - def get_column(self,col):
616 return self.alignment.get_column(col)
617
618 -class PSSM:
619 """Represent a position specific score matrix. 620 621 This class is meant to make it easy to access the info within a PSSM 622 and also make it easy to print out the information in a nice table. 623 624 Let's say you had an alignment like this: 625 GTATC 626 AT--C 627 CTGTC 628 629 The position specific score matrix (when printed) looks like: 630 631 G A T C 632 G 1 1 0 1 633 T 0 0 3 0 634 A 1 1 0 0 635 T 0 0 2 0 636 C 0 0 0 3 637 638 You can access a single element of the PSSM using the following: 639 640 your_pssm[sequence_number][residue_count_name] 641 642 For instance, to get the 'T' residue for the second element in the 643 above alignment you would need to do: 644 645 your_pssm[1]['T'] 646 """
647 - def __init__(self, pssm):
648 """Initialize with pssm data to represent. 649 650 The pssm passed should be a list with the following structure: 651 652 list[0] - The letter of the residue being represented (for instance, 653 from the example above, the first few list[0]s would be GTAT... 654 list[1] - A dictionary with the letter substitutions and counts. 655 """ 656 self.pssm = pssm
657
658 - def __getitem__(self, pos):
659 return self.pssm[pos][1]
660
661 - def __str__(self):
662 out = " " 663 all_residues = self.pssm[0][1].keys() 664 all_residues.sort() 665 666 # first print out the top header 667 for res in all_residues: 668 out = out + " %s" % res 669 out = out + "\n" 670 671 # for each item, write out the substitutions 672 for item in self.pssm: 673 out = out + "%s " % item[0] 674 for res in all_residues: 675 out = out + " %.1f" % item[1][res] 676 677 out = out + "\n" 678 return out
679
680 - def get_residue(self, pos):
681 """Return the residue letter at the specified position. 682 """ 683 return self.pssm[pos][0]
684 685 698 699 if __name__ == "__main__" : 700 print "Quick test" 701 from Bio import AlignIO 702 703 filename = "../../Tests/GFF/multi.fna" 704 format = "fasta" 705 706 #filename = "../../Tests/Phylip/horses.phy" 707 #format = "phylip" 708 709 alignment = AlignIO.read(open(filename), format) 710 for record in alignment : 711 print record.seq.tostring() 712 print "="*alignment.get_alignment_length() 713 714 summary = SummaryInfo(alignment) 715 consensus = summary.dumb_consensus(ambiguous="N") 716 print consensus 717 consensus = summary.gap_consensus(ambiguous="N") 718 print consensus 719 print 720 print summary.pos_specific_score_matrix(chars_to_ignore=['-'], 721 axis_seq=consensus) 722 print 723 print summary.information_content() 724 print 725 print "Done" 726