Package Bio :: Package Wise :: Module dnal
[hide private]
[frames] | no frames]

Source Code for Module Bio.Wise.dnal

  1  #!/usr/bin/env python 
  2   
  3  from __future__ import division 
  4   
  5  __version__ = "$Revision: 1.10 $" 
  6   
  7  import commands 
  8  import itertools 
  9  import os 
 10  import re 
 11   
 12  from Bio import Wise 
 13   
 14  _SCORE_MATCH = 4 
 15  _SCORE_MISMATCH = -1 
 16  _SCORE_GAP_START = -5 
 17  _SCORE_GAP_EXTENSION = -1 
 18   
 19  _CMDLINE_DNAL = ["dnal", "-alb", "-nopretty"] 
 20   
21 -def _build_dnal_cmdline(match, mismatch, gap, extension):
22 res = _CMDLINE_DNAL[:] 23 res.extend(["-match", str(match)]) 24 res.extend(["-mis", str(mismatch)]) 25 res.extend(["-gap", str(-gap)]) # negative: convert score to penalty 26 res.extend(["-ext", str(-extension)]) # negative: convert score to penalty 27 28 return res
29 30 _CMDLINE_FGREP_COUNT = "fgrep -c '%s' %s"
31 -def _fgrep_count(pattern, file):
32 return int(commands.getoutput(_CMDLINE_FGREP_COUNT % (pattern, file)))
33 34 _re_alb_line2coords = re.compile(r"^\[([^:]+):[^\[]+\[([^:]+):")
35 -def _alb_line2coords(line):
36 return tuple([int(coord)+1 # one-based -> zero-based 37 for coord 38 in _re_alb_line2coords.match(line).groups()])
39
40 -def _get_coords(filename):
41 alb = file(filename) 42 43 start_line = None 44 end_line = None 45 46 for line in alb: 47 if line.startswith("["): 48 if not start_line: 49 start_line = line # rstrip not needed 50 else: 51 end_line = line 52 53 if end_line is None: # sequence is too short 54 return [(0, 0), (0, 0)] 55 56 return zip(*map(_alb_line2coords, [start_line, end_line])) # returns [(start0, end0), (start1, end1)]
57
58 -def _any(seq, pred=bool):
59 "Returns True if pred(x) is True at least one element in the iterable" 60 return True in itertools.imap(pred, seq)
61
62 -class Statistics(object):
63 """ 64 Calculate statistics from an ALB report 65 """
66 - def __init__(self, filename, match, mismatch, gap, extension):
67 self.matches = _fgrep_count('"SEQUENCE" %s' % match, filename) 68 self.mismatches = _fgrep_count('"SEQUENCE" %s' % mismatch, filename) 69 self.gaps = _fgrep_count('"INSERT" %s' % gap, filename) 70 71 if gap == extension: 72 self.extensions = 0 73 else: 74 self.extensions = _fgrep_count('"INSERT" %s' % extension, filename) 75 76 self.score = (match*self.matches + 77 mismatch*self.mismatches + 78 gap*self.gaps + 79 extension*self.extensions) 80 81 if _any([self.matches, self.mismatches, self.gaps, self.extensions]): 82 self.coords = _get_coords(filename) 83 else: 84 self.coords = [(0, 0), (0,0)]
85
86 - def identity_fraction(self):
87 return self.matches/(self.matches+self.mismatches)
88 89 header = "identity_fraction\tmatches\tmismatches\tgaps\textensions" 90
91 - def __str__(self):
92 return "\t".join([str(x) for x in (self.identity_fraction(), self.matches, self.mismatches, self.gaps, self.extensions)])
93
94 -def align(pair, match=_SCORE_MATCH, mismatch=_SCORE_MISMATCH, gap=_SCORE_GAP_START, extension=_SCORE_GAP_EXTENSION, **keywds):
95 cmdline = _build_dnal_cmdline(match, mismatch, gap, extension) 96 temp_file = Wise.align(cmdline, pair, **keywds) 97 try: 98 return Statistics(temp_file.name, match, mismatch, gap, extension) 99 except AttributeError: 100 try: 101 keywds['dry_run'] 102 return None 103 except KeyError: 104 raise
105
106 -def main():
107 import sys 108 stats = align(sys.argv[1:3]) 109 print "\n".join(["%s: %s" % (attr, getattr(stats, attr)) 110 for attr in 111 ("matches", "mismatches", "gaps", "extensions")]) 112 print "identity_fraction: %s" % stats.identity_fraction() 113 print "coords: %s" % stats.coords
114
115 -def _test(*args, **keywds):
116 import doctest, sys 117 doctest.testmod(sys.modules[__name__], *args, **keywds)
118 119 if __name__ == "__main__": 120 if __debug__: 121 _test() 122 main() 123