1
2
3 from __future__ import division
4
5 __version__ = "$Revision: 1.10 $"
6
7 import commands
8 import itertools
9 import os
10 import re
11
12 from Bio import Wise
13
14 _SCORE_MATCH = 4
15 _SCORE_MISMATCH = -1
16 _SCORE_GAP_START = -5
17 _SCORE_GAP_EXTENSION = -1
18
19 _CMDLINE_DNAL = ["dnal", "-alb", "-nopretty"]
20
29
30 _CMDLINE_FGREP_COUNT = "fgrep -c '%s' %s"
33
34 _re_alb_line2coords = re.compile(r"^\[([^:]+):[^\[]+\[([^:]+):")
39
41 alb = file(filename)
42
43 start_line = None
44 end_line = None
45
46 for line in alb:
47 if line.startswith("["):
48 if not start_line:
49 start_line = line
50 else:
51 end_line = line
52
53 if end_line is None:
54 return [(0, 0), (0, 0)]
55
56 return zip(*map(_alb_line2coords, [start_line, end_line]))
57
58 -def _any(seq, pred=bool):
59 "Returns True if pred(x) is True at least one element in the iterable"
60 return True in itertools.imap(pred, seq)
61
63 """
64 Calculate statistics from an ALB report
65 """
66 - def __init__(self, filename, match, mismatch, gap, extension):
67 self.matches = _fgrep_count('"SEQUENCE" %s' % match, filename)
68 self.mismatches = _fgrep_count('"SEQUENCE" %s' % mismatch, filename)
69 self.gaps = _fgrep_count('"INSERT" %s' % gap, filename)
70
71 if gap == extension:
72 self.extensions = 0
73 else:
74 self.extensions = _fgrep_count('"INSERT" %s' % extension, filename)
75
76 self.score = (match*self.matches +
77 mismatch*self.mismatches +
78 gap*self.gaps +
79 extension*self.extensions)
80
81 if _any([self.matches, self.mismatches, self.gaps, self.extensions]):
82 self.coords = _get_coords(filename)
83 else:
84 self.coords = [(0, 0), (0,0)]
85
87 return self.matches/(self.matches+self.mismatches)
88
89 header = "identity_fraction\tmatches\tmismatches\tgaps\textensions"
90
92 return "\t".join([str(x) for x in (self.identity_fraction(), self.matches, self.mismatches, self.gaps, self.extensions)])
93
95 cmdline = _build_dnal_cmdline(match, mismatch, gap, extension)
96 temp_file = Wise.align(cmdline, pair, **keywds)
97 try:
98 return Statistics(temp_file.name, match, mismatch, gap, extension)
99 except AttributeError:
100 try:
101 keywds['dry_run']
102 return None
103 except KeyError:
104 raise
105
107 import sys
108 stats = align(sys.argv[1:3])
109 print "\n".join(["%s: %s" % (attr, getattr(stats, attr))
110 for attr in
111 ("matches", "mismatches", "gaps", "extensions")])
112 print "identity_fraction: %s" % stats.identity_fraction()
113 print "coords: %s" % stats.coords
114
115 -def _test(*args, **keywds):
116 import doctest, sys
117 doctest.testmod(sys.modules[__name__], *args, **keywds)
118
119 if __name__ == "__main__":
120 if __debug__:
121 _test()
122 main()
123