1
2
3
4
5
6
7
8
9
10
11
12
13
14 from __future__ import division
15
16 __version__ = "$Revision: 1.11 $"
17
18 import commands
19 import itertools
20 import os
21 import re
22
23 from Bio import Wise
24
25 _SCORE_MATCH = 4
26 _SCORE_MISMATCH = -1
27 _SCORE_GAP_START = -5
28 _SCORE_GAP_EXTENSION = -1
29
30 _CMDLINE_DNAL = ["dnal", "-alb", "-nopretty"]
31
40
41 _CMDLINE_FGREP_COUNT = "fgrep -c '%s' %s"
44
45 _re_alb_line2coords = re.compile(r"^\[([^:]+):[^\[]+\[([^:]+):")
50
52 alb = file(filename)
53
54 start_line = None
55 end_line = None
56
57 for line in alb:
58 if line.startswith("["):
59 if not start_line:
60 start_line = line
61 else:
62 end_line = line
63
64 if end_line is None:
65 return [(0, 0), (0, 0)]
66
67 return zip(*map(_alb_line2coords, [start_line, end_line]))
68
69 -def _any(seq, pred=bool):
70 "Returns True if pred(x) is True at least one element in the iterable"
71 return True in itertools.imap(pred, seq)
72
74 """
75 Calculate statistics from an ALB report
76 """
77 - def __init__(self, filename, match, mismatch, gap, extension):
78 self.matches = _fgrep_count('"SEQUENCE" %s' % match, filename)
79 self.mismatches = _fgrep_count('"SEQUENCE" %s' % mismatch, filename)
80 self.gaps = _fgrep_count('"INSERT" %s' % gap, filename)
81
82 if gap == extension:
83 self.extensions = 0
84 else:
85 self.extensions = _fgrep_count('"INSERT" %s' % extension, filename)
86
87 self.score = (match*self.matches +
88 mismatch*self.mismatches +
89 gap*self.gaps +
90 extension*self.extensions)
91
92 if _any([self.matches, self.mismatches, self.gaps, self.extensions]):
93 self.coords = _get_coords(filename)
94 else:
95 self.coords = [(0, 0), (0,0)]
96
98 return self.matches/(self.matches+self.mismatches)
99
100 header = "identity_fraction\tmatches\tmismatches\tgaps\textensions"
101
103 return "\t".join([str(x) for x in (self.identity_fraction(), self.matches, self.mismatches, self.gaps, self.extensions)])
104
106 cmdline = _build_dnal_cmdline(match, mismatch, gap, extension)
107 temp_file = Wise.align(cmdline, pair, **keywds)
108 try:
109 return Statistics(temp_file.name, match, mismatch, gap, extension)
110 except AttributeError:
111 try:
112 keywds['dry_run']
113 return None
114 except KeyError:
115 raise
116
118 import sys
119 stats = align(sys.argv[1:3])
120 print "\n".join(["%s: %s" % (attr, getattr(stats, attr))
121 for attr in
122 ("matches", "mismatches", "gaps", "extensions")])
123 print "identity_fraction: %s" % stats.identity_fraction()
124 print "coords: %s" % stats.coords
125
126 -def _test(*args, **keywds):
127 import doctest, sys
128 doctest.testmod(sys.modules[__name__], *args, **keywds)
129
130 if __name__ == "__main__":
131 if __debug__:
132 _test()
133 main()
134