1 """Martel expression for the hmmpfam database search program in hmmer.
2
3 This has been tested with version 2.2g. I should also make it work with
4 2.1.1 output.
5
6 XXX This isn't completely finished and doesn't do everything quite right.
7 The main two problems being that it isn't well tested for a wide variety of
8 outputs and that the family line is not parsed into it's respective parts
9 (see multitude of comments on this below).
10 """
11
12 import warnings
13 warnings.warn("Bio.expressions was deprecated, as it does not work with recent versions of mxTextTools. If you want to continue to use this module, please get in contact with the Biopython developers at biopython-dev@biopython.org to avoid permanent removal of this module from Biopython", DeprecationWarning)
14
15
16
17 from Martel import *
18 from Martel import RecordReader
19 from Bio import Std
20
21
22
23 program_description = (Std.application_name(Str("hmmpfam")) +
24 ToEol())
25
26
27 program_version = (Str("HMMER ") +
28 Std.application_version(Re(r"\d\.\d\w") |
29 Re(r"\d\.\d\.\d")) +
30 ToEol())
31
32
33
34
35 copyright = (ToEol() +
36 ToEol())
37
38
39
40
41
42
43 files = (ToEol() +
44 Str("HMM file:") + Spaces() +
45 Std.database_name(UntilEol()) + AnyEol() +
46 Str("Sequence file:") + Spaces() +
47 Group("inputfile_name", UntilEol()) + AnyEol() +
48 ToEol())
49
50 header = Std.search_header(program_description + program_version +
51 copyright + files)
52
53
54
55
56
57
58
59
60 sequence_info = (ToEol() +
61 Str("Query sequence:") + Spaces() +
62 Group("query_name", UntilEol()) + AnyEol() +
63 Str("Accession:") + Spaces() +
64 Group("query_accession", UntilEol()) + AnyEol() +
65 Str("Description:") + Spaces() +
66 Std.query_description(UntilEol()) + AnyEol())
67
68
69
70
71 model_name = Re(r"[\w-]+")
72
73
74
75
76
77
78 family_header = (ToEol() +
79 Str("Scores") + ToEol() +
80 Str("Model") + ToEol() +
81 Str("-----") + ToEol())
82
83
84
85
86
87
88 no_hit_line = (Spaces() + Str("[no hits above thresholds]") + AnyEol())
89
90 family_hit_line = (Group("family_model", model_name) + Spaces() +
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105 ToEol("family_information"))
106
107
108
109
110
111
112
113
114
115
116
117
118 domain_header = (ToEol() +
119 Str("Parsed for domains:") + AnyEol() +
120 Str("Model") + ToEol() +
121 Str("-----") + ToEol())
122
123
124
125
126
127
128
129 symbol_forward = Str(".") | Str("[")
130 symbol_reverse = Str(".") | Str("]")
131 match_symbols = symbol_forward + symbol_reverse
132
133 domain_hit_line = (Group("domain_model", model_name) + Spaces() +
134 Group("domain_domain", Integer() + Str("/") + Integer()) +
135 Spaces() +
136 Integer("domain_seq-f") + Spaces() +
137 Integer("domain_seq-r") + Spaces() +
138 Group("domain_seq_symbols", match_symbols) + Spaces() +
139 Integer("domain_hmm-f") + Spaces() +
140 Integer("domain_hmm-t") + Spaces() +
141 Group("domain_hmm_symbols", match_symbols) + Spaces() +
142 Float("domain_score") + Spaces() +
143 Float("domain_evalue") + AnyEol())
144
145
146
147 alignment_header = (ToEol() +
148 Str("Alignments of top-scoring domains:") +
149 AnyEol())
150
151
152
153
154
155
156
157
158
159
160 domain_align_header = (Group("dalign_name", model_name) +
161 Str(": domain ") +
162 Integer("dalign_of_domain") +
163 Str(" of ") +
164 Integer("dalign_total_domain") +
165 Str(", from ") +
166 Integer("dalign_domain_start") +
167 Str(" to ") +
168 Integer("dalign_domain_end") +
169 Str(": score ") +
170 Float("dalign_score") +
171 Str(", E = ") +
172 Float("dalign_evalue") +
173 AnyEol())
174
175
176
177
178 rf_line = (Spaces() + Str("RF ") + ToEol())
179
180 domain_align_top = (Spaces() +
181 UntilEol("dalign_match_top") + AnyEol())
182
183 domain_align_middle = (Spaces() +
184 UntilEol("dalign_match_middle") + AnyEol())
185
186 domain_align_bottom = (Spaces() +
187 ToSep("dalign_query_name", " ") + Spaces() +
188
189
190
191
192 Alt(Integer("dalign_query_start") + Spaces() +
193 Group("dalign_match_bottom",
194 Re("[\w\-]+")) + Spaces() +
195 Integer("dalign_query_end") +
196 Spaces() + AnyEol(),
197 Str("- ") + ToEol()) +
198 ToEol())
199
200 domain_alignment = (domain_align_header +
201 Rep1(Opt(rf_line) +
202 domain_align_top +
203 domain_align_middle +
204 domain_align_bottom))
205
206
207 record_end = Str("//") + AnyEol()
208
209
210 record = Std.record(Rep(sequence_info +
211 family_header +
212 (no_hit_line | Rep1(family_hit_line)) +
213 domain_header +
214 (no_hit_line | Rep1(domain_hit_line)) +
215 alignment_header +
216 (no_hit_line | Rep1(domain_alignment)) +
217 record_end
218 ))
219
220 format = HeaderFooter("hmmpfam", {},
221 header, RecordReader.CountLines, (8,),
222 record, RecordReader.EndsWith, ("//\n",),
223 None, None, None)
224