1
2
3
4
5
6 """Martel based parser to read MetaTool output files.
7
8 This is a huge regular regular expression for MetaTool 3.5 output, built using
9 the 'regular expressiona on steroids' capabilities of Martel.
10
11 http://www2.bioinf.mdc-berlin.de/metabolic/metatool/
12
13
14 This helps us have endlines be consistent across platforms.
15
16 """
17
18 import string
19
20
21 from Martel import Opt, Alt, Digits, Integer, Group, Str, MaxRepeat
22 from Martel import Any, AnyBut, RepN, Rep, Rep1, ToEol, AnyEol
23 from Martel import Expression
24 from Martel import RecordReader
25
26 blank = ' '
27 tab = '\t'
28 blank_space = MaxRepeat( Any( blank + tab), 1, 80 )
29 optional_blank_space = Rep( Any( blank + tab ) )
30 white_space = " \t" + chr( 10 ) + chr( 13 )
31 blank_line = optional_blank_space + AnyEol()
32 lower_case_letter = Group( "lower_case_letter", Any( "abcdefghijklmnopqrstuvwxyz" ) )
33 digits = "0123456789"
34
35 enzyme = Group( "enzyme", optional_blank_space + Digits() +
36 optional_blank_space + Str( ':' ) + ToEol() )
37 reaction = Group( "reaction", optional_blank_space + Digits() +
38 optional_blank_space + Str( ":" ) + ToEol() )
39 not_found_line = Group( "not_found_line", optional_blank_space + Str( "- not found -" ) +
40 ToEol() )
41
42 enzymes_header = Group( "enzymes_header", optional_blank_space + Str( "enzymes" ) +
43 ToEol() )
44 enzymes_list = Group( "enzymes_list", Alt( Rep1( enzyme ), \
45 not_found_line ) )
46 enzymes_block = Group( "enzymes_block", enzymes_header + Rep( blank_line ) +
47 enzymes_list )
48
49 reactions_header = Group( "reactions_header", optional_blank_space +
50 Str( "overall reaction" ) + ToEol() )
51 reactions_list = Group( "reactions_list", Alt( Rep1( reaction ), \
52 not_found_line ) )
53 reactions_block = Group( "reactions_block", reactions_header + Rep( blank_line ) +
54 reactions_list )
55
56 rev = Group( "rev", Opt( lower_case_letter ) )
57 version = Group( "version", Digits( "version_major") + Any( "." ) +
58 Digits( "version_minor") + rev )
59 metatool_tag = Str( "METATOOL OUTPUT" )
60 metatool_line = Group( "metatool_line", metatool_tag + blank_space +
61 Str( "Version" ) + blank_space + version + ToEol() )
62
63 input_file_tag = Str( "INPUT FILE:" )
64 input_file_line = Group( "input_file_line", input_file_tag + blank_space +
65 ToEol( "input_file_name" ) )
66
67 metabolite_count_tag = Str( "INTERNAL METABOLITES:" )
68 metabolite_count_line = Group( "metabolite_count_line", metabolite_count_tag +
69 blank_space + Digits( "num_int_metabolites" ) + ToEol() )
70
71 reaction_count_tag = Str( "REACTIONS:" )
72 reaction_count_line = Group( "reaction_count_line", reaction_count_tag + blank_space +
73 Digits( "num_reactions" ) + ToEol() )
74
75 type_metabolite = Group( "type_metabolite", Alt( Str( "int" ), \
76 Str( "external" ) ) )
77 metabolite_info = Group( "metabolite_info", optional_blank_space +
78 Digits() + blank_space + type_metabolite + blank_space +
79
80 Rep1( AnyBut( white_space ) ) )
81 metabolite_line = Group( "metabolite_line", metabolite_info + ToEol() )
82 metabolites_summary = Group( "metabolites_summary", optional_blank_space + Digits() +
83 blank_space + Str( "metabolites" ) + ToEol() )
84 metabolites_block = Group( "metabolites_block", Rep1( metabolite_line ) +
85 metabolites_summary + Rep( blank_line ) )
86
87 graph_structure_heading = Group( "graph_structure_heading", optional_blank_space +
88 Str( "edges" ) + blank_space + Str( "frequency of nodes" ) + ToEol() )
89 graph_structure_line = Group( "graph_structure_line", optional_blank_space +
90 Digits( "edge_count" ) + blank_space + Digits( "num_nodes" ) + ToEol() )
91 graph_structure_block = Group( "graph_structure_block", \
92 graph_structure_heading + Rep( blank_line ) +
93 Rep1( graph_structure_line ) + Rep( blank_line ) )
94
95 sum_is_constant_line = Group( "sum_is_constant_line", optional_blank_space +
96 Digits() + optional_blank_space + Any( ":" ) + optional_blank_space +
97 Rep1( AnyBut( white_space ) ) +
98 Rep( blank_space + Any( "+" ) + blank_space + Rep1( AnyBut( white_space ) ) ) +
99 optional_blank_space + Str( "=" ) + ToEol() )
100 sum_is_constant_block = Group( "sum_is_constant_block", Rep( sum_is_constant_line ) )
101
102
103 stoichiometric_tag = Group( "stoichiometric_tag", Str( "STOICHIOMETRIC MATRIX" ) )
104 stoichiometric_line = Group( "stoichiometric_line", stoichiometric_tag +
105 ToEol() )
106
107 not_balanced_tag = Group( "not_balanced_tag", Str( "NOT BALANCED INTERNAL METABOLITES" ) )
108 not_balanced_line = Group( "not_balanced_line", not_balanced_tag +
109 ToEol() )
110
111 subsets_tag = Group( "subsets_tag", Str( "SUBSETS OF REACTIONS" ) )
112 subsets_line = Group( "subsets_line", \
113 subsets_tag + ToEol() )
114
115 reduced_system_tag = Group( "reduced_system_tag", Str( "REDUCED SYSTEM" ) )
116 reduced_system_line = Group( "reduced_system_line", reduced_system_tag +
117 Rep1( AnyBut( digits ) ) + Digits( "branch_points" ) +
118 Rep1( AnyBut( digits ) ) + Digits() + ToEol() )
119
120 kernel_tag = Group( "kernel_tag", Str( "KERNEL" ) )
121 kernel_line = Group( "kernel_line", kernel_tag + ToEol() )
122
123 convex_basis_tag = Group( "convex_basis_tag", Str( "CONVEX BASIS" ) )
124 convex_basis_line = Group( "convex_basis_line", convex_basis_tag +
125 ToEol() )
126
127 conservation_relations_tag = Group( "conservation_relations_tag", \
128 Str( "CONSERVATION RELATIONS" ) )
129 conservation_relations_line = Group( "conservation_relations_line", \
130 conservation_relations_tag + ToEol() )
131
132 elementary_modes_tag = Group( "elementary_modes_tag", \
133 Str( "ELEMENTARY MODES" ) )
134 elementary_modes_line = Group( "elementary_modes_line", \
135 elementary_modes_tag + ToEol() )
136
137 num_rows = Group( "num_rows", Digits() )
138 num_cols = Group( "num_cols", Digits() )
139 matrix_header = Group( "matrix_header", optional_blank_space +
140 Str( "matrix dimension" ) + blank_space + Any( "r" ) +
141 num_rows + blank_space + Any( "x" ) + blank_space +
142 Any( "c" ) + num_cols + optional_blank_space + AnyEol() )
143 matrix_element = Group( "matrix_element", Integer() )
144 matrix_row = Group( "matrix_row", MaxRepeat( optional_blank_space + matrix_element, \
145 "num_cols", "num_cols" ) + ToEol() )
146 matrix = Group( "matrix", MaxRepeat( matrix_row, "num_rows", "num_rows" ) )
147
148 matrix_block = Group( "matrix_block", matrix_header + matrix )
149 irreversible_vector = Group( "irreversible_vector", \
150 MaxRepeat( blank_space + matrix_element, "num_cols", "num_cols" ) +
151 ToEol() )
152
153 little_gap = Str( " " )
154 big_gap = Alt( Str( "\t" ), MaxRepeat( Str( " " ), 2, 80 ) )
155 unbalanced_metabolite = Group( "unbalanced_metabolite", \
156 Rep1( AnyBut( white_space ) ) + Opt( little_gap +
157 Rep1( AnyBut( white_space ) ) ) )
158 not_balanced_data = Group( "not_balanced_data", optional_blank_space +
159 unbalanced_metabolite + Rep( big_gap + unbalanced_metabolite ) + ToEol() )
160
161 metabolite_roles_heading = Group( "metabolite_roles_heading", \
162 Str( "->" ) + ToEol() )
163 metabolite_role_cols = Group( "metabolite_role_cols", \
164 optional_blank_space + Str( "met" ) + blank_space + Str( "cons" ) +
165 blank_space + Str( "built" ) +
166 blank_space + Str( "reactions" ) + ToEol() )
167 branch_metabolite = Group( "branch_metabolite", optional_blank_space +
168 Rep1( AnyBut( white_space ) ) + blank_space +
169 RepN( Digits() + blank_space, 3 ) + Rep1( Any( "ir" ) ) + ToEol() )
170 non_branch_metabolite = Group( "non_branch_metabolite", optional_blank_space +
171 Rep1( AnyBut( white_space ) ) + blank_space +
172 RepN( Digits() + blank_space, 3 ) + Rep1( Any( "ir" ) ) + ToEol() )
173 branch_metabolite_block = Group( "branch_metabolite_block", \
174 metabolite_roles_heading +
175 metabolite_role_cols + Rep( branch_metabolite ) )
176 non_branch_metabolite_block = Group( "non_branch_metabolite_block", \
177 metabolite_roles_heading +
178 metabolite_role_cols + Rep( non_branch_metabolite ) )
179
180 end_stoichiometric = Group( "end_stochiometric", \
181 Rep( Expression.Assert( not_balanced_tag, 1 ) +
182 Expression.Assert( kernel_tag, 1 ) + ToEol() ) )
183 end_not_balanced = Group( "end_not_balanced", \
184 Rep( Expression.Assert( kernel_tag, 1 ) + ToEol() ) )
185 end_kernel = Group( "end_kernel", \
186 Rep( Expression.Assert( subsets_tag, 1 ) + ToEol() ) )
187 end_subsets = Group( "end_subsets", \
188 Rep( Expression.Assert( reduced_system_tag, 1 ) + ToEol() ) )
189 end_reduced_system = Group( "end_reduced_system", \
190 Rep( Expression.Assert( convex_basis_tag, 1 ) + ToEol() ) )
191 end_convex_basis = Group( "end_convex_basis", \
192 Rep( Expression.Assert( conservation_relations_tag, 1 ) + ToEol() ) )
193 end_conservation_relations = Group( "end_conservation_relations", \
194 Rep( Expression.Assert( elementary_modes_tag, 1 ) + ToEol() ) )
195 end_elementary_modes = Group( "end_elementary_modes", Rep( ToEol() ) )
196
197
198 input_file_block = Group( "input_file_block", input_file_line +
199 Rep( blank_line ) )
200 metatool_block = Group( "metatool_block", metatool_line + Rep1( blank_line ) )
201
202 metabolite_count_block = Group( "metabolite_count_block", \
203 metabolite_count_line + Rep( blank_line ) )
204 reaction_count_block = Group( "reaction_count_block", reaction_count_line +
205 Rep( blank_line ) + metabolites_block + Rep( blank_line ) +
206 graph_structure_block + Rep( blank_line ) )
207 stoichiometric_block = Group( "stoichiometric_block", stoichiometric_line +
208 Rep( blank_line ) + matrix_block + ToEol() + irreversible_vector +
209 end_stoichiometric )
210 not_balanced_block = Group( "not_balanced_block", not_balanced_line +
211 Rep( blank_line ) + not_balanced_data + Rep( blank_line ) )
212 kernel_block = Group( "kernel_block", kernel_line + Rep( blank_line ) +
213 matrix_block + ToEol() + Rep( blank_line ) + enzymes_block +
214 Rep( blank_line ) + reactions_block + end_kernel )
215 subsets_block = Group( "subsets_block", subsets_line + Rep( blank_line ) +
216 matrix_block + ToEol() + Rep( blank_line ) + enzymes_block +
217 Rep( blank_line ) + reactions_block + end_subsets )
218 reduced_system_block = Group( "reduced_system_block", reduced_system_line +
219 Rep( blank_line ) + matrix_block + ToEol() + irreversible_vector +
220 Rep( blank_line ) + branch_metabolite_block + Rep( blank_line ) +
221 non_branch_metabolite_block + end_reduced_system )
222 convex_basis_block = Group( "convex_basis_block", convex_basis_line +
223 Rep( blank_line ) + matrix_block + Opt( ToEol() ) + Rep( blank_line ) +
224 enzymes_block + Rep( blank_line ) + reactions_block + end_convex_basis )
225 conservation_relations_block = Group( "conservation_relations_block", \
226 conservation_relations_line + Rep( blank_line ) + matrix_block +
227 Rep( blank_line ) + sum_is_constant_block +
228 end_conservation_relations )
229 elementary_modes_block = Group( "elementary_modes_block", elementary_modes_line +
230 Rep( blank_line ) + matrix_block + Opt( ToEol() ) + Rep( blank_line ) +
231 enzymes_block + Rep( blank_line ) + reactions_block + end_elementary_modes )
232
233
234 metatool_record = Group( "metatool_record", metatool_block + input_file_block +
235 metabolite_count_block + reaction_count_block + stoichiometric_block +
236 Opt( not_balanced_block ) + kernel_block + subsets_block +
237 reduced_system_block + convex_basis_block + conservation_relations_block +
238 elementary_modes_block )
239