1
2
3
4
5 """Parser for output from MetaTool, a program which defines metabolic routes
6 within networks.
7
8 http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&
9 list_uids=10222413&dopt=Abstract
10 """
11
12
13 import string
14 import Matrix
15
16
17 from xml.sax import handler
18
19
20 import Martel
21 from Martel import RecordReader
22
23 from Bio.ParserSupport import EventGenerator
24 from Bio import File
25 import metatool_format
26 import Record
27
29 """Iterator interface to move over a file of MetaTool entries one at a time.
30 """
31 - def __init__(self, handle, parser = None):
32 """Initialize the iterator.
33
34 Arguments:
35 o handle - A handle with Kabat entries to iterate through.
36 o parser - An optional parser to pass the entries through before
37 returning them. If None, then the raw entry will be returned.
38 """
39 self._reader = RecordReader.StartsWith(handle, "METATOOL")
40
41 self._parser = parser
42
44 """Return the next MetaTool record from the handle.
45
46 Will return None if we ran out of records.
47 """
48 data = self._reader.next()
49
50 if self._parser is not None:
51 if data:
52 dumpfile = open( 'dump', 'w' )
53 dumpfile.write( data )
54 dumpfile.close()
55 return self._parser.parse(File.StringHandle(data))
56
57 return data
58
60 return iter(self.next, None)
61
63 """Create a MetaTool Record object from scanner generated information.
64 """
69
72
75
78
80 self.state = "reaction_count_state"
81
89
93
107
119
121 self.state = "stoichiometry_state"
122 self._vectors = []
123 self._enzymes = []
124 self._reactions = []
125
127 self.state = "kernel_state"
128 self._vectors = []
129 self._enzymes = []
130 self._reactions = []
131
133 self.state = "subsets_state"
134 self._vectors = []
135 self._enzymes = []
136 self._reactions = []
137
139 self.state = "reduced_system_state"
140 self._vectors = []
141 self._enzymes = []
142 self._reactions = []
143
145 self.state = "convex_basis_state"
146 self._vectors = []
147 self._enzymes = []
148 self._reactions = []
149
151 self.state = "conservation_relations_state"
152 self._vectors = []
153 self._enzymes = []
154 self._reactions = []
155
157 self.state = "elementary_modes_state"
158 self._vectors = []
159 self._enzymes = []
160 self._reactions = []
161
173
174
178
182
184 self._irreversible_vector = content[ 0 ].split()
185
191
197
203
206
209
213
225
234
243
244
255
256
265
267 if( self._vectors != [] ):
268 self.data.conservation_relations.matrix = Matrix.Matrix( self._vectors )
269 self.data.conservation_relations.enzymes = []
270 for enzyme in self._enzymes:
271 self.data.conservation_relations.enzymes.append( enzyme )
272 for reaction in self._reactions:
273 self.data.conservation_relations.reactions.append( reaction )
274
275
284
286 """Start up Martel to do the scanning of the file.
287
288 This initialzes the Martel based parser and connects it to a handler
289 that will generate events for a Feature Consumer.
290 """
292 """Initialize the scanner by setting up our caches.
293
294 Creating the parser takes a long time, so we want to cache it
295 to reduce parsing time.
296
297 Arguments:
298 o debug - The level of debugging that the parser should
299 display. Level 0 is no debugging, Level 2 displays the most
300 debugging info (but is much slower). See Martel documentation
301 for more info on this.
302 """
303
304
305 self.interest_tags = [ "input_file_name", "num_int_metabolites", \
306 "num_reactions", "metabolite_line", "unbalanced_metabolite", \
307 "num_rows", "num_cols", "irreversible_vector", \
308 "branch_metabolite", "non_branch_metabolite", \
309 "stoichiometric_tag", "kernel_tag", "subsets_tag", \
310 "reduced_system_tag", "convex_basis_tag", \
311 "conservation_relations_tag", "elementary_modes_tag", \
312 "reaction", "enzyme", "matrix_row", "sum_is_constant_line", \
313 "end_stochiometric", "end_kernel", "end_subsets", \
314 "end_reduced_system", "end_convex_basis", \
315 "end_conservation_relations", "end_elementary_modes" ]
316
317
318 expression = Martel.select_names( metatool_format.metatool_record,
319 self.interest_tags)
320 self._parser = expression.make_parser(debug_level = debug)
321
322 - def feed(self, handle, consumer):
323 """Feeed a set of data into the scanner.
324
325 Arguments:
326 o handle - A handle with the information to parse.
327 o consumer - The consumer that should be informed of events.
328 """
329 self._parser.setContentHandler(EventGenerator(consumer,
330 self.interest_tags ))
331
332 self._parser.setErrorHandler(handler.ErrorHandler())
333
334 self._parser.parseFile(handle)
335
337 """Parse MetaTool files into Record objects
338 """
340 """Initialize the parser.
341
342 Arguments:
343 o debug_level - An optional argument that species the amount of
344 debugging information Martel should spit out. By default we have
345 no debugging info (the fastest way to do things), but if you want
346 you can set this as high as two and see exactly where a parse fails.
347 """
348 self._scanner = _Scanner(debug_level)
349
350 - def parse(self, handle):
351 """Parse the specified handle into a MetaTool record.
352 """
353 self._consumer = _RecordConsumer()
354 self._scanner.feed(handle, self._consumer)
355 return self._consumer.data
356
358 """Combine multiple lines of content separated by spaces.
359
360 This function is used by the EventGenerator callback function to
361 combine multiple lines of information. The lines are first
362 stripped to remove whitepsace, and then combined so they are separated
363 by a space. This is a simple minded way to combine lines, but should
364 work for most cases.
365 """
366
367 stripped_line_list = map(string.strip, line_list)
368
369
370 return string.join(stripped_line_list)
371