1
2
3
4
5 """Parser for output from MetaTool, a program which defines metabolic routes
6 within networks.
7
8 http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&
9 list_uids=10222413&dopt=Abstract
10 """
11
12
13 import string
14 import array
15 import os
16 import re
17 import sys
18 import sgmllib
19 import urlparse
20 import copy
21 import Matrix
22
23
24 from xml.sax import handler
25
26
27 import Martel
28
29
30 from Martel import RecordReader
31
32 from Bio.ParserSupport import EventGenerator
33 from Bio.ParserSupport import AbstractConsumer
34 from Bio.SeqFeature import Reference
35 from Bio import File
36 import metatool_format
37 import Record
38
40 """Iterator interface to move over a file of MetaTool entries one at a time.
41 """
42 - def __init__(self, handle, parser = None):
43 """Initialize the iterator.
44
45 Arguments:
46 o handle - A handle with Kabat entries to iterate through.
47 o parser - An optional parser to pass the entries through before
48 returning them. If None, then the raw entry will be returned.
49 """
50 self._reader = RecordReader.StartsWith(handle, "METATOOL")
51
52 self._parser = parser
53
55 """Return the next MetaTool record from the handle.
56
57 Will return None if we ran out of records.
58 """
59 data = self._reader.next()
60
61 if self._parser is not None:
62 if data:
63 dumpfile = open( 'dump', 'w' )
64 dumpfile.write( data )
65 dumpfile.close()
66 return self._parser.parse(File.StringHandle(data))
67
68 return data
69
71 return iter(self.next, None)
72
74 """Create a MetaTool Record object from scanner generated information.
75 """
80
83
86
89
91 self.state = "reaction_count_state"
92
100
104
118
130
132 self.state = "stoichiometry_state"
133 self._vectors = []
134 self._enzymes = []
135 self._reactions = []
136
138 self.state = "kernel_state"
139 self._vectors = []
140 self._enzymes = []
141 self._reactions = []
142
144 self.state = "subsets_state"
145 self._vectors = []
146 self._enzymes = []
147 self._reactions = []
148
150 self.state = "reduced_system_state"
151 self._vectors = []
152 self._enzymes = []
153 self._reactions = []
154
156 self.state = "convex_basis_state"
157 self._vectors = []
158 self._enzymes = []
159 self._reactions = []
160
162 self.state = "conservation_relations_state"
163 self._vectors = []
164 self._enzymes = []
165 self._reactions = []
166
168 self.state = "elementary_modes_state"
169 self._vectors = []
170 self._enzymes = []
171 self._reactions = []
172
184
185
189
193
195 self._irreversible_vector = content[ 0 ].split()
196
202
208
214
217
220
224
236
245
254
255
266
267
276
278 if( self._vectors != [] ):
279 self.data.conservation_relations.matrix = Matrix.Matrix( self._vectors )
280 self.data.conservation_relations.enzymes = []
281 for enzyme in self._enzymes:
282 self.data.conservation_relations.enzymes.append( enzyme )
283 for reaction in self._reactions:
284 self.data.conservation_relations.reactions.append( reaction )
285
286
295
297 """Start up Martel to do the scanning of the file.
298
299 This initialzes the Martel based parser and connects it to a handler
300 that will generate events for a Feature Consumer.
301 """
303 """Initialize the scanner by setting up our caches.
304
305 Creating the parser takes a long time, so we want to cache it
306 to reduce parsing time.
307
308 Arguments:
309 o debug - The level of debugging that the parser should
310 display. Level 0 is no debugging, Level 2 displays the most
311 debugging info (but is much slower). See Martel documentation
312 for more info on this.
313 """
314
315
316 self.interest_tags = [ "input_file_name", "num_int_metabolites", \
317 "num_reactions", "metabolite_line", "unbalanced_metabolite", \
318 "num_rows", "num_cols", "irreversible_vector", \
319 "branch_metabolite", "non_branch_metabolite", \
320 "stoichiometric_tag", "kernel_tag", "subsets_tag", \
321 "reduced_system_tag", "convex_basis_tag", \
322 "conservation_relations_tag", "elementary_modes_tag", \
323 "reaction", "enzyme", "matrix_row", "sum_is_constant_line", \
324 "end_stochiometric", "end_kernel", "end_subsets", \
325 "end_reduced_system", "end_convex_basis", \
326 "end_conservation_relations", "end_elementary_modes" ]
327
328
329 expression = Martel.select_names( metatool_format.metatool_record,
330 self.interest_tags)
331 self._parser = expression.make_parser(debug_level = debug)
332
333 - def feed(self, handle, consumer):
334 """Feeed a set of data into the scanner.
335
336 Arguments:
337 o handle - A handle with the information to parse.
338 o consumer - The consumer that should be informed of events.
339 """
340 self._parser.setContentHandler(EventGenerator(consumer,
341 self.interest_tags ))
342
343 self._parser.setErrorHandler(handler.ErrorHandler())
344
345 self._parser.parseFile(handle)
346
348 """Parse MetaTool files into Record objects
349 """
351 """Initialize the parser.
352
353 Arguments:
354 o debug_level - An optional argument that species the amount of
355 debugging information Martel should spit out. By default we have
356 no debugging info (the fastest way to do things), but if you want
357 you can set this as high as two and see exactly where a parse fails.
358 """
359 self._scanner = _Scanner(debug_level)
360
361 - def parse(self, handle):
362 """Parse the specified handle into a MetaTool record.
363 """
364 self._consumer = _RecordConsumer()
365 self._scanner.feed(handle, self._consumer)
366 return self._consumer.data
367
369 """Combine multiple lines of content separated by spaces.
370
371 This function is used by the EventGenerator callback function to
372 combine multiple lines of information. The lines are first
373 stripped to remove whitepsace, and then combined so they are separated
374 by a space. This is a simple minded way to combine lines, but should
375 work for most cases.
376 """
377
378 stripped_line_list = map(string.strip, line_list)
379
380
381 return string.join(stripped_line_list, ' ')
382