Package Bio :: Package MetaTool
[hide private]
[frames] | no frames]

Source Code for Package Bio.MetaTool

  1  # Copyright 2001 by Katharine Lindner.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Parser for output from MetaTool, a program which defines metabolic routes 
  6  within networks. 
  7   
  8  http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed& 
  9  list_uids=10222413&dopt=Abstract 
 10  """ 
 11   
 12  # standard library 
 13  import string 
 14  import array 
 15  import os 
 16  import re 
 17  import sys 
 18  import sgmllib 
 19  import urlparse 
 20  import copy 
 21  import Matrix 
 22   
 23  # XML from python 2.0 
 24  from xml.sax import handler 
 25   
 26  # Martel 
 27  import Martel 
 28  # from Martel import Opt, Alt, Integer, SignedInteger, Group, Str, MaxRepeat 
 29  # from Martel import Any, AnyBut, RepN, Rep, Rep1, ToEol 
 30  from Martel import RecordReader 
 31   
 32  from Bio.ParserSupport import EventGenerator 
 33  from Bio.ParserSupport import AbstractConsumer 
 34  from Bio.SeqFeature import Reference 
 35  from Bio import File 
 36  import metatool_format 
 37  import Record 
 38   
39 -class Iterator:
40 """Iterator interface to move over a file of MetaTool entries one at a time. 41 """
42 - def __init__(self, handle, parser = None):
43 """Initialize the iterator. 44 45 Arguments: 46 o handle - A handle with Kabat entries to iterate through. 47 o parser - An optional parser to pass the entries through before 48 returning them. If None, then the raw entry will be returned. 49 """ 50 self._reader = RecordReader.StartsWith(handle, "METATOOL") 51 # self._reader = RecordReader.EndsWith(handle, "RECEND|\n") 52 self._parser = parser
53
54 - def next(self):
55 """Return the next MetaTool record from the handle. 56 57 Will return None if we ran out of records. 58 """ 59 data = self._reader.next() 60 61 if self._parser is not None: 62 if data: 63 dumpfile = open( 'dump', 'w' ) 64 dumpfile.write( data ) 65 dumpfile.close() 66 return self._parser.parse(File.StringHandle(data)) 67 68 return data
69
70 - def __iter__(self):
71 return iter(self.next, None)
72
73 -class _RecordConsumer:
74 """Create a MetaTool Record object from scanner generated information. 75 """
76 - def __init__(self):
77 self.data = Record.Record() 78 self.data.internal_metabolites = [] 79 self.data.external_metabolites = []
80
81 - def input_file_name( self, content ):
82 self.data.input_file_name = content[ 0 ]
83
84 - def input_file_tag( self, content ):
85 self.state = "input_file_state"
86
87 - def metabolite_count_tag( self, content ):
88 self.state = "metabolite_count_state"
89
90 - def reaction_count_tag( self, content ):
91 self.state = "reaction_count_state"
92
93 - def matrix_row( self, matrix_rows ):
94 for matrix_row in matrix_rows: 95 elements = matrix_row.split() 96 vector = [] 97 for element in elements: 98 vector.append( int( element ) ) 99 self._vectors.append( vector )
100
101 - def unbalanced_metabolite( self, content ):
102 for metabolite in content: 103 self.data.unbalanced_metabolites.append( metabolite )
104
105 - def branch_metabolite( self, content ):
106 for metabolite in content: 107 items = metabolite.split() 108 name = items[ 0 ] 109 consumed = int( items[ 1 ] ) 110 built = int( items[ 2 ] ) 111 vector = items[ 4 ].replace( 'r', '0' ) 112 113 vector = vector.replace( 'i', '1' ) 114 vector = list( vector ) 115 map( int, vector ) 116 entry = Record.MetaboliteRole( name, consumed, built, vector ) 117 self.data.branch_metabolites.append( entry )
118
119 - def non_branch_metabolite( self, content ):
120 for metabolite in content: 121 items = metabolite.split() 122 name = items[ 0 ] 123 consumed = int( items[ 1 ] ) 124 built = int( items[ 2 ] ) 125 vector = items[ 4 ].replace( 'r', '0' ) 126 vector = vector.replace( 'i', '1' ) 127 vector = list( vector ) 128 entry = Record.MetaboliteRole( name, consumed, built, vector ) 129 self.data.non_branch_metabolites.append( entry )
130
131 - def stoichiometric_tag( self, content ):
132 self.state = "stoichiometry_state" 133 self._vectors = [] 134 self._enzymes = [] 135 self._reactions = []
136
137 - def kernel_tag( self, kernel_tag ):
138 self.state = "kernel_state" 139 self._vectors = [] 140 self._enzymes = [] 141 self._reactions = []
142
143 - def subsets_tag( self, content ):
144 self.state = "subsets_state" 145 self._vectors = [] 146 self._enzymes = [] 147 self._reactions = []
148
149 - def reduced_system_tag( self, content ):
150 self.state = "reduced_system_state" 151 self._vectors = [] 152 self._enzymes = [] 153 self._reactions = []
154
155 - def convex_basis_tag( self, content ):
156 self.state = "convex_basis_state" 157 self._vectors = [] 158 self._enzymes = [] 159 self._reactions = []
160
161 - def conservation_relations_tag( self, content ):
162 self.state = "conservation_relations_state" 163 self._vectors = [] 164 self._enzymes = [] 165 self._reactions = []
166
167 - def elementary_modes_tag( self, content ):
168 self.state = "elementary_modes_state" 169 self._vectors = [] 170 self._enzymes = [] 171 self._reactions = []
172
173 - def metabolite_line( self, content ):
174 self.data.external_metabolites = [] 175 self.data.internal_metabolites = [] 176 for metabolite in content: 177 items = metabolite.split() 178 entry = Record.Metabolite( int( items[ 0 ] ), items[ 2 ] ) 179 180 if( items[ 1 ] == "external" ): 181 self.data.external_metabolites.append( entry ) 182 else: 183 self.data.internal_metabolites.append( entry )
184 185
186 - def num_int_metabolites( self, content ):
187 num_int_metabolites = content[ 0 ] 188 self.data.num_int_metabolites = int( num_int_metabolites )
189
190 - def num_reactions( self, content ):
191 num_reactions = content[ 0 ] 192 self.data.num_reactions = int( num_reactions )
193
194 - def irreversible_vector( self, content ):
195 self._irreversible_vector = content[ 0 ].split()
196
197 - def reaction( self, reactions ):
198 for reaction in reactions: 199 items = reaction.split() 200 item = string.join( items[ 1: ] ) 201 self._reactions.append( item.strip() )
202
203 - def enzyme( self, enzymes ):
204 for enzyme in enzymes: 205 items = enzyme.split() 206 item = string.join( items[ 1: ] ) 207 self._enzymes.append( item.strip() )
208
209 - def sum_is_constant_line( self, lines ):
210 for line in lines: 211 items = line.split( ':') 212 items = items[ 1 ].split( '=' ) 213 self.data.sum_is_constant_lines.append( items[ 0 ] )
214
215 - def num_rows( self, num_rows ):
216 pass
217
218 - def num_cols( self, num_cols ):
219 pass
220
221 - def metabolite_roles( self, content ):
222 for metabolite_role in content: 223 cols = metabolite_role.split()
224
225 - def end_stochiometric( self, content ):
226 if( self._vectors != [] ): 227 self.data.stochiometric.matrix = Matrix.Matrix( self._vectors ) 228 self.data.stochiometric.enzymes = [] 229 for enzyme in self._enzymes: 230 self.data.stochiometric.enzymes.append( enzyme ) 231 self.data.stochiometric.enzymes = [] 232 for reaction in self._reactions: 233 self.data.stochiometric.reactions.append( reaction ) 234 for col in self._irreversible_vector: 235 self.data.stochiometric.irreversible_vector.append( col )
236
237 - def end_kernel( self, content ):
238 if( self._vectors != [] ): 239 self.data.kernel.matrix = Matrix.Matrix( self._vectors ) 240 self.data.kernel.enzymes = [] 241 for enzyme in self._enzymes: 242 self.data.kernel.enzymes.append( enzyme ) 243 for reaction in self._reactions: 244 self.data.kernel.reactions.append( reaction )
245
246 - def end_subsets( self, content ):
247 if( self._vectors != [] ): 248 self.data.subsets.matrix = Matrix.Matrix( self._vectors ) 249 self.data.subsets.enzymes = [] 250 for enzyme in self._enzymes: 251 self.data.subsets.enzymes.append( enzyme ) 252 for reaction in self._reactions: 253 self.data.subsets.reactions.append( reaction )
254 255
256 - def end_reduced_system( self, content ):
257 if( self._vectors != [] ): 258 self.data.reduced_system.matrix = Matrix.Matrix( self._vectors[:14] ) 259 self.data.reduced_system.enzymes = [] 260 for enzyme in self._enzymes: 261 self.data.reduced_system.enzymes.append( enzyme ) 262 for reaction in self._reactions: 263 self.data.reduced_system.reactions.append( reaction ) 264 for col in self._irreversible_vector: 265 self.data.reduced_system.irreversible_vector.append( col )
266 267
268 - def end_convex_basis( self, content ):
269 if( self._vectors != [] ): 270 self.data.convex_basis.matrix = Matrix.Matrix( self._vectors ) 271 self.data.convex_basis.enzymes = [] 272 for enzyme in self._enzymes: 273 self.data.convex_basis.enzymes.append( enzyme ) 274 for reaction in self._reactions: 275 self.data.convex_basis.reactions.append( reaction )
276
277 - def end_conservation_relations( self, content ):
278 if( self._vectors != [] ): 279 self.data.conservation_relations.matrix = Matrix.Matrix( self._vectors ) 280 self.data.conservation_relations.enzymes = [] 281 for enzyme in self._enzymes: 282 self.data.conservation_relations.enzymes.append( enzyme ) 283 for reaction in self._reactions: 284 self.data.conservation_relations.reactions.append( reaction )
285 286
287 - def end_elementary_modes( self, content ):
288 if( self._vectors != [] ): 289 self.data.elementary_modes.matrix = Matrix.Matrix( self._vectors ) 290 self.data.elementary_modes.enzymes = [] 291 for enzyme in self._enzymes: 292 self.data.elementary_modes.enzymes.append( enzyme ) 293 for reaction in self._reactions: 294 self.data.elementary_modes.reactions.append( reaction )
295
296 -class _Scanner:
297 """Start up Martel to do the scanning of the file. 298 299 This initialzes the Martel based parser and connects it to a handler 300 that will generate events for a Feature Consumer. 301 """
302 - def __init__(self, debug = 0):
303 """Initialize the scanner by setting up our caches. 304 305 Creating the parser takes a long time, so we want to cache it 306 to reduce parsing time. 307 308 Arguments: 309 o debug - The level of debugging that the parser should 310 display. Level 0 is no debugging, Level 2 displays the most 311 debugging info (but is much slower). See Martel documentation 312 for more info on this. 313 """ 314 # a listing of all tags we are interested in scanning for 315 # in the MartelParser 316 self.interest_tags = [ "input_file_name", "num_int_metabolites", \ 317 "num_reactions", "metabolite_line", "unbalanced_metabolite", \ 318 "num_rows", "num_cols", "irreversible_vector", \ 319 "branch_metabolite", "non_branch_metabolite", \ 320 "stoichiometric_tag", "kernel_tag", "subsets_tag", \ 321 "reduced_system_tag", "convex_basis_tag", \ 322 "conservation_relations_tag", "elementary_modes_tag", \ 323 "reaction", "enzyme", "matrix_row", "sum_is_constant_line", \ 324 "end_stochiometric", "end_kernel", "end_subsets", \ 325 "end_reduced_system", "end_convex_basis", \ 326 "end_conservation_relations", "end_elementary_modes" ] 327 328 # make a parser that returns only the tags we are interested in 329 expression = Martel.select_names( metatool_format.metatool_record, 330 self.interest_tags) 331 self._parser = expression.make_parser(debug_level = debug)
332
333 - def feed(self, handle, consumer):
334 """Feeed a set of data into the scanner. 335 336 Arguments: 337 o handle - A handle with the information to parse. 338 o consumer - The consumer that should be informed of events. 339 """ 340 self._parser.setContentHandler(EventGenerator(consumer, 341 self.interest_tags )) 342 # _strip_and_combine )) 343 self._parser.setErrorHandler(handler.ErrorHandler()) 344 345 self._parser.parseFile(handle)
346
347 -class RecordParser:
348 """Parse MetaTool files into Record objects 349 """
350 - def __init__(self, debug_level = 0):
351 """Initialize the parser. 352 353 Arguments: 354 o debug_level - An optional argument that species the amount of 355 debugging information Martel should spit out. By default we have 356 no debugging info (the fastest way to do things), but if you want 357 you can set this as high as two and see exactly where a parse fails. 358 """ 359 self._scanner = _Scanner(debug_level)
360
361 - def parse(self, handle):
362 """Parse the specified handle into a MetaTool record. 363 """ 364 self._consumer = _RecordConsumer() 365 self._scanner.feed(handle, self._consumer) 366 return self._consumer.data
367
368 -def _strip_and_combine(line_list):
369 """Combine multiple lines of content separated by spaces. 370 371 This function is used by the EventGenerator callback function to 372 combine multiple lines of information. The lines are first 373 stripped to remove whitepsace, and then combined so they are separated 374 by a space. This is a simple minded way to combine lines, but should 375 work for most cases. 376 """ 377 # first strip out extra whitespace 378 stripped_line_list = map(string.strip, line_list) 379 380 # now combine everything with spaces 381 return string.join(stripped_line_list, ' ')
382