Package Bio :: Package MetaTool
[hide private]
[frames] | no frames]

Source Code for Package Bio.MetaTool

  1  # Copyright 2001 by Katharine Lindner.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Parser for output from MetaTool, a program which defines metabolic routes 
  6  within networks. 
  7   
  8  http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed& 
  9  list_uids=10222413&dopt=Abstract 
 10  """ 
 11   
 12  # standard library 
 13  import string 
 14  import Matrix 
 15   
 16  # XML from python 2.0 
 17  from xml.sax import handler 
 18   
 19  # Martel 
 20  import Martel 
 21  from Martel import RecordReader 
 22   
 23  from Bio.ParserSupport import EventGenerator 
 24  from Bio import File 
 25  import metatool_format 
 26  import Record 
 27   
28 -class Iterator:
29 """Iterator interface to move over a file of MetaTool entries one at a time. 30 """
31 - def __init__(self, handle, parser = None):
32 """Initialize the iterator. 33 34 Arguments: 35 o handle - A handle with Kabat entries to iterate through. 36 o parser - An optional parser to pass the entries through before 37 returning them. If None, then the raw entry will be returned. 38 """ 39 self._reader = RecordReader.StartsWith(handle, "METATOOL") 40 # self._reader = RecordReader.EndsWith(handle, "RECEND|\n") 41 self._parser = parser
42
43 - def next(self):
44 """Return the next MetaTool record from the handle. 45 46 Will return None if we ran out of records. 47 """ 48 data = self._reader.next() 49 50 if self._parser is not None: 51 if data: 52 dumpfile = open( 'dump', 'w' ) 53 dumpfile.write( data ) 54 dumpfile.close() 55 return self._parser.parse(File.StringHandle(data)) 56 57 return data
58
59 - def __iter__(self):
60 return iter(self.next, None)
61
62 -class _RecordConsumer:
63 """Create a MetaTool Record object from scanner generated information. 64 """
65 - def __init__(self):
66 self.data = Record.Record() 67 self.data.internal_metabolites = [] 68 self.data.external_metabolites = []
69
70 - def input_file_name( self, content ):
71 self.data.input_file_name = content[ 0 ]
72
73 - def input_file_tag( self, content ):
74 self.state = "input_file_state"
75
76 - def metabolite_count_tag( self, content ):
77 self.state = "metabolite_count_state"
78
79 - def reaction_count_tag( self, content ):
80 self.state = "reaction_count_state"
81
82 - def matrix_row( self, matrix_rows ):
83 for matrix_row in matrix_rows: 84 elements = matrix_row.split() 85 vector = [] 86 for element in elements: 87 vector.append( int( element ) ) 88 self._vectors.append( vector )
89
90 - def unbalanced_metabolite( self, content ):
91 for metabolite in content: 92 self.data.unbalanced_metabolites.append( metabolite )
93
94 - def branch_metabolite( self, content ):
95 for metabolite in content: 96 items = metabolite.split() 97 name = items[ 0 ] 98 consumed = int( items[ 1 ] ) 99 built = int( items[ 2 ] ) 100 vector = items[ 4 ].replace( 'r', '0' ) 101 102 vector = vector.replace( 'i', '1' ) 103 vector = list( vector ) 104 map( int, vector ) 105 entry = Record.MetaboliteRole( name, consumed, built, vector ) 106 self.data.branch_metabolites.append( entry )
107
108 - def non_branch_metabolite( self, content ):
109 for metabolite in content: 110 items = metabolite.split() 111 name = items[ 0 ] 112 consumed = int( items[ 1 ] ) 113 built = int( items[ 2 ] ) 114 vector = items[ 4 ].replace( 'r', '0' ) 115 vector = vector.replace( 'i', '1' ) 116 vector = list( vector ) 117 entry = Record.MetaboliteRole( name, consumed, built, vector ) 118 self.data.non_branch_metabolites.append( entry )
119
120 - def stoichiometric_tag( self, content ):
121 self.state = "stoichiometry_state" 122 self._vectors = [] 123 self._enzymes = [] 124 self._reactions = []
125
126 - def kernel_tag( self, kernel_tag ):
127 self.state = "kernel_state" 128 self._vectors = [] 129 self._enzymes = [] 130 self._reactions = []
131
132 - def subsets_tag( self, content ):
133 self.state = "subsets_state" 134 self._vectors = [] 135 self._enzymes = [] 136 self._reactions = []
137
138 - def reduced_system_tag( self, content ):
139 self.state = "reduced_system_state" 140 self._vectors = [] 141 self._enzymes = [] 142 self._reactions = []
143
144 - def convex_basis_tag( self, content ):
145 self.state = "convex_basis_state" 146 self._vectors = [] 147 self._enzymes = [] 148 self._reactions = []
149
150 - def conservation_relations_tag( self, content ):
151 self.state = "conservation_relations_state" 152 self._vectors = [] 153 self._enzymes = [] 154 self._reactions = []
155
156 - def elementary_modes_tag( self, content ):
157 self.state = "elementary_modes_state" 158 self._vectors = [] 159 self._enzymes = [] 160 self._reactions = []
161
162 - def metabolite_line( self, content ):
163 self.data.external_metabolites = [] 164 self.data.internal_metabolites = [] 165 for metabolite in content: 166 items = metabolite.split() 167 entry = Record.Metabolite( int( items[ 0 ] ), items[ 2 ] ) 168 169 if( items[ 1 ] == "external" ): 170 self.data.external_metabolites.append( entry ) 171 else: 172 self.data.internal_metabolites.append( entry )
173 174
175 - def num_int_metabolites( self, content ):
176 num_int_metabolites = content[ 0 ] 177 self.data.num_int_metabolites = int( num_int_metabolites )
178
179 - def num_reactions( self, content ):
180 num_reactions = content[ 0 ] 181 self.data.num_reactions = int( num_reactions )
182
183 - def irreversible_vector( self, content ):
184 self._irreversible_vector = content[ 0 ].split()
185
186 - def reaction( self, reactions ):
187 for reaction in reactions: 188 items = reaction.split() 189 item = string.join( items[ 1: ] ) 190 self._reactions.append(item)
191
192 - def enzyme( self, enzymes ):
193 for enzyme in enzymes: 194 items = enzyme.split() 195 item = string.join( items[ 1: ] ) 196 self._enzymes.append(item)
197
198 - def sum_is_constant_line( self, lines ):
199 for line in lines: 200 items = line.split( ':') 201 items = items[ 1 ].split( '=' ) 202 self.data.sum_is_constant_lines.append( items[ 0 ] )
203
204 - def num_rows( self, num_rows ):
205 pass
206
207 - def num_cols( self, num_cols ):
208 pass
209
210 - def metabolite_roles( self, content ):
211 for metabolite_role in content: 212 cols = metabolite_role.split()
213
214 - def end_stochiometric( self, content ):
215 if( self._vectors != [] ): 216 self.data.stochiometric.matrix = Matrix.Matrix( self._vectors ) 217 self.data.stochiometric.enzymes = [] 218 for enzyme in self._enzymes: 219 self.data.stochiometric.enzymes.append( enzyme ) 220 self.data.stochiometric.enzymes = [] 221 for reaction in self._reactions: 222 self.data.stochiometric.reactions.append( reaction ) 223 for col in self._irreversible_vector: 224 self.data.stochiometric.irreversible_vector.append( col )
225
226 - def end_kernel( self, content ):
227 if( self._vectors != [] ): 228 self.data.kernel.matrix = Matrix.Matrix( self._vectors ) 229 self.data.kernel.enzymes = [] 230 for enzyme in self._enzymes: 231 self.data.kernel.enzymes.append( enzyme ) 232 for reaction in self._reactions: 233 self.data.kernel.reactions.append( reaction )
234
235 - def end_subsets( self, content ):
236 if( self._vectors != [] ): 237 self.data.subsets.matrix = Matrix.Matrix( self._vectors ) 238 self.data.subsets.enzymes = [] 239 for enzyme in self._enzymes: 240 self.data.subsets.enzymes.append( enzyme ) 241 for reaction in self._reactions: 242 self.data.subsets.reactions.append( reaction )
243 244
245 - def end_reduced_system( self, content ):
246 if( self._vectors != [] ): 247 self.data.reduced_system.matrix = Matrix.Matrix( self._vectors[:14] ) 248 self.data.reduced_system.enzymes = [] 249 for enzyme in self._enzymes: 250 self.data.reduced_system.enzymes.append( enzyme ) 251 for reaction in self._reactions: 252 self.data.reduced_system.reactions.append( reaction ) 253 for col in self._irreversible_vector: 254 self.data.reduced_system.irreversible_vector.append( col )
255 256
257 - def end_convex_basis( self, content ):
258 if( self._vectors != [] ): 259 self.data.convex_basis.matrix = Matrix.Matrix( self._vectors ) 260 self.data.convex_basis.enzymes = [] 261 for enzyme in self._enzymes: 262 self.data.convex_basis.enzymes.append( enzyme ) 263 for reaction in self._reactions: 264 self.data.convex_basis.reactions.append( reaction )
265
266 - def end_conservation_relations( self, content ):
267 if( self._vectors != [] ): 268 self.data.conservation_relations.matrix = Matrix.Matrix( self._vectors ) 269 self.data.conservation_relations.enzymes = [] 270 for enzyme in self._enzymes: 271 self.data.conservation_relations.enzymes.append( enzyme ) 272 for reaction in self._reactions: 273 self.data.conservation_relations.reactions.append( reaction )
274 275
276 - def end_elementary_modes( self, content ):
277 if( self._vectors != [] ): 278 self.data.elementary_modes.matrix = Matrix.Matrix( self._vectors ) 279 self.data.elementary_modes.enzymes = [] 280 for enzyme in self._enzymes: 281 self.data.elementary_modes.enzymes.append( enzyme ) 282 for reaction in self._reactions: 283 self.data.elementary_modes.reactions.append( reaction )
284
285 -class _Scanner:
286 """Start up Martel to do the scanning of the file. 287 288 This initialzes the Martel based parser and connects it to a handler 289 that will generate events for a Feature Consumer. 290 """
291 - def __init__(self, debug = 0):
292 """Initialize the scanner by setting up our caches. 293 294 Creating the parser takes a long time, so we want to cache it 295 to reduce parsing time. 296 297 Arguments: 298 o debug - The level of debugging that the parser should 299 display. Level 0 is no debugging, Level 2 displays the most 300 debugging info (but is much slower). See Martel documentation 301 for more info on this. 302 """ 303 # a listing of all tags we are interested in scanning for 304 # in the MartelParser 305 self.interest_tags = [ "input_file_name", "num_int_metabolites", \ 306 "num_reactions", "metabolite_line", "unbalanced_metabolite", \ 307 "num_rows", "num_cols", "irreversible_vector", \ 308 "branch_metabolite", "non_branch_metabolite", \ 309 "stoichiometric_tag", "kernel_tag", "subsets_tag", \ 310 "reduced_system_tag", "convex_basis_tag", \ 311 "conservation_relations_tag", "elementary_modes_tag", \ 312 "reaction", "enzyme", "matrix_row", "sum_is_constant_line", \ 313 "end_stochiometric", "end_kernel", "end_subsets", \ 314 "end_reduced_system", "end_convex_basis", \ 315 "end_conservation_relations", "end_elementary_modes" ] 316 317 # make a parser that returns only the tags we are interested in 318 expression = Martel.select_names( metatool_format.metatool_record, 319 self.interest_tags) 320 self._parser = expression.make_parser(debug_level = debug)
321
322 - def feed(self, handle, consumer):
323 """Feeed a set of data into the scanner. 324 325 Arguments: 326 o handle - A handle with the information to parse. 327 o consumer - The consumer that should be informed of events. 328 """ 329 self._parser.setContentHandler(EventGenerator(consumer, 330 self.interest_tags )) 331 # _strip_and_combine )) 332 self._parser.setErrorHandler(handler.ErrorHandler()) 333 334 self._parser.parseFile(handle)
335
336 -class RecordParser:
337 """Parse MetaTool files into Record objects 338 """
339 - def __init__(self, debug_level = 0):
340 """Initialize the parser. 341 342 Arguments: 343 o debug_level - An optional argument that species the amount of 344 debugging information Martel should spit out. By default we have 345 no debugging info (the fastest way to do things), but if you want 346 you can set this as high as two and see exactly where a parse fails. 347 """ 348 self._scanner = _Scanner(debug_level)
349
350 - def parse(self, handle):
351 """Parse the specified handle into a MetaTool record. 352 """ 353 self._consumer = _RecordConsumer() 354 self._scanner.feed(handle, self._consumer) 355 return self._consumer.data
356
357 -def _strip_and_combine(line_list):
358 """Combine multiple lines of content separated by spaces. 359 360 This function is used by the EventGenerator callback function to 361 combine multiple lines of information. The lines are first 362 stripped to remove whitepsace, and then combined so they are separated 363 by a space. This is a simple minded way to combine lines, but should 364 work for most cases. 365 """ 366 # first strip out extra whitespace 367 stripped_line_list = map(string.strip, line_list) 368 369 # now combine everything with spaces 370 return string.join(stripped_line_list)
371