Package Bio :: Package ECell
[hide private]
[frames] | no frames]

Source Code for Package Bio.ECell

  1  # Copyright 2001 by Katharine Lindner.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  # standard library 
  7  import sys 
  8  import string 
  9  import copy 
 10  import array 
 11  import os 
 12  import re 
 13  import sgmllib 
 14  import urlparse 
 15   
 16  # XML from python 2.0 
 17  from xml.sax import handler 
 18   
 19  # Martel 
 20  import Martel 
 21  from Martel import RecordReader 
 22   
 23  from Bio.ParserSupport import EventGenerator 
 24  from Bio.ParserSupport import AbstractConsumer 
 25  from Bio import File 
 26  from Bio.Align.Generic import Alignment 
 27  import Bio.Alphabet 
 28  import ecell_format 
 29  import Record 
 30   
 31  """ 
 32  Ecell converts the ECell input from spreadsheet format to an intermediate format, described in 
 33  http://www.e-cell.org/manual/chapter2E.html#3.2.  It provides an alternative to the perl script 
 34  supplied with the Ecell2 distribution at http://bioinformatics.org/project/?group_id=49. 
 35   
 36  ECell expects a spreadsheet exported in delimited text format. The file should be read with 
 37  FilteredReader using the default filter chain to remove extraneous characters. 
 38  """ 
 39   
40 -class Error( Exception ):
41 """ 42 """
43 - def __init__( self ):
44 pass
45
46 -class ECellError( Error ):
47 48 """ 49 message - description of error 50 """ 51
52 - def __init__( self, message ):
53 self.message = message
54 55 56
57 -class Iterator:
58 """Iterator interface to move over a file of ecell entries one at a time. 59 """
60 - def __init__(self, handle, parser = None):
61 """Initialize the iterator. 62 63 Arguments: 64 o handle - A handle with ECell entries to iterate through. 65 o parser - An optional parser to pass the entries through before 66 returning them. If None, then the raw entry will be returned. 67 """ 68 self.handle = File.UndoHandle( handle ) 69 self._reader = RecordReader.Everything( self.handle ) 70 self._parser = parser
71
72 - def next(self):
73 """Return the next ecell record from the handle. 74 75 Will return None if we ran out of records. 76 """ 77 data = self._reader.next() 78 79 if self._parser is not None: 80 if data: 81 dumpfile = open( 'dump', 'w' ) 82 dumpfile.write( data ) 83 dumpfile.close() 84 return self._parser.parse(File.StringHandle(data)) 85 86 return data
87
88 - def __iter__(self):
89 return iter(self.next, None)
90 91 92
93 -class _Scanner:
94 """Start up Martel to do the scanning of the file. 95 96 This initialzes the Martel based parser and connects it to a handler 97 that will generate events for a Feature Consumer. 98 """
99 - def __init__(self, debug = 0):
100 """Initialize the scanner by setting up our caches. 101 102 Creating the parser takes a long time, so we want to cache it 103 to reduce parsing time. 104 105 Arguments: 106 o debug - The level of debugging that the parser should 107 display. Level 0 is no debugging, Level 2 displays the most 108 debugging info (but is much slower). See Martel documentation 109 for more info on this. 110 """ 111 # a listing of all tags we are interested in scanning for 112 # in the MartelParser 113 self.interest_tags = [ 'header_line', 'system_line', 'substance_multiline', \ 114 'reactor_multiline', 'include_line' ] 115 116 # make a parser that returns only the tags we are interested in 117 expression = Martel.select_names( ecell_format.ecell_record, self.interest_tags) 118 self._parser = expression.make_parser(debug_level = debug)
119
120 - def feed(self, handle, consumer):
121 """Feed a set of data into the scanner. 122 123 Arguments: 124 o handle - A handle with the information to parse. 125 o consumer - The consumer that should be informed of events. 126 """ 127 self._parser.setContentHandler( EventGenerator(consumer, 128 self.interest_tags)) 129 # self._parser.setErrorHandler(handle.ErrorHandler()) 130 131 self._parser.parseFile(handle)
132
133 -class _RecordConsumer:
134 """Create an ECell Record object from scanner generated information. 135 """
136 - def __init__(self):
137 self.data = Record.Record() 138 self._header = [] 139 self._database = {} 140 self._state = ''
141
142 - def include_line( self, line ):
143 self.data.include_buf = self.data.include_buf + line
144
145 - def header_line( self, lines ):
146 for line in lines: 147 items = line.split( '\t') 148 items[ 0 ] = items[ 0 ].lower() 149 self._header = [] 150 self._state = items[ 0 ] 151 for item in items: 152 item = item.strip() 153 self._header.append( item.lower() )
154 155
156 - def system_line( self, lines ):
157 for line in lines: 158 line_dict = self._make_line_dict( line ) 159 if( not self._check_missing_header( line_dict ) ): 160 raise EcellError( "invalid header" ) 161 self.data.num_systems = self.data.num_systems + 1 162 _set_defaults( line_dict ) 163 self._build_system_entry( line_dict )
164 165
166 - def substance_multiline( self, multiline ):
167 for line in multiline: 168 self.parse_substance_lines( line )
169
170 - def parse_substance_lines( self, multiline ):
171 lines = multiline.splitlines() 172 line_no = 0 173 for line in lines: 174 line_dict = self._make_line_dict( line ) 175 try: 176 if( not _is_valid_substance( line_dict ) ): 177 raise ECellError( "quantity and concentration are mutually exclusive" ) 178 except ECellError, e: 179 print sys.stderr, e.message 180 181 qty = Record.get_entry( line_dict, 'qty' ) 182 conc = Record.get_entry( line_dict, 'conc' ) 183 if( ( qty.lower() != 'fix' ) and ( conc.lower() != 'fix' ) ): 184 self.data.num_substances = self.data.num_substances + 1 185 else: 186 line_no = line_no + 1 187 if( line.lower().startswith( 'substance' ) ): 188 _set_defaults( line_dict ) 189 self._convert_conc( line_dict ) 190 191 self._build_substance_entry( line_dict, line_no )
192
193 - def reactor_multiline( self, multiline ):
194 for line in multiline: 195 self.parse_reactor_lines( line )
196
197 - def parse_reactor_lines( self, multiline ):
198 lines = multiline.splitlines() 199 for line in lines: 200 line_dict = self._make_line_dict( line ) 201 if( line.lower().startswith( 'reactor' ) ): 202 if( not self._check_missing_header( line_dict ) ): 203 raise ECellError( "invalid header" ) 204 try: 205 if( not is_only_digits( line_dict[ 's_coeff' ] ) ): 206 raise ECellError( 's_coeff must contain only digits' ) 207 if( not is_only_digits( line_dict[ 'p_coeff' ] ) ): 208 raise ECellError( 'p_coeff must contain only digits' ) 209 except KeyError: 210 pass 211 if( line.lower().startswith( 'reactor' ) ): 212 _set_reactor_defaults( line_dict ) 213 line_dict = self._remove_if_inconsistent( line_dict ) 214 215 if( line_dict.has_key( 'class' ) ): 216 self.data.num_reactors = self.data.num_reactors + 1 217 num_substrates = 0 218 num_products = 0 219 num_catalysts = 0 220 num_effectors = 0 221 num_options = 0 222 num_args = 0 223 if( line_dict.has_key( 's_id' ) ): num_substrates = num_substrates + 1 224 if( line_dict.has_key( 'p_id' ) ): num_products = num_products + 1 225 if( line_dict.has_key( 'c_id' ) ): num_catalysts = num_catalysts + 1 226 if( line_dict.has_key( 'e_id' ) ): num_effectors = num_effectors + 1 227 if( line_dict.has_key( 'o_type' ) ): num_options = num_options + 1 228 if( line_dict.has_key( 'arg_tag' ) ): num_args = num_args + 1 229 counter_dict = { \ 230 's_' : num_substrates, \ 231 'p_' : num_products, \ 232 'c_' : num_catalysts, \ 233 'e_' : num_effectors, \ 234 'o_' : num_options, \ 235 'arg_tag' : num_args 236 } 237 self._set_max( counter_dict ) 238 self._build_reactor_entry( line_dict, counter_dict )
239 240
241 - def _set_max( self, counter_dict ):
242 num_reactors = self.data.num_reactors 243 for key in counter_dict.keys(): 244 composite_key = key + str( num_reactors ) 245 self.data._max_dict[ composite_key ] = counter_dict[ key ]
246
247 - def _build_system_entry( self, line_dict ):
248 for key in line_dict.keys(): 249 item = line_dict[ key ] 250 composite_key = 'system' + str( self.data.num_systems ) + key + '0' 251 252 if( not self.data.cell_dict.has_key( composite_key ) ): 253 self.data.cell_dict[ composite_key ] = item
254
255 - def _build_substance_entry( self, line_dict, line_no ):
256 for key in line_dict.keys(): 257 item = line_dict[ key ] 258 composite_key = 'substance' + str( self.data.num_substances ) + key + \ 259 str( line_no ) 260 if( not self.data.cell_dict.has_key( composite_key ) ): 261 self.data.cell_dict[ composite_key ] = item
262
263 - def _convert_conc( self, line_dict ):
264 if( line_dict.has_key( 'conc' ) ): 265 if( not line_dict.has_key( 'qty' ) ): 266 contents = 'QTY(%s,%s)' % ( line_dict[ 'conc' ], line_dict[ 'path' ] ) 267 composite_key = 'substance' + str( self.data.num_substances ) + 'qty' + '0' 268 self.data.cell_dict[ composite_key ] = contents 269 self.data.contains_concentration = 1
270
271 - def _build_reactor_entry( self, line_dict, counter_dict ):
272 for key in line_dict.keys(): 273 item = line_dict[ key ] 274 prefix = key[ :2 ] 275 if( key.startswith( 'arg_' ) ): 276 index = counter_dict[ 'arg_tag' ] 277 elif( counter_dict.has_key( prefix ) ): 278 index = counter_dict[ prefix ] 279 else: 280 index = '0' 281 composite_key = 'reactor' + str( self.data.num_reactors ) + str( key ) + str( index ) 282 if( not self.data.cell_dict.has_key( composite_key ) ): 283 self.data.cell_dict[ composite_key ] = item
284 285
286 - def _check_missing_header( self, line_dict ):
287 ok = 1 288 items = [ 'id', 'path', 'class' ] 289 for item in items: 290 if( line_dict.has_key( item ) == 0 ): 291 others = copy.deepcopy( items ) 292 others.remove( item ) 293 for other in others: 294 if( line_dict.has_key( other ) ): 295 if( item.lower() != 'class' ): 296 ok = 0 297 break 298 return ok
299
300 - def _remove_if_inconsistent( self, list_dict ):
301 valid_keys = list_dict.keys() 302 for label in [ 'id', 'path', 'type' ]: 303 for prefix in [ 's_', 'p_', 'c_', 'e_' ]: 304 node = prefix + label 305 valid_keys = self._consistency_filter( prefix, node, valid_keys ) 306 for key in list_dict.keys(): 307 if( not key in valid_keys ): 308 del list_dict[ key ] 309 return list_dict
310
311 - def _consistency_filter( self, prefix, tag, valid_keys ):
312 block = [] 313 for suffix in [ 'id', 'path', 'coeff', 'type' ]: 314 node = prefix + suffix 315 block.append( node ) 316 for node in block: 317 if( ( not tag in valid_keys ) and ( node in valid_keys ) ): 318 if( ( prefix == 'o_' ) or ( not tag.endswith( 'type' ) ) ): 319 valid_keys.remove( node ) 320 return valid_keys
321
322 - def _make_line_dict( self, line ):
323 line_dict = {} 324 items = line.split( '\t' ) 325 num = 0 326 for item in items: 327 item = item.strip() 328 if( item != '' ): 329 line_dict[ self._header[ num ] ] = item 330 num = num + 1 331 return line_dict
332
333 -def _clear_bad_block( block, items ):
334 for label in block: 335 items = items.remove( items.index( label ) ) 336 return items
337
338 -def _is_valid_substance( line_dict ):
339 ok = 1 340 if( line_dict.has_key( 'qty' ) and line_dict.has_key( 'conc' ) ): 341 if( not ( line_dict[ 'qty' ] == 'QTY' ) ): 342 ok = 0 343 return ok
344
345 -def is_only_digits( line ):
346 ok = 1 347 text = line.strip() 348 if( text != '' ): 349 if( not text.isdigit() ): 350 ok = 0 351 return ok
352
353 -def _set_reactor_defaults( line_dict ):
354 line_dict = _set_defaults( line_dict ) 355 for item in [ 's_', 'p_', 'c_', 'e_' ]: 356 id = item + 'id' 357 coeff = item + 'coeff' 358 path = item + 'path' 359 if( line_dict.has_key( id ) ): 360 if( not line_dict.has_key( coeff ) ): 361 line_dict[ coeff ] = 1 362 if( not line_dict.has_key( path ) ): 363 line_dict[ path ] = line_dict[ 'path' ] 364 365 return( line_dict )
366
367 -def _set_defaults( line_dict ):
368 if( not line_dict.has_key( 'name' ) ): 369 line_dict[ 'name' ] = line_dict[ 'id' ] 370 if( line_dict.has_key( 'arg_tag' ) ): 371 if( not line_dict.has_key( 'arg_coeff' ) ): 372 line_dict[ 'arg_coeff' ] = 0 373 374 return( line_dict )
375 376 377 378 379 380 381
382 -class RecordParser:
383 """Parse ECell files into Record objects 384 """
385 - def __init__(self, debug_level = 0):
386 """Initialize the parser. 387 388 Arguments: 389 o debug_level - An optional argument that specifies the amount of 390 debugging information Martel should spit out. By default we have 391 no debugging info (the fastest way to do things), but if you want 392 you can set this as high as two and see exactly where a parse fails. 393 """ 394 self._scanner = _Scanner(debug_level)
395
396 - def parse(self, handle):
397 """Parse the specified handle into an ECell record. 398 """ 399 self._consumer = _RecordConsumer() 400 self._scanner.feed(handle, self._consumer) 401 return self._consumer.data
402