Package Bio :: Package ECell
[hide private]
[frames] | no frames]

Source Code for Package Bio.ECell

  1  # Copyright 2001 by Katharine Lindner.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  import warnings 
  7  warnings.warn("Bio.ECell was deprecated, as it does not seem to have any users. If you do use this module, please contact the Biopython developers at biopython-dev@biopython.org to avoid permanent removal of this module") 
  8   
  9   
 10  # standard library 
 11  import sys 
 12  import string 
 13  import copy 
 14  import array 
 15  import os 
 16  import re 
 17  import sgmllib 
 18  import urlparse 
 19   
 20  # XML from python 2.0 
 21  from xml.sax import handler 
 22   
 23  # Martel 
 24  import Martel 
 25  from Martel import RecordReader 
 26   
 27  from Bio.ParserSupport import EventGenerator 
 28  from Bio.ParserSupport import AbstractConsumer 
 29  from Bio import File 
 30  from Bio.Align.Generic import Alignment 
 31  import Bio.Alphabet 
 32  import ecell_format 
 33  import Record 
 34   
 35  """ 
 36  Ecell converts the ECell input from spreadsheet format to an intermediate format, described in 
 37  http://www.e-cell.org/manual/chapter2E.html#3.2.  It provides an alternative to the perl script 
 38  supplied with the Ecell2 distribution at http://bioinformatics.org/project/?group_id=49. 
 39   
 40  ECell expects a spreadsheet exported in delimited text format. The file should be read with 
 41  FilteredReader using the default filter chain to remove extraneous characters. 
 42  """ 
 43   
44 -class Error( Exception ):
45 """ 46 """
47 - def __init__( self ):
48 pass
49
50 -class ECellError( Error ):
51 52 """ 53 message - description of error 54 """ 55
56 - def __init__( self, message ):
57 self.message = message
58 59 60
61 -class Iterator:
62 """Iterator interface to move over a file of ecell entries one at a time. 63 """
64 - def __init__(self, handle, parser = None):
65 """Initialize the iterator. 66 67 Arguments: 68 o handle - A handle with ECell entries to iterate through. 69 o parser - An optional parser to pass the entries through before 70 returning them. If None, then the raw entry will be returned. 71 """ 72 self.handle = File.UndoHandle( handle ) 73 self._reader = RecordReader.Everything( self.handle ) 74 self._parser = parser
75
76 - def next(self):
77 """Return the next ecell record from the handle. 78 79 Will return None if we ran out of records. 80 """ 81 data = self._reader.next() 82 83 if self._parser is not None: 84 if data: 85 dumpfile = open( 'dump', 'w' ) 86 dumpfile.write( data ) 87 dumpfile.close() 88 return self._parser.parse(File.StringHandle(data)) 89 90 return data
91
92 - def __iter__(self):
93 return iter(self.next, None)
94 95 96
97 -class _Scanner:
98 """Start up Martel to do the scanning of the file. 99 100 This initialzes the Martel based parser and connects it to a handler 101 that will generate events for a Feature Consumer. 102 """
103 - def __init__(self, debug = 0):
104 """Initialize the scanner by setting up our caches. 105 106 Creating the parser takes a long time, so we want to cache it 107 to reduce parsing time. 108 109 Arguments: 110 o debug - The level of debugging that the parser should 111 display. Level 0 is no debugging, Level 2 displays the most 112 debugging info (but is much slower). See Martel documentation 113 for more info on this. 114 """ 115 # a listing of all tags we are interested in scanning for 116 # in the MartelParser 117 self.interest_tags = [ 'header_line', 'system_line', 'substance_multiline', \ 118 'reactor_multiline', 'include_line' ] 119 120 # make a parser that returns only the tags we are interested in 121 expression = Martel.select_names( ecell_format.ecell_record, self.interest_tags) 122 self._parser = expression.make_parser(debug_level = debug)
123
124 - def feed(self, handle, consumer):
125 """Feed a set of data into the scanner. 126 127 Arguments: 128 o handle - A handle with the information to parse. 129 o consumer - The consumer that should be informed of events. 130 """ 131 self._parser.setContentHandler( EventGenerator(consumer, 132 self.interest_tags)) 133 # self._parser.setErrorHandler(handle.ErrorHandler()) 134 135 self._parser.parseFile(handle)
136
137 -class _RecordConsumer:
138 """Create an ECell Record object from scanner generated information. 139 """
140 - def __init__(self):
141 self.data = Record.Record() 142 self._header = [] 143 self._database = {} 144 self._state = ''
145
146 - def include_line( self, line ):
147 self.data.include_buf = self.data.include_buf + line
148
149 - def header_line( self, lines ):
150 for line in lines: 151 items = line.split( '\t') 152 items[ 0 ] = items[ 0 ].lower() 153 self._header = [] 154 self._state = items[ 0 ] 155 for item in items: 156 item = item.strip() 157 self._header.append( item.lower() )
158 159
160 - def system_line( self, lines ):
161 for line in lines: 162 line_dict = self._make_line_dict( line ) 163 if( not self._check_missing_header( line_dict ) ): 164 raise EcellError( "invalid header" ) 165 self.data.num_systems = self.data.num_systems + 1 166 _set_defaults( line_dict ) 167 self._build_system_entry( line_dict )
168 169
170 - def substance_multiline( self, multiline ):
171 for line in multiline: 172 self.parse_substance_lines( line )
173
174 - def parse_substance_lines( self, multiline ):
175 lines = multiline.splitlines() 176 line_no = 0 177 for line in lines: 178 line_dict = self._make_line_dict( line ) 179 try: 180 if( not _is_valid_substance( line_dict ) ): 181 raise ECellError( "quantity and concentration are mutually exclusive" ) 182 except ECellError, e: 183 print sys.stderr, e.message 184 185 qty = Record.get_entry( line_dict, 'qty' ) 186 conc = Record.get_entry( line_dict, 'conc' ) 187 if( ( qty.lower() != 'fix' ) and ( conc.lower() != 'fix' ) ): 188 self.data.num_substances = self.data.num_substances + 1 189 else: 190 line_no = line_no + 1 191 if( line.lower().startswith( 'substance' ) ): 192 _set_defaults( line_dict ) 193 self._convert_conc( line_dict ) 194 195 self._build_substance_entry( line_dict, line_no )
196
197 - def reactor_multiline( self, multiline ):
198 for line in multiline: 199 self.parse_reactor_lines( line )
200
201 - def parse_reactor_lines( self, multiline ):
202 lines = multiline.splitlines() 203 for line in lines: 204 line_dict = self._make_line_dict( line ) 205 if( line.lower().startswith( 'reactor' ) ): 206 if( not self._check_missing_header( line_dict ) ): 207 raise ECellError( "invalid header" ) 208 try: 209 if( not is_only_digits( line_dict[ 's_coeff' ] ) ): 210 raise ECellError( 's_coeff must contain only digits' ) 211 if( not is_only_digits( line_dict[ 'p_coeff' ] ) ): 212 raise ECellError( 'p_coeff must contain only digits' ) 213 except KeyError: 214 pass 215 if( line.lower().startswith( 'reactor' ) ): 216 _set_reactor_defaults( line_dict ) 217 line_dict = self._remove_if_inconsistent( line_dict ) 218 219 if( line_dict.has_key( 'class' ) ): 220 self.data.num_reactors = self.data.num_reactors + 1 221 num_substrates = 0 222 num_products = 0 223 num_catalysts = 0 224 num_effectors = 0 225 num_options = 0 226 num_args = 0 227 if( line_dict.has_key( 's_id' ) ): num_substrates = num_substrates + 1 228 if( line_dict.has_key( 'p_id' ) ): num_products = num_products + 1 229 if( line_dict.has_key( 'c_id' ) ): num_catalysts = num_catalysts + 1 230 if( line_dict.has_key( 'e_id' ) ): num_effectors = num_effectors + 1 231 if( line_dict.has_key( 'o_type' ) ): num_options = num_options + 1 232 if( line_dict.has_key( 'arg_tag' ) ): num_args = num_args + 1 233 counter_dict = { \ 234 's_' : num_substrates, \ 235 'p_' : num_products, \ 236 'c_' : num_catalysts, \ 237 'e_' : num_effectors, \ 238 'o_' : num_options, \ 239 'arg_tag' : num_args 240 } 241 self._set_max( counter_dict ) 242 self._build_reactor_entry( line_dict, counter_dict )
243 244
245 - def _set_max( self, counter_dict ):
246 num_reactors = self.data.num_reactors 247 for key in counter_dict.keys(): 248 composite_key = key + str( num_reactors ) 249 self.data._max_dict[ composite_key ] = counter_dict[ key ]
250
251 - def _build_system_entry( self, line_dict ):
252 for key in line_dict.keys(): 253 item = line_dict[ key ] 254 composite_key = 'system' + str( self.data.num_systems ) + key + '0' 255 256 if( not self.data.cell_dict.has_key( composite_key ) ): 257 self.data.cell_dict[ composite_key ] = item
258
259 - def _build_substance_entry( self, line_dict, line_no ):
260 for key in line_dict.keys(): 261 item = line_dict[ key ] 262 composite_key = 'substance' + str( self.data.num_substances ) + key + \ 263 str( line_no ) 264 if( not self.data.cell_dict.has_key( composite_key ) ): 265 self.data.cell_dict[ composite_key ] = item
266
267 - def _convert_conc( self, line_dict ):
268 if( line_dict.has_key( 'conc' ) ): 269 if( not line_dict.has_key( 'qty' ) ): 270 contents = 'QTY(%s,%s)' % ( line_dict[ 'conc' ], line_dict[ 'path' ] ) 271 composite_key = 'substance' + str( self.data.num_substances ) + 'qty' + '0' 272 self.data.cell_dict[ composite_key ] = contents 273 self.data.contains_concentration = 1
274
275 - def _build_reactor_entry( self, line_dict, counter_dict ):
276 for key in line_dict.keys(): 277 item = line_dict[ key ] 278 prefix = key[ :2 ] 279 if( key.startswith( 'arg_' ) ): 280 index = counter_dict[ 'arg_tag' ] 281 elif( counter_dict.has_key( prefix ) ): 282 index = counter_dict[ prefix ] 283 else: 284 index = '0' 285 composite_key = 'reactor' + str( self.data.num_reactors ) + str( key ) + str( index ) 286 if( not self.data.cell_dict.has_key( composite_key ) ): 287 self.data.cell_dict[ composite_key ] = item
288 289
290 - def _check_missing_header( self, line_dict ):
291 ok = 1 292 items = [ 'id', 'path', 'class' ] 293 for item in items: 294 if( line_dict.has_key( item ) == 0 ): 295 others = copy.deepcopy( items ) 296 others.remove( item ) 297 for other in others: 298 if( line_dict.has_key( other ) ): 299 if( item.lower() != 'class' ): 300 ok = 0 301 break 302 return ok
303
304 - def _remove_if_inconsistent( self, list_dict ):
305 valid_keys = list_dict.keys() 306 for label in [ 'id', 'path', 'type' ]: 307 for prefix in [ 's_', 'p_', 'c_', 'e_' ]: 308 node = prefix + label 309 valid_keys = self._consistency_filter( prefix, node, valid_keys ) 310 for key in list_dict.keys(): 311 if( not key in valid_keys ): 312 del list_dict[ key ] 313 return list_dict
314
315 - def _consistency_filter( self, prefix, tag, valid_keys ):
316 block = [] 317 for suffix in [ 'id', 'path', 'coeff', 'type' ]: 318 node = prefix + suffix 319 block.append( node ) 320 for node in block: 321 if( ( not tag in valid_keys ) and ( node in valid_keys ) ): 322 if( ( prefix == 'o_' ) or ( not tag.endswith( 'type' ) ) ): 323 valid_keys.remove( node ) 324 return valid_keys
325
326 - def _make_line_dict( self, line ):
327 line_dict = {} 328 items = line.split( '\t' ) 329 num = 0 330 for item in items: 331 item = item.strip() 332 if( item != '' ): 333 line_dict[ self._header[ num ] ] = item 334 num = num + 1 335 return line_dict
336
337 -def _clear_bad_block( block, items ):
338 for label in block: 339 items = items.remove( items.index( label ) ) 340 return items
341
342 -def _is_valid_substance( line_dict ):
343 ok = 1 344 if( line_dict.has_key( 'qty' ) and line_dict.has_key( 'conc' ) ): 345 if( not ( line_dict[ 'qty' ] == 'QTY' ) ): 346 ok = 0 347 return ok
348
349 -def is_only_digits( line ):
350 ok = 1 351 text = line.strip() 352 if( text != '' ): 353 if( not text.isdigit() ): 354 ok = 0 355 return ok
356
357 -def _set_reactor_defaults( line_dict ):
358 line_dict = _set_defaults( line_dict ) 359 for item in [ 's_', 'p_', 'c_', 'e_' ]: 360 id = item + 'id' 361 coeff = item + 'coeff' 362 path = item + 'path' 363 if( line_dict.has_key( id ) ): 364 if( not line_dict.has_key( coeff ) ): 365 line_dict[ coeff ] = 1 366 if( not line_dict.has_key( path ) ): 367 line_dict[ path ] = line_dict[ 'path' ] 368 369 return( line_dict )
370
371 -def _set_defaults( line_dict ):
372 if( not line_dict.has_key( 'name' ) ): 373 line_dict[ 'name' ] = line_dict[ 'id' ] 374 if( line_dict.has_key( 'arg_tag' ) ): 375 if( not line_dict.has_key( 'arg_coeff' ) ): 376 line_dict[ 'arg_coeff' ] = 0 377 378 return( line_dict )
379 380 381 382 383 384 385
386 -class RecordParser:
387 """Parse ECell files into Record objects 388 """
389 - def __init__(self, debug_level = 0):
390 """Initialize the parser. 391 392 Arguments: 393 o debug_level - An optional argument that specifies the amount of 394 debugging information Martel should spit out. By default we have 395 no debugging info (the fastest way to do things), but if you want 396 you can set this as high as two and see exactly where a parse fails. 397 """ 398 self._scanner = _Scanner(debug_level)
399
400 - def parse(self, handle):
401 """Parse the specified handle into an ECell record. 402 """ 403 self._consumer = _RecordConsumer() 404 self._scanner.feed(handle, self._consumer) 405 return self._consumer.data
406