1
2
3
4
5
6
7 import sys
8 import string
9 import copy
10 import array
11 import os
12 import re
13 import sgmllib
14 import urlparse
15
16
17 from xml.sax import handler
18
19
20 import Martel
21 from Martel import RecordReader
22
23 from Bio.ParserSupport import EventGenerator
24 from Bio.ParserSupport import AbstractConsumer
25 from Bio import File
26 from Bio.Align.Generic import Alignment
27 import Bio.Alphabet
28 import ecell_format
29 import Record
30
31 """
32 Ecell converts the ECell input from spreadsheet format to an intermediate format, described in
33 http://www.e-cell.org/manual/chapter2E.html#3.2. It provides an alternative to the perl script
34 supplied with the Ecell2 distribution at http://bioinformatics.org/project/?group_id=49.
35
36 ECell expects a spreadsheet exported in delimited text format. The file should be read with
37 FilteredReader using the default filter chain to remove extraneous characters.
38 """
39
45
47
48 """
49 message - description of error
50 """
51
54
55
56
58 """Iterator interface to move over a file of ecell entries one at a time.
59 """
60 - def __init__(self, handle, parser = None):
61 """Initialize the iterator.
62
63 Arguments:
64 o handle - A handle with ECell entries to iterate through.
65 o parser - An optional parser to pass the entries through before
66 returning them. If None, then the raw entry will be returned.
67 """
68 self.handle = File.UndoHandle( handle )
69 self._reader = RecordReader.Everything( self.handle )
70 self._parser = parser
71
73 """Return the next ecell record from the handle.
74
75 Will return None if we ran out of records.
76 """
77 data = self._reader.next()
78
79 if self._parser is not None:
80 if data:
81 dumpfile = open( 'dump', 'w' )
82 dumpfile.write( data )
83 dumpfile.close()
84 return self._parser.parse(File.StringHandle(data))
85
86 return data
87
89 return iter(self.next, None)
90
91
92
94 """Start up Martel to do the scanning of the file.
95
96 This initialzes the Martel based parser and connects it to a handler
97 that will generate events for a Feature Consumer.
98 """
100 """Initialize the scanner by setting up our caches.
101
102 Creating the parser takes a long time, so we want to cache it
103 to reduce parsing time.
104
105 Arguments:
106 o debug - The level of debugging that the parser should
107 display. Level 0 is no debugging, Level 2 displays the most
108 debugging info (but is much slower). See Martel documentation
109 for more info on this.
110 """
111
112
113 self.interest_tags = [ 'header_line', 'system_line', 'substance_multiline', \
114 'reactor_multiline', 'include_line' ]
115
116
117 expression = Martel.select_names( ecell_format.ecell_record, self.interest_tags)
118 self._parser = expression.make_parser(debug_level = debug)
119
120 - def feed(self, handle, consumer):
121 """Feed a set of data into the scanner.
122
123 Arguments:
124 o handle - A handle with the information to parse.
125 o consumer - The consumer that should be informed of events.
126 """
127 self._parser.setContentHandler( EventGenerator(consumer,
128 self.interest_tags))
129
130
131 self._parser.parseFile(handle)
132
134 """Create an ECell Record object from scanner generated information.
135 """
141
143 self.data.include_buf = self.data.include_buf + line
144
154
155
164
165
169
171 lines = multiline.splitlines()
172 line_no = 0
173 for line in lines:
174 line_dict = self._make_line_dict( line )
175 try:
176 if( not _is_valid_substance( line_dict ) ):
177 raise ECellError( "quantity and concentration are mutually exclusive" )
178 except ECellError, e:
179 print sys.stderr, e.message
180
181 qty = Record.get_entry( line_dict, 'qty' )
182 conc = Record.get_entry( line_dict, 'conc' )
183 if( ( qty.lower() != 'fix' ) and ( conc.lower() != 'fix' ) ):
184 self.data.num_substances = self.data.num_substances + 1
185 else:
186 line_no = line_no + 1
187 if( line.lower().startswith( 'substance' ) ):
188 _set_defaults( line_dict )
189 self._convert_conc( line_dict )
190
191 self._build_substance_entry( line_dict, line_no )
192
196
198 lines = multiline.splitlines()
199 for line in lines:
200 line_dict = self._make_line_dict( line )
201 if( line.lower().startswith( 'reactor' ) ):
202 if( not self._check_missing_header( line_dict ) ):
203 raise ECellError( "invalid header" )
204 try:
205 if( not is_only_digits( line_dict[ 's_coeff' ] ) ):
206 raise ECellError( 's_coeff must contain only digits' )
207 if( not is_only_digits( line_dict[ 'p_coeff' ] ) ):
208 raise ECellError( 'p_coeff must contain only digits' )
209 except KeyError:
210 pass
211 if( line.lower().startswith( 'reactor' ) ):
212 _set_reactor_defaults( line_dict )
213 line_dict = self._remove_if_inconsistent( line_dict )
214
215 if( line_dict.has_key( 'class' ) ):
216 self.data.num_reactors = self.data.num_reactors + 1
217 num_substrates = 0
218 num_products = 0
219 num_catalysts = 0
220 num_effectors = 0
221 num_options = 0
222 num_args = 0
223 if( line_dict.has_key( 's_id' ) ): num_substrates = num_substrates + 1
224 if( line_dict.has_key( 'p_id' ) ): num_products = num_products + 1
225 if( line_dict.has_key( 'c_id' ) ): num_catalysts = num_catalysts + 1
226 if( line_dict.has_key( 'e_id' ) ): num_effectors = num_effectors + 1
227 if( line_dict.has_key( 'o_type' ) ): num_options = num_options + 1
228 if( line_dict.has_key( 'arg_tag' ) ): num_args = num_args + 1
229 counter_dict = { \
230 's_' : num_substrates, \
231 'p_' : num_products, \
232 'c_' : num_catalysts, \
233 'e_' : num_effectors, \
234 'o_' : num_options, \
235 'arg_tag' : num_args
236 }
237 self._set_max( counter_dict )
238 self._build_reactor_entry( line_dict, counter_dict )
239
240
242 num_reactors = self.data.num_reactors
243 for key in counter_dict.keys():
244 composite_key = key + str( num_reactors )
245 self.data._max_dict[ composite_key ] = counter_dict[ key ]
246
247 - def _build_system_entry( self, line_dict ):
248 for key in line_dict.keys():
249 item = line_dict[ key ]
250 composite_key = 'system' + str( self.data.num_systems ) + key + '0'
251
252 if( not self.data.cell_dict.has_key( composite_key ) ):
253 self.data.cell_dict[ composite_key ] = item
254
255 - def _build_substance_entry( self, line_dict, line_no ):
256 for key in line_dict.keys():
257 item = line_dict[ key ]
258 composite_key = 'substance' + str( self.data.num_substances ) + key + \
259 str( line_no )
260 if( not self.data.cell_dict.has_key( composite_key ) ):
261 self.data.cell_dict[ composite_key ] = item
262
264 if( line_dict.has_key( 'conc' ) ):
265 if( not line_dict.has_key( 'qty' ) ):
266 contents = 'QTY(%s,%s)' % ( line_dict[ 'conc' ], line_dict[ 'path' ] )
267 composite_key = 'substance' + str( self.data.num_substances ) + 'qty' + '0'
268 self.data.cell_dict[ composite_key ] = contents
269 self.data.contains_concentration = 1
270
271 - def _build_reactor_entry( self, line_dict, counter_dict ):
272 for key in line_dict.keys():
273 item = line_dict[ key ]
274 prefix = key[ :2 ]
275 if( key.startswith( 'arg_' ) ):
276 index = counter_dict[ 'arg_tag' ]
277 elif( counter_dict.has_key( prefix ) ):
278 index = counter_dict[ prefix ]
279 else:
280 index = '0'
281 composite_key = 'reactor' + str( self.data.num_reactors ) + str( key ) + str( index )
282 if( not self.data.cell_dict.has_key( composite_key ) ):
283 self.data.cell_dict[ composite_key ] = item
284
285
287 ok = 1
288 items = [ 'id', 'path', 'class' ]
289 for item in items:
290 if( line_dict.has_key( item ) == 0 ):
291 others = copy.deepcopy( items )
292 others.remove( item )
293 for other in others:
294 if( line_dict.has_key( other ) ):
295 if( item.lower() != 'class' ):
296 ok = 0
297 break
298 return ok
299
301 valid_keys = list_dict.keys()
302 for label in [ 'id', 'path', 'type' ]:
303 for prefix in [ 's_', 'p_', 'c_', 'e_' ]:
304 node = prefix + label
305 valid_keys = self._consistency_filter( prefix, node, valid_keys )
306 for key in list_dict.keys():
307 if( not key in valid_keys ):
308 del list_dict[ key ]
309 return list_dict
310
312 block = []
313 for suffix in [ 'id', 'path', 'coeff', 'type' ]:
314 node = prefix + suffix
315 block.append( node )
316 for node in block:
317 if( ( not tag in valid_keys ) and ( node in valid_keys ) ):
318 if( ( prefix == 'o_' ) or ( not tag.endswith( 'type' ) ) ):
319 valid_keys.remove( node )
320 return valid_keys
321
323 line_dict = {}
324 items = line.split( '\t' )
325 num = 0
326 for item in items:
327 item = item.strip()
328 if( item != '' ):
329 line_dict[ self._header[ num ] ] = item
330 num = num + 1
331 return line_dict
332
337
339 ok = 1
340 if( line_dict.has_key( 'qty' ) and line_dict.has_key( 'conc' ) ):
341 if( not ( line_dict[ 'qty' ] == 'QTY' ) ):
342 ok = 0
343 return ok
344
346 ok = 1
347 text = line.strip()
348 if( text != '' ):
349 if( not text.isdigit() ):
350 ok = 0
351 return ok
352
354 line_dict = _set_defaults( line_dict )
355 for item in [ 's_', 'p_', 'c_', 'e_' ]:
356 id = item + 'id'
357 coeff = item + 'coeff'
358 path = item + 'path'
359 if( line_dict.has_key( id ) ):
360 if( not line_dict.has_key( coeff ) ):
361 line_dict[ coeff ] = 1
362 if( not line_dict.has_key( path ) ):
363 line_dict[ path ] = line_dict[ 'path' ]
364
365 return( line_dict )
366
368 if( not line_dict.has_key( 'name' ) ):
369 line_dict[ 'name' ] = line_dict[ 'id' ]
370 if( line_dict.has_key( 'arg_tag' ) ):
371 if( not line_dict.has_key( 'arg_coeff' ) ):
372 line_dict[ 'arg_coeff' ] = 0
373
374 return( line_dict )
375
376
377
378
379
380
381
383 """Parse ECell files into Record objects
384 """
386 """Initialize the parser.
387
388 Arguments:
389 o debug_level - An optional argument that specifies the amount of
390 debugging information Martel should spit out. By default we have
391 no debugging info (the fastest way to do things), but if you want
392 you can set this as high as two and see exactly where a parse fails.
393 """
394 self._scanner = _Scanner(debug_level)
395
396 - def parse(self, handle):
397 """Parse the specified handle into an ECell record.
398 """
399 self._consumer = _RecordConsumer()
400 self._scanner.feed(handle, self._consumer)
401 return self._consumer.data
402