Package Bio :: Package Saf
[hide private]
[frames] | no frames]

Source Code for Package Bio.Saf

  1  # Copyright 2001 by Katharine Lindner.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Parser for SAF (Simple Alignment Format). 
  6   
  7  http://www.embl-heidelberg.de/predictprotein/Dexa/optin_safDes.html 
  8  """ 
  9   
 10  # Martel 
 11  import Martel 
 12  from Martel import RecordReader 
 13  from Martel import Dispatch 
 14   
 15   
 16  from Bio import File 
 17  import saf_format 
 18  import Record 
 19   
 20   
21 -class Iterator:
22 """Iterator interface to move over a file of Saf entries one at a time. 23 """
24 - def __init__(self, handle, parser = None):
25 """Initialize the iterator. 26 27 Arguments: 28 o handle - A handle with Saf entries to iterate through. 29 o parser - An optional parser to pass the entries through before 30 returning them. If None, then the raw entry will be returned. 31 """ 32 self.handle = File.UndoHandle( handle ) 33 self._reader = RecordReader.Everything( self.handle ) 34 self._parser = parser
35
36 - def next(self):
37 """Return the next Saf record from the handle. 38 39 Will return None if we ran out of records. 40 """ 41 data = self._reader.next() 42 43 if self._parser is not None: 44 if data: 45 dumpfile = open( 'dump', 'w' ) 46 dumpfile.write( data ) 47 dumpfile.close() 48 return self._parser.parse(File.StringHandle(data)) 49 50 return data
51
52 - def __iter__(self):
53 return iter(self.next, None)
54
55 -class _Scanner:
56 """Start up Martel to do the scanning of the file. 57 58 This initialzes the Martel based parser and connects it to a handler 59 that will generate events for a Feature Consumer. 60 """
61 - def __init__(self, debug = 0):
62 """Initialize the scanner by setting up our caches. 63 64 Creating the parser takes a long time, so we want to cache it 65 to reduce parsing time. 66 67 Arguments: 68 o debug - The level of debugging that the parser should 69 display. Level 0 is no debugging, Level 2 displays the most 70 debugging info (but is much slower). See Martel documentation 71 for more info on this. 72 """ 73 # a listing of all tags we are interested in scanning for 74 # in the MartelParser 75 self.interest_tags = [ 'candidate_line', 'saf_record' ] 76 77 # make a parser that returns only the tags we are interested in 78 expression = Martel.select_names( saf_format.saf_record, self.interest_tags) 79 self._parser = expression.make_parser(debug_level = debug)
80
81 - def feed(self, handle, consumer):
82 """Feed a set of data into the scanner. 83 84 Arguments: 85 o handle - A handle with the information to parse. 86 o consumer - The consumer that should be informed of events. 87 """ 88 consumer.set_interest_tags( self.interest_tags ) 89 self._parser.setContentHandler( consumer ) 90 # self._parser.setErrorHandler(handle.ErrorHandler()) 91 92 self._parser.parseFile(handle)
93
94 -class _RecordConsumer( Dispatch.Dispatcher ):
95 """Create a Saf Record object from scanner generated information. 96 """
97 - def __init__(self ):
98 Dispatch.Dispatcher.__init__( self ) 99 self.data = Record.Record() 100 self._refresh()
101
102 - def _refresh( self ):
103 self._sequences = {} 104 self._names = {} 105 self._history = [] 106 self._guide = '' 107 self._ref_length = 0 108 self._ordinal = 0
109
110 - def set_interest_tags( self, interest_tags ):
111 self.interest_tags = interest_tags
112
113 - def startDocument(self):
114 self.data = Record.Record() 115 self._refresh()
116 117
118 - def start_candidate_line(self, name, attrs):
119 self.save_characters()
120
121 - def end_candidate_line(self, candidate_lines ):
122 candidate_line = self.get_characters() 123 name = candidate_line.split( ' ' )[ 0 ] 124 sequence = candidate_line[ len( name ): ] 125 name = name.strip() 126 sequence = sequence.replace( " ", "" ) 127 if( self._guide == '' ): 128 self._guide = name 129 self._ref_length = len( sequence ) 130 elif( name == self._guide ): 131 history = [] 132 self._ref_length = len( sequence ) 133 try: 134 self._history.index( name ) 135 except ValueError: 136 self._names[ self._ordinal ] = name 137 self._ordinal = self._ordinal + 1 138 self._history.append( name ) 139 sequence = sequence.strip() 140 try: 141 sequence = self._sequences[ name ] + sequence 142 except KeyError: 143 pass 144 self._sequences[ name ] = sequence
145
146 - def start_saf_record( self, sequence, attrs ):
147 self._sequences = {}
148
149 - def end_saf_record( self, saf_record ):
150 ordinals = self._names.keys() 151 ordinals.sort() 152 for ordinal in ordinals: 153 name = self._names[ ordinal ] 154 sequence = self._sequences[ name ] 155 self.data.alignment.add_sequence( name, sequence ) 156 self._refresh()
157
158 -class RecordParser:
159 """Parse Saf files into Record objects 160 """
161 - def __init__(self, debug_level = 0):
162 """Initialize the parser. 163 164 Arguments: 165 o debug_level - An optional argument that specifies the amount of 166 debugging information Martel should spit out. By default we have 167 no debugging info (the fastest way to do things), but if you want 168 you can set this as high as two and see exactly where a parse fails. 169 """ 170 self._scanner = _Scanner(debug_level)
171
172 - def parse(self, handle):
173 """Parse the specified handle into a SAF record. 174 """ 175 self._consumer = _RecordConsumer() 176 self._scanner.feed(handle, self._consumer) 177 return self._consumer.data
178