Package Bio :: Package Saf
[hide private]
[frames] | no frames]

Source Code for Package Bio.Saf

  1  # Copyright 2001 by Katharine Lindner.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Parser for SAF (Simple Alignment Format). 
  6   
  7  http://www.embl-heidelberg.de/predictprotein/Dexa/optin_safDes.html 
  8  """ 
  9   
 10  # standard library 
 11  import string 
 12  import array 
 13  import os 
 14  import re 
 15  import sgmllib 
 16  import urlparse 
 17   
 18  # XML from python 2.0 
 19  from xml.sax import handler 
 20  from xml.sax.handler import ContentHandler 
 21   
 22  # Martel 
 23  import Martel 
 24  from Martel import RecordReader 
 25  from Martel import Dispatch 
 26   
 27   
 28  from Bio.ParserSupport import EventGenerator 
 29  from Bio.ParserSupport import AbstractConsumer 
 30  from Bio import File 
 31  from Bio.Align.Generic import Alignment 
 32  import Bio.Alphabet 
 33  import saf_format 
 34  import Record 
 35   
 36   
37 -class Iterator:
38 """Iterator interface to move over a file of Saf entries one at a time. 39 """
40 - def __init__(self, handle, parser = None):
41 """Initialize the iterator. 42 43 Arguments: 44 o handle - A handle with Saf entries to iterate through. 45 o parser - An optional parser to pass the entries through before 46 returning them. If None, then the raw entry will be returned. 47 """ 48 self.handle = File.UndoHandle( handle ) 49 self._reader = RecordReader.Everything( self.handle ) 50 self._parser = parser
51
52 - def next(self):
53 """Return the next Saf record from the handle. 54 55 Will return None if we ran out of records. 56 """ 57 data = self._reader.next() 58 59 if self._parser is not None: 60 if data: 61 dumpfile = open( 'dump', 'w' ) 62 dumpfile.write( data ) 63 dumpfile.close() 64 return self._parser.parse(File.StringHandle(data)) 65 66 return data
67
68 - def __iter__(self):
69 return iter(self.next, None)
70
71 -class _Scanner:
72 """Start up Martel to do the scanning of the file. 73 74 This initialzes the Martel based parser and connects it to a handler 75 that will generate events for a Feature Consumer. 76 """
77 - def __init__(self, debug = 0):
78 """Initialize the scanner by setting up our caches. 79 80 Creating the parser takes a long time, so we want to cache it 81 to reduce parsing time. 82 83 Arguments: 84 o debug - The level of debugging that the parser should 85 display. Level 0 is no debugging, Level 2 displays the most 86 debugging info (but is much slower). See Martel documentation 87 for more info on this. 88 """ 89 # a listing of all tags we are interested in scanning for 90 # in the MartelParser 91 self.interest_tags = [ 'candidate_line', 'saf_record' ] 92 93 # make a parser that returns only the tags we are interested in 94 expression = Martel.select_names( saf_format.saf_record, self.interest_tags) 95 self._parser = expression.make_parser(debug_level = debug)
96
97 - def feed(self, handle, consumer):
98 """Feed a set of data into the scanner. 99 100 Arguments: 101 o handle - A handle with the information to parse. 102 o consumer - The consumer that should be informed of events. 103 """ 104 consumer.set_interest_tags( self.interest_tags ) 105 self._parser.setContentHandler( consumer ) 106 # self._parser.setErrorHandler(handle.ErrorHandler()) 107 108 self._parser.parseFile(handle)
109
110 -class _RecordConsumer( Dispatch.Dispatcher ):
111 """Create a Saf Record object from scanner generated information. 112 """
113 - def __init__(self ):
114 Dispatch.Dispatcher.__init__( self ) 115 self.data = Record.Record() 116 self._refresh()
117
118 - def _refresh( self ):
119 self._sequences = {} 120 self._names = {} 121 self._history = [] 122 self._guide = '' 123 self._ref_length = 0 124 self._ordinal = 0
125
126 - def set_interest_tags( self, interest_tags ):
127 self.interest_tags = interest_tags
128
129 - def startDocument(self):
130 self.data = Record.Record() 131 self._refresh()
132 133
134 - def start_candidate_line(self, name, attrs):
135 self.save_characters()
136
137 - def end_candidate_line(self, candidate_lines ):
138 candidate_line = self.get_characters() 139 name = candidate_line.split( ' ' )[ 0 ] 140 sequence = candidate_line[ len( name ): ] 141 name = name.strip() 142 sequence = sequence.replace( " ", "" ) 143 if( self._guide == '' ): 144 self._guide = name 145 self._ref_length = len( sequence ) 146 elif( name == self._guide ): 147 history = [] 148 self._ref_length = len( sequence ) 149 try: 150 self._history.index( name ) 151 except ValueError: 152 self._names[ self._ordinal ] = name 153 self._ordinal = self._ordinal + 1 154 self._history.append( name ) 155 sequence = sequence.strip() 156 try: 157 sequence = self._sequences[ name ] + sequence 158 except KeyError: 159 pass 160 self._sequences[ name ] = sequence
161
162 - def start_saf_record( self, sequence, attrs ):
163 self._sequences = {}
164
165 - def end_saf_record( self, saf_record ):
166 ordinals = self._names.keys() 167 ordinals.sort() 168 for ordinal in ordinals: 169 name = self._names[ ordinal ] 170 sequence = self._sequences[ name ] 171 self.data.alignment.add_sequence( name, sequence ) 172 self._refresh()
173
174 -class RecordParser:
175 """Parse Saf files into Record objects 176 """
177 - def __init__(self, debug_level = 0):
178 """Initialize the parser. 179 180 Arguments: 181 o debug_level - An optional argument that specifies the amount of 182 debugging information Martel should spit out. By default we have 183 no debugging info (the fastest way to do things), but if you want 184 you can set this as high as two and see exactly where a parse fails. 185 """ 186 self._scanner = _Scanner(debug_level)
187
188 - def parse(self, handle):
189 """Parse the specified handle into a SAF record. 190 """ 191 self._consumer = _RecordConsumer() 192 self._scanner.feed(handle, self._consumer) 193 return self._consumer.data
194