1
2
3
4
5 """Parser for SAF (Simple Alignment Format).
6
7 http://www.embl-heidelberg.de/predictprotein/Dexa/optin_safDes.html
8 """
9
10
11 import string
12 import array
13 import os
14 import re
15 import sgmllib
16 import urlparse
17
18
19 from xml.sax import handler
20 from xml.sax.handler import ContentHandler
21
22
23 import Martel
24 from Martel import RecordReader
25 from Martel import Dispatch
26
27
28 from Bio.ParserSupport import EventGenerator
29 from Bio.ParserSupport import AbstractConsumer
30 from Bio import File
31 from Bio.Align.Generic import Alignment
32 import Bio.Alphabet
33 import saf_format
34 import Record
35
36
38 """Iterator interface to move over a file of Saf entries one at a time.
39 """
40 - def __init__(self, handle, parser = None):
41 """Initialize the iterator.
42
43 Arguments:
44 o handle - A handle with Saf entries to iterate through.
45 o parser - An optional parser to pass the entries through before
46 returning them. If None, then the raw entry will be returned.
47 """
48 self.handle = File.UndoHandle( handle )
49 self._reader = RecordReader.Everything( self.handle )
50 self._parser = parser
51
53 """Return the next Saf record from the handle.
54
55 Will return None if we ran out of records.
56 """
57 data = self._reader.next()
58
59 if self._parser is not None:
60 if data:
61 dumpfile = open( 'dump', 'w' )
62 dumpfile.write( data )
63 dumpfile.close()
64 return self._parser.parse(File.StringHandle(data))
65
66 return data
67
69 return iter(self.next, None)
70
72 """Start up Martel to do the scanning of the file.
73
74 This initialzes the Martel based parser and connects it to a handler
75 that will generate events for a Feature Consumer.
76 """
78 """Initialize the scanner by setting up our caches.
79
80 Creating the parser takes a long time, so we want to cache it
81 to reduce parsing time.
82
83 Arguments:
84 o debug - The level of debugging that the parser should
85 display. Level 0 is no debugging, Level 2 displays the most
86 debugging info (but is much slower). See Martel documentation
87 for more info on this.
88 """
89
90
91 self.interest_tags = [ 'candidate_line', 'saf_record' ]
92
93
94 expression = Martel.select_names( saf_format.saf_record, self.interest_tags)
95 self._parser = expression.make_parser(debug_level = debug)
96
97 - def feed(self, handle, consumer):
98 """Feed a set of data into the scanner.
99
100 Arguments:
101 o handle - A handle with the information to parse.
102 o consumer - The consumer that should be informed of events.
103 """
104 consumer.set_interest_tags( self.interest_tags )
105 self._parser.setContentHandler( consumer )
106
107
108 self._parser.parseFile(handle)
109
111 """Create a Saf Record object from scanner generated information.
112 """
117
119 self._sequences = {}
120 self._names = {}
121 self._history = []
122 self._guide = ''
123 self._ref_length = 0
124 self._ordinal = 0
125
128
132
133
136
161
164
173
175 """Parse Saf files into Record objects
176 """
178 """Initialize the parser.
179
180 Arguments:
181 o debug_level - An optional argument that specifies the amount of
182 debugging information Martel should spit out. By default we have
183 no debugging info (the fastest way to do things), but if you want
184 you can set this as high as two and see exactly where a parse fails.
185 """
186 self._scanner = _Scanner(debug_level)
187
188 - def parse(self, handle):
189 """Parse the specified handle into a SAF record.
190 """
191 self._consumer = _RecordConsumer()
192 self._scanner.feed(handle, self._consumer)
193 return self._consumer.data
194