1 import string
2 import operator
3 from Bio import File
4 import Martel
5 from Martel.Dispatch import Dispatcher
6 from Martel import RecordReader
7 from mx import TextTools
8 from locus_format import locus_record
9
10 """Parser for NCBI's LocusLink, curated sequence and descriptive information
11 about genetic loci.
12
13 The LocusLink site is:
14 http://www.ncbi.nlm.nih.gov/LocusLink/
15 """
16
18
21
23 queue_keys = self.keys()
24 queue_keys.sort()
25 out = ''
26 for key in queue_keys:
27 out = out + '%s:\n' % key
28 out = out + self.print_item( self[ key ] )
29 out = out + '\n'
30 return out
31
33 indent = ' '
34 out = ''
35 for j in range( 0, level ):
36 indent = indent + ' '
37 if( type( item ) == type( '' ) ):
38 if( item != '' ):
39 out = out + '%s%s\n' % ( indent, item )
40 elif( type( item ) == type([])):
41 for subitem in item:
42 out = out + self.print_item( subitem, level + 1 )
43 out = out + '\n'
44 elif( isinstance( item, dict ) ):
45 keys = item.keys()
46 keys.sort()
47 for subitem in keys:
48 out = out + '%s %s:\n' % ( indent, subitem )
49 out = out + self.print_item( item[ subitem ], level + 1 )
50 out = out + '\n'
51 elif( type( item ) == type( {} ) ):
52 keys = item.keys()
53 keys.sort()
54 for subitem in keys:
55 out = out + '%s %s:\n' % ( indent, subitem )
56 out = out + self.print_item( item[ subitem ], level + 1 )
57 out = out + '\n'
58 else:
59 out = out + '%s\n' % str( item )
60 return out
61
63 """Iterator interface to move over a file of LocusLink entries one at a time.
64
65 """
66 - def __init__(self, handle, parser = None):
67 """Initialize the iterator.
68
69 Arguments:
70 o handle - A handle with LocusLink entries to iterate through.
71 o parser - An optional parser to pass the entries through before
72 returning them. If None, then the raw entry will be returned.
73 """
74 self.handle = File.UndoHandle( handle )
75 self._reader = RecordReader.StartsWith( self.handle, '>>' )
76 self._parser = parser
77
79 """Return the next LocusLink record from the handle.
80
81 Will return None if we ran out of records.
82 """
83 data = self._reader.next()
84 if self._parser is not None:
85 if data:
86 dumpfile = open( 'dump', 'w' )
87 dumpfile.write( data )
88 dumpfile.close()
89 return self._parser.parse(File.StringHandle(data))
90
91 return data
92
94 return iter(self.next, None)
95
97 """Start up Martel to do the scanning of the file.
98
99 This initialzes the Martel based parser and connects it to a handler
100 that will generate events for a Feature Consumer.
101 """
103 """Initialize the scanner by setting up our caches.
104
105 Creating the parser takes a long time, so we want to cache it
106 to reduce parsing time.
107
108 Arguments:
109 o debug - The level of debugging that the parser should
110 display. Level 0 is no debugging, Level 2 displays the most
111 debugging info (but is much slower). See Martel documentation
112 for more info on this.
113 """
114
115
116 self.interest_tags = [ "locus_line", "accnum_block", "phenotype_block", "db_block" ]
117
118
119 expression = Martel.select_names( locus_format.locus_record, self.interest_tags)
120 self._parser = expression.make_parser(debug_level )
121
122 - def feed(self, handle, consumer):
123 """Feeed a set of data into the scanner.
124
125 Arguments:
126 o handle - A handle with the information to parse.
127 o consumer - The consumer that should be informed of events.
128 """
129 consumer.set_interest_tags( self.interest_tags )
130 self._parser.setContentHandler( consumer )
131
132
133 self._parser.parseFile(handle)
134
136 """Create a LocusLink Record object from scanner generated information.
137 """
140
143
144
147
150
171
174
178
181
185
188
192
194 lines = block.splitlines()
195 entry = {}
196 for line in lines:
197 cols = line.split( ':', 1 )
198
199 key = cols[ 0 ]
200 key = key.strip()
201 newval = cols[ 1 ]
202 newval = newval.strip()
203 entry[ key ] = newval
204
205 if not self.data.has_key( block_key ):
206 self.data[ block_key ] = [ entry, ]
207 else:
208
209 val = self.data[ block_key ]
210 val.append( entry )
211 self.data[ block_key ] = val
212
214 """Parse LocusLink files into Record objects
215 """
217 """Initialize the parser.
218
219 Arguments:
220 o debug_level - An optional argument that specifies the amount of
221 debugging information Martel should spit out. By default we have
222 no debugging info (the fastest way to do things), but if you want
223 you can set this as high as two and see exactly where a parse fails.
224 """
225 self._scanner = _Scanner(debug_level)
226
227 - def parse(self, handle):
228 """Parse the specified handle into an NBRF record.
229 """
230 self._consumer = _RecordConsumer()
231 self._scanner.feed(handle, self._consumer)
232 return self._consumer.data
233