1
2
3 from xml.sax import handler
4 from Martel import Parser, Dispatch
5 from Bio import Std, Decode
6
7
8
9
10
12 assert not hasattr(klass, "start_" +tag), "existing method exists"
13 assert not hasattr(klass, "end_" +tag), "existing method exists"
14 s = """if 1:
15 def start(self, tag, attrs):
16 self.save_characters()
17 def end(self, tag):
18 self.%s = int(self.get_characters())
19 """ % attrname
20 d = {}
21 exec s in d
22 setattr(klass, "start_" + tag, d["start"])
23 setattr(klass, "end_" + tag, d["end"])
24
25 -def add_text_handler(klass, tag, attrname):
26 assert not hasattr(klass, "start_" +tag), "existing method exists"
27 assert not hasattr(klass, "end_" +tag), "existing method exists"
28 s = """if 1:
29 def start(self, tag, attrs):
30 self.save_characters()
31 def end(self, tag):
32 self.%s = self.get_characters()
33 """ % attrname
34 d = {}
35 exec s in d
36 setattr(klass, "start_" + tag, d["start"])
37 setattr(klass, "end_" + tag, d["end"])
38
39 -def add_text_dict_handler(klass, tag, attrname, key):
40 assert not hasattr(klass, "start_" +tag), "existing method exists"
41 assert not hasattr(klass, "end_" +tag), "existing method exists"
42 s = """if 1:
43 def start(self, tag, attrs):
44 self.save_characters()
45 def end(self, tag):
46 self.%s["%s"] = self.get_characters()
47 """ % (attrname, key)
48 d = {}
49 exec s in d
50 setattr(klass, "start_" + tag, d["start"])
51 setattr(klass, "end_" + tag, d["end"])
52
53 -def add_text_decode_handler(klass, tag, attrname):
54 assert not hasattr(klass, "start_" +tag), "existing method exists"
55 assert not hasattr(klass, "end_" +tag), "existing method exists"
56 s = """if 1:
57 def start(self, tag, attrs):
58 self.save_characters()
59 self._decode_%s = attrs.get("bioformat:decode", None)
60 def end(self, tag):
61 if self._decode_%s is not None:
62 s = Decode.make_decoder(self._decode_%s)(s)
63 self.%s = self.get_characters()
64 """ % (tag, tag, tag, attrname)
65 d = {"Decode": Decode}
66 exec s in d
67 setattr(klass, "start_" + tag, d["start"])
68 setattr(klass, "end_" + tag, d["end"])
69
70 -def add_first_text_handler(klass, tag, attrname):
71 assert not hasattr(klass, "start_" +tag), "existing method exists"
72 assert not hasattr(klass, "end_" +tag), "existing method exists"
73 s = """if 1:
74 def start(self, tag, attrs):
75 if self.%s is None:
76 self.save_characters()
77 def end(self, tag):
78 if self.%s is None:
79 self.%s = self.get_characters()
80 """ % (attrname, attrname, attrname)
81 d = {}
82 exec s in d
83 setattr(klass, "start_" + tag, d["start"])
84 setattr(klass, "end_" + tag, d["end"])
85
86 -def add_text_block_handler(klass, tag, joinattr, defaultjoin, attrname):
87 assert not hasattr(klass, "start_" + tag), "existing method exists"
88 assert not hasattr(klass, "end_" + tag), "existing method exists"
89 assert not hasattr(klass, "start_"+tag+"_block"), "existing method exists"
90 assert not hasattr(klass, "end_" +tag+"_block"), "existing method exists"
91 s = """if 1:
92 def start_block(self, tag, attrs):
93 self._%(tag)s_join_func = Decode.make_decoder(attrs.get(%(joinattr)r, %(defaultjoin)r))
94 self._%(tag)s_lines = []
95 def end_block(self, tag):
96 self.%(attrname)s = self._%(tag)s_join_func(self._%(tag)s_lines)
97 def start(self, tag, attrs):
98 self.save_characters()
99 def end(self, tag):
100 self._%(tag)s_lines.append(self.get_characters())
101 """ % locals()
102 d = {"Decode": Decode}
103 exec s in d
104 setattr(klass, "start_" + tag, d["start"])
105 setattr(klass, "end_" + tag, d["end"])
106 setattr(klass, "start_" + tag + "_block", d["start_block"])
107 setattr(klass, "end_" + tag + "_block", d["end_block"])
108
110 assert not hasattr(klass, "start_" +tag), "existing method exists"
111 assert not hasattr(klass, "end_" +tag), "existing method exists"
112 s = """if 1:
113 def start(self, tag, attrs):
114 self._%(tag)s_name = attrs["name"]
115 self._%(tag)s_decode = attrs.get("bioformat:decode", None)
116 self.save_characters()
117 def end(self, tag):
118 s = self.get_characters()
119 if self._%(tag)s_decode is not None:
120 s = Decode.make_decoder(self._%(tag)s_decode)(s)
121 self.%(attrname)s[self._%(tag)s_name] = s
122 """ % locals()
123 d = {"Decode": Decode}
124 exec s in d
125 setattr(klass, "start_" + tag, d["start"])
126 setattr(klass, "end_" + tag, d["end"])
127
128
129
130
132 """Used to read records and produce output"""
133 - def __init__(self, record_builder, writer, record_tag = "record"):
134 handler.ContentHandler.__init__(self)
135 self.record_builder = record_builder
136 self.writer = writer
137 self.record_tag = record_tag
138
142
151
160
163
165 """Used to read records and produce output through a Dispatcher"""
166 - def __init__(self, record_builder, writer, record_tag = "record"):
167 setattr(self, "end_" + record_tag, self.write_record)
168 Dispatch.Dispatcher.__init__(self,
169 remap = {record_tag: "bioformat:"}
170 )
171 self.acquire(record_builder)
172 self.record_builder = record_builder
173 self.writer = writer
174 self.record_tag = record_tag
176 self.writer.write(self.record_builder.document)
177
178
179
182 self.recognized = 1
183 self.exc = None
184
192
193 error = fatalError
194
198
199
200
209
210
225 self.callback(self.join_fctn(self.descriptions))
226
227
228
229
230
231
232
233
234
235
236
237
238
239
262
263
264
265
266
267 _fast_dbxref_sp_general_data = None
269 global _fast_dbxref_sp_general_data
270 if _fast_dbxref_sp_general_data is None:
271 from Bio.expressions.swissprot import sprot38
272 _fast_dbxref_sp_general_data = _fixup_sp_pattern(
273 sprot38.real_DR_general)
274
275 pat, dbstyle, primary_type, secondary_type = _fast_dbxref_sp_general_data
276
277 m = pat.match(s)
278 assert m is not None, "Ill-formated sp-general dxbref: %r" % s
279 return (
280 (dbstyle, m.group("dbname"), primary_type,
281 m.group("primary_dbid"), 0),
282 (dbstyle, m.group("dbname"), secondary_type,
283 m.group("secondary_dbid"), 0)
284 )
285
286
287
288
289 _fast_dbxref_sp_prosite_data = None
291 global _fast_dbxref_sp_prosite_data
292
293 if _fast_dbxref_sp_prosite_data is None:
294 from Bio.expressions.swissprot import sprot38
295 _fast_dbxref_sp_prosite_data = _fixup_sp_pattern(
296 sprot38.real_DR_prosite)
297
298 pat, dbstyle, primary_type, secondary_type = _fast_dbxref_sp_prosite_data
299 m = pat.match(s)
300 assert m is not None, "Ill-formated sp-prosite dxbref: %r" % s
301 return (
302 (dbstyle, m.group("dbname"), primary_type,
303 m.group("primary_dbid"), 0),
304 (dbstyle, m.group("dbname"), secondary_type,
305 m.group("secondary_dbid"), 0)
306 )
307
308
309
310 _fast_dbxref_sp_embl_data = None
312 global _fast_dbxref_sp_embl_data
313
314 if _fast_dbxref_sp_embl_data is None:
315 from Bio.expressions.swissprot import sprot38
316 _fast_dbxref_sp_embl_data = _fixup_sp_pattern(
317 sprot38.real_DR_embl)
318
319 pat, dbstyle, primary_type, secondary_type = _fast_dbxref_sp_embl_data
320 m = pat.match(s)
321 assert m is not None, "Ill-formated sp-embl dxbref: %r" % s
322 return (
323 (dbstyle, m.group("dbname"), primary_type,
324 m.group("primary_dbid"), 0),
325 (dbstyle, m.group("dbname"), secondary_type,
326 m.group("secondary_dbid"), 0)
327 )
328
329 _fast_dbxref_parser_table = {
330 "sp-general": _fast_dbxref_sp_general,
331 "sp-prosite": _fast_dbxref_sp_prosite,
332 "sp-embl": _fast_dbxref_sp_embl,
333 }
334
341 self.negate = 0
342 self.dbname = None
343 self.dbids = []
344 self.info = []
345
347 assert self.dbname is None, "cannot set the dbname twice"
348 self.dbname_style = attrs.get("style", "unknown")
349 self.save_characters()
352
354 d = attrs.get("dbname", None)
355 if d is None:
356 assert self.dbname is not None, "must set the dbname"
357 self.info.append( (self.dbname_style, self.dbname,
358 attrs.get("type", "primary")) )
359 else:
360 self.info.append( ("bioformat", d,
361 attrs.get("type", "primary")) )
362 self.save_characters()
363
366
369
371 cb = self.slow_callback
372 if cb is None:
373 return
374 negate = self.negate
375 for ( (dbname_style, dbname, idtype), dbid) in zip(self.info,
376 self.dbids):
377 self.slow_callback(dbname_style, dbname, idtype, dbid, negate)
378
385 for info in self._fast_parser(self.get_characters()):
386 self.callback(*info)
387 self.slow_callback = self.callback
388
389
422
424 - def __init__(self, name, description, location, qualifiers):
430 return "Feature %r %r %s num_qualifiers = %d" % \
431 (self.name, self.description, self.location,
432 len(self.qualifiers))
433
434
436 - def __init__(self, callback, settings = {}):
439
441 self.location_style = attrs.get("location-style",
442 self.settings["location-style"])
443 j = attrs.get("join-feature", None)
444 if j is None:
445 self.text_join_func = "".join
446 else:
447 self.text_join_func = Decode.make_typechecked_decoder(j, list, str)
448
449 self.location_start = None
450 self.location_end = None
451 self.text_lines = []
452
454 if self.location_start or self.location_end:
455 if self.text_lines:
456 raise TypeError("Cannot have both location text and start/end")
457 self.callback(self.location_style,
458 (self.location_start, self.location_end))
459 else:
460 self.callback(self.location_style,
461 (self.text_join_func(self.text_lines), None))
462
467
468 add_text_handler(Handle_feature_location, "feature_location_start",
469 "location_start")
470 add_text_handler(Handle_feature_location, "feature_location_end",
471 "location_end")
472
473
474
476 - def __init__(self, callback, settings):
479
488
491
496
497 add_text_handler(Handle_feature_qualifier, "feature_qualifier_name", "name")
498
499
500
510
528
532
538
543
550
553
555 self.location = (style, location_info)
556
557 add_text_handler(Handle_features, "feature_name", "name")
558
559
560
561
564 self.query_name = None
565 self.subject_name = None
566
567 self.query_seq = ""
568 self.homology_seq = ""
569 self.subject_seq = ""
570
571 self.query_start_loc = None
572 self.query_end_loc = None
573
574 self.subject_start_loc = None
575 self.subject_end_loc = None
576
579
581 self.sub_leader = None
582
586 s = self.get_characters()
587 self.query_seq += s
588 self.sub_query_seq_len = len(s)
589
593 query_leader = self.leader_size
594 query_seq_len = self.sub_query_seq_len
595 line = self.get_characters()
596 s = line[query_leader:query_leader+query_seq_len]
597 assert len(s) == query_seq_len, (len(s), query_seq_len, line)
598 self.homology_seq += s
599
604
609
610 add_first_text_handler(Handle_hsp_seqalign, "hsp_seqalign_query_name",
611 "query_name")
612
613 add_first_text_handler(Handle_hsp_seqalign, "hsp_seqalign_subject_name",
614 "subject_name")
615
616 add_first_text_handler(Handle_hsp_seqalign, "hsp_seqalign_query_start",
617 "query_start_loc")
618 add_text_handler(Handle_hsp_seqalign, "hsp_seqalign_query_end",
619 "query_end_loc")
620
621 add_first_text_handler(Handle_hsp_seqalign, "hsp_seqalign_subject_start",
622 "subject_start_loc")
623 add_text_handler(Handle_hsp_seqalign, "hsp_seqalign_subject_end",
624 "subject_end_loc")
625
626
627
628
629
630
635
637 self.hsp_values = {}
638 self.strands = {}
639 self.frames = {}
640
642 self.callback(self.hsp_values,
643 self.hsp_info,
644 self.strands, self.frames,
645 )
646
648 self.strands[attrs["which"]] = attrs["strand"]
649
653
655 self.frames[self.getting_frame] = self.get_characters()
656 self.getting_frame = None
657
660
665
667 s = self.get_characters()
668 if self.value_name is not None:
669 if self.value_name == "float":
670 s = float(s)
671 else:
672 s = Decode.make_decoder(self.value_convert)(s)
673 self.hsp_values[self.value_name] = s
674
675
676
677
708
709 add_text_handler(Handle_search_table, "search_table_description",
710 "description")
711
712
713
715 - def start_(self, tag, attrs):
718
723
724 add_text_block_handler(Handle_search_header, "query_description",
725 "join-query", "join|fixspaces", "query_description")
726
727 add_text_dict_handler(Handle_search_header, "application_name",
728 "dict", "appname")
729 add_text_dict_handler(Handle_search_header, "application_version",
730 "dict", "appversion")
731 add_text_dict_handler(Handle_search_header, "database_name",
732 "dict", "dbname")
733 add_text_dict_handler(Handle_search_header, "database_num_sequences",
734 "dict", "db_num_sequences")
735 add_text_dict_handler(Handle_search_header, "database_num_letters",
736 "dict", "db_num_letters")
737 add_text_dict_handler(Handle_search_header, "query_size",
738 "dict", "query_size")
739
740
741
742
744 - def start_(self, tag, attrs):
747
748 - def end_(self, tag):
750
751 add_value_handler(Handle_search_info, "search_parameter", "parameters")
752 add_value_handler(Handle_search_info, "search_statistic", "statistics")
753