1
2
3
4 """Implement Martel parsers.
5
6 The classes in this module are used by other Martel modules and not
7 typically by external users.
8
9 There are two major parsers, 'Parser' and 'RecordParser.' The first
10 is the standard one, which parses the file as one string in memory
11 then generates the SAX events. The other reads a record at a time
12 using a RecordReader and generates events after each read. The
13 generated event callbacks are identical.
14
15 At some level, both parsers use "_do_callback" to convert mxTextTools
16 tags into SAX events.
17
18 XXX finish this documentation
19
20 XXX need a better way to get closer to the likely error position when
21 parsing.
22
23 XXX need to implement Locator
24
25 """
26 from __future__ import generators
27
28 import urllib, traceback, sys
29 from xml.sax import handler, saxutils
30 import Parser, RecordReader
31
32 try:
33 from cStringIO import StringIO
34 except ImportError:
35 from StringIO import StringIO
36
37
39 - def __init__(self, record_parser, make_reader, reader_args, marker_tag):
40 self.record_parser = record_parser
41 self.make_reader = make_reader
42 self.reader_args = reader_args
43 self.marker_tag = marker_tag
44
46 return IterRecords(self.record_parser.copy(),
47 self.make_reader,
48 self.reader_args,
49 self.marker_tag)
50
51 - def iterate(self, source, cont_handler = None):
55
58
60 self.start_position = 0
61 if cont_handler is None:
62 import LAX
63 cont_handler = LAX.LAX()
64 self.record_parser.setContentHandler(cont_handler)
65
66 reader = self.make_reader(fileobj, *self.reader_args)
67 while 1:
68 try:
69 rec = reader.next()
70 except RecordReader.ReaderError:
71 raise Parser.ParserPositionException(self.start_position)
72 if rec is None:
73 break
74 self.end_position = self.start_position + len(rec)
75 try:
76 self.record_parser.parseString(rec)
77 except Parser.ParserPositionException, exc:
78 exc += self.start_position
79 raise
80
81 yield cont_handler
82 self.start_position = self.end_position
83
84 fileobj, lookahead = reader.remainder()
85 if lookahead or fileobj.read(1):
86 raise Parser.ParserPositionException(self.start_position)
87
88
225