1
2
3
4
5
6
7
8
9
10
11 import string
12 from mx import TextTools as TT
13
14 SIZEHINT = 100000
15
18
23 raise NotImplementedError
24 - def remainder(self):
25 raise NotImplementedError
26
28 return (
29
30 ("begin", TT.Word, text, TT.MatchFail, +1),
31
32
33 (None, TT.AllInSet, TT.invset('\r\n'), +1, +1),
34
35
36 (None, TT.Is, '\n', +1, +4),
37 (None, TT.Is, '\r', +2, +1),
38 (None, TT.Is, '\n', +2, +2),
39
40
41 (None, TT.EOF, TT.Here, +1, TT.MatchOk),
42
43
44 ("begin", TT.Word, text, +1, -5),
45
46
47 (None, TT.AllInSet, TT.invset('\r\n'), +1, +1),
48
49
50 (None, TT.Is, '\n', +1, -2),
51 (None, TT.Is, '\r', +2, +1),
52 (None, TT.Is, '\n', -4, -4),
53
54 (None, TT.EOF, TT.Here, TT.MatchFail, TT.MatchOk),
55 )
56
58 return (
59
60 ("begin", TT.Word, text, TT.MatchFail, +1),
61
62
63 (None, TT.Is, '\n', +1, +4),
64 (None, TT.Is, '\r', +2, +1),
65 (None, TT.Is, '\n', +2, +2),
66
67
68
69
70
71 (None, TT.EOF, TT.Here, TT.MatchFail, TT.MatchOk),
72
73
74 ("begin", TT.Word, text, +1, -4),
75
76
77 (None, TT.AllInSet, TT.invset('\r\n'), +1, +1),
78
79
80 (None, TT.Is, '\n', +1, -2),
81 (None, TT.Is, '\r', +2, +1),
82 (None, TT.Is, '\n', -4, -4),
83
84 (None, TT.EOF, TT.Here, TT.MatchFail, TT.MatchOk),
85 )
86
87
89 success, tags, pos = TT.tag(text, tagtable)
90
91 if not success:
92 raise ReaderError("invalid format starting with %s" % repr(text[:50]))
93 if pos != len(text):
94 raise ReaderError, \
95 "could not parse to end of text (ended at %d of %d)" % \
96 (pos, len(text))
97 return [tag[1] for tag in tags]
98
99
102 RecordReader.__init__(self, infile)
103 self.text = text
104 self.sizehint = sizehint
105
106 pos = string.find(text, "\n")
107 if pos != -1:
108 if pos != len(text)-1:
109 raise AssertionError, "'\\n' can only exist at the end of the string"
110 text = text[:-1]
111 has_newline = 1
112 else:
113 has_newline = 0
114 assert len(text), "StartsWith text size is too short"
115 assert len(text) < sizehint - 2, \
116 "StartsWith text size larger than sizehint allows"
117
118
119 if has_newline:
120 raise NotImplementedError, "there's a bug in the '\\n' option"
121 self.tagtable = _startswith_tagtable_newline(text)
122 else:
123 self.tagtable = _startswith_tagtable_rest_of_line(text)
124
125 self.lookahead = lookahead
126
127
128
129 if len(self.lookahead) < len(text) + 2:
130 self.lookahead += infile.read(sizehint)
131 if self.lookahead:
132 self.positions = _find_begin_positions(self.lookahead,
133 self.tagtable)
134 else:
135 self.positions = [0]
136 self.index = 0
137
139
140 if self.index + 1 < len(self.positions):
141
142 s = self.lookahead[self.positions[self.index]:
143 self.positions[self.index+1]]
144 self.index += 1
145 return s
146
147
148
149 self.lookahead = self.lookahead[self.positions[-1]:]
150
151
152
153 positions = [self.positions[-1]]
154 while 1:
155 data = self.infile.read(self.sizehint)
156 if not data:
157 break
158 self.lookahead = self.lookahead + data
159 positions = _find_begin_positions(self.lookahead, self.tagtable)
160 if len(positions) > 1:
161 break
162 if len(positions) > 1:
163 self.positions = positions
164 self.index = 1
165 return self.lookahead[positions[0]:positions[1]]
166 elif not self.lookahead:
167
168 self.positions = [0]
169 self.index = 0
170 return None
171 else:
172
173 assert len(positions) == 1
174 self.positions = [0]
175 self.index = 0
176 s = self.lookahead
177 self.lookahead = ""
178 return s
179
180 - def remainder(self):
181 return self.infile, self.lookahead[self.positions[self.index]:]
182
184 return (
185
186 (None, TT.Word, text, +6, +1),
187
188
189 ("end", TT.Is, '\n', +1, -1),
190 (None, TT.Is, '\r', +4, +1),
191 ("end", TT.Is, '\n', +1, -3),
192 (None, TT.Skip, -1, +1, +1),
193 ("end", TT.Skip, +1, -5, -5),
194
195
196 (None, TT.AllInSet, TT.invset('\r\n'), +1, +1),
197
198
199 (None, TT.EOF, TT.Here, +1, TT.MatchOk),
200
201
202 (None, TT.AllInSet, TT.set('\r\n'), TT.MatchFail, -8),
203 )
204
206 return (
207
208 (None, TT.Word, text, +8, +1),
209
210
211 (None, TT.AllInSet, TT.invset('\r\n'), +1, +1),
212
213
214 ("end", TT.Is, '\n', +1, -2),
215 (None, TT.Is, '\r', +4, +1),
216 ("end", TT.Is, '\n', +1, -4),
217 (None, TT.Skip, -1, +1, +1),
218 ("end", TT.Skip, +1, -6, -6),
219
220
221
222 ("end", TT.EOF, TT.Here, TT.MatchFail, TT.MatchOk),
223
224
225 (None, TT.AllInSet, TT.invset('\r\n'), +1, +1),
226
227
228 (None, TT.EOF, TT.Here, +1, TT.MatchOk),
229
230
231 (None, TT.AllInSet, TT.set('\r\n'), TT.MatchFail, -10),
232 )
233
234
236 success, tags, pos = TT.tag(text, tagtable)
237
238 if not success:
239 raise ReaderError("invalid format starting with %s" % repr(text[:50]))
240 if pos != len(text):
241 raise ReaderError, \
242 "could not parse to end of text (ended at %d of %d)" % \
243 (pos, len(text))
244 return [tag[2] for tag in tags]
245
248 RecordReader.__init__(self, infile)
249 self.text = text
250 self.sizehint = sizehint
251
252 pos = string.find(text, "\n")
253 if pos != -1:
254 if pos != len(text)-1:
255 raise AssertionError, "'\\n' can only exist at the end of the string"
256 text = text[:-1]
257 has_newline = 1
258 else:
259 has_newline = 0
260 assert len(text) < sizehint - 2, \
261 "EndsWith text size larger than sizehint allows"
262
263 if has_newline:
264 self.tagtable = _endswith_tagtable_newline(text)
265 else:
266 self.tagtable = _endswith_tagtable_rest_of_line(text)
267
268
269 self.lookahead = lookahead
270 self.positions = []
271 self.index = 0
272 self.pos = 0
273
275
276 if self.index < len(self.positions):
277
278 newpos = self.positions[self.index]
279 s = self.lookahead[self.pos:newpos]
280 self.pos = newpos
281 self.index = self.index + 1
282 return s
283
284
285
286 lookahead = self.lookahead[self.pos:]
287
288 data = ""
289 positions = []
290
291 while 1:
292 data = self.infile.read(self.sizehint)
293 if not data:
294 if not positions:
295 positions = _find_end_positions(lookahead, self.tagtable)
296 break
297 lookahead = lookahead + data
298 positions = _find_end_positions(lookahead, self.tagtable)
299 if len(positions) > 1:
300 del positions[-1]
301 break
302
303 self.lookahead = lookahead
304 self.positions = positions
305
306 if positions:
307 self.index = 1
308 self.pos = positions[0]
309 return lookahead[:positions[0]]
310 elif not lookahead:
311
312 self.pos = 0
313 self.index = 0
314 return None
315
316
317
318
319 if lookahead[-1:] not in "\r\n":
320 special_case = lookahead + "\n"
321 positions = _find_end_positions(special_case, self.tagtable)
322 if positions:
323 assert len(positions) == 1, "this case should not occur"
324 pos = positions[0]
325 assert pos == len(special_case), "wrong sizes: %d and %d" % \
326 (pos, len(special_case))
327 self.lookahead = ""
328 self.positions = []
329 self.pos = 0
330 self.index = 0
331 return lookahead
332
333
334 self.index = 0
335 self.pos = 0
336 raise ReaderError("Last record not terminated: at %s ..." %
337 repr(self.lookahead[:50]))
338
339 - def remainder(self):
340 return self.infile, self.lookahead[self.pos:]
341
342
343
344 -class Until(RecordReader):
346 RecordReader.__init__(self, infile)
347 self.text = text
348 self.lookahead = lookahead
349 self.sizehint = sizehint
350 self.found = 0
351
352 if text[-1] == "\n":
353 raise NotImplementedError, "Until reader does not support '\\n'"
354 if "\n" in text:
355 raise AssertionError, "'\\n' can only exist at the end of the string"
356
358 if self.found:
359 return None
360
361
362
363 fake = self.text + "\n"
364 reader = StartsWith(self.infile, self.text, self.sizehint,
365 fake + self.lookahead)
366 rec = reader.next()
367 rec = rec[len(fake):]
368 self.infile, self.lookahead = reader.remainder()
369 self.found = 1
370 return rec
371
372 - def remainder(self):
373 return self.infile, self.lookahead
374
375
376 _tag_lines_tagtable = (
377
378 (None, TT.AllInSet, TT.invset('\r\n'), +1, +1),
379
380
381 ("newline", TT.Is, '\n', +1, -1),
382 (None, TT.Is, '\r', +3, +1),
383 ("newline", TT.Is, '\n', +1, -3),
384 ("newline", TT.Skip, 0, -4, -4),
385 (None, TT.EOF, TT.Here, -5, TT.MatchOk),
386 )
387
388
390 """Read a specified (fixed) number of lines"""
392 assert count > 0, "CountLines reader must read at least one line"
393 assert lookahead > 0, "Must read at least a character at a time"
394 assert sizehint > 0, "sizehint must be positive"
395 RecordReader.__init__(self, infile)
396 self.sizehint = sizehint
397 self.lookahead = lookahead
398 self.count = count
399 self.pos = 0
400 self.positions = []
401 self.index = 0
402
404 if self.index + self.count < len(self.positions):
405 self.index = self.index + self.count
406 endpos = self.positions[self.index-1]
407 s = self.lookahead[self.pos:endpos]
408 self.pos = endpos
409 return s
410
411 lookahead = self.lookahead[self.pos:]
412 while 1:
413 positions = _find_end_positions(lookahead, _tag_lines_tagtable)
414 if len(positions) > self.count:
415
416
417 del positions[-1]
418 break
419 data = self.infile.read(self.sizehint)
420 if not data:
421 break
422 lookahead = lookahead + data
423
424 self.lookahead = lookahead
425 self.pos = 0
426 self.positions = positions
427
428 if not lookahead:
429 return None
430
431 if len(positions) >= self.count:
432 self.index = self.count
433 endpos = self.positions[self.count-1]
434 s = lookahead[0:endpos]
435 self.pos = endpos
436 return s
437
438
439
440
441
442
443
444
445
446
447
448 raise ReaderError, \
449 "Only found %d lines, expecting %d (starting with %s ...)" % \
450 (len(positions), self.count, repr(lookahead[:20]))
451
452 - def remainder(self):
453 return self.infile, self.lookahead[self.pos:]
454
456 """Reads nothing"""
460
463
464 - def remainder(self):
465 return self.infile, self.lookahead
466
468 """Reads everything"""
473
475 if self.found:
476 return None
477 s = self.lookahead + self.infile.read()
478 self.lookahead = ""
479 self.found = 1
480 return s
481
482 - def remainder(self):
483 return self.infile, self.lookahead
484