1
2
3
4 __version__ = "1.47"
5
6 import Expression
7 import convert_re
8 import string
9 from xml.sax import xmlreader
10
11
12
16
18 """(s1, s2, ...) -> match s1 or s2 or ..."""
19 if len(args) == 1:
20 return Str1(args[0])
21 return Expression.Alt(tuple(map(Str, args)))
22
28
30 """s -> match any character not in s"""
31 return Expression.Any(s, 1)
32
33
34
35
36
37
38
40 """exp1, exp2, ... -> match exp1 followed by exp2 followed by ..."""
41
42
43 for arg in args:
44 assert isinstance(arg, Expression.Expression), \
45 "expecting an Expression, not a %s" % type(arg)
46
47 return Expression.Seq(args)
48
50 """exp1, exp2, ... -> match exp1 or (if that fails) match exp2 or ..."""
51
52 for arg in args:
53 assert isinstance(arg, Expression.Expression), \
54 "expecting an Expression, not a %s" % type(arg)
55 return Expression.Alt(args)
56
62
68
70 """expr -> match 'expr' as many times as possible, but at least once"""
71 assert isinstance(expr, Expression.Expression), \
72 "expecting an Expression, not a %s" % type(expr)
73 return Expression.MaxRepeat(expr, 1)
74
75
76
77
78 NoCase = Expression.NoCase
79
81 raise NotImplementedError
82
84 raise NotImplementedError
85
87 raise NotImplementedError
88
90 raise NotImplementedError
91
95
96
97
98
99 AnyEol = Expression.AnyEol
100
102 """expr, min_count, max_count = 65535 -> match between min- and max_count times
103
104 If max_count == 65535 (which is Expression.MAXREPEAT) then there
105 is no upper limit.
106 """
107 assert isinstance(expr, Expression.Expression), \
108 "expecting an Expression, not a %s" % type(expr)
109 return Expression.MaxRepeat(expr, min_count, max_count)
110
111 -def RepN(expr, count):
112 """expr, count -> match the expression 'count' number of time
113
114 This option is handy for named group repeats since you don't have
115 to use the name twice; for the min_count and max_count fields.
116 """
117 return Expression.MaxRepeat(expr, count, count)
118
119
120 -def Group(name, expr, attrs = None):
125
126
128
129
130 s = string.replace(s, "\r\n", "\n")
131 s = string.replace(s, "\r", "\n")
132 return string.replace(s, "\n", r"\R")
133
134
135 -def Re(pattern, fix_newlines = 0):
140
141 NullOp = Expression.NullOp
142 Debug = Expression.Debug
143
146
149
150
156
157 -def Digits(name = None, attrs = None):
158 """match one or more decimal digits
159
160 This is the same as (?P<name?attrs>\d+).
161
162 If 'name' is not None, the matching text will be put inside a
163 group of the given name. You can optionally include group
164 attributes.
165 """
166 return _group(name, Re(r"\d+"), attrs)
167
168 -def Integer(name = None, attrs = None):
169 """match an integer (digits w/ optional leading + or - sign)
170
171 If 'name' is not None, the matching text will be put inside a
172 group of the given name. You can optionally include group
173 attributes.
174 """
175 exp = Re(r"[+-]?\d+")
176 return _group(name, exp, attrs)
177
178 -def Float(name = None, attrs = None):
179 """match floating point numbers like 6, 6., -.1, 2.3, +4E-5, ...
180
181 If 'name' is not None, the matching text will be put inside of a
182 group of the given name. You can optionally include group
183 attributes.
184 """
185 exp = Re(r"[+-]?((\d+(\.\d*)?)|\.\d+)([eE][+-]?[0-9]+)?")
186 return _group(name, exp, attrs)
187
188 -def Word(name = None, attrs = None):
189 """match a 'word'
190
191 A 'word' is defined as '\w+', and \w is [a-zA-Z0-9_].
192
193 If 'name' is not None, the matching text will be put inside of a
194 group of the given name. You can optionally include group
195 attributes.
196
197 In other words, this is the short way to write (?P<name>\w+).
198 """
199 exp = Re(r"\w+")
200 return _group(name, exp, attrs)
201
202 -def Spaces(name = None, attrs = None):
203 """match one or more whitespace (except newline)
204
205 "Spaces" is defined as [\\t\\v\\f\\r ]+, which is *not* the same
206 as '\\s+'. (It's missing the '\\n', which is useful since you
207 almost never mean for whitespace to go beyond the newline.)
208
209 If 'name' is not None, the matching text will be put inside of a
210 group of the given name. You can optionally include group
211 attributes.
212 """
213 exp = Re(r"[\t\v\f ]+")
214 return _group(name, exp, attrs)
215
217 """match an unprintable character (characters not in string.printable)
218
219 If 'name' is not None, the matching text will be put inside of a
220 group of the given name. You can optionally include group
221 attributes.
222 """
223 return _group(name, AnyBut(string.printable), attrs)
224
226 """match a punctuation character (characters in string.punctuation)
227
228 If 'name' is not None, the matching text will be put inside of a
229 group of the given name. You can optionally include group
230 attributes.
231 """
232 return _group(name, Any(string.punctuation), attrs)
233
234
235 -def ToEol(name = None, attrs = None):
236 """match everything up to and including the end of line
237
238 If 'name' is not None, the matching text, except for the newline,
239 will be put inside a group of the given name. You can optionally
240 include group attributes.
241 """
242 if name is None:
243 assert not attrs, "Attributes (%s) require a group name" % (attrs,)
244 return Re(r"[^\R]*\R")
245 else:
246 return Group(name, Re(r"[^\R]*"), attrs) + AnyEol()
247
248 -def UntilEol(name = None, attrs = None):
249 """match everything up to but not including the end of line
250
251 If 'name' is not None, the matching text, except for the newline,
252 will be put inside a group of the given name. You can optionally
253 include group attributes.
254 """
255 if name is None:
256 assert not attrs, "Attributes (%s) require a group name" % (attrs,)
257 return Re(r"[^\R]*")
258 else:
259 return Group(name, Re(r"[^\R]*"), attrs)
260
262 """read and ignore lines up to, but excluding, the line matching expr"""
263 return Rep(AssertNot(expr) + ToEol())
264
266 """read and ignore lines up to and including, the line matching expr"""
267 return Rep(AssertNot(expr) + ToEol()) + expr + ToEol()
268
269
270 -def ToSep(name = None, sep = None, attrs = None):
271 """match all characters up to the given seperator(s)
272
273 This is useful for parsing space, tab, color, or other character
274 delimited fields. There is no default seperator character.
275
276 If 'name' is not None, the matching text, except for the seperator
277 will be put inside a group of the given name. You can optionally
278 include group attributes. The seperator character will also be
279 consumed.
280
281 Neither "\\r" nor "\\n" may be used as a seperator
282 """
283 if sep is None:
284
285 raise TypeError("Must specify a seperator (the 'sep' parameter)")
286
287 assert "\r" not in sep and "\n" not in sep, \
288 "cannot use %s as a seperator" % (repr(seperator),)
289
290 exp = Rep(AnyBut(sep + "\r\n"))
291 return _group(name, exp, attrs) + Str(sep)
292
293 -def UntilSep(name = None, sep = None, attrs = None):
294 """match all characters up to the given seperators(s)
295
296 This is useful for parsing space, tab, color, or other character
297 delimited fields. There is no default seperator.
298
299 If 'name' is not None, the matching text, except for the seperator
300 will be put inside a group of the given name. You can optionally
301 include group attributes. The seperator character will not be
302 consumed.
303
304 Neither "\\r" nor "\\n" may be used as a seperator.
305 """
306 if sep is None:
307
308 raise TypeError("Must specify a seperator (the 'sep' parameter)")
309
310 assert "\r" not in sep and "\n" not in sep, \
311 "cannot use %s as a seperator" % (repr(sep),)
312
313 exp = Rep(AnyBut(sep + "\r\n"))
314 return _group(name, exp, attrs)
315
316
318 """match 0 or more fields seperated by the given seperator(s)
319
320 This is useful for parsing space, tab, color, or other character
321 delimited fields. There is no default seperator.
322
323 If 'name' is not None, the delimited text, excluding the seperator,
324 will be put inside groups of the given name. You can optionally
325 include group attributes. The seperator character is consumed,
326 but not accessible using a group.
327
328 Neither "\\r" nor "\\n" may be used as a seperator.
329 The line as a whole is not included in a group.
330 """
331 if sep is None:
332
333 raise TypeError("Must specify a sep (via the 'sep' parameter)")
334
335 assert "\r" not in sep and "\n" not in sep, \
336 "cannot use %s as a seperator" % (repr(sep),)
337
338 term = _group(name, Rep(AnyBut(sep + "\r\n")), attrs)
339 rep = Rep(Any(sep) + term)
340 return term + rep + AnyEol()
341
342
343 FastFeature = Expression.FastFeature
344
345
346
347 ParseRecords = Expression.ParseRecords
348 HeaderFooter = Expression.HeaderFooter
349
350
351
352
364
366 expr = expr.copy()
367 for tagname, replacement_expr in replacements:
368 matches = expr._find_groups(tagname)
369 for match in matches:
370 match.expression = replacement_expr
371 return expr
372
374 return ParseRecords("dataset", {"format": "*filter*"},
375 Group("record", expr + Rep(ToEol())),
376 make_reader, reader_args)
377