1
2
3 import string
4 from Bio.Parsers.spark import GenericScanner, GenericParser
5
7 result = []
8 for i in range(len(s)):
9 if s[i] != "\\":
10 result.append(s[i])
11 continue
12 c = s[i+1:i+2]
13 if c == "x":
14 x = s[i+2:i+4]
15 if len(x) != 2:
16 raise ValueError("invalid \\x escape")
17 i = int(x, 16)
18 result.append(chr(i))
19 continue
20 if c in "01234567":
21 x = s[i+1:i+4]
22
23 i = int(x, 8)
24 result.append(chr(i))
25 continue
26 result.append(c)
27 return "".join(result)
28
30 if not fields:
31 return ""
32 s = fields[0]
33 for field in fields[1:]:
34 if s[-1:] == "-" and s[-3:-2] == "-":
35 s = s + field
36 continue
37 if s.find(" ") == -1 and field.find(" ") == -1:
38 s = s + field
39 continue
40 s = s + " " + field
41 return (" ".join(s.split())).strip()
42
43
44
46 if s[-1:] == c:
47 return s[:-1]
48 return s
49
51 if s[:1] == c:
52 return s[1:]
53 return s
54
56 if s[:1] == c and s[-1:] == c:
57 return s[1:-1]
58 return s
59
65
67 return " ".join((" ".join(lines)).split()).strip()
68
72
74 """converts to int if the number is small, long if it's large"""
75 try:
76 return int(s)
77 except ValueError:
78 return long(s)
79
80 decode_functions = {
81 "chomp": (chomp, str, str),
82 "chompchomp": (chompchomp, str, str),
83 "chop": (lambda s: s[:-1], str, str),
84 "chopchop": (lambda s: s[1:-1], str, str),
85 "fixspaces": (fixspaces, str, str),
86 "lchomp": (lchomp, str, str),
87 "lchop": (lambda s: s[1:], str, str),
88 "lower": (lambda s: s.lower(), str, str),
89 "lstrip": (lambda s: s.lstrip(), str, str),
90 "replace": (lambda s, old, new: s.replace(old, new), str, str),
91 "rstrip": (lambda s: s.rstrip(), str, str),
92 "str": (str, str, str),
93 "strip": (lambda s: s.strip(), str, str),
94 "tr": (tr, str, str),
95 "unescape.c": (unescape_C, str, str),
96 "unescape.doublequote": (lambda s: s.replace('""', '"'), str, str),
97 "unescape.singlequote": (lambda s: s.replace("''", "'"), str, str),
98 "upper": (lambda s: s.upper(), str, str),
99
100
101 "join": (lambda lst, s = " ": s.join(lst), list, str),
102 "join.english": (join_english, list, str),
103
104
105 "int": (safe_int, [float, str, int], int),
106 "int.comma": (lambda s: safe_int(s.replace(",", "")),
107 [float, str, int], int),
108 "hex": (hex, str, int),
109 "oct": (oct, str, int),
110 "add": ((lambda i, j: i+j), int, int),
111
112
113 "float": (float, (float, str, int), float),
114
115 }
116
118
119 for k, v in decode_functions.items():
120 f, in_types, out_types = v
121 if isinstance(in_types, type([])):
122 in_types = tuple(in_types)
123 elif not isinstance(in_types, type( () )):
124 in_types = (in_types,)
125
126 if isinstance(out_types, type([])):
127 out_types = tuple(out_types)
128 elif not isinstance(out_types, type( () )):
129 out_types = (out_types,)
130
131 decode_functions[k] = (f, in_types, out_types)
132 _fixup_defs()
133
138 return cmp(self.type, other)
140 return "Token(%r)" % (self.type,)
141
147 return cmp(self.type, other)
149 return "%s(%r)" % (self.__class__.__name__, self.val)
152
156
160
164
168
172
177
181
185
189
193
197
201
209
211 r""" [+-]?((\d+(\.\d*)?)|\.\d+)([eE][+-]?[0-9]+)? """
212
213 try:
214 self.rv.append(Integer(safe_int(input)))
215 except ValueError:
216 self.rv.append(Float(float(input)))
217
220 self.name = name
221 self.args = args
223 args = self.args
224 if not args:
225 s = ""
226 else:
227 s = str(args)[1:-1]
228 return "%s(x, %s)" % (self.name, s)
229 __repr__ = __str__
230
232 - def __init__(self, start = "expression"):
235
237 """
238 expression ::= term
239 expression ::= term pipe expression
240 """
241 if len(args) == 1:
242 return [args[0]]
243 return [args[0]] + args[2]
244
246 """
247 term ::= functionname
248 term ::= functionname open_paren args close_paren
249 """
250 if len(args) == 1:
251 return Function(args[0].val)
252 return Function(args[0].val, tuple([x.val for x in args[2]]))
253
255 """
256 args ::= arg
257 args ::= arg comma args
258 """
259 if len(args) == 1:
260 return [args[0]]
261 return [args[0]] + args[2]
262
264 """
265 arg ::= string
266 arg ::= integer
267 arg ::= float
268 """
269 return args[0]
270
274
278
279 _decoder_cache = {}
280
283 self.f = f
284 self.args = args
286 return self.f(x, *self.args)
287
290 self.inner_f = inner_f
291 self.f = f
292 self.args = args
294 return self.f(self.inner_f(x), *self.args)
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
341
343 for x in subset:
344 if x not in total:
345 raise TypeError("%s can produce a %r value not accepted by %s" %
346 (old_name, x.__name__, new_name))
347
348 _typechecked_decoder_cache = {}
350 cache_lookup = (s, input_types, output_types)
351 try:
352 return _typechecked_decoder_cache[cache_lookup]
353 except KeyError:
354 pass
355 if input_types is not None and not isinstance(input_types, type( () )):
356 input_types = (input_types,)
357 if output_types is not None and not isinstance(output_types, type( () )):
358 output_types = (output_types,)
359
360 functions = parse(scan(s))
361
362
363 f = functions[0]
364 fc, in_types, out_types = decode_functions[f.name]
365 if input_types is not None:
366 for x in input_types:
367 if x not in in_types:
368 raise TypeError(
369 "the input type includes %r which isn't supported by %s" %
370 (x.__name__, f.name))
371
372
373 old_name = f.name
374 input_types = out_types
375 args = functions[0].args
376 if args:
377 fc = FunctionCall(fc, args)
378
379 for f in functions[1:]:
380 transform_func, in_types, out_types = decode_functions[f.name]
381 _verify_subtypes(input_types, in_types, old_name, f.name)
382 old_name = f.name
383 input_types = out_types
384 fc = FunctionCallChain(fc, transform_func, f.args)
385
386 if output_types is not None:
387 _verify_subtypes(input_types, output_types, old_name, "the output")
388 _typechecked_decoder_cache[cache_lookup] = fc
389 return fc
390
391
405
406 if __name__ == "__main__":
407 test()
408