1
2
3
4
5
6
7
8
9
10
11 """Internal support module for sre"""
12
13 import _sre, sys
14
15 from sre_constants import *
16
17 assert _sre.MAGIC == MAGIC, "SRE module mismatch"
18
19 if _sre.CODESIZE == 2:
20 MAXCODE = 65535
21 else:
22 MAXCODE = 0xFFFFFFFFL
23
26
28 s = {}
29 for elem in seq:
30 s[elem] = 1
31 return s
32
33 _LITERAL_CODES = set([LITERAL, NOT_LITERAL])
34 _REPEATING_CODES = set([REPEAT, MIN_REPEAT, MAX_REPEAT])
35 _SUCCESS_CODES = set([SUCCESS, FAILURE])
36 _ASSERT_CODES = set([ASSERT, ASSERT_NOT])
37
59 else:
60 emit(OPCODES[op])
61 fixup = _identityfunction
62 skip = _len(code); emit(0)
63 _compile_charset(av, flags, code, fixup)
64 code[skip] = _len(code) - skip
65 elif op is ANY:
66 if flags & SRE_FLAG_DOTALL:
67 emit(OPCODES[ANY_ALL])
68 else:
69 emit(OPCODES[ANY])
70 elif op in REPEATING_CODES:
71 if flags & SRE_FLAG_TEMPLATE:
72 raise error, "internal: unsupported template operator"
73 emit(OPCODES[REPEAT])
74 skip = _len(code); emit(0)
75 emit(av[0])
76 emit(av[1])
77 _compile(code, av[2], flags)
78 emit(OPCODES[SUCCESS])
79 code[skip] = _len(code) - skip
80 elif _simple(av) and op is not REPEAT:
81 if op is MAX_REPEAT:
82 emit(OPCODES[REPEAT_ONE])
83 else:
84 emit(OPCODES[MIN_REPEAT_ONE])
85 skip = _len(code); emit(0)
86 emit(av[0])
87 emit(av[1])
88 _compile(code, av[2], flags)
89 emit(OPCODES[SUCCESS])
90 code[skip] = _len(code) - skip
91 else:
92 emit(OPCODES[REPEAT])
93 skip = _len(code); emit(0)
94 emit(av[0])
95 emit(av[1])
96 _compile(code, av[2], flags)
97 code[skip] = _len(code) - skip
98 if op is MAX_REPEAT:
99 emit(OPCODES[MAX_UNTIL])
100 else:
101 emit(OPCODES[MIN_UNTIL])
102 elif op is SUBPATTERN:
103 if av[0]:
104 emit(OPCODES[MARK])
105 emit((av[0]-1)*2)
106
107 _compile(code, av[1], flags)
108 if av[0]:
109 emit(OPCODES[MARK])
110 emit((av[0]-1)*2+1)
111 elif op in SUCCESS_CODES:
112 emit(OPCODES[op])
113 elif op in ASSERT_CODES:
114 emit(OPCODES[op])
115 skip = _len(code); emit(0)
116 if av[0] >= 0:
117 emit(0)
118 else:
119 lo, hi = av[1].getwidth()
120 if lo != hi:
121 raise error, "look-behind requires fixed-width pattern"
122 emit(lo)
123 _compile(code, av[1], flags)
124 emit(OPCODES[SUCCESS])
125 code[skip] = _len(code) - skip
126 elif op is CALL:
127 emit(OPCODES[op])
128 skip = _len(code); emit(0)
129 _compile(code, av, flags)
130 emit(OPCODES[SUCCESS])
131 code[skip] = _len(code) - skip
132 elif op is AT:
133 emit(OPCODES[op])
134 if flags & SRE_FLAG_MULTILINE:
135 av = AT_MULTILINE.get(av, av)
136 if flags & SRE_FLAG_LOCALE:
137 av = AT_LOCALE.get(av, av)
138 elif flags & SRE_FLAG_UNICODE:
139 av = AT_UNICODE.get(av, av)
140 emit(ATCODES[av])
141 elif op is BRANCH:
142 emit(OPCODES[op])
143 tail = []
144 tailappend = tail.append
145 for av in av[1]:
146 skip = _len(code); emit(0)
147
148 _compile(code, av, flags)
149 emit(OPCODES[JUMP])
150 tailappend(_len(code)); emit(0)
151 code[skip] = _len(code) - skip
152 emit(0)
153 for tail in tail:
154 code[tail] = _len(code) - tail
155 elif op is CATEGORY:
156 emit(OPCODES[op])
157 if flags & SRE_FLAG_LOCALE:
158 av = CH_LOCALE[av]
159 elif flags & SRE_FLAG_UNICODE:
160 av = CH_UNICODE[av]
161 emit(CHCODES[av])
162 elif op is GROUPREF:
163 if flags & SRE_FLAG_IGNORECASE:
164 emit(OPCODES[OP_IGNORE[op]])
165 else:
166 emit(OPCODES[op])
167 emit(av-1)
168 elif op is GROUPREF_EXISTS:
169 emit(OPCODES[op])
170 emit(av[0]-1)
171 skipyes = _len(code); emit(0)
172 _compile(code, av[1], flags)
173 if av[2]:
174 emit(OPCODES[JUMP])
175 skipno = _len(code); emit(0)
176 code[skipyes] = _len(code) - skipyes + 1
177 _compile(code, av[2], flags)
178 code[skipno] = _len(code) - skipno
179 else:
180 code[skipyes] = _len(code) - skipyes + 1
181 else:
182 raise ValueError, ("unsupported operand type", op)
183
212
214
215 out = []
216 outappend = out.append
217 charmap = [0]*256
218 try:
219 for op, av in charset:
220 if op is NEGATE:
221 outappend((op, av))
222 elif op is LITERAL:
223 charmap[fixup(av)] = 1
224 elif op is RANGE:
225 for i in range(fixup(av[0]), fixup(av[1])+1):
226 charmap[i] = 1
227 elif op is CATEGORY:
228
229 return charset
230 except IndexError:
231
232 return _optimize_unicode(charset, fixup)
233
234 i = p = n = 0
235 runs = []
236 runsappend = runs.append
237 for c in charmap:
238 if c:
239 if n == 0:
240 p = i
241 n = n + 1
242 elif n:
243 runsappend((p, n))
244 n = 0
245 i = i + 1
246 if n:
247 runsappend((p, n))
248 if len(runs) <= 2:
249
250 for p, n in runs:
251 if n == 1:
252 outappend((LITERAL, p))
253 else:
254 outappend((RANGE, (p, p+n-1)))
255 if len(out) < len(charset):
256 return out
257 else:
258
259 data = _mk_bitmap(charmap)
260 outappend((CHARSET, data))
261 return out
262 return charset
263
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
308 try:
309 import array
310 except ImportError:
311 return charset
312 charmap = [0]*65536
313 negate = 0
314 try:
315 for op, av in charset:
316 if op is NEGATE:
317 negate = 1
318 elif op is LITERAL:
319 charmap[fixup(av)] = 1
320 elif op is RANGE:
321 for i in xrange(fixup(av[0]), fixup(av[1])+1):
322 charmap[i] = 1
323 elif op is CATEGORY:
324
325 return charset
326 except IndexError:
327
328 return charset
329 if negate:
330 if sys.maxunicode != 65535:
331
332 return charset
333 for i in xrange(65536):
334 charmap[i] = not charmap[i]
335 comps = {}
336 mapping = [0]*256
337 block = 0
338 data = []
339 for i in xrange(256):
340 chunk = tuple(charmap[i*256:(i+1)*256])
341 new = comps.setdefault(chunk, block)
342 mapping[i] = new
343 if new == block:
344 block = block + 1
345 data = data + _mk_bitmap(chunk)
346 header = [block]
347 if _sre.CODESIZE == 2:
348 code = 'H'
349 else:
350 code = 'I'
351
352 mapping = array.array('b', mapping).tostring()
353
354 mapping = array.array(code, mapping)
355 assert mapping.itemsize == _sre.CODESIZE
356 header = header + mapping.tolist()
357 data[0:0] = header
358 return [(BIGCHARSET, data)]
359
361
362 lo, hi = av[2].getwidth()
363 if lo == 0 and hi == MAXREPEAT:
364 raise error, "nothing to repeat"
365 return lo == hi == 1 and av[2][0][0] != SUBPATTERN
366
368
369
370
371 lo, hi = pattern.getwidth()
372 if lo == 0:
373 return
374
375 prefix = []
376 prefixappend = prefix.append
377 prefix_skip = 0
378 charset = []
379 charsetappend = charset.append
380 if not (flags & SRE_FLAG_IGNORECASE):
381
382 for op, av in pattern.data:
383 if op is LITERAL:
384 if len(prefix) == prefix_skip:
385 prefix_skip = prefix_skip + 1
386 prefixappend(av)
387 elif op is SUBPATTERN and len(av[1]) == 1:
388 op, av = av[1][0]
389 if op is LITERAL:
390 prefixappend(av)
391 else:
392 break
393 else:
394 break
395
396 if not prefix and pattern.data:
397 op, av = pattern.data[0]
398 if op is SUBPATTERN and av[1]:
399 op, av = av[1][0]
400 if op is LITERAL:
401 charsetappend((op, av))
402 elif op is BRANCH:
403 c = []
404 cappend = c.append
405 for p in av[1]:
406 if not p:
407 break
408 op, av = p[0]
409 if op is LITERAL:
410 cappend((op, av))
411 else:
412 break
413 else:
414 charset = c
415 elif op is BRANCH:
416 c = []
417 cappend = c.append
418 for p in av[1]:
419 if not p:
420 break
421 op, av = p[0]
422 if op is LITERAL:
423 cappend((op, av))
424 else:
425 break
426 else:
427 charset = c
428 elif op is IN:
429 charset = av
430
431
432
433
434
435 emit = code.append
436 emit(OPCODES[INFO])
437 skip = len(code); emit(0)
438
439 mask = 0
440 if prefix:
441 mask = SRE_INFO_PREFIX
442 if len(prefix) == prefix_skip == len(pattern.data):
443 mask = mask + SRE_INFO_LITERAL
444 elif charset:
445 mask = mask + SRE_INFO_CHARSET
446 emit(mask)
447
448 if lo < MAXCODE:
449 emit(lo)
450 else:
451 emit(MAXCODE)
452 prefix = prefix[:MAXCODE]
453 if hi < MAXCODE:
454 emit(hi)
455 else:
456 emit(0)
457
458 if prefix:
459 emit(len(prefix))
460 emit(prefix_skip)
461 code.extend(prefix)
462
463 table = [-1] + ([0]*len(prefix))
464 for i in xrange(len(prefix)):
465 table[i+1] = table[i]+1
466 while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
467 table[i+1] = table[table[i+1]-1]+1
468 code.extend(table[1:])
469 elif charset:
470 _compile_charset(charset, flags, code)
471 code[skip] = len(code) - skip
472
473 try:
474 unicode
475 except NameError:
476 STRING_TYPES = (type(""),)
477 else:
478 STRING_TYPES = (type(""), type(unicode("")))
479
481 for tp in STRING_TYPES:
482 if isinstance(obj, tp):
483 return 1
484 return 0
485
487
488 flags = p.pattern.flags | flags
489 code = []
490
491
492 _compile_info(code, p, flags)
493
494
495 _compile(code, p.data, flags)
496
497 code.append(OPCODES[SUCCESS])
498
499 return code
500
532