Package Bio :: Module Decode
[hide private]
[frames] | no frames]

Source Code for Module Bio.Decode

  1  # Decode elements from a Std/Martel parsed XML stream 
  2   
  3  import string 
  4  from Bio.Parsers.spark import GenericScanner, GenericParser 
  5   
6 -def unescape_C(s):
7 result = [] 8 for i in range(len(s)): 9 if s[i] != "\\": 10 result.append(s[i]) 11 continue 12 c = s[i+1:i+2] 13 if c == "x": 14 x = s[i+2:i+4] 15 if len(x) != 2: 16 raise ValueError("invalid \\x escape") 17 i = int(x, 16) 18 result.append(chr(i)) 19 continue 20 if c in "01234567": 21 x = s[i+1:i+4] 22 # \octals don't do a length assertion check 23 i = int(x, 8) 24 result.append(chr(i)) 25 continue 26 result.append(c) 27 return "".join(result)
28
29 -def join_english(fields):
30 if not fields: 31 return "" 32 s = fields[0] 33 for field in fields[1:]: 34 if s[-1:] == "-" and s[-3:-2] == "-": 35 s = s + field 36 continue 37 if s.find(" ") == -1 and field.find(" ") == -1: 38 s = s + field 39 continue 40 s = s + " " + field 41 return (" ".join(s.split())).strip()
42 43 44
45 -def chomp(s, c):
46 if s[-1:] == c: 47 return s[:-1] 48 return s
49
50 -def lchomp(s, c):
51 if s[:1] == c: 52 return s[1:] 53 return s
54
55 -def chompchomp(s, c):
56 if s[:1] == c and s[-1:] == c: 57 return s[1:-1] 58 return s
59
60 -def fixspaces(s):
61 # s.split breaks down to a list of words 62 # " ".join puts them together 63 # strip removes leading and trailing spaces 64 return " ".join(s.split()).strip()
65
66 -def join_fixspaces(lines):
67 return " ".join((" ".join(lines)).split()).strip()
68
69 -def tr(s, frm, to):
70 table = string.maketrans(frm, to) 71 return string.translate(s, table)
72
73 -def safe_int(s):
74 """converts to int if the number is small, long if it's large""" 75 try: 76 return int(s) 77 except ValueError: 78 return long(s)
79 80 decode_functions = { 81 "chomp": (chomp, str, str), 82 "chompchomp": (chompchomp, str, str), 83 "chop": (lambda s: s[:-1], str, str), 84 "chopchop": (lambda s: s[1:-1], str, str), 85 "fixspaces": (fixspaces, str, str), 86 "lchomp": (lchomp, str, str), 87 "lchop": (lambda s: s[1:], str, str), 88 "lower": (lambda s: s.lower(), str, str), 89 "lstrip": (lambda s: s.lstrip(), str, str), 90 "replace": (lambda s, old, new: s.replace(old, new), str, str), 91 "rstrip": (lambda s: s.rstrip(), str, str), 92 "str": (str, str, str), 93 "strip": (lambda s: s.strip(), str, str), 94 "tr": (tr, str, str), 95 "unescape.c": (unescape_C, str, str), 96 "unescape.doublequote": (lambda s: s.replace('""', '"'), str, str), 97 "unescape.singlequote": (lambda s: s.replace("''", "'"), str, str), 98 "upper": (lambda s: s.upper(), str, str), 99 100 # List operations 101 "join": (lambda lst, s = " ": s.join(lst), list, str), 102 "join.english": (join_english, list, str), 103 104 # Integer operations 105 "int": (safe_int, [float, str, int], int), 106 "int.comma": (lambda s: safe_int(s.replace(",", "")), 107 [float, str, int], int), 108 "hex": (hex, str, int), 109 "oct": (oct, str, int), 110 "add": ((lambda i, j: i+j), int, int), 111 112 # Float operations 113 "float": (float, (float, str, int), float), 114 115 } 116
117 -def _fixup_defs():
118 # Normalize so the 2nd and 3rd terms are tuples 119 for k, v in decode_functions.items(): 120 f, in_types, out_types = v 121 if isinstance(in_types, type([])): 122 in_types = tuple(in_types) 123 elif not isinstance(in_types, type( () )): 124 in_types = (in_types,) 125 126 if isinstance(out_types, type([])): 127 out_types = tuple(out_types) 128 elif not isinstance(out_types, type( () )): 129 out_types = (out_types,) 130 131 decode_functions[k] = (f, in_types, out_types)
132 _fixup_defs() 133
134 -class Token:
135 - def __init__(self, type):
136 self.type = type
137 - def __cmp__(self, other):
138 return cmp(self.type, other)
139 - def __repr__(self):
140 return "Token(%r)" % (self.type,)
141
142 -class ValueToken(Token):
143 - def __init__(self, type, val):
144 Token.__init__(self, type) 145 self.val = val
146 - def __cmp__(self, other):
147 return cmp(self.type, other)
148 - def __repr__(self):
149 return "%s(%r)" % (self.__class__.__name__, self.val)
150 - def __str__(self):
151 return str(self.val)
152
153 -class Integer(ValueToken):
154 - def __init__(self, val):
155 ValueToken.__init__(self, "integer", val)
156
157 -class Float(ValueToken):
158 - def __init__(self, val):
159 ValueToken.__init__(self, "float", val)
160
161 -class String(ValueToken):
162 - def __init__(self, val):
163 ValueToken.__init__(self, "string", val)
164
165 -class FunctionName(ValueToken):
166 - def __init__(self, val):
167 ValueToken.__init__(self, "functionname", val)
168
169 -class DecodeScanner(GenericScanner):
170 - def __init__(self):
172
173 - def tokenize(self, input):
174 self.rv = [] 175 GenericScanner.tokenize(self, input) 176 return self.rv
177
178 - def t_functionname(self, input):
179 r" \w+(\.\w+)*" 180 self.rv.append(FunctionName(input))
181
182 - def t_pipe(self, input):
183 r" \| " 184 self.rv.append(Token("pipe"))
185
186 - def t_open_paren(self, input):
187 r" \( " 188 self.rv.append(Token("open_paren"))
189
190 - def t_close_paren(self, input):
191 r" \) " 192 self.rv.append(Token("close_paren"))
193
194 - def t_comma(self, input):
195 r" , " 196 self.rv.append(Token("comma"))
197
198 - def t_whitespace(self, input):
199 r" \s+ " 200 pass
201
202 - def t_string(self, input):
203 r""" "([^"\\]+|\\.)*"|'([^'\\]+|\\.)*' """ 204 # "' # emacs cruft 205 s = input[1:-1] 206 s = unescape_C(s) 207 208 self.rv.append(String(s))
209
210 - def t_float(self, input):
211 r""" [+-]?((\d+(\.\d*)?)|\.\d+)([eE][+-]?[0-9]+)? """ 212 # See if this is an integer 213 try: 214 self.rv.append(Integer(safe_int(input))) 215 except ValueError: 216 self.rv.append(Float(float(input)))
217
218 -class Function:
219 - def __init__(self, name, args = ()):
220 self.name = name 221 self.args = args
222 - def __str__(self):
223 args = self.args 224 if not args: 225 s = "" 226 else: 227 s = str(args)[1:-1] 228 return "%s(x, %s)" % (self.name, s)
229 __repr__ = __str__
230
231 -class DecodeParser(GenericParser):
232 - def __init__(self, start = "expression"):
233 GenericParser.__init__(self, start) 234 self.begin_pos = 0
235
236 - def p_expression(self, args):
237 """ 238 expression ::= term 239 expression ::= term pipe expression 240 """ 241 if len(args) == 1: 242 return [args[0]] 243 return [args[0]] + args[2]
244
245 - def p_term(self, args):
246 """ 247 term ::= functionname 248 term ::= functionname open_paren args close_paren 249 """ 250 if len(args) == 1: 251 return Function(args[0].val) 252 return Function(args[0].val, tuple([x.val for x in args[2]]))
253
254 - def p_args(self, args):
255 """ 256 args ::= arg 257 args ::= arg comma args 258 """ 259 if len(args) == 1: 260 return [args[0]] 261 return [args[0]] + args[2]
262
263 - def p_arg(self, args):
264 """ 265 arg ::= string 266 arg ::= integer 267 arg ::= float 268 """ 269 return args[0]
270
271 -def scan(input):
272 scanner = DecodeScanner() 273 return scanner.tokenize(input)
274
275 -def parse(tokens):
276 parser = DecodeParser() 277 return parser.parse(tokens)
278 279 _decoder_cache = {} 280
281 -class FunctionCall:
282 - def __init__(self, f, args):
283 self.f = f 284 self.args = args
285 - def __call__(self, x):
286 return self.f(x, *self.args)
287
288 -class FunctionCallChain:
289 - def __init__(self, inner_f, f, args):
290 self.inner_f = inner_f 291 self.f = f 292 self.args = args
293 - def __call__(self, x):
294 return self.f(self.inner_f(x), *self.args)
295 296 #### I don't think this is the right way to do things 297 ##class CheckTypes: 298 ## def __init__(self, f, call_types, return_types): 299 ## self.f = f 300 ## self.call_types = call_types 301 ## self.return_types = return_types 302 ## def __call__(self, x): 303 ## if self.call_types is not None: 304 ## for T in self.call_types: 305 ## if isinstance(x, T): 306 ## break 307 ## else: 308 ## raise TypeError( 309 ## "Call value %s of type %s, expecting one of %s" % 310 ## (x, type(x).__name__, 311 ## [T.name for T in self.call_types])) 312 ## y = self.f(x) 313 314 ## if not self.return_types: 315 ## return y 316 317 ## for T in self.return_types: 318 ## if isinstance(y, T): 319 ## return y 320 ## raise TypeError("Return value %s of type %s, expecting one of %s" % 321 ## (y, type(y).__name__, 322 ## [T.name for T in self.return_types])) 323
324 -def make_decoder(s):
325 try: 326 return _decoder_cache[s] 327 except KeyError: 328 pass 329 330 functions = parse(scan(s)) 331 332 f = functions[0] 333 fc = decode_functions[f.name][0] 334 args = f.args 335 if args: 336 fc = FunctionCall(fc, args) 337 for f in functions[1:]: 338 fc = FunctionCallChain(fc, decode_functions[f.name][0], f.args) 339 _decoder_cache[s] = fc 340 return fc
341
342 -def _verify_subtypes(subset, total, old_name, new_name):
343 for x in subset: 344 if x not in total: 345 raise TypeError("%s can produce a %r value not accepted by %s" % 346 (old_name, x.__name__, new_name))
347 348 _typechecked_decoder_cache = {}
349 -def make_typechecked_decoder(s, input_types = None, output_types = None):
350 cache_lookup = (s, input_types, output_types) 351 try: 352 return _typechecked_decoder_cache[cache_lookup] 353 except KeyError: 354 pass 355 if input_types is not None and not isinstance(input_types, type( () )): 356 input_types = (input_types,) 357 if output_types is not None and not isinstance(output_types, type( () )): 358 output_types = (output_types,) 359 360 functions = parse(scan(s)) 361 362 # Make sure the input type(s) are allowed 363 f = functions[0] 364 fc, in_types, out_types = decode_functions[f.name] 365 if input_types is not None: 366 for x in input_types: 367 if x not in in_types: 368 raise TypeError( 369 "the input type includes %r which isn't supported by %s" % 370 (x.__name__, f.name)) 371 372 # Do the composition 373 old_name = f.name 374 input_types = out_types 375 args = functions[0].args 376 if args: 377 fc = FunctionCall(fc, args) 378 379 for f in functions[1:]: 380 transform_func, in_types, out_types = decode_functions[f.name] 381 _verify_subtypes(input_types, in_types, old_name, f.name) 382 old_name = f.name 383 input_types = out_types 384 fc = FunctionCallChain(fc, transform_func, f.args) 385 386 if output_types is not None: 387 _verify_subtypes(input_types, output_types, old_name, "the output") 388 _typechecked_decoder_cache[cache_lookup] = fc 389 return fc
390 391
392 -def test():
393 assert make_decoder("chop")("Andrew") == "Andre" 394 assert make_decoder("int")("9") == 9 395 assert make_decoder('join(" ")')(["Andrew", "Dalke"]) == \ 396 "Andrew Dalke" 397 assert make_decoder('chomp("|")')("|test|") == "|test" 398 assert make_decoder('chomp("|")')("|test") == "|test" 399 assert make_decoder('chomp("A")|chop')("BA") == "" 400 assert make_decoder('chomp("A")|chop')("AB") == "A" 401 assert make_decoder('chop|chomp("A")')("AB") == "" 402 assert make_decoder('chop|chomp("A")')("BA") == "B" 403 assert make_decoder('add(5)')(2) == 7 404 assert make_decoder('add(-2)')(5) == 3
405 406 if __name__ == "__main__": 407 test() 408