Package Martel
[hide private]
[frames] | no frames]

Source Code for Package Martel

  1  # Copyright 2000-2001, Dalke Scientific Software, LLC 
  2  # Distributed under the Biopython License Agreement (see the LICENSE file). 
  3   
  4  __version__ = "0.85" 
  5   
  6  import Expression 
  7  import convert_re 
  8  import string 
  9  from xml.sax import xmlreader 
 10   
 11  # This interface is based off of Greg Ewing's Plex parser. 
 12   
13 -def Str1(s):
14 """(s) -> match the literal string""" 15 return Expression.Str(s)
16
17 -def Str(*args):
18 """(s1, s2, ...) -> match s1 or s2 or ...""" 19 if len(args) == 1: 20 return Str1(args[0]) 21 return Expression.Alt(tuple(map(Str, args)))
22
23 -def Any(s):
24 """(s) -> match any character in s""" 25 if len(s) == 1: 26 return Expression.Literal(s) 27 return Expression.Any(s, 0)
28
29 -def AnyBut(s):
30 """s -> match any character not in s""" 31 return Expression.Any(s, 1)
32 33 ## Untested! 34 ##def AnyChar(): 35 ## """match any character, including newline""" 36 ## return Expression.Re("(?:.|\n)") 37 38
39 -def Seq(*args):
40 """exp1, exp2, ... -> match exp1 followed by exp2 followed by ...""" 41 # I'm always forgetting and passing strings into this function, 42 # so make sure the arguments are expressions 43 for arg in args: 44 assert isinstance(arg, Expression.Expression), \ 45 "expecting an Expression, not a %s" % type(arg) 46 47 return Expression.Seq(args)
48
49 -def Alt(*args):
50 """exp1, exp2, ... -> match exp1 or (if that fails) match exp2 or ...""" 51 # Do some type checking 52 for arg in args: 53 assert isinstance(arg, Expression.Expression), \ 54 "expecting an Expression, not a %s" % type(arg) 55 return Expression.Alt(args)
56
57 -def Opt(expr):
58 """expr -> match 'expr' 1 or 0 times""" 59 assert isinstance(expr, Expression.Expression), \ 60 "expecting an Expression, not a %s" % type(expr) 61 return Expression.MaxRepeat(expr, 0, 1)
62
63 -def Rep(expr):
64 """expr -> match 'expr' as many times as possible, even 0 time""" 65 assert isinstance(expr, Expression.Expression), \ 66 "expecting an Expression, not a %s" % type(expr) 67 return Expression.MaxRepeat(expr, 0)
68
69 -def Rep1(expr):
70 """expr -> match 'expr' as many times as possible, but at least once""" 71 assert isinstance(expr, Expression.Expression), \ 72 "expecting an Expression, not a %s" % type(expr) 73 return Expression.MaxRepeat(expr, 1)
74 75 76 # These are in Plex, but I don't (yet?) implement them 77 78 NoCase = Expression.NoCase 79
80 -def Case(expr):
81 raise NotImplementedError
82
83 -def Bol():
84 raise NotImplementedError
85
86 -def Eol():
87 raise NotImplementedError
88
89 -def Empty():
90 raise NotImplementedError
91
92 -def Eof():
93 raise NotImplementedError 94 return Expression.AtEnd()
95 96 97 # Not in Plex, but useful 98 99 AnyEol = Expression.AnyEol 100
101 -def MaxRepeat(expr, min_count, max_count = Expression.MAXREPEAT):
102 """expr, min_count, max_count = 65535 -> match between min- and max_count times 103 104 If max_count == 65535 (which is Expression.MAXREPEAT) then there 105 is no upper limit. 106 """ 107 assert isinstance(expr, Expression.Expression), \ 108 "expecting an Expression, not a %s" % type(expr) 109 return Expression.MaxRepeat(expr, min_count, max_count)
110
111 -def RepN(expr, count):
112 """expr, count -> match the expression 'count' number of time 113 114 This option is handy for named group repeats since you don't have 115 to use the name twice; for the min_count and max_count fields. 116 """ 117 return Expression.MaxRepeat(expr, count, count)
118 119
120 -def Group(name, expr, attrs = None):
121 """name, expr -> use 'name' to describe a successful match of the expression""" 122 assert isinstance(expr, Expression.Expression), \ 123 "expecting an Expression, not a %s" % type(expr) 124 return Expression.Group(name, expr, attrs)
125 126
127 -def _fix_newlines(s):
128 # Replace the characters "\r\n", "\r" and "\n" with \R. 129 # Does not affect the substrings '\' + 'n' or '\' + 'n' 130 s = string.replace(s, "\r\n", "\n") 131 s = string.replace(s, "\r", "\n") 132 return string.replace(s, "\n", r"\R")
133 134
135 -def Re(pattern, fix_newlines = 0):
136 """pattern -> the expression tree for the regexp pattern string""" 137 if fix_newlines: 138 pattern = _fix_newlines(pattern) 139 return convert_re.make_expression(pattern)
140 141 NullOp = Expression.NullOp 142 Debug = Expression.Debug 143
144 -def Assert(expression):
145 return Expression.Assert(expression)
146
147 -def AssertNot(expression):
148 return Expression.Assert(expression, invert = 1)
149 150 # helper function
151 -def _group(name, exp, attrs):
152 if name is None: 153 assert not attrs, "Attributes (%s) require a group name" % (attrs,) 154 return exp 155 return Group(name, exp, attrs)
156
157 -def Digits(name = None, attrs = None):
158 """match one or more decimal digits 159 160 This is the same as (?P<name?attrs>\d+). 161 162 If 'name' is not None, the matching text will be put inside a 163 group of the given name. You can optionally include group 164 attributes. 165 """ 166 return _group(name, Re(r"\d+"), attrs)
167
168 -def Integer(name = None, attrs = None):
169 """match an integer (digits w/ optional leading + or - sign) 170 171 If 'name' is not None, the matching text will be put inside a 172 group of the given name. You can optionally include group 173 attributes. 174 """ 175 exp = Re(r"[+-]?\d+") 176 return _group(name, exp, attrs)
177
178 -def Float(name = None, attrs = None):
179 """match floating point numbers like 6, 6., -.1, 2.3, +4E-5, ... 180 181 If 'name' is not None, the matching text will be put inside of a 182 group of the given name. You can optionally include group 183 attributes. 184 """ 185 exp = Re(r"[+-]?((\d+(\.\d*)?)|\.\d+)([eE][+-]?[0-9]+)?") 186 return _group(name, exp, attrs)
187
188 -def Word(name = None, attrs = None):
189 """match a 'word' 190 191 A 'word' is defined as '\w+', and \w is [a-zA-Z0-9_]. 192 193 If 'name' is not None, the matching text will be put inside of a 194 group of the given name. You can optionally include group 195 attributes. 196 197 In other words, this is the short way to write (?P<name>\w+). 198 """ 199 exp = Re(r"\w+") 200 return _group(name, exp, attrs)
201
202 -def Spaces(name = None, attrs = None):
203 """match one or more whitespace (except newline) 204 205 "Spaces" is defined as [\\t\\v\\f\\r ]+, which is *not* the same 206 as '\\s+'. (It's missing the '\\n', which is useful since you 207 almost never mean for whitespace to go beyond the newline.) 208 209 If 'name' is not None, the matching text will be put inside of a 210 group of the given name. You can optionally include group 211 attributes. 212 """ 213 exp = Re(r"[\t\v\f ]+") 214 return _group(name, exp, attrs)
215
216 -def Unprintable(name = None, attrs = None):
217 """match an unprintable character (characters not in string.printable) 218 219 If 'name' is not None, the matching text will be put inside of a 220 group of the given name. You can optionally include group 221 attributes. 222 """ 223 return _group(name, AnyBut(string.printable), attrs)
224
225 -def Punctuation(name = None, attrs = None):
226 """match a punctuation character (characters in string.punctuation) 227 228 If 'name' is not None, the matching text will be put inside of a 229 group of the given name. You can optionally include group 230 attributes. 231 """ 232 return _group(name, Any(string.punctuation), attrs)
233 234
235 -def ToEol(name = None, attrs = None):
236 """match everything up to and including the end of line 237 238 If 'name' is not None, the matching text, except for the newline, 239 will be put inside a group of the given name. You can optionally 240 include group attributes. 241 """ 242 if name is None: 243 assert not attrs, "Attributes (%s) require a group name" % (attrs,) 244 return Re(r"[^\R]*\R") 245 else: 246 return Group(name, Re(r"[^\R]*"), attrs) + AnyEol()
247
248 -def UntilEol(name = None, attrs = None):
249 """match everything up to but not including the end of line 250 251 If 'name' is not None, the matching text, except for the newline, 252 will be put inside a group of the given name. You can optionally 253 include group attributes. 254 """ 255 if name is None: 256 assert not attrs, "Attributes (%s) require a group name" % (attrs,) 257 return Re(r"[^\R]*") 258 else: 259 return Group(name, Re(r"[^\R]*"), attrs)
260
261 -def SkipLinesUntil(expr):
262 """read and ignore lines up to, but excluding, the line matching expr""" 263 return Rep(AssertNot(expr) + ToEol())
264
265 -def SkipLinesTo(expr):
266 """read and ignore lines up to and including, the line matching expr""" 267 return Rep(AssertNot(expr) + ToEol()) + expr + ToEol()
268 269
270 -def ToSep(name = None, sep = None, attrs = None):
271 """match all characters up to the given seperator(s) 272 273 This is useful for parsing space, tab, color, or other character 274 delimited fields. There is no default seperator character. 275 276 If 'name' is not None, the matching text, except for the seperator 277 will be put inside a group of the given name. You can optionally 278 include group attributes. The seperator character will also be 279 consumed. 280 281 Neither "\\r" nor "\\n" may be used as a seperator 282 """ 283 if sep is None: 284 # I found it was too easy to make a mistake with a default 285 raise TypeError("Must specify a seperator (the 'sep' parameter)") 286 287 assert "\r" not in sep and "\n" not in sep, \ 288 "cannot use %s as a seperator" % (repr(seperator),) 289 290 exp = Rep(AnyBut(sep + "\r\n")) 291 return _group(name, exp, attrs) + Str(sep)
292
293 -def UntilSep(name = None, sep = None, attrs = None):
294 """match all characters up to the given seperators(s) 295 296 This is useful for parsing space, tab, color, or other character 297 delimited fields. There is no default seperator. 298 299 If 'name' is not None, the matching text, except for the seperator 300 will be put inside a group of the given name. You can optionally 301 include group attributes. The seperator character will not be 302 consumed. 303 304 Neither "\\r" nor "\\n" may be used as a seperator. 305 """ 306 if sep is None: 307 # I found it was too easy to make a mistake with a default 308 raise TypeError("Must specify a seperator (the 'sep' parameter)") 309 310 assert "\r" not in sep and "\n" not in sep, \ 311 "cannot use %s as a seperator" % (repr(sep),) 312 313 exp = Rep(AnyBut(sep + "\r\n")) 314 return _group(name, exp, attrs)
315 316
317 -def DelimitedFields(name = None, sep = None, attrs = None):
318 """match 0 or more fields seperated by the given seperator(s) 319 320 This is useful for parsing space, tab, color, or other character 321 delimited fields. There is no default seperator. 322 323 If 'name' is not None, the delimited text, excluding the seperator, 324 will be put inside groups of the given name. You can optionally 325 include group attributes. The seperator character is consumed, 326 but not accessible using a group. 327 328 Neither "\\r" nor "\\n" may be used as a seperator. 329 The line as a whole is not included in a group. 330 """ 331 if sep is None: 332 # I found it was too easy to make a mistake with a default 333 raise TypeError("Must specify a sep (via the 'sep' parameter)") 334 335 assert "\r" not in sep and "\n" not in sep, \ 336 "cannot use %s as a seperator" % (repr(sep),) 337 338 term = _group(name, Rep(AnyBut(sep + "\r\n")), attrs) 339 rep = Rep(Any(sep) + term) 340 return term + rep + AnyEol()
341 342 # Allows some optimizations 343 FastFeature = Expression.FastFeature 344 345 346 # Used when making parsers which read a record at a time 347 ParseRecords = Expression.ParseRecords 348 HeaderFooter = Expression.HeaderFooter 349 350 # Use this to prune out group names you aren't 351 # interested in seeing, which reduces the number of method 352 # calls back to the parser.
353 -def select_names(expression, names):
354 # Make a copy so I know I don't share subexpressions which other 355 # expressions. 356 exp = expression.copy() 357 358 # Use that internal method I told you not to use :) 359 exp._select_names(names) 360 361 # Get rid of unnamed groups 362 import optimize 363 return optimize.optimize_unnamed_groups(exp)
364
365 -def replace_groups(expr, replacements):
366 expr = expr.copy() 367 for tagname, replacement_expr in replacements: 368 matches = expr._find_groups(tagname) 369 for match in matches: 370 match.expression = replacement_expr 371 return expr
372
373 -def SimpleRecordFilter(expr, make_reader, reader_args = ()):
374 return ParseRecords("dataset", {"format": "*filter*"}, 375 Group("record", expr + Rep(ToEol())), 376 make_reader, reader_args)
377