Package Bio :: Module Std
[hide private]
[frames] | no frames]

Source Code for Module Bio.Std

  1  # Standard Bioformats definitions 
  2   
  3  import Martel 
  4  Group = Martel.Group 
  5   
  6  namespace = "bioformat" 
  7  NS = namespace + ":" 
  8  XMLNS = "http://biopython.org/bioformat" 
  9   
10 -def _set_if_given(attrs, field, d, valid = None, convert = None):
11 value = attrs.get(field) 12 if value is not None: 13 if valid is not None: 14 if value not in valid: 15 raise TypeError("%s (%r) must be one of %s" % \ 16 (field, value, valid)) 17 if convert is None: 18 d[field] = value 19 else: 20 d[field] = convert(value)
21
22 -def _complain_if_given(attrs, name):
23 if attrs.has_key(name) and attrs[name] is not None: 24 raise NotImplementedError("Don't yet handle %r" % (name,))
25
26 -def _must_have(expr, f):
27 tag = f.tag 28 if tag not in expr.group_names(): 29 raise TypeError( 30 "group %r not present in the expression but is required" % \ 31 (tag,))
32
33 -def _must_have_set(expr, sets):
34 names = expr.group_names() 35 for set in sets: 36 for f in set: 37 tag = f.tag 38 if tag not in names: 39 break 40 else: 41 return 42 if len(sets) == 1: 43 raise TypeError("missing required tags (need %s) in expression" % 44 [f.tag for f in sets[0]]) 45 lines = ["missing required tags in expression; must have one set from:"] 46 for set in sets: 47 lines.append( str( [t.tag for f in set] ) ) 48 s = "\n".join(lines) 49 raise TypeError(s)
50
51 -def _must_not_have(expr, f):
52 f.tag 53 if tag in expr.group_names(): 54 raise TypeError( 55 "group %r present in the expression but is not allowed" % \ 56 (tag,))
57 58 59 # pre- Python 2.2 functions didn't allow attributes
60 -def _f():
61 pass
62 try: 63 _f.x = 1 64 _use_hack = 0 65 except AttributeError: 66 _use_hack = 1 67 del _f 68
69 -def _check_name(f, text):
70 if text == "record": # XXX FIXME 71 return 72 assert NS + f.func_name == text, (NS + ":" + f.func_name, text)
73
74 -def _check_attrs(attrs, names):
75 for name in attrs.keys(): 76 if name not in names: 77 raise TypeError("attr %r is not allowed here (valid terms: %s)" % \ 78 (name, names)) 79 d = attrs.copy() 80 for name in names: 81 if not d.has_key(name): 82 d[name] = None 83 return d
84 85 if not _use_hack:
86 - def _settag(f, tag):
87 _check_name(f, tag) 88 f.tag = tag
89 else: 90 # Convert the functions into callable objects
91 - class StdTerm:
92 - def __init__(self, func):
93 self._func = func
94 - def __call__(self, *args, **kwargs):
95 return self._func( *args, **kwargs)
96
97 - def _settag(f, tag):
98 _check_name(f, tag) 99 x = globals()[f.func_name] = StdTerm(f) 100 x.tag = tag
101 102 ################ identifier, description, and cross-references
103 -def record(expr, attrs = {}):
104 attrs = _check_attrs(attrs, ("format",)) 105 d = {"xmlns:bioformat": XMLNS} 106 _set_if_given(attrs, "format", d) 107 return Group("record", expr, d) # XXX FIXME
108 _settag(record, "record") # XXX AND FIXME 109 110
111 -def dbid(expr, attrs = {}):
112 attrs = _check_attrs(attrs, ("type", "style", "dbname")) 113 d = {} 114 _set_if_given(attrs, "type", d, ("primary", "accession", "secondary")) 115 _set_if_given(attrs, "dbname", d) 116 return Group(NS + "dbid", expr, d)
117 _settag(dbid, NS + "dbid") 118
119 -def description_block(expr, attrs = {}):
120 attrs = _check_attrs(attrs, ("join",)) 121 _must_have(expr, description) 122 d = {} 123 _set_if_given(attrs, "join", d, ("english", "concat", "space", "newline")) 124 return Group(NS + "description_block", expr, d)
125 _settag(description_block, NS + "description_block") 126
127 -def description(expr, attrs = {}):
128 attrs = _check_attrs(attrs, ()) 129 return Group(NS + "description", expr)
130 _settag(description, NS + "description") 131
132 -def description_line(expr, attrs = {}):
133 return description_block(description(expr, attrs))
134
135 -def fast_dbxref(expr, attrs = {}):
136 attrs = _check_attrs(attrs, ("style",)) 137 d = {} 138 _set_if_given(attrs, "style", d, ("sp-general", "sp-prosite", "sp-embl")) 139 return Group(NS + "fast_dbxref", expr, d)
140
141 -def dbxref(expr, attrs = {}):
142 attrs = _check_attrs(attrs, ("style",)) 143 _must_have(expr, dbxref_dbid) 144 d = {} 145 _complain_if_given(attrs, "style") 146 return Group(NS + "dbxref", expr, d)
147 _settag(dbxref, NS + "dbxref") 148
149 -def dbxref_dbname(expr, attrs = {}):
150 attrs = _check_attrs(attrs, ("style",)) 151 d = {} 152 _set_if_given(attrs, "style", d) 153 return Group(NS + "dbxref_dbname", expr, d)
154 _settag(dbxref_dbname, NS + "dbxref_dbname") 155
156 -def dbxref_dbid(expr, attrs = {}):
157 attrs = _check_attrs(attrs, ("dbname", "type", "style", "negate")) 158 d = {} 159 _set_if_given(attrs, "dbname", d) 160 _set_if_given(attrs, "type", d, ("primary", "accession", "secondary")) 161 _complain_if_given(attrs, "style") 162 _set_if_given(attrs, "negate", d, (0, 1), str) 163 164 return Group(NS + "dbxref_dbid", expr, d)
165 _settag(dbxref_dbid, NS + "dbxref_dbid") 166
167 -def dbxref_negate(expr, attrs = {}):
168 attrs = _check_attrs(attrs, ()) 169 return Group(NS + "dbxref_negate", expr)
170 _settag(dbxref_negate, NS + "dbxref_negate") 171 172 ##################### sequences 173
174 -def _check_gapchar(s):
175 if not ( ord(" ") <= ord(s) <= 126 ): 176 raise TypeError("%r not allowed as a gap character" % (s,)) 177 return s
178 179 # What about three letter codes?
180 -def sequence_block(expr, attrs = {}):
181 attrs = _check_attrs(attrs, ("alphabet", "gapchar", "remove_spaces")) 182 _must_have(expr, sequence) 183 d = {} 184 _set_if_given(attrs, "alphabet", d, 185 ("iupac-protein", "iupac-dna", "iupac-rna", 186 "iupac-ambiguous-protein", 187 "iupac-ambiguous-dna", 188 "iupac-ambiguous-rna", 189 "protein", "dna", "rna", "unknown")) 190 _set_if_given(attrs, "gapchar", d, convert = _check_gapchar) 191 _set_if_given(attrs, "remove_spaces", d, (0, 1), str) 192 return Group(NS + "sequence_block", expr, d)
193 _settag(sequence_block, NS + "sequence_block") 194
195 -def sequence(expr, attrs = {}):
196 attrs = _check_attrs(attrs, ()) 197 return Group(NS + "sequence", expr)
198 _settag(sequence, NS + "sequence") 199
200 -def alphabet(expr, attrs = {}):
201 attrs = _check_attrs(attrs, ("alphabet",)) 202 d = {} 203 _set_if_given(attrs, "alphabet", d, 204 ("iupac-protein", "iupac-dna", "iupac-rna", 205 "iupac-ambiguous-protein", 206 "iupac-ambiguous-dna", 207 "iupac-ambiguous-rna", 208 "protein", "dna", "rna", "nucleotide", "unknown")) 209 return Group(NS + "alphabet", expr, d)
210 _settag(alphabet, NS + "alphabet") 211 212 213 214 ############################## features 215 216 # In PIR 217 218 # FEATURE 219 # 1-25 #domain signal sequence #status predicted #label SIG\ 220 # 26-737 #product procollagen-lysine 5-dioxygenase 2 #status 221 # predicted #label MAT\ 222 # 63,209,297,365,522, 223 # 725 #binding_site carbohydrate (Asn) (covalent) #status 224 # predicted 225 226 # The whole thing is a 'feature_block' 227 228 # One 'feature' is 229 # 26-737 #product procollagen-lysine 5-dioxygenase 2 #status 230 # predicted #label MAT\ 231 232 # One 'feature_name' is "binding_site". 233 234 # An example of the feature_location_block and feature_block, which I 235 # will abbreviate as 'flb' and 'fl', is: 236 # <flb> <fl>63,209,297,365,522,</fl> 237 # <fl>725</fl> #binding_site carbohydrate ... 238 239 # PIR doesn't have a 'feature_description' 240 241 # Let: 242 # fq = feature_qualifier 243 # fqb = feature_qualifier 244 # fqn = feature_qualifier_name 245 # fqd = feature_qualifier_description 246 # then the text 247 # 248 # 26-737 #product procollagen-lysine 5-dioxygenase 2 #status 249 # predicted #label MAT\ 250 # 251 # can be represented as (the rather tedious) 252 # 253 # 26-737 <fqb><fq>#<fqn>product</fqn> <fqd>procollagen-\ 254 # lysine 5-dioxygenase 2</fqd></fq> #<fq><fqn>status</fqn> 255 # <fqd>predicted</fqd> #<fq><fqn>label\ 256 # </fqn> <fqd>MAT</fqd></fq>\</fqb> 257 # 258 259 # 'style' determines the namespace for the feature name
260 -def feature_block(expr, attrs = {}):
261 attrs = _check_attrs(attrs, ("style", "location-style")) 262 d = {} 263 _set_if_given(attrs, "style", d) 264 _set_if_given(attrs, "location-style", d) 265 _must_have(expr, feature) 266 return Group(NS + "feature_block", expr, d)
267 _settag(feature_block, NS + "feature_block") 268
269 -def feature(expr, attrs = {}):
270 attrs = _check_attrs(attrs, ("location-style",)) 271 d = {} 272 _set_if_given(attrs, "location-style", d) 273 _must_have(expr, feature_name) 274 _must_have_set(expr, [[feature_location], 275 [feature_location_start, feature_location_end]]) 276 return Group(NS + "feature", expr, d)
277 _settag(feature, NS + "feature") 278
279 -def feature_name(expr, attrs = {}):
280 attrs = _check_attrs(attrs, ()) 281 return Group(NS + "feature_name", expr)
282 _settag(feature_name, NS + "feature_name") 283
284 -def feature_location(expr, attrs = {}):
285 attrs = _check_attrs(attrs, ()) 286 return Group(NS + "feature_location", expr)
287 _settag(feature_location, NS + "feature_location") 288
289 -def feature_location_start(expr, attrs = {}):
290 attrs = _check_attrs(attrs, ()) 291 return Group(NS + "feature_location_start", expr)
292 _settag(feature_location_start, NS + "feature_location_start") 293
294 -def feature_location_end(expr, attrs = {}):
295 attrs = _check_attrs(attrs, ()) 296 return Group(NS + "feature_location_end", expr)
297 _settag(feature_location_end, NS + "feature_location_end") 298
299 -def feature_description(expr, attrs = {}):
300 attrs = _check_attrs(attrs, ()) 301 return Group(NS + "feature_description", expr)
302 _settag(feature_description, NS + "feature_description") 303 304 305 ##def feature_qualifier_block(expr, attrs = {}): 306 ## attrs = _check_attrs(attrs, ()) 307 ## _must_have(expr, feature_qualifier) 308 ## return Group(NS + "feature_qualifier_block", expr) 309 ##_settag(feature_qualifier_block, NS + "feature_qualifier_block") 310
311 -def feature_qualifier(expr, attrs = {}):
312 attrs = _check_attrs(attrs, ()) 313 _must_have(expr, feature_qualifier_name) 314 return Group(NS + "feature_qualifier", expr)
315 _settag(feature_qualifier, NS + "feature_qualifier") 316
317 -def feature_qualifier_name(expr, attrs = {}):
318 attrs = _check_attrs(attrs, ()) 319 return Group(NS + "feature_qualifier_name", expr)
320 _settag(feature_qualifier_name, NS + "feature_qualifier_name") 321
322 -def feature_qualifier_description(expr, attrs = {}):
323 attrs = _check_attrs(attrs, ()) 324 return Group(NS + "feature_qualifier_description", expr)
325 _settag(feature_qualifier_description, NS + "feature_qualifier_description") 326 327 328 ############ For homology searches 329 330 # "BLASTN", "BLASTP"
331 -def application_name(expr, attrs = {}):
332 attrs = _check_attrs(attrs, ("app",)) 333 return Group("bioformat:application_name", expr, attrs)
334 335 # "2.0.11", "2.0a19MP-WashU"
336 -def application_version(expr, attrs = {}):
337 attrs = _check_attrs(attrs, ()) 338 return Group("bioformat:application_version", expr, attrs)
339
340 -def search_header(expr, attrs = {}):
341 attrs = _check_attrs(attrs, ()) 342 return Group("bioformat:search_header", expr, attrs)
343
344 -def search_table(expr, attrs = {}):
345 attrs = _check_attrs(attrs, ()) 346 return Group("bioformat:search_table", expr, attrs)
347
348 -def search_table_description(expr, attrs = {}):
349 attrs = _check_attrs(attrs, ("bioformat:decode",)) 350 d = {"bioformat:decode": "strip"} 351 _set_if_given(attrs, "bioformat:decode", d) 352 return Group("bioformat:search_table_description", expr, d)
353
354 -def search_table_value(expr, attrs = {}):
355 attrs = _check_attrs(attrs, ("name", "bioformat:decode")) 356 return Group("bioformat:search_table_value", expr, attrs)
357
358 -def search_table_entry(expr, attrs = {}):
359 attrs = _check_attrs(attrs, ()) 360 return Group("bioformat:search_table_entry", expr, attrs)
361
362 -def query_description_block(expr, attrs = {}):
363 attrs = _check_attrs(attrs, ("join-query",)) 364 d = {"join-query": "join|fixspaces"} 365 _set_if_given(attrs, "join-query", d) 366 return Group("bioformat:query_description_block", expr, d)
367
368 -def query_description(expr, attrs = {}):
369 attrs = _check_attrs(attrs, ("bioformat:decode")) 370 d = {} 371 _set_if_given(attrs, "bioformat:decode", d) 372 return Group("bioformat:query_description", expr, d)
373
374 -def query_size(expr, attrs = {}):
375 attrs = _check_attrs(attrs, ()) 376 return Group("bioformat:query_size", expr)
377
378 -def database_name(expr, attrs = {}):
379 attrs = _check_attrs(attrs, ()) 380 return Group("bioformat:database_name", expr, attrs)
381
382 -def database_num_sequences(expr, attrs = {}):
383 attrs = _check_attrs(attrs, ("bioformat:decode",)) 384 return Group("bioformat:database_num_sequences", expr, attrs)
385
386 -def database_num_letters(expr, attrs = {}):
387 attrs = _check_attrs(attrs, ("bioformat:decode",)) 388 return Group("bioformat:database_num_letters", expr, attrs)
389
390 -def hit(expr, attrs = {}):
391 attrs = _check_attrs(attrs, ("join-description",)) 392 d = {"join-description": "join|fixspaces"} 393 _set_if_given(attrs, "join-description", d) 394 return Group("bioformat:hit", expr, d)
395
396 -def hit_length(expr, attrs = {}):
397 attrs = _check_attrs(attrs, ()) 398 return Group("bioformat:hit_length", expr, attrs)
399
400 -def hit_description(expr, attrs = {}):
401 attrs = _check_attrs(attrs, ("bioformat:decode")) 402 d = {} 403 _set_if_given(attrs, "bioformat:decode", d) 404 return Group("bioformat:hit_description", expr, d)
405
406 -def hsp(expr, attrs = {}):
407 attrs = _check_attrs(attrs, ()) 408 return Group("bioformat:hsp", expr, attrs)
409
410 -def hsp_value(expr, attrs = {}):
411 attrs = _check_attrs(attrs, ("name", "bioformat:decode")) 412 return Group("bioformat:hsp_value", expr, attrs)
413
414 -def hsp_frame(expr, attrs = {}):
415 attrs = _check_attrs(attrs, ("which",)) 416 d = {} 417 _set_if_given(attrs, "which", d, valid = ("query", "homology", "subject")) 418 return Group("bioformat:hsp_frame", expr, d)
419
420 -def hsp_strand(expr, attrs = {}):
421 attrs = _check_attrs(attrs, ("strand", "which")) 422 d = {} 423 _set_if_given(attrs, "which", d, valid = ("query", "homology", "subject")) 424 _set_if_given(attrs, "strand", d, valid = ("+1", "0", "-1", "")) 425 return Group("bioformat:hsp_strand", expr, d)
426
427 -def hsp_seqalign_query_seq(expr, attrs = {}):
428 attrs = _check_attrs(attrs, ()) 429 return Group("bioformat:hsp_seqalign_query_seq", expr, attrs)
430
431 -def hsp_seqalign_homology_seq(expr, attrs = {}):
432 attrs = _check_attrs(attrs, ()) 433 return Group("bioformat:hsp_seqalign_homology_seq", expr, attrs)
434
435 -def hsp_seqalign_subject_seq(expr, attrs = {}):
436 attrs = _check_attrs(attrs, ()) 437 return Group("bioformat:hsp_seqalign_subject_seq", expr, attrs)
438
439 -def hsp_seqalign_query_leader(expr, attrs = {}):
440 attrs = _check_attrs(attrs, ()) 441 return Group("bioformat:hsp_seqalign_query_leader", expr, attrs)
442 443
444 -def hsp_seqalign_query_name(expr, attrs = {}):
445 attrs = _check_attrs(attrs, ()) 446 return Group("bioformat:hsp_seqalign_query_name", expr, attrs)
447
448 -def hsp_seqalign_subject_name(expr, attrs = {}):
449 attrs = _check_attrs(attrs, ()) 450 return Group("bioformat:hsp_seqalign_subject_name", expr, attrs)
451
452 -def hsp_seqalign(expr, attrs = {}):
453 attrs = _check_attrs(attrs, ()) 454 return Group("bioformat:hsp_seqalign", expr, attrs)
455
456 -def hsp_seqalign_query_start(expr, attrs = {}):
457 attrs = _check_attrs(attrs, ()) 458 return Group("bioformat:hsp_seqalign_query_start", expr, attrs)
459
460 -def hsp_seqalign_query_end(expr, attrs = {}):
461 attrs = _check_attrs(attrs, ()) 462 return Group("bioformat:hsp_seqalign_query_end", expr, attrs)
463
464 -def hsp_seqalign_subject_start(expr, attrs = {}):
465 attrs = _check_attrs(attrs, ()) 466 return Group("bioformat:hsp_seqalign_subject_start", expr, attrs)
467
468 -def hsp_seqalign_subject_end(expr, attrs = {}):
469 attrs = _check_attrs(attrs, ()) 470 return Group("bioformat:hsp_seqalign_subject_end", expr, attrs)
471
472 -def search_parameter(expr, attrs = {}):
473 attrs = _check_attrs(attrs, ("name", "bioformat:decode")) 474 d = {} 475 _set_if_given(attrs, "name", d) 476 _set_if_given(attrs, "bioformat:decode", d) 477 return Group("bioformat:search_parameter", expr, d)
478
479 -def search_statistic(expr, attrs = {}):
480 attrs = _check_attrs(attrs, ("name", "bioformat:decode")) 481 d = {} 482 _set_if_given(attrs, "name", d) 483 _set_if_given(attrs, "bioformat:decode", d) 484 return Group("bioformat:search_statistic", expr, d)
485