Trees | Indices | Help |
---|
|
1 # Standard Bioformats definitions 2 3 import Martel 4 Group = Martel.Group 5 6 namespace = "bioformat" 7 NS = namespace + ":" 8 XMLNS = "http://biopython.org/bioformat" 911 value = attrs.get(field) 12 if value is not None: 13 if valid is not None: 14 if value not in valid: 15 raise TypeError("%s (%r) must be one of %s" % \ 16 (field, value, valid)) 17 if convert is None: 18 d[field] = value 19 else: 20 d[field] = convert(value)2123 if attrs.has_key(name) and attrs[name] is not None: 24 raise NotImplementedError("Don't yet handle %r" % (name,))2527 tag = f.tag 28 if tag not in expr.group_names(): 29 raise TypeError( 30 "group %r not present in the expression but is required" % \ 31 (tag,))3234 names = expr.group_names() 35 for set in sets: 36 for f in set: 37 tag = f.tag 38 if tag not in names: 39 break 40 else: 41 return 42 if len(sets) == 1: 43 raise TypeError("missing required tags (need %s) in expression" % 44 [f.tag for f in sets[0]]) 45 lines = ["missing required tags in expression; must have one set from:"] 46 for set in sets: 47 lines.append( str( [t.tag for f in set] ) ) 48 s = "\n".join(lines) 49 raise TypeError(s)5052 f.tag 53 if tag in expr.group_names(): 54 raise TypeError( 55 "group %r present in the expression but is not allowed" % \ 56 (tag,))57 58 59 # pre- Python 2.2 functions didn't allow attributes 62 try: 63 _f.x = 1 64 _use_hack = 0 65 except AttributeError: 66 _use_hack = 1 67 del _f 6870 if text == "record": # XXX FIXME 71 return 72 assert NS + f.func_name == text, (NS + ":" + f.func_name, text)7375 for name in attrs.keys(): 76 if name not in names: 77 raise TypeError("attr %r is not allowed here (valid terms: %s)" % \ 78 (name, names)) 79 d = attrs.copy() 80 for name in names: 81 if not d.has_key(name): 82 d[name] = None 83 return d84 85 if not _use_hack: 89 else: 90 # Convert the functions into callable objects 96 101 102 ################ identifier, description, and cross-references104 attrs = _check_attrs(attrs, ("format",)) 105 d = {"xmlns:bioformat": XMLNS} 106 _set_if_given(attrs, "format", d) 107 return Group("record", expr, d) # XXX FIXME108 _settag(record, "record") # XXX AND FIXME 109 110112 attrs = _check_attrs(attrs, ("type", "style", "dbname")) 113 d = {} 114 _set_if_given(attrs, "type", d, ("primary", "accession", "secondary")) 115 _set_if_given(attrs, "dbname", d) 116 return Group(NS + "dbid", expr, d)117 _settag(dbid, NS + "dbid") 118120 attrs = _check_attrs(attrs, ("join",)) 121 _must_have(expr, description) 122 d = {} 123 _set_if_given(attrs, "join", d, ("english", "concat", "space", "newline")) 124 return Group(NS + "description_block", expr, d)125 _settag(description_block, NS + "description_block") 126 130 _settag(description, NS + "description") 131 134136 attrs = _check_attrs(attrs, ("style",)) 137 d = {} 138 _set_if_given(attrs, "style", d, ("sp-general", "sp-prosite", "sp-embl")) 139 return Group(NS + "fast_dbxref", expr, d)140142 attrs = _check_attrs(attrs, ("style",)) 143 _must_have(expr, dbxref_dbid) 144 d = {} 145 _complain_if_given(attrs, "style") 146 return Group(NS + "dbxref", expr, d)147 _settag(dbxref, NS + "dbxref") 148150 attrs = _check_attrs(attrs, ("style",)) 151 d = {} 152 _set_if_given(attrs, "style", d) 153 return Group(NS + "dbxref_dbname", expr, d)154 _settag(dbxref_dbname, NS + "dbxref_dbname") 155157 attrs = _check_attrs(attrs, ("dbname", "type", "style", "negate")) 158 d = {} 159 _set_if_given(attrs, "dbname", d) 160 _set_if_given(attrs, "type", d, ("primary", "accession", "secondary")) 161 _complain_if_given(attrs, "style") 162 _set_if_given(attrs, "negate", d, (0, 1), str) 163 164 return Group(NS + "dbxref_dbid", expr, d)165 _settag(dbxref_dbid, NS + "dbxref_dbid") 166 170 _settag(dbxref_negate, NS + "dbxref_negate") 171 172 ##################### sequences 173175 if not ( ord(" ") <= ord(s) <= 126 ): 176 raise TypeError("%r not allowed as a gap character" % (s,)) 177 return s178 179 # What about three letter codes?181 attrs = _check_attrs(attrs, ("alphabet", "gapchar", "remove_spaces")) 182 _must_have(expr, sequence) 183 d = {} 184 _set_if_given(attrs, "alphabet", d, 185 ("iupac-protein", "iupac-dna", "iupac-rna", 186 "iupac-ambiguous-protein", 187 "iupac-ambiguous-dna", 188 "iupac-ambiguous-rna", 189 "protein", "dna", "rna", "unknown")) 190 _set_if_given(attrs, "gapchar", d, convert = _check_gapchar) 191 _set_if_given(attrs, "remove_spaces", d, (0, 1), str) 192 return Group(NS + "sequence_block", expr, d)193 _settag(sequence_block, NS + "sequence_block") 194 198 _settag(sequence, NS + "sequence") 199201 attrs = _check_attrs(attrs, ("alphabet",)) 202 d = {} 203 _set_if_given(attrs, "alphabet", d, 204 ("iupac-protein", "iupac-dna", "iupac-rna", 205 "iupac-ambiguous-protein", 206 "iupac-ambiguous-dna", 207 "iupac-ambiguous-rna", 208 "protein", "dna", "rna", "nucleotide", "unknown")) 209 return Group(NS + "alphabet", expr, d)210 _settag(alphabet, NS + "alphabet") 211 212 213 214 ############################## features 215 216 # In PIR 217 218 # FEATURE 219 # 1-25 #domain signal sequence #status predicted #label SIG\ 220 # 26-737 #product procollagen-lysine 5-dioxygenase 2 #status 221 # predicted #label MAT\ 222 # 63,209,297,365,522, 223 # 725 #binding_site carbohydrate (Asn) (covalent) #status 224 # predicted 225 226 # The whole thing is a 'feature_block' 227 228 # One 'feature' is 229 # 26-737 #product procollagen-lysine 5-dioxygenase 2 #status 230 # predicted #label MAT\ 231 232 # One 'feature_name' is "binding_site". 233 234 # An example of the feature_location_block and feature_block, which I 235 # will abbreviate as 'flb' and 'fl', is: 236 # <flb> <fl>63,209,297,365,522,</fl> 237 # <fl>725</fl> #binding_site carbohydrate ... 238 239 # PIR doesn't have a 'feature_description' 240 241 # Let: 242 # fq = feature_qualifier 243 # fqb = feature_qualifier 244 # fqn = feature_qualifier_name 245 # fqd = feature_qualifier_description 246 # then the text 247 # 248 # 26-737 #product procollagen-lysine 5-dioxygenase 2 #status 249 # predicted #label MAT\ 250 # 251 # can be represented as (the rather tedious) 252 # 253 # 26-737 <fqb><fq>#<fqn>product</fqn> <fqd>procollagen-\ 254 # lysine 5-dioxygenase 2</fqd></fq> #<fq><fqn>status</fqn> 255 # <fqd>predicted</fqd> #<fq><fqn>label\ 256 # </fqn> <fqd>MAT</fqd></fq>\</fqb> 257 # 258 259 # 'style' determines the namespace for the feature name261 attrs = _check_attrs(attrs, ("style", "location-style")) 262 d = {} 263 _set_if_given(attrs, "style", d) 264 _set_if_given(attrs, "location-style", d) 265 _must_have(expr, feature) 266 return Group(NS + "feature_block", expr, d)267 _settag(feature_block, NS + "feature_block") 268270 attrs = _check_attrs(attrs, ("location-style",)) 271 d = {} 272 _set_if_given(attrs, "location-style", d) 273 _must_have(expr, feature_name) 274 _must_have_set(expr, [[feature_location], 275 [feature_location_start, feature_location_end]]) 276 return Group(NS + "feature", expr, d)277 _settag(feature, NS + "feature") 278 282 _settag(feature_name, NS + "feature_name") 283 287 _settag(feature_location, NS + "feature_location") 288 292 _settag(feature_location_start, NS + "feature_location_start") 293 297 _settag(feature_location_end, NS + "feature_location_end") 298 302 _settag(feature_description, NS + "feature_description") 303 304 305 ##def feature_qualifier_block(expr, attrs = {}): 306 ## attrs = _check_attrs(attrs, ()) 307 ## _must_have(expr, feature_qualifier) 308 ## return Group(NS + "feature_qualifier_block", expr) 309 ##_settag(feature_qualifier_block, NS + "feature_qualifier_block") 310312 attrs = _check_attrs(attrs, ()) 313 _must_have(expr, feature_qualifier_name) 314 return Group(NS + "feature_qualifier", expr)315 _settag(feature_qualifier, NS + "feature_qualifier") 316 320 _settag(feature_qualifier_name, NS + "feature_qualifier_name") 321 325 _settag(feature_qualifier_description, NS + "feature_qualifier_description") 326 327 328 ############ For homology searches 329 330 # "BLASTN", "BLASTP"332 attrs = _check_attrs(attrs, ("app",)) 333 return Group("bioformat:application_name", expr, attrs)334 335 # "2.0.11", "2.0a19MP-WashU" 339 343 347349 attrs = _check_attrs(attrs, ("bioformat:decode",)) 350 d = {"bioformat:decode": "strip"} 351 _set_if_given(attrs, "bioformat:decode", d) 352 return Group("bioformat:search_table_description", expr, d)353355 attrs = _check_attrs(attrs, ("name", "bioformat:decode")) 356 return Group("bioformat:search_table_value", expr, attrs)357 361363 attrs = _check_attrs(attrs, ("join-query",)) 364 d = {"join-query": "join|fixspaces"} 365 _set_if_given(attrs, "join-query", d) 366 return Group("bioformat:query_description_block", expr, d)367369 attrs = _check_attrs(attrs, ("bioformat:decode")) 370 d = {} 371 _set_if_given(attrs, "bioformat:decode", d) 372 return Group("bioformat:query_description", expr, d)373 377 381383 attrs = _check_attrs(attrs, ("bioformat:decode",)) 384 return Group("bioformat:database_num_sequences", expr, attrs)385387 attrs = _check_attrs(attrs, ("bioformat:decode",)) 388 return Group("bioformat:database_num_letters", expr, attrs)389391 attrs = _check_attrs(attrs, ("join-description",)) 392 d = {"join-description": "join|fixspaces"} 393 _set_if_given(attrs, "join-description", d) 394 return Group("bioformat:hit", expr, d)395 399401 attrs = _check_attrs(attrs, ("bioformat:decode")) 402 d = {} 403 _set_if_given(attrs, "bioformat:decode", d) 404 return Group("bioformat:hit_description", expr, d)405 409411 attrs = _check_attrs(attrs, ("name", "bioformat:decode")) 412 return Group("bioformat:hsp_value", expr, attrs)413415 attrs = _check_attrs(attrs, ("which",)) 416 d = {} 417 _set_if_given(attrs, "which", d, valid = ("query", "homology", "subject")) 418 return Group("bioformat:hsp_frame", expr, d)419421 attrs = _check_attrs(attrs, ("strand", "which")) 422 d = {} 423 _set_if_given(attrs, "which", d, valid = ("query", "homology", "subject")) 424 _set_if_given(attrs, "strand", d, valid = ("+1", "0", "-1", "")) 425 return Group("bioformat:hsp_strand", expr, d)426428 attrs = _check_attrs(attrs, ()) 429 return Group("bioformat:hsp_seqalign_query_seq", expr, attrs)430432 attrs = _check_attrs(attrs, ()) 433 return Group("bioformat:hsp_seqalign_homology_seq", expr, attrs)434436 attrs = _check_attrs(attrs, ()) 437 return Group("bioformat:hsp_seqalign_subject_seq", expr, attrs)438440 attrs = _check_attrs(attrs, ()) 441 return Group("bioformat:hsp_seqalign_query_leader", expr, attrs)442 443445 attrs = _check_attrs(attrs, ()) 446 return Group("bioformat:hsp_seqalign_query_name", expr, attrs)447449 attrs = _check_attrs(attrs, ()) 450 return Group("bioformat:hsp_seqalign_subject_name", expr, attrs)451 455457 attrs = _check_attrs(attrs, ()) 458 return Group("bioformat:hsp_seqalign_query_start", expr, attrs)459461 attrs = _check_attrs(attrs, ()) 462 return Group("bioformat:hsp_seqalign_query_end", expr, attrs)463465 attrs = _check_attrs(attrs, ()) 466 return Group("bioformat:hsp_seqalign_subject_start", expr, attrs)467469 attrs = _check_attrs(attrs, ()) 470 return Group("bioformat:hsp_seqalign_subject_end", expr, attrs)471473 attrs = _check_attrs(attrs, ("name", "bioformat:decode")) 474 d = {} 475 _set_if_given(attrs, "name", d) 476 _set_if_given(attrs, "bioformat:decode", d) 477 return Group("bioformat:search_parameter", expr, d)478480 attrs = _check_attrs(attrs, ("name", "bioformat:decode")) 481 d = {} 482 _set_if_given(attrs, "name", d) 483 _set_if_given(attrs, "bioformat:decode", d) 484 return Group("bioformat:search_statistic", expr, d)485
Trees | Indices | Help |
---|
Generated by Epydoc 3.0.1 on Mon Sep 15 09:26:59 2008 | http://epydoc.sourceforge.net |