Package Bio :: Package config :: Module FormatRegistry
[hide private]
[frames] | no frames]

Source Code for Module Bio.config.FormatRegistry

  1  # Copyright 2002 by Jeffrey Chang, Andrew Dalke.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  # This is based on some older code by Andrew Dalke. 
  7   
  8  """Implements a Registry to store Martel-type format expressions. 
  9   
 10  Classes: 
 11  FormatRegistry   Holds Biopython formats in a dictionary-like interface. 
 12  FormatObject     Describes a Biopython file format. 
 13  FormatGroup      Describes a group of Biopython file formats. 
 14   
 15  """ 
 16  # Private Functions: 
 17  # _parses_file     Return whether an expression can parse a file. 
 18  # _parses_string   Return whether an expression can parse a string. 
 19  # _normalize_expression   Turn an expression or path into an expression. 
 20  # _load_first_existing    Return the first format that loads successfully. 
 21  # _load_expression        Load a Martel expression. 
 22  # _load_object            Load a Python object. 
 23   
 24  from Bio.config.Registry import * 
 25   
 26  from Bio.config import _support 
 27   
 28   
29 -class FormatRegistry(Registry):
30 """This implements a dictionary-like interface to Biopython file 31 formats. 32 33 Methods: 34 find_builder Find a builder that converts from a format to an object. 35 find_writer Find a writer that can write an object to a format. 36 37 """
38 - def __init__(self, name, load_path=None, 39 builder_path="Bio.builders", writer_path="Bio.writers"):
40 Registry.__init__(self, name, load_path=load_path) 41 self._builder_path = builder_path 42 self._writer_path = writer_path
43
44 - def normalize(self, name_or_format): # XXX appropriate?
45 if isinstance(name_or_format, type("")): 46 # It's a name 47 return self[name_or_format] 48 return name_or_format
49
50 - def _build_parent_path(self, format, visited=None):
51 if visited is None: 52 visited = {} 53 if visited.has_key(format.name): 54 return [] 55 format_list = [format] 56 for parent in format._parents: 57 format_list.extend(self._build_parent_path(parent, visited)) 58 return format_list
59
60 - def _build_child_path(self, format, visited=None):
61 if visited is None: 62 visited = {} 63 if visited.has_key(format.name): 64 return [] 65 format_list = [format] 66 for child in getattr(format, 'objs', []): 67 format_list.extend(self._build_child_path(child, visited)) 68 return format_list
69
70 - def find_builder(self, from_format, to_io):
71 # The directory of the builders is organized according to: 72 # builders/io/format 73 basemodulename = "%s.%s" % (self._builder_path, to_io.abbrev) 74 75 # Search through the formats in the order of most specific to 76 # most general. 77 all_formats = self._build_parent_path(from_format) 78 for format in all_formats: 79 name = basemodulename + "." + format.abbrev 80 module = _support.safe_load_module(name) 81 if module is not None: 82 break 83 else: 84 raise TypeError("Cannot find builder for %r" % to_io.abbrev) 85 return module.make_builder()
86
87 - def find_writer(self, from_io, to_format, outfile):
88 # The directory of the writers is organized according to: 89 # writers/io/format 90 basemodulename = "%s.%s" % (self._writer_path, from_io.abbrev) 91 92 # Search through the formats in the order of most general to 93 # most specific. 94 all_formats = self._build_child_path(to_format) 95 for format in all_formats: 96 name = basemodulename + "." + format.abbrev 97 module = _support.safe_load_module(name) 98 if module is not None: 99 break 100 else: 101 raise TypeError("Cannot find writer for %r" % from_io.abbrev) 102 return module.make_writer(outfile)
103 104 formats = FormatRegistry("formats", "Bio.formatdefs") 105 106
107 -class FormatObject(RegisterableObject):
108 """This object stores Biopython file formats and provides methods 109 to work on them. 110 111 Methods: 112 identify Identify the format at a URL. 113 identifyFile Identify the format of a file. 114 identifyString Identify the format of a string. 115 116 make_parser Make a parser that can parse the format. 117 make_iterator Make an iterator over files of this format. 118 119 """
120 - def __init__(self, name, expression, abbrev=None, doc=None, 121 filter=None, multirecord=1):
122 """FormatObject(name, expression[, abbrev][, doc] 123 [, filter][, multirecord]) 124 125 name is the name of the object, abbrev is an abbreviation for 126 the name, and doc is some documentation describing the object. 127 128 expression is a Martel.Expression that can parse this format. 129 filter is an optional Martel.Expression that can be used to 130 quickly determine whether some input is parseable by this 131 format. 132 133 multirecord is either 0/1 indicating whether this format can 134 be used to parse multiple records. By default, it is 1. 135 136 """ 137 import operator 138 RegisterableObject.__init__(self, name, abbrev, doc) 139 self.expression = _normalize_expression(expression) 140 self.filter = _normalize_expression(filter) or self.expression 141 self.filter = _support.make_cached_expression(self.filter) 142 self.multirecord = operator.truth(multirecord) 143 self._parser_cache = {} 144 self._iterator_cache = {} 145 self._parents = []
146
147 - def identifyFile(self, infile, debug_level=0):
148 """S.identifyFile(infile[, debug_level]) -> FormatObject or None""" 149 if _parses_file(self.filter, infile, debug_level): 150 return self 151 return None
152
153 - def identifyString(self, s, debug_level=0):
154 """S.identifyString(s[, debug_level]) -> FormatObject or None""" 155 if _parses_string(self.filter, s, debug_level): 156 return self 157 return None
158
159 - def identify(self, source, debug_level=0):
160 """S.identify(source[, debug_level]) -> FormatObject or None""" 161 source = ReseekFile.prepare_input_source(source) 162 f = source.getCharacterStream() or source.getByteStream() 163 return self.identifyFile(f, debug_level)
164
165 - def make_parser(self, select_names=None, debug_level=0):
166 """S.make_parser([select_names][, debug_level]) -> parser""" 167 if select_names is not None: 168 select_names = list(select_names) 169 select_names.sort() 170 key = tuple(select_names), debug_level 171 else: 172 key = None, debug_level 173 174 if not self._parser_cache.has_key(key): 175 import Martel 176 exp = self.expression 177 if select_names is not None: 178 exp = Martel.select_names(exp, select_names) 179 p = exp.make_parser(debug_level = debug_level) 180 self._parser_cache[key] = p 181 return self._parser_cache[key].copy()
182
183 - def make_iterator(self, tag="record", select_names=None, debug_level=0):
184 """S.make_iterator([tag][, select_names][, debug_level]) -> iterator""" 185 if select_names is not None: 186 select_names = list(select_names) 187 select_names.sort() 188 key = tuple(select_names), debug_level 189 else: 190 key = None, debug_level 191 192 if not self._iterator_cache.has_key(key): 193 import Martel 194 exp = self.expression 195 if select_names is not None: 196 exp = Martel.select_names(exp, select_names) 197 p = exp.make_iterator(tag, debug_level = debug_level) 198 self._iterator_cache[key] = p 199 return self._iterator_cache[key].copy()
200
201 -class FormatGroup(RegisterableGroup):
202 """This object holds a group of FormatObjects. 203 204 Methods: 205 identify Identify the format at a URL. 206 identifyFile Identify the format of a file. 207 identifyString Identify the format of a string. 208 209 """
210 - def __init__(self, name, abbrev=None, filter=None, multirecord=1):
211 """FormatGroup(name[, abbrev][, filter][, multirecord]) 212 213 name is the name of the object, abbrev is an abbreviation for 214 the name. 215 216 filter is an optional Martel.Expression that can be used to 217 quickly determine whether some input is parseable by this 218 group. 219 220 multirecord is either 0/1 indicating whether this format can 221 be used to parse multiple records. By default, it is 1. 222 223 """ 224 RegisterableGroup.__init__(self, name, abbrev, None) 225 self.filter = _normalize_expression(filter) 226 if filter is not None: 227 self.filter = _support.make_cached_expression(self.filter) 228 self.multirecord = multirecord 229 self._parents = []
230
231 - def identifyFile(self, infile, debug_level=0):
232 """S.identifyFile(infile[, debug_level]) -> FormatObject or None""" 233 # See if the filter test weeds things out 234 if self.filter: 235 if not _parses_file(self.filter, infile, debug_level): 236 return None 237 for obj in self.objs: 238 format = obj.identifyFile(infile, debug_level=debug_level) 239 if format is not None: 240 return format 241 return None
242
243 - def identifyString(self, s, debug_level=0):
244 """S.identifyString(s[, debug_level]) -> FormatObject or None""" 245 from StringIO import StringIO 246 return self.identifyFile(StringIO(s), debug_level)
247
248 - def identify(self, source, debug_level=0):
249 """S.identify(source[, debug_level]) -> FormatObject or None""" 250 source = ReseekFile.prepare_input_source(source) 251 f = source.getCharacterStream() or source.getByteStream() 252 return self.identifyFile(f, debug_level)
253
254 - def add(self, obj, *args, **keywds):
255 import weakref 256 RegisterableGroup.add(self, obj, *args, **keywds) 257 obj._parents.append(weakref.proxy(self))
258 259
260 -def _parses_file(expression, infile, debug_level):
261 # Return a boolean indicating whether expression can parse infile. 262 from Bio import StdHandler 263 from Martel import Parser 264 265 parser = expression.make_parser(debug_level) 266 handler = StdHandler.RecognizeHandler() 267 parser.setErrorHandler(handler) 268 parser.setContentHandler(handler) 269 pos = infile.tell() 270 try: 271 try: 272 parser.parseFile(infile) 273 except Parser.ParserException: 274 pass 275 finally: 276 infile.seek(pos) 277 return handler.recognized
278
279 -def _parses_string(expression, s, debug_level):
280 from StringIO import StringIO 281 return _parses_string(expression, StringIO(s), debug_level)
282
283 -def _normalize_expression(expression_or_path):
284 if expression_or_path is None: 285 return None 286 if type(expression_or_path) != type(""): 287 return expression_or_path 288 return _load_expression(expression_or_path)
289
290 -def _load_expression(path):
291 from Martel import Expression 292 x = _load_object(path) 293 if x is not None: 294 if not isinstance(x, Expression.Expression): 295 try: 296 klass = x.__class__.__name__ 297 except AttributeError: 298 klass = type(x) 299 raise TypeError("%r should be a Martel Expression but " \ 300 "is a %r" % (path, klass)) 301 return x 302 303 # Expression not found; make a useful error message 304 msg = "Could not find %r\n" % (path,) 305 msg = msg + "(You may need to add the top-level module to the PYTHONPATH)" 306 raise TypeError(msg)
307
308 -def _load_object(path):
309 terms = path.split(".") 310 s = terms[0] 311 # Import all the needed modules 312 # (Don't know which are modules and which are classes, so simply 313 # stop when imports fail.) 314 # The order of appends is correct, since the last element cannot 315 # be a module. 316 x = __import__(s) 317 prev_term = s 318 for term in terms[1:]: 319 try: 320 __import__(s) 321 except SyntaxError, exc: 322 ## raise SyntaxError("%s during import of %r" % (exc, s)), \ 323 ## None, sys.exc_info()[2] 324 raise 325 except ImportError, exc: 326 # This is the only way I know to tell if the module 327 # could not be loaded because it doesn't exist. 328 error_text = str(exc) 329 if error_text.find("No module named %s" % prev_term) == -1: 330 raise 331 break 332 if not term: 333 raise TypeError("There's a '.' in the wrong place: %r" % \ 334 (path,)) 335 s = s + "." + term 336 prev_term = term 337 338 # Get the requested object 339 s = terms[0] 340 for term in terms[1:]: 341 try: 342 x = getattr(x, term) 343 except AttributeError: 344 raise AttributeError("%s object (%r) has no attribute %r" % \ 345 (type(x).__name__, s, term)) 346 s = s + "." + term 347 return x
348
349 -def _load_first_existing(basemodulename, possible_formats):
350 for format in possible_formats: 351 try: 352 module = _support.load_module(basemodulename + "." + format.abbrev) 353 except ImportError, exc: 354 # This is the only way I know to tell if the module 355 # could not be loaded because it doesn't exist. 356 error_text = str(exc) 357 if error_text.find("No module named %s" % format.abbrev) == -1: 358 raise 359 continue 360 return module 361 return None
362