Package Bio :: Module Index
[hide private]
[frames] | no frames]

Source Code for Module Bio.Index

  1  # Copyright 1999 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Index.py 
  7   
  8  This module provides a way to create indexes to text files. 
  9   
 10  Classes: 
 11  Index     Dictionary-like class used to store index information. 
 12   
 13  _ShelveIndex    An Index class based on the shelve module. 
 14  _InMemoryIndex  An in-memory Index class. 
 15   
 16  """ 
 17  import os 
 18  import array 
 19  import string 
 20  import cPickle 
 21  import shelve 
 22   
23 -class _ShelveIndex(dict):
24 """An index file wrapped around shelve. 25 26 """ 27 # Without a good dbm module installed, this is pretty slow and 28 # generates large files. When generating an index on a FASTA- 29 # formatted file with 82000 sequences (37Mb), the 30 # index 'dat' file is 42Mb and 'dir' file is 8Mb. 31 32 __version = 2 33 __version_key = '__version' 34
35 - def __init__(self, indexname, truncate=None):
36 dict.__init__(self) 37 try: 38 if truncate: 39 # In python 1.52 and before, dumbdbm (under shelve) 40 # doesn't clear the old database. 41 files = [indexname + '.dir', 42 indexname + '.dat', 43 indexname + '.bak' 44 ] 45 for file in files: 46 if os.path.exists(file): 47 os.unlink(file) 48 raise "open a new shelf" 49 self.data = shelve.open(indexname, flag='r') 50 except: 51 # No database exists. 52 self.data = shelve.open(indexname, flag='n') 53 self.data[self.__version_key] = self.__version 54 else: 55 # Check to make sure the database is the correct version. 56 version = self.data.get(self.__version_key, None) 57 if version is None: 58 raise IOError, "Unrecognized index format" 59 elif version != self.__version: 60 raise IOError, "Version %s doesn't match my version %s" % \ 61 (version, self.__version)
62
63 - def __del__(self):
64 if self.__dict__.has_key('data'): 65 self.data.close()
66
67 -class _InMemoryIndex(dict):
68 """This creates an in-memory index file. 69 70 """ 71 # File Format: 72 # version 73 # key value 74 # [...] 75 76 __version = 3 77 __version_key = '__version' 78
79 - def __init__(self, indexname, truncate=None):
80 self._indexname = indexname 81 dict.__init__(self) 82 self.__changed = 0 # the index hasn't changed 83 84 # Remove the database if truncate is true. 85 if truncate and os.path.exists(indexname): 86 os.unlink(indexname) 87 self.__changed = 1 88 89 # Load the database if it exists 90 if os.path.exists(indexname): 91 handle = open(indexname) 92 version = self._toobj(string.rstrip(handle.readline())) 93 if version != self.__version: 94 raise IOError, "Version %s doesn't match my version %s" % \ 95 (version, self.__version) 96 lines = handle.readlines() 97 lines = map(string.split, lines) 98 for key, value in lines: 99 key, value = self._toobj(key), self._toobj(value) 100 self[key] = value 101 self.__changed = 0
102
103 - def update(self, dict):
104 self.__changed = 1 105 dict.update(self, dict)
106 - def __setitem__(self, key, value):
107 self.__changed = 1 108 dict.__setitem__(self, key, value)
109 - def __delitem__(self, key):
110 self.__changed = 1 111 dict.__delitem__(self, key)
112 - def clear(self):
113 self.__changed = 1 114 dict.clear(self)
115
116 - def __del__(self):
117 if self.__changed: 118 handle = open(self._indexname, 'w') 119 handle.write("%s\n" % self._tostr(self.__version)) 120 for key, value in self.items(): 121 handle.write("%s %s\n" % 122 (self._tostr(key), self._tostr(value))) 123 handle.close()
124
125 - def _tostr(self, obj):
126 # I need a representation of the object that's saveable to 127 # a file that uses whitespace as delimiters. Thus, I'm 128 # going to pickle the object, and then convert each character of 129 # the string to its ASCII integer value. Then, I'm going to convert 130 # the integers into strings and join them together with commas. 131 # It's not the most efficient way of storing things, but it's 132 # relatively fast. 133 s = cPickle.dumps(obj) 134 intlist = array.array('b', s) 135 strlist = map(str, intlist) 136 return string.join(strlist, ',')
137
138 - def _toobj(self, str):
139 intlist = map(int, string.split(str, ',')) 140 intlist = array.array('b', intlist) 141 strlist = map(chr, intlist) 142 return cPickle.loads(string.join(strlist, ''))
143 144 Index = _InMemoryIndex 145