1
2 import os, bisect
3 import BaseDB, Location
4 import Bio
5
6 _open = open
7 INDEX_TYPE = "flat/1"
8
10 name, filetag, startpos, length = s.rstrip().split("\t")
11 return name, filetag, long(startpos), long(length)
12
25
27 info = primary_table.items()
28 info.sort()
29 n = 1
30 for k, v in info:
31
32 s = "%s\t%s" % (k, v)
33 if len(s) > n:
34 n = len(s)
35 if n > 9999:
36 raise AssertionError(
37 "Primary index record too large for format spec! " +
38 " %s bytes in %r" % (n, s))
39 outfile = _open(filename, "wb")
40 outfile.write("%04d" % n)
41 for k, v in info:
42 s = "%s\t%s" % (k, v)
43 outfile.write(s.ljust(n))
44 outfile.close()
45
47 return s.rstrip().split("\t")
48
62
64 items = table.items()
65 items.sort()
66
67 n = 0
68 for k, v in items:
69 for x in v:
70 s = "%s\t%s" % (k, x)
71 if len(s) > n:
72 n = len(s)
73 if n > 9999:
74 raise AssertionError(
75 "Secondary index record too large for format spec! " +
76 " %s bytes in %r" % (n, s))
77
78 outfile = _open(filename, "wb")
79 outfile.write("%04d" % n)
80 for k, v in items:
81 for x in v:
82 s = "%s\t%s" % (k, x)
83 outfile.write(s.ljust(n))
84 outfile.close()
85
91
93 - def __init__(self, db, namespace, table):
107
109 - def __init__(self, db, namespace, table):
125
128 self.__in_constructor = 1
129 self._need_flush = 0
130 BaseFlatDB.__init__(self, dbname)
131
132 primary_filename = os.path.join(self.dbname,
133 "key_%s.key" % (self.primary_namespace,) )
134 self.primary_table = _read_primary_table(primary_filename)
135
136 self.secondary_tables = {}
137 for namespace in self.secondary_namespaces:
138 filename = os.path.join(self.dbname, "id_%s.index" % namespace)
139 self.secondary_tables[namespace] = _read_secondary_table(filename)
140
141 self.__in_constructor = 0
142
143 - def add_record(self, filetag, startpos, length, table):
144 key_list = table[self.primary_namespace]
145 if len(key_list) != 1:
146 raise TypeError(
147 "Field %s has %d entries but must have only one "
148 "(must be unique)" % (repr(unique), len(key_list)))
149 key = key_list[0]
150 if self.primary_table.has_key(key):
151 raise TypeError("Field %r = %r already exists; must be unique" %
152 (self.primary_namespace, key))
153 self.primary_table[key] = "%s\t%s\t%s" % (filetag,
154 BaseDB._int_str(startpos),
155 BaseDB._int_str(length))
156
157 for namespace in self.secondary_namespaces:
158 lookup = self.secondary_tables[namespace]
159
160 for val in table.get(namespace, ()):
161
162 lookup.setdefault(val, []).append(key)
163 self._need_flush = 1
164
166 if not self._need_flush:
167 return
168
169 config_filename = os.path.join(self.dbname, "config.dat")
170 BaseDB.write_config(config_filename = config_filename,
171 index_type = INDEX_TYPE,
172 primary_namespace = self.primary_namespace,
173 secondary_namespaces =
174 self.secondary_tables.keys(),
175 fileid_info = self.fileid_info,
176 formatname = self.formatname,
177 )
178
179 primary_filename = os.path.join(self.dbname,
180 "key_%s.key" % (self.primary_namespace,) )
181 _write_primary_table(filename = primary_filename,
182 primary_table = self.primary_table)
183
184
185
186 for namespace, table in self.secondary_tables.items():
187 filename = os.path.join(self.dbname, "id_%s.index" % namespace)
188 _write_secondary_table(filename = filename,
189 table = table)
190
191 self._need_flush = 0
192
194 self.flush()
195 self.primary_table = self.fileid_info = self.filename_map = \
196 self.secondary_tables = None
197
199 if not self.__in_constructor:
200 self.close()
201
202
208
209
212 self.infile = infile
213 self.size = size
214 infile.seek(0)
215 self.record_size = int(infile.read(4))
216 assert (size - 4) % self.record_size == 0, "record size is wrong"
218 if self.record_size == 0:
219 return 0
220 return int((self.size - 4) / self.record_size)
222 self.infile.seek(i * self.record_size + 4)
223 return self.infile.read(self.record_size).split("\t")[0]
224 - def get_entry(self, i):
225 self.infile.seek(i * self.record_size + 4)
226 return self.infile.read(self.record_size)
227
228 -def _find_entry(filename, wantword):
229 size = os.path.getsize(filename)
230 infile = _open(filename, "rb")
231
232 bf = BisectFile(infile, size)
233 left = bisect.bisect_left(bf, wantword)
234 line = bf.get_entry(left)
235 if not line.startswith(wantword):
236 return None
237 return line
238
240 size = os.path.getsize(filename)
241 infile = _open(filename, "rb")
242
243 bf = BisectFile(infile, size)
244 left = bisect.bisect_left(bf, wantword)
245 line = bf.get_entry(left)
246 if not line.startswith(wantword):
247 return None
248
249 right = bisect.bisect_right(bf, wantword)
250 data = []
251 for i in range(left, right):
252 x = bf.get_entry(i)
253 data.append(x)
254 return data
255
256
262
264 lines = _find_range(id_filename, word)
265 if not lines:
266 return None
267 primary_keys = []
268 for line in lines:
269 alias, primary_key = _parse_secondary_table_entry(line)
270 assert alias == word, (alias, word)
271 primary_keys.append(primary_key)
272 return primary_keys
273
274 -def create(dbname, primary_namespace, secondary_namespaces,
275 formatname = "unknown"):
276 os.mkdir(dbname)
277 config_filename = os.path.join(dbname, "config.dat")
278 BaseDB.write_config(config_filename = config_filename,
279 index_type = INDEX_TYPE,
280 primary_namespace = primary_namespace,
281 secondary_namespaces = secondary_namespaces,
282 fileid_info = {},
283 formatname = formatname,
284 )
285
286 primary_filename = os.path.join(dbname,
287 "key_%s.key" % (primary_namespace,) )
288 _write_primary_table(filename = primary_filename,
289 primary_table = {})
290
291
292
293 for namespace in secondary_namespaces:
294 filename = os.path.join(dbname, "id_%s.index" % namespace)
295 _write_secondary_table(filename = filename,
296 table = {})
297 return open(dbname, "rw")
298
299
300 -def open(dbname, mode = "r"):
301 if mode == "r":
302 return DiskFlatDB(dbname)
303 elif mode == "rw":
304 return MemoryFlatDB(dbname)
305 elif mode == "a":
306 raise TypeError("Must call FlatDB.create to create the database")
307 else:
308 raise TypeError("Unknown mode: %r" % (mode,))
309
323
342
343
371
372
385