1 """Connect with a BioSQL database and load Biopython like objects from it.
2
3 This provides interfaces for loading biological objects from a relational
4 database, and is compatible with the BioSQL standards.
5 """
6 import BioSeq
7 import Loader
8 import DBUtils
9
11 """Main interface for loading a existing BioSQL-style database.
12
13 This function is the easiest way to retrieve a connection to a
14 database, doing something like:
15
16 >>> from BioSeq import BioSeqDatabase
17 >>> server = BioSeqDatabase.open_database(user = "root", db="minidb")
18
19 the various options are:
20 driver -> The name of the database driver to use for connecting. The
21 driver should implement the python DB API. By default, the MySQLdb
22 driver is used.
23 user -> the username to connect to the database with.
24 password, passwd -> the password to connect with
25 host -> the hostname of the database
26 database or db -> the name of the database
27 """
28 module = __import__(driver)
29 connect = getattr(module, "connect")
30
31
32 kw = kwargs.copy()
33 if driver == "MySQLdb":
34 if kw.has_key("database"):
35 kw["db"] = kw["database"]
36 del kw["database"]
37 if kw.has_key("password"):
38 kw["passwd"] = kw["password"]
39 del kw["password"]
40 else:
41
42 if kw.has_key("db"):
43 kw["database"] = kw["db"]
44 del kw["db"]
45 if kw.has_key("passwd"):
46 kw["password"] = kw["passwd"]
47 del kw["passwd"]
48 if driver == "psycopg" and not kw.get("database"):
49 kw["database"] = "template1"
50 try:
51 conn = connect(**kw)
52 except module.InterfaceError:
53
54
55 if kw.has_key("database"):
56 kw["dbname"] = kw["database"]
57 del kw["database"]
58 elif kw.has_key("db"):
59 kw["dbname"] = kw["db"]
60 del kw["db"]
61
62 dsn = ' '.join(['='.join(i) for i in kw.items()])
63 conn = connect(dsn)
64
65 return DBServer(conn, module)
66
68 - def __init__(self, conn, module, module_name=None):
69 self.module = module
70 if module_name is None:
71 module_name = module.__name__
72 self.adaptor = Adaptor(conn, DBUtils.get_dbutils(module_name))
73 self.module_name = module_name
74
76 return self.__class__.__name__ + "(%r)" % self.adaptor.conn
85
92
93 - def new_database(self, db_name, authority=None, description=None):
94 """Add a new database to the server and return it.
95 """
96
97 sql = r"INSERT INTO biodatabase (name, authority, description)" \
98 r" VALUES (%s, %s, %s)"
99 self.adaptor.execute(sql, (db_name,authority, description))
100 return BioSeqDatabase(self.adaptor, db_name)
101
103 """Load a database schema into the given database.
104
105 This is used to create tables, etc when a database is first created.
106 sql_file should specify the complete path to a file containing
107 SQL entries for building the tables.
108 """
109
110
111
112
113
114 sql_handle = open(sql_file, "rb")
115 sql = r""
116 for line in sql_handle.xreadlines():
117 if line.find("--") == 0:
118 pass
119 elif line.find("#") == 0:
120 pass
121 elif line.strip():
122 sql += line.strip()
123 sql += ' '
124
125
126
127
128
129 if self.module_name in ["psycopg"]:
130 self.adaptor.cursor.execute(sql)
131
132
133 elif self.module_name in ["MySQLdb"]:
134 sql_parts = sql.split(";")
135 for sql_line in sql_parts[:-1]:
136 self.adaptor.cursor.execute(sql_line)
137 else:
138 raise ValueError("Module %s not supported by the loader." %
139 (self.module_name))
140
143 self.conn = conn
144 self.cursor = conn.cursor()
145 self.dbutils = dbutils
146
149
152
155
158
160 return self.conn.close()
161
163 self.cursor.execute(
164 r"select biodatabase_id from biodatabase where name = %s",
165 (dbname,))
166 rv = self.cursor.fetchall()
167 if not rv:
168 raise KeyError("Cannot find biodatabase with name %r" % dbname)
169
170
171 return rv[0][0]
172
174 sql = r"select bioentry_id from bioentry where name = %s"
175 fields = [name]
176 if dbid:
177 sql += " and biodatabase_id = %s"
178 fields.append(dbid)
179 self.cursor.execute(sql, fields)
180 rv = self.cursor.fetchall()
181 if not rv:
182 raise IndexError("Cannot find display id %r" % name)
183 if len(rv) > 1:
184 raise IndexError("More than one entry with display id %r" % name)
185 return rv[0][0]
186
188 sql = r"select bioentry_id from bioentry where accession = %s"
189 fields = [name]
190 if dbid:
191 sql += " and biodatabase_id = %s"
192 fields.append(dbid)
193 self.cursor.execute(sql, fields)
194 rv = self.cursor.fetchall()
195 if not rv:
196 raise IndexError("Cannot find accession %r" % name)
197 if len(rv) > 1:
198 raise IndexError("More than one entry with accession %r" % name)
199 return rv[0][0]
200
202 sql = r"select bioentry_id from bioentry where accession = %s"
203 fields = [name]
204 if dbid:
205 sql += " and biodatabase_id = %s"
206 fields.append(dbid)
207 return self.execute_and_fetch_col0(sql, fields)
208
210 acc_version = name.split(".")
211 if len(acc_version) > 2:
212 raise IndexError("Bad version %r" % name)
213 acc = acc_version[0]
214 if len(acc_version) == 2:
215 version = acc_version[1]
216 else:
217 version = "0"
218 sql = r"SELECT bioentry_id FROM bioentry WHERE accession = %s" \
219 r" AND version = %s"
220 fields = [acc, version]
221 if dbid:
222 sql += " and biodatabase_id = %s"
223 fields.append(dbid)
224 self.cursor.execute(sql, fields)
225 rv = self.cursor.fetchall()
226 if not rv:
227 raise IndexError("Cannot find version %r" % name)
228 if len(rv) > 1:
229 raise IndexError("More than one entry with version %r" % name)
230 return rv[0][0]
231
233
234 sql = "SELECT bioentry_id FROM bioentry WHERE identifier = %s"
235 fields = [identifier]
236 if dbid:
237 sql += " and biodatabase_id = %s"
238 fields.append(dbid)
239 self.cursor.execute(sql, fields)
240 rv = self.cursor.fetchall()
241 if not rv:
242 raise IndexError("Cannot find display id %r" % identifier)
243 return rv[0][0]
244
248
249 - def list_bioentry_ids(self, dbid):
250 return self.execute_and_fetch_col0(
251 "SELECT bioentry_id FROM bioentry WHERE biodatabase_id = %s",
252 (dbid,))
253
255 return self.execute_and_fetch_col0(
256 "SELECT name FROM bioentry WHERE biodatabase_id = %s",
257 (dbid,))
258
260 """Return ids given a SQL statement to select for them.
261
262 This assumes that the given SQL does a SELECT statement that
263 returns a list of items. This parses them out of the 2D list
264 they come as and just returns them in a list.
265 """
266 return self.cursor.execute_and_fetch_col0(sql, args)
267
269 self.cursor.execute(sql, args or ())
270 rv = self.cursor.fetchall()
271 assert len(rv) == 1, "Expected 1 response, got %d" % len(rv)
272 return rv[0]
273
274 - def execute(self, sql, args=None):
275 """Just execute an sql command.
276 """
277 self.cursor.execute(sql, args or ())
278
285
287 self.cursor.execute(sql, args or ())
288 return [field[0] for field in self.cursor.fetchall()]
289
293
294 _allowed_lookups = {
295
296 'primary_id': "fetch_seqid_by_identifier",
297 'gi': "fetch_seqid_by_identifier",
298 'display_id': "fetch_seqid_by_display_id",
299 'name': "fetch_seqid_by_display_id",
300 'accession': "fetch_seqid_by_accession",
301 'version': "fetch_seqid_by_version",
302 }
303
310 return "BioSeqDatabase(%r, %r)" % (self.adaptor, self.name)
311
320
329
338
340 """Gets a *list* of Bio::Seq objects by accession number
341
342 Example: seqs = db.get_Seq_by_acc('X77802')
343
344 """
345 seqids = self.adaptor.fetch_seqids_by_accession(self.dbid, name)
346 return [BioSeq.DBSeqRecord(self.adaptor, seqid) for seqid in seqids]
347
349
350
351
352
353 raise NotImplementedError("waiting for Python 2.2's iter")
354
356 """Array of all the primary_ids of the sequences in the database.
357
358 These maybe ids (display style) or accession numbers or
359 something else completely different - they *are not*
360 meaningful outside of this database implementation.
361 """
362 return self.adaptor.list_bioentry_ids(self.dbid)
363
372
374 if len(kwargs) != 1:
375 raise TypeError("single key/value parameter expected")
376 k, v = kwargs.items()[0]
377 if not _allowed_lookups.has_key(k):
378 raise TypeError("lookup() expects one of %s, not %r" % \
379 (repr(_allowed_lookups.keys())[1:-1], repr(k)))
380 lookup_name = _allowed_lookups[k]
381 lookup_func = getattr(self.adaptor, lookup_name)
382 seqid = lookup_func(self.dbid, v)
383 return BioSeq.DBSeqRecord(self.adaptor, seqid)
384
386 """Gets a Bio::Seq object by the primary (internal) id.
387
388 The primary id in these cases has to come from
389 $db->get_all_primary_ids. There is no other way to get (or
390 guess) the primary_ids in a database.
391 """
392 return self[seqid]
393
394 - def load(self, record_iterator):
395 """Load a set of SeqRecords into the BioSQL database.
396
397 record_iterator is an Iterator object that returns SeqRecord objects
398 which will be used to populate the database. The Iterator should
399 implement next() and either return None or raise StopIteration
400 when it is out of objects.
401
402 Returns the number of records loaded.
403 """
404 db_loader = Loader.DatabaseLoader(self.adaptor, self.dbid)
405 num_records = 0
406 while 1:
407 try:
408 cur_record = record_iterator.next()
409 except StopIteration:
410 break
411 if cur_record is None:
412 break
413 num_records += 1
414 db_loader.load_seqrecord(cur_record)
415
416 return num_records
417