1
2
3
4
5
6
7
8
9 """Implementations of Biopython-like Seq objects on top of BioSQL.
10
11 This allows retrival of items stored in a BioSQL database using
12 a biopython-like Seq interface.
13 """
14
15 from Bio.Seq import Seq
16 from Bio.SeqRecord import SeqRecord
17 from Bio import SeqFeature
18
20 - def __init__(self, primary_id, adaptor, alphabet, start, length):
26
29
31
32
33
34 if isinstance(index, int) :
35
36 i = index
37 if i < 0:
38 if -i > self._length:
39 raise IndexError(i)
40 i = i + self._length
41 elif i >= self._length:
42 raise IndexError(i)
43 return self.adaptor.get_subseq_as_string(self.primary_id,
44 self.start + i,
45 self.start + i + 1)
46 if not isinstance(index, slice) :
47 raise ValueError, "Unexpected index type"
48
49
50
51 if index.start is None :
52 i=0
53 else :
54 i = index.start
55 if i < 0 :
56
57 if -i > self._length:
58 raise IndexError(i)
59 i = i + self._length
60 elif i >= self._length:
61
62 i = self._length
63
64 if index.stop is None :
65 j = self._length
66 else :
67 j = index.stop
68 if j < 0 :
69
70 if -j > self._length:
71 raise IndexError(j)
72 j = j + self._length
73 elif j >= self._length:
74 j = self._length
75
76 if i >= j:
77
78 return Seq("", self.alphabet)
79 elif index.step is None or index.step == 1 :
80
81 return self.__class__(self.primary_id, self.adaptor, self.alphabet,
82 self.start + i, j - i)
83 else :
84
85 full = self.adaptor.get_subseq_as_string(self.primary_id,
86 self.start + i,
87 self.start + j)
88 return Seq(full[::index.step], self.alphabet)
89
91 """Returns the full sequence as a python string.
92
93 Although not formally deprecated, you are now encouraged to use
94 str(my_seq) instead of my_seq.tostring()."""
95 return self.adaptor.get_subseq_as_string(self.primary_id,
96 self.start,
97 self.start + self._length)
103
104
105 data = property(tostring, doc="Sequence as string (DEPRECATED)")
106
127
143
145 sql = "SELECT seqfeature_id, type.name, rank" \
146 " FROM seqfeature join term type on (type_term_id = type.term_id)" \
147 " WHERE bioentry_id = %s" \
148 " ORDER BY rank"
149 results = adaptor.execute_and_fetchall(sql, (primary_id,))
150 seq_feature_list = []
151 for seqfeature_id, seqfeature_type, seqfeature_rank in results:
152
153 qvs = adaptor.execute_and_fetchall(
154 "SELECT name, value" \
155 " FROM seqfeature_qualifier_value join term using (term_id)" \
156 " WHERE seqfeature_id = %s", (seqfeature_id,))
157 qualifiers = {}
158 for qv_name, qv_value in qvs:
159 qualifiers.setdefault(qv_name, []).append(qv_value)
160
161 qvs = adaptor.execute_and_fetchall(
162 "SELECT dbxref.dbname, dbxref.accession" \
163 " FROM dbxref join seqfeature_dbxref using (dbxref_id)" \
164 " WHERE seqfeature_dbxref.seqfeature_id = %s", (seqfeature_id,))
165 for qv_name, qv_value in qvs:
166 value = "%s:%s" % (qv_name, qv_value)
167 qualifiers.setdefault("db_xref", []).append(value)
168
169 results = adaptor.execute_and_fetchall(
170 "SELECT location_id, start_pos, end_pos, strand" \
171 " FROM location" \
172 " WHERE seqfeature_id = %s" \
173 " ORDER BY rank", (seqfeature_id,))
174 locations = []
175
176 for location_id, start, end, strand in results:
177 if start:
178 start -= 1
179 locations.append( (location_id, start, end, strand) )
180
181 remote_results = adaptor.execute_and_fetchall(
182 "SELECT location_id, dbname, accession, version" \
183 " FROM location join dbxref using (dbxref_id)" \
184 " WHERE seqfeature_id = %s", (seqfeature_id,))
185 lookup = {}
186 for location_id, dbname, accession, version in remote_results:
187 if version and version != "0":
188 v = "%s.%s" % (accession, version)
189 else:
190 v = accession
191 lookup[location_id] = (dbname, v)
192
193 feature = SeqFeature.SeqFeature(type = seqfeature_type)
194 feature.qualifiers = qualifiers
195 if len(locations) == 0:
196 pass
197 elif len(locations) == 1:
198 location_id, start, end, strand = locations[0]
199 dbname, version = lookup.get(location_id, (None, None))
200
201 feature.location = SeqFeature.FeatureLocation(start, end)
202 feature.strand = strand
203 feature.ref_db = dbname
204 feature.ref = version
205 else:
206 min_start = locations[0][1]
207 max_end = locations[0][2]
208 sub_feature_list = []
209 for location in locations:
210 location_id, start, end, strand = location
211 dbname, version = lookup.get(location_id, (None, None))
212 min_start = min(min_start, start)
213 max_end = max(max_end, end)
214
215 subfeature = SeqFeature.SeqFeature()
216 subfeature.type = seqfeature_type
217 subfeature.location_operator = "join"
218 subfeature.location = SeqFeature.FeatureLocation(start, end)
219 subfeature.strand = strand
220 subfeature.ref_db = dbname
221 subfeature.ref = version
222 sub_feature_list.append((start, subfeature))
223 sub_feature_list.sort()
224 feature.sub_features = [sub_feature[1]
225 for sub_feature in sub_feature_list]
226 feature.location = SeqFeature.FeatureLocation(min_start, max_end)
227 feature.strand = feature.sub_features[0].strand
228
229 seq_feature_list.append(
230 (seqfeature_rank, feature.location.start.position, feature) )
231
232
233
234
235 seq_feature_list.sort()
236
237
238 return [x[2] for x in seq_feature_list]
239
246
248 qvs = adaptor.execute_and_fetchall(
249 "SELECT name, value" \
250 " FROM bioentry_qualifier_value JOIN term USING (term_id)" \
251 " WHERE bioentry_id = %s" \
252 " ORDER BY rank", (primary_id,))
253 qualifiers = {}
254 for name, value in qvs:
255 if name == "keyword": name = "keywords"
256 elif name == "date_changed": name = "dates"
257 elif name == "secondary_accession": name = "accessions"
258 qualifiers.setdefault(name, []).append(value)
259 return qualifiers
260
262
263
264 refs = adaptor.execute_and_fetchall(
265 "SELECT start_pos, end_pos, " \
266 " location, title, authors," \
267 " dbname, accession" \
268 " FROM bioentry_reference" \
269 " JOIN reference USING (reference_id)" \
270 " LEFT JOIN dbxref USING (dbxref_id)" \
271 " WHERE bioentry_id = %s" \
272 " ORDER BY rank", (primary_id,))
273 references = []
274 for start, end, location, title, authors, dbname, accession in refs:
275 reference = SeqFeature.Reference()
276 if start: start -= 1
277 reference.location = [SeqFeature.FeatureLocation(start, end)]
278 reference.authors = authors
279
280 if title : reference.title = title
281 reference.journal = location
282 if dbname == 'PUBMED':
283 reference.pubmed_id = accession
284 elif dbname == 'MEDLINE':
285 reference.medline_id = accession
286 references.append(reference)
287 return {'references': references}
288
290 a = {}
291 common_names = adaptor.execute_and_fetch_col0(
292 "SELECT name FROM taxon_name WHERE taxon_id = %s" \
293 " AND name_class = 'genbank common name'", (taxon_id,))
294 if common_names:
295 a['source'] = common_names[0]
296 scientific_names = adaptor.execute_and_fetch_col0(
297 "SELECT name FROM taxon_name WHERE taxon_id = %s" \
298 " AND name_class = 'scientific name'", (taxon_id,))
299 if scientific_names:
300 a['organism'] = scientific_names[0]
301 ncbi_taxids = adaptor.execute_and_fetch_col0(
302 "SELECT ncbi_taxon_id FROM taxon WHERE taxon_id = %s", (taxon_id,))
303 if ncbi_taxids and ncbi_taxids[0] and ncbi_taxids[0] != "0":
304 a['ncbi_taxid'] = ncbi_taxids[0]
305
306
307
308
309
310
311
312
313
314
315 taxonomy = []
316 while taxon_id :
317 name, rank, parent_taxon_id = adaptor.execute_one(
318 "SELECT taxon_name.name, taxon.node_rank, taxon.parent_taxon_id" \
319 " FROM taxon, taxon_name" \
320 " WHERE taxon.taxon_id=taxon_name.taxon_id" \
321 " AND taxon_name.name_class='scientific name'" \
322 " AND taxon.taxon_id = %s", (taxon_id,))
323 if taxon_id == parent_taxon_id :
324
325
326
327 break
328 if rank <> "no rank" :
329
330
331
332 taxonomy.insert(0, name)
333 taxon_id = parent_taxon_id
334
335 if taxonomy:
336 a['taxonomy'] = taxonomy
337 return a
338
340 """BioSQL equivalent of the biopython SeqRecord object.
341 """
342
343 - def __init__(self, adaptor, primary_id):
344 self._adaptor = adaptor
345 self._primary_id = primary_id
346
347 (self._biodatabase_id, self._taxon_id, self.name,
348 accession, version, self._identifier,
349 self._division, self.description) = self._adaptor.execute_one(
350 "SELECT biodatabase_id, taxon_id, name, accession, version," \
351 " identifier, division, description" \
352 " FROM bioentry" \
353 " WHERE bioentry_id = %s", (self._primary_id,))
354 if version and version != "0":
355 self.id = "%s.%s" % (accession, version)
356 else:
357 self.id = accession
358
360 if not hasattr(self, "_seq"):
361 self._seq = _retrieve_seq(self._adaptor, self._primary_id)
362 return self._seq
365 seq = property(__get_seq, __set_seq, __del_seq, "Seq object")
366
368 if not hasattr(self,"_dbxrefs"):
369 self._dbxrefs = _retrieve_dbxrefs(self._adaptor, self._primary_id)
370 return self._dbxrefs
373 dbxrefs = property(__get_dbxrefs, __set_dbxrefs, __del_dbxrefs,
374 "Database cross references")
375
377 if not hasattr(self, "_features"):
378 self._features = _retrieve_features(self._adaptor,
379 self._primary_id)
380 return self._features
383 features = property(__get_features, __set_features, __del_features,
384 "Features")
385
387 if not hasattr(self, "_annotations"):
388 self._annotations = _retrieve_annotations(self._adaptor,
389 self._primary_id,
390 self._taxon_id)
391 if self._identifier:
392 self._annotations["gi"] = self._identifier
393 if self._division:
394 self._annotations["data_file_division"] = self._division
395 return self._annotations
398 annotations = property(__get_annotations, __set_annotations,
399 __del_annotations, "Annotations")
400