1 """Implementations of Biopython-like Seq objects on top of BioSQL.
2
3 This allows retrival of items stored in a BioSQL database using
4 a biopython-like Seq interface.
5 """
6
7 from Bio import SeqFeature
8
10 - def __init__(self, primary_id, adaptor, alphabet, start, length):
16
19
32 i = max(i, 0)
33 i = min(i, self._length)
34 j = max(j, 0)
35 j = min(j, self._length)
36 if i >= j:
37 return self.__class__(self.primary_id, self.adaptor, self.alphabet,
38 self.start, 0)
39 return self.__class__(self.primary_id, self.adaptor, self.alphabet,
40 self.start + i, j - i)
45
46 data = property(tostring)
47
67
82
84 sql = "SELECT seqfeature_id, type.name, rank" \
85 " FROM seqfeature join term type on (type_term_id = type.term_id)" \
86 " WHERE bioentry_id = %s" \
87 " ORDER BY rank"
88 results = adaptor.execute_and_fetchall(sql, (primary_id,))
89 seq_feature_list = []
90 for seqfeature_id, seqfeature_type, seqfeature_rank in results:
91
92 qvs = adaptor.execute_and_fetchall(
93 "SELECT name, value" \
94 " FROM seqfeature_qualifier_value join term using (term_id)" \
95 " WHERE seqfeature_id = %s", (seqfeature_id,))
96 qualifiers = {}
97 for qv_name, qv_value in qvs:
98 qualifiers.setdefault(qv_name, []).append(qv_value)
99
100 results = adaptor.execute_and_fetchall(
101 "SELECT location_id, start_pos, end_pos, strand" \
102 " FROM location" \
103 " WHERE seqfeature_id = %s" \
104 " ORDER BY rank", (seqfeature_id,))
105 locations = []
106
107 for location_id, start, end, strand in results:
108 if start:
109 start -= 1
110 locations.append( (location_id, start, end, strand) )
111
112 remote_results = adaptor.execute_and_fetchall(
113 "SELECT location_id, dbname, accession, version" \
114 " FROM location join dbxref using (dbxref_id)" \
115 " WHERE seqfeature_id = %s", (seqfeature_id,))
116 lookup = {}
117 for location_id, dbname, accession, version in remote_results:
118 if version and version != "0":
119 v = "%s.%s" % (accession, version)
120 else:
121 v = accession
122 lookup[location_id] = (dbname, v)
123
124 feature = SeqFeature.SeqFeature(type = seqfeature_type)
125 feature.qualifiers = qualifiers
126 if len(locations) == 0:
127 pass
128 elif len(locations) == 1:
129 location_id, start, end, strand = locations[0]
130 dbname, version = lookup.get(location_id, (None, None))
131
132 feature.location = SeqFeature.FeatureLocation(start, end)
133 feature.strand = strand
134 feature.ref_db = dbname
135 feature.ref = version
136 else:
137 min_start = locations[0][1]
138 max_end = locations[0][2]
139 sub_feature_list = []
140 for location in locations:
141 location_id, start, end, strand = location
142 dbname, version = lookup.get(location_id, (None, None))
143 min_start = min(min_start, start)
144 max_end = max(max_end, end)
145
146 subfeature = SeqFeature.SeqFeature()
147 subfeature.type = seqfeature_type
148 subfeature.location_operator = "join"
149 subfeature.location = SeqFeature.FeatureLocation(start, end)
150 subfeature.strand = strand
151 subfeature.ref_db = dbname
152 subfeature.ref = version
153 sub_feature_list.append((start, subfeature))
154 sub_feature_list.sort()
155 feature.sub_features = [sub_feature[1]
156 for sub_feature in sub_feature_list]
157 feature.location = SeqFeature.FeatureLocation(min_start, max_end)
158 feature.strand = feature.sub_features[0].strand
159
160 seq_feature_list.append(
161 (seqfeature_rank, feature.location.start.position, feature) )
162
163
164
165
166 seq_feature_list.sort()
167
168
169 return [x[2] for x in seq_feature_list]
170
178
180 qvs = adaptor.execute_and_fetchall(
181 "SELECT name, value" \
182 " FROM bioentry_qualifier_value JOIN term USING (term_id)" \
183 " WHERE bioentry_id = %s" \
184 " ORDER BY rank", (primary_id,))
185 qualifiers = {}
186 for name, value in qvs:
187 if name == "keyword": name = "keywords"
188 elif name == "date_changed": name = "dates"
189 elif name == "secondary_accession": name = "accessions"
190 qualifiers.setdefault(name, []).append(value)
191 return qualifiers
192
194
195
196 refs = adaptor.execute_and_fetchall(
197 "SELECT start_pos, end_pos, " \
198 " location, title, authors," \
199 " dbname, accession" \
200 " FROM bioentry_reference" \
201 " JOIN reference USING (reference_id)" \
202 " LEFT JOIN dbxref USING (dbxref_id)" \
203 " WHERE bioentry_id = %s" \
204 " ORDER BY rank", (primary_id,))
205 references = []
206 for start, end, location, title, authors, dbname, accession in refs:
207 reference = SeqFeature.Reference()
208 if start: start -= 1
209 reference.location = [SeqFeature.FeatureLocation(start, end)]
210 reference.authors = authors
211 reference.title = title
212 reference.journal = location
213 if dbname == 'PUBMED':
214 reference.pubmed_id = accession
215 elif dbname == 'MEDLINE':
216 reference.medline_id = accession
217 references.append(reference)
218 return {'references': references}
219
236
238 a = {}
239 common_names = adaptor.execute_and_fetch_col0(
240 "SELECT name FROM taxon_name WHERE taxon_id = %s" \
241 " AND name_class = 'genbank common name'", (taxon_id,))
242 if common_names:
243 a['source'] = common_names[0]
244 scientific_names = adaptor.execute_and_fetch_col0(
245 "SELECT name FROM taxon_name WHERE taxon_id = %s" \
246 " AND name_class = 'scientific name'", (taxon_id,))
247 if scientific_names:
248 a['organism'] = scientific_names[0]
249 ncbi_taxids = adaptor.execute_and_fetch_col0(
250 "SELECT ncbi_taxon_id FROM taxon WHERE taxon_id = %s", (taxon_id,))
251 if ncbi_taxids and ncbi_taxids[0] and ncbi_taxids[0] != "0":
252 a['ncbi_taxid'] = ncbi_taxids[0]
253
254
255
256 taxonomy = adaptor.execute_and_fetch_col0(
257 "SELECT taxon_name.name" \
258 " FROM taxon t, taxon parent, taxon_name" \
259 " WHERE parent.taxon_id=taxon_name.taxon_id" \
260 " AND t.left_value BETWEEN parent.left_value AND parent.right_value" \
261 " AND name_class='scientific name'" \
262 " AND parent.node_rank <> 'no rank'" \
263 " AND t.taxon_id = %s" \
264 " ORDER BY parent.left_value", (taxon_id,))
265 if taxonomy:
266 a['taxonomy'] = taxonomy
267 return a
268
270 """BioSQL equivalent of the biopython SeqRecord object.
271 """
272
273 - def __init__(self, adaptor, primary_id):
274 self._adaptor = adaptor
275 self._primary_id = primary_id
276
277 (self._biodatabase_id, self._taxon_id, self.name,
278 accession, version, self._identifier,
279 self._division, self.description) = self._adaptor.execute_one(
280 "SELECT biodatabase_id, taxon_id, name, accession, version," \
281 " identifier, division, description" \
282 " FROM bioentry" \
283 " WHERE bioentry_id = %s", (self._primary_id,))
284 if version and version != "0":
285 self.id = "%s.%s" % (accession, version)
286 else:
287 self.id = accession
288
290 if not hasattr(self, "_seq"):
291 self._seq = _retrieve_seq(self._adaptor, self._primary_id)
292 return self._seq
295 seq = property(__get_seq, __set_seq, __del_seq, "Seq object")
296
298 if not hasattr(self,"_dbxrefs"):
299 self._dbxrefs = _retrieve_dbxrefs(self._adaptor, self._primary_id)
300 return self._dbxrefs
303 dbxrefs = property(__get_dbxrefs, __set_dbxrefs, __del_dbxrefs, "Dbxrefs")
304
306 if not hasattr(self, "_features"):
307 self._features = _retrieve_features(self._adaptor,
308 self._primary_id)
309 return self._features
312 features = property(__get_features, __set_features, __del_features,
313 "Features")
314
316 if not hasattr(self, "_annotations"):
317 self._annotations = _retrieve_annotations(self._adaptor,
318 self._primary_id,
319 self._taxon_id)
320 if self._identifier:
321 self._annotations["gi"] = self._identifier
322 if self._division:
323 self._annotations["data_file_division"] = self._division
324 return self._annotations
327 annotations = property(__get_annotations, __set_annotations,
328 __del_annotations, "Annotations")
329