1
2
3
4
5
6 """Record classes to hold BLAST output.
7
8 Classes:
9 Blast Holds all the information from a blast search.
10 PSIBlast Holds all the information from a psi-blast search.
11
12 Header Holds information from the header.
13 Description Holds information about one hit description.
14 Alignment Holds information about one alignment hit.
15 HSP Holds information about one HSP.
16 MultipleAlignment Holds information about a multiple alignment.
17 DatabaseReport Holds information from the database report.
18 Parameters Holds information from the parameters.
19
20 """
21
22
23 import string
24
25 from Bio.Align import Generic
26
28 """Saves information from a blast header.
29
30 Members:
31 application The name of the BLAST flavor that generated this data.
32 version Version of blast used.
33 date Date this data was generated.
34 reference Reference for blast.
35
36 query Name of query sequence.
37 query_letters Number of letters in the query sequence. (int)
38
39 database Name of the database.
40 database_sequences Number of sequences in the database. (int)
41 database_letters Number of letters in the database. (int)
42
43 """
56
58 """Stores information about one hit in the descriptions section.
59
60 Members:
61 title Title of the hit.
62 score Number of bits. (int)
63 e E value. (float)
64 num_alignments Number of alignments for the same subject. (int)
65
66 """
68 self.title = ''
69 self.score = None
70 self.e = None
71 self.num_alignments = None
73 return "%-66s %5s %s" % (self.title, self.score, self.e)
74
76 """Stores information about one hit in the alignments section.
77
78 Members:
79 title Name.
80 hit_id Hit identifier. (str)
81 hit_def Hit definition. (str)
82 length Length. (int)
83 hsps A list of HSP objects.
84
85 """
87 self.title = ''
88 self.hit_id = ''
89 self.hit_def = ''
90 self.length = None
91 self.hsps = []
93 lines = []
94 titles = string.split(self.title, '\n')
95 for i in range(len(titles)):
96 if i:
97 lines.append(" ")
98 lines.append("%s\n" % titles[i])
99 lines.append(" Length = %s\n" % self.length)
100 return string.join(lines, '')
101
103 """Stores information about one hsp in an alignment hit.
104
105 Members:
106 score BLAST score of hit. (float)
107 bits Number of bits for that score. (float)
108 expect Expect value. (float)
109 num_alignments Number of alignments for same subject. (int)
110 identities Number of identities/total aligned. tuple of (int, int)
111 positives Number of positives/total aligned. tuple of (int, int)
112 gaps Numer of gaps/total aligned. tuple of (int, int)
113 align_length Length of the alignment. (int)
114 strand Tuple of (query, target) strand.
115 frame Tuple of 1 or 2 frame shifts, depending on the flavor.
116
117 query The query sequence.
118 query_start The start residue for the query sequence. (1-based)
119 query_end The end residue for the query sequence. (1-based)
120 match The match sequence.
121 sbjct The sbjct sequence.
122 sbjct_start The start residue for the sbjct sequence. (1-based)
123 sbjct_end The end residue for the sbjct sequence. (1-based)
124
125 Not all flavors of BLAST return values for every attribute:
126 score expect identities positives strand frame
127 BLASTP X X X X
128 BLASTN X X X X X
129 BLASTX X X X X X
130 TBLASTN X X X X X
131 TBLASTX X X X X X/X
132
133 Note: for BLASTX, the query sequence is shown as a protein sequence,
134 but the numbering is based on the nucleotides. Thus, the numbering
135 is 3x larger than the number of amino acid residues. A similar effect
136 can be seen for the sbjct sequence in TBLASTN, and for both sequences
137 in TBLASTX.
138
139 Also, for negative frames, the sequence numbering starts from
140 query_start and counts down.
141
142 """
144 self.score = None
145 self.bits = None
146 self.expect = None
147 self.num_alignments = None
148 self.identities = (None, None)
149 self.positives = (None, None)
150 self.gaps = (None, None)
151 self.align_length = None
152 self.strand = (None, None)
153 self.frame = ()
154
155 self.query = ''
156 self.query_start = None
157 self.query_end = None
158 self.match = ''
159 self.sbjct = ''
160 self.sbjct_start = None
161 self.sbjct_end = None
162
164 """Holds information about a multiple alignment.
165
166 Members:
167 alignment A list of tuples (name, start residue, sequence, end residue).
168
169 The start residue is 1-based. It may be blank, if that sequence is
170 not aligned in the multiple alignment.
171
172 """
175
177 """Retrieve generic alignment object for the given alignment.
178
179 Instead of the tuples, this returns an Alignment object from
180 Bio.Align.Generic, through which you can manipulate and query
181 the object.
182
183 alphabet is the specified alphabet for the sequences in the code (for
184 example IUPAC.IUPACProtein.
185
186 Thanks to James Casbon for the code.
187 """
188 seq_parts = []
189 seq_names = []
190 parse_number = 0
191 n = 0
192 for name, start, seq, end in self.alignment:
193 if name == 'QUERY':
194 parse_number = parse_number + 1
195 n = 0
196
197 if parse_number == 1:
198 seq_parts.append(seq)
199 seq_names.append(name)
200 else:
201 seq_parts[n] = seq_parts[n] + seq
202 n = n + 1
203
204 generic = Generic.Alignment(alphabet)
205 for (name,seq) in zip(seq_names,seq_parts):
206 generic.add_sequence(name, seq)
207
208 return generic
209
211 """Holds information from a PSI-BLAST round.
212
213 Members:
214 number Round number. (int)
215 reused_seqs Sequences in model, found again. List of Description objects.
216 new_seqs Sequences not found, or below threshold. List of Description.
217 alignments A list of Alignment objects.
218 multiple_alignment A MultipleAlignment object.
219
220 """
222 self.number = None
223 self.reused_seqs = []
224 self.new_seqs = []
225 self.alignments = []
226 self.multiple_alignment = None
227
229 """Holds information about a database report.
230
231 Members:
232 database_name List of database names. (can have multiple dbs)
233 num_letters_in_database Number of letters in the database. (int)
234 num_sequences_in_database List of number of sequences in the database.
235 posted_date List of the dates the databases were posted.
236 ka_params A tuple of (lambda, k, h) values. (floats)
237 gapped # XXX this isn't set right!
238 ka_params_gap A tuple of (lambda, k, h) values. (floats)
239
240 """
249
251 """Holds information about the parameters.
252
253 Members:
254 matrix Name of the matrix.
255 gap_penalties Tuple of (open, extend) penalties. (floats)
256 sc_match Match score for nucleotide-nucleotide comparison
257 sc_mismatch Mismatch penalty for nucleotide-nucleotide comparison
258 num_hits Number of hits to the database. (int)
259 num_sequences Number of sequences. (int)
260 num_good_extends Number of extensions. (int)
261 num_seqs_better_e Number of sequences better than e-value. (int)
262 hsps_no_gap Number of HSP's better, without gapping. (int)
263 hsps_prelim_gapped Number of HSP's gapped in prelim test. (int)
264 hsps_prelim_gapped_attemped Number of HSP's attempted in prelim. (int)
265 hsps_gapped Total number of HSP's gapped. (int)
266 query_length Length of the query. (int)
267 query_id Identifier of the query sequence. (str)
268 database_length Number of letters in the database. (int)
269 effective_hsp_length Effective HSP length. (int)
270 effective_query_length Effective length of query. (int)
271 effective_database_length Effective length of database. (int)
272 effective_search_space Effective search space. (int)
273 effective_search_space_used Effective search space used. (int)
274 frameshift Frameshift window. Tuple of (int, float)
275 threshold Threshold. (int)
276 window_size Window size. (int)
277 dropoff_1st_pass Tuple of (score, bits). (int, float)
278 gap_x_dropoff Tuple of (score, bits). (int, float)
279 gap_x_dropoff_final Tuple of (score, bits). (int, float)
280 gap_trigger Tuple of (score, bits). (int, float)
281 blast_cutoff Tuple of (score, bits). (int, float)
282 """
312
313 -class Blast(Header, DatabaseReport, Parameters):
314 """Saves the results from a blast search.
315
316 Members:
317 descriptions A list of Description objects.
318 alignments A list of Alignment objects.
319 multiple_alignment A MultipleAlignment object.
320 + members inherited from base classes
321
322 """
330
331 -class PSIBlast(Header, DatabaseReport, Parameters):
332 """Saves the results from a blastpgp search.
333
334 Members:
335 rounds A list of Round objects.
336 converged Whether the search converged.
337 + members inherited from base classes
338
339 """
346