1
2
3
4
5
6
7
8
9
10 """Bio.SeqIO support for the "fasta" (aka FastA or Pearson) file format.
11
12 You are expected to use this module via the Bio.SeqIO functions."""
13
14 from Bio.Alphabet import single_letter_alphabet
15 from Bio.Seq import Seq
16 from Bio.SeqRecord import SeqRecord
17 from Interfaces import SequentialSequenceWriter
18
19
21 """Generator function to iterate over Fasta records (as SeqRecord objects).
22
23 handle - input file
24 alphabet - optional alphabet
25 title2ids - A function that, when given the title of the FASTA
26 file (without the beginning >), will return the id, name and
27 description (in that order) for the record as a tuple of strings.
28
29 If this is not given, then the entire title line will be used
30 as the description, and the first word as the id and name.
31
32 Note that use of title2ids matches that of Bio.Fasta.SequenceParser
33 but the defaults are slightly different.
34 """
35
36 while True :
37 line = handle.readline()
38 if line == "" : return
39 if line[0] == ">" :
40 break
41
42 while True :
43 if line[0]<>">" :
44 raise ValueError("Records in Fasta files should start with '>' character")
45 if title2ids :
46 id, name, descr = title2ids(line[1:].rstrip())
47 else :
48 descr = line[1:].rstrip()
49 id = descr.split()[0]
50 name = id
51
52 lines = []
53 line = handle.readline()
54 while True:
55 if not line : break
56 if line[0] == ">": break
57
58 lines.append(line.rstrip().replace(" ",""))
59 line = handle.readline()
60
61
62 yield SeqRecord(Seq("".join(lines), alphabet),
63 id = id, name = name, description = descr)
64
65 if not line : return
66 assert False, "Should not reach this line"
67
69 """Class to write Fasta format files."""
70 - def __init__(self, handle, wrap=60, record2title=None):
71 """Create a Fasta writer.
72
73 handle - Handle to an output file, e.g. as returned
74 by open(filename, "w")
75 wrap - Optional line length used to wrap sequence lines.
76 Defaults to wrapping the sequence at 60 characters
77 Use zero (or None) for no wrapping, giving a single
78 long line for the sequence.
79 record2title - Optional function to return the text to be
80 used for the title line of each record. By default the
81 a combination of the record.id and record.description
82 is used. If the record.description starts with the
83 record.id, then just the record.description is used.
84
85 You can either use:
86
87 myWriter = FastaWriter(open(filename,"w"))
88 writer.write_file(myRecords)
89
90 Or, follow the sequential file writer system, for example:
91
92 myWriter = FastaWriter(open(filename,"w"))
93 writer.write_header() # does nothing for Fasta files
94 ...
95 Multiple calls to writer.write_record() and/or writer.write_records()
96 ...
97 writer.write_footer() # does nothing for Fasta files
98 writer.close()
99 """
100 SequentialSequenceWriter.__init__(self, handle)
101
102 self.wrap = None
103 if wrap :
104 if wrap < 1 :
105 raise ValueError
106 self.wrap = wrap
107 self.record2title = record2title
108
141
142 if __name__ == "__main__" :
143 print "Running quick self test"
144
145 import os
146 from Bio.Alphabet import generic_protein, generic_nucleotide
147
148
149
150 fna_filename = "NC_005213.fna"
151 faa_filename = "NC_005213.faa"
152
158
171
172 if os.path.isfile(fna_filename) :
173 print "--------"
174 print "FastaIterator (single sequence)"
175 iterator = FastaIterator(open(fna_filename, "r"), alphabet=generic_nucleotide, title2ids=genbank_name_function)
176 count=0
177 for record in iterator :
178 count=count+1
179 print_record(record)
180 assert count == 1
181 print str(record.__class__)
182
183 if os.path.isfile(faa_filename) :
184 print "--------"
185 print "FastaIterator (multiple sequences)"
186 iterator = FastaIterator(open(faa_filename, "r"), alphabet=generic_protein, title2ids=genbank_name_function)
187 count=0
188 for record in iterator :
189 count=count+1
190 print_record(record)
191 break
192 assert count>0
193 print str(record.__class__)
194
195 from cStringIO import StringIO
196 print "--------"
197 print "FastaIterator (empty input file)"
198
199 iterator = FastaIterator(StringIO(""))
200 count = 0
201 for record in iterator :
202 count = count+1
203 assert count==0
204
205 print "Done"
206