1
2
3
4
5
6
7 """Bio.SeqIO support for the "swiss" (aka SwissProt/UniProt) file format.
8
9 You are expected to use this module via the Bio.SeqIO functions.
10 See also the Bio.SwissProt module which offers more than just accessing
11 the sequences as SeqRecord objects."""
12
13 from Bio.SwissProt import SProt
14 import cStringIO
15
16
18 """Breaks up a Swiss-Prot/UniProt file into SeqRecord objects.
19
20 Every section from the ID line to the terminating // becomes
21 a single SeqRecord with associated annotation and features.
22
23 This parser is for the flat file "swiss" format as used by:
24 * Swiss-Prot aka SwissProt
25 * TrEMBL
26 * UniProtKB aka UniProt Knowledgebase
27
28 It does NOT read their new XML file format.
29 http://www.expasy.org/sprot/
30
31 For consistency with BioPerl and EMBOSS we call this the "swiss"
32 format.
33 """
34 parser = SProt.SequenceParser()
35 lines = []
36 for line in handle:
37 lines.append(line)
38 if line[:2]=='//':
39 handle = cStringIO.StringIO("".join(lines))
40 record = parser.parse(handle)
41 lines = []
42 yield record
43
44
45
46
47 if __name__ == "__main__" :
48 print "Quick self test..."
49
50 example_filename = "../../Tests/SwissProt/sp008"
51
52 import os
53 if not os.path.isfile(example_filename):
54 print "Missing test file %s" % example_filename
55 else :
56
57 handle = open(example_filename)
58 records = SwissIterator(handle)
59 for record in records:
60 print record.name
61 print record.id
62 print record.annotations['keywords']
63 print repr(record.annotations['organism'])
64 print record.seq.tostring()[:20] + "..."
65 handle.close()
66