1
2
3
4
5
6 """Bio.AlignIO support for the "nexus" file format.
7
8 You are expected to use this module via the Bio.AlignIO functions
9 (or the Bio.SeqIO functions).
10
11 See also the Bio.Nexus module (which this code calls internally),
12 as this offers more than just accessing the alignment or its
13 sequences as SeqRecord objects.
14 """
15
16 from Bio.Nexus import Nexus
17 from Bio.Align.Generic import Alignment
18 from Bio.SeqRecord import SeqRecord
19
20
21
22
23
25 """Returns SeqRecord objects from a Nexus file.
26
27 Thus uses the Bio.Nexus module to do the hard work.
28
29 NOTE - We only expect ONE alignment matrix per Nexus file,
30 meaning this iterator will only yield one Alignment."""
31 n = Nexus.Nexus(handle)
32 if not n.matrix :
33
34 raise StopIteration
35 alignment = Alignment(n.alphabet)
36
37
38
39 assert len(n.unaltered_taxlabels) == len(n.taxlabels)
40
41 if seq_count :
42 assert seq_count == len(n.unaltered_taxlabels)
43
44 for old_name, new_name in zip (n.unaltered_taxlabels, n.taxlabels) :
45 assert new_name.startswith(old_name)
46 seq = n.matrix[new_name]
47
48
49 alignment._records.append(SeqRecord(seq,
50 id=new_name,
51 name=old_name,
52 description=""))
53
54 yield alignment
55
56 if __name__ == "__main__" :
57 from StringIO import StringIO
58 print "Quick self test"
59 print
60 print "Repeated names without a TAXA block"
61 handle = StringIO("""#NEXUS
62 [TITLE: NoName]
63
64 begin data;
65 dimensions ntax=4 nchar=50;
66 format interleave datatype=protein gap=- symbols="FSTNKEYVQMCLAWPHDRIG";
67
68 matrix
69 CYS1_DICDI -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ----
70 ALEU_HORVU MAHARVLLLA LAVLATAAVA VASSSSFADS NPIRPVTDRA ASTLESAVLG
71 CATH_HUMAN ------MWAT LPLLCAGAWL LGV------- -PVCGAAELS VNSLEK----
72 CYS1_DICDI -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ---X
73 ;
74 end;
75 """)
76 for a in NexusIterator(handle) :
77 print a
78 for r in a :
79 print repr(r.seq), r.name, r.id
80 print "Done"
81
82 print
83 print "Repeated names with a TAXA block"
84 handle = StringIO("""#NEXUS
85 [TITLE: NoName]
86
87 begin taxa
88 CYS1_DICDI
89 ALEU_HORVU
90 CATH_HUMAN
91 CYS1_DICDI;
92 end;
93
94 begin data;
95 dimensions ntax=4 nchar=50;
96 format interleave datatype=protein gap=- symbols="FSTNKEYVQMCLAWPHDRIG";
97
98 matrix
99 CYS1_DICDI -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ----
100 ALEU_HORVU MAHARVLLLA LAVLATAAVA VASSSSFADS NPIRPVTDRA ASTLESAVLG
101 CATH_HUMAN ------MWAT LPLLCAGAWL LGV------- -PVCGAAELS VNSLEK----
102 CYS1_DICDI -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ---X
103 ;
104 end;
105 """)
106 for a in NexusIterator(handle) :
107 print a
108 for r in a :
109 print repr(r.seq), r.name, r.id
110 print "Done"
111 print
112 print "Reading an empty file"
113 assert 0 == len(list(NexusIterator(StringIO())))
114 print "Done"
115