1 """
2 A Martel grammar to read information from a clustal formatted file (*.aln).
3
4 This uses Andrew Dalke's Martel to do the parsing dirty work for me.
5 So all we need to do here is set up a big ol' regular expression to
6 let Martel know what the file looks like.
7 """
8
9 import sys
10
11 import Martel
12
13
14
15
16 version = Martel.Group("version",
17 Martel.Re("\d.\d\d?"))
18
19 header = Martel.Group("header",
20 Martel.Str("CLUSTAL ") +
21 Martel.Re(".+") +
22 Martel.MaxRepeat(Martel.AnyEol(), 0, 3))
23
24 seq_id = Martel.Group("seq_id",
25 Martel.Re("[-a-zA-Z:;^_'\",\+\#\|\[\]\(\)\/\.\d\?]+"))
26
27
28 seq_space = Martel.Group("seq_space",
29 Martel.Re("[ ]+"))
30
31 seq_info = Martel.Group("seq_info",
32 Martel.Re("[-a-zA-Z.]+"))
33
34
35
36 seq_num = Martel.Group("seq_num",
37 Martel.Re("[ ]+") +
38 Martel.Re("[\d]+"))
39
40 seq_line = Martel.Group("seq_line", seq_id + seq_space + seq_info +
41 Martel.Opt(seq_num) +
42 Martel.Str("\n"))
43
44 match_stars = Martel.Group("match_stars",
45 Martel.Re("[ :\.\*]+") +
46 Martel.Opt(Martel.AnyEol()))
47
48
49 new_block = Martel.Group("new_block",
50 Martel.AnyEol())
51
52 block_info = Martel.Group("block_info",
53 Martel.Rep1(seq_line) +
54 Martel.Opt(match_stars) +
55 Martel.Rep(new_block))
56
57
58
59
60 format = Martel.Group("clustalx",
61 header +
62 Martel.Rep1(block_info))
63