Package Bio :: Package Clustalw :: Module clustal_format
[hide private]
[frames] | no frames]

Source Code for Module Bio.Clustalw.clustal_format

 1  """ 
 2  A Martel grammar to read information from a clustal formatted file (*.aln). 
 3   
 4  This uses Andrew Dalke's Martel to do the parsing dirty work for me. 
 5  So all we need to do here is set up a big ol' regular expression to 
 6  let Martel know what the file looks like. 
 7  """ 
 8  # standard library 
 9  import sys 
10   
11  import Martel 
12       
13   
14  # define everything we will parse at a ton of regular expressions with 
15  # specific callbacks 
16  version = Martel.Group("version", 
17                         Martel.Re("\d.\d\d?")) 
18   
19  header = Martel.Group("header", 
20                       Martel.Str("CLUSTAL ") + 
21                       Martel.Re(".+") + 
22                       Martel.MaxRepeat(Martel.AnyEol(), 0, 3)) 
23   
24  seq_id = Martel.Group("seq_id", 
25                        Martel.Re("[-a-zA-Z:;^_'\",\+\#\|\[\]\(\)\/\.\d\?]+")) 
26   
27  # space between the sequence and id 
28  seq_space = Martel.Group("seq_space", 
29                           Martel.Re("[ ]+")) 
30   
31  seq_info = Martel.Group("seq_info", 
32                          Martel.Re("[-a-zA-Z.]+")) 
33   
34  # you can output an optional number to tell you where you are in the sequence 
35  # we need to swallow this up if it is here 
36  seq_num = Martel.Group("seq_num", 
37                         Martel.Re("[ ]+") + 
38                         Martel.Re("[\d]+")) 
39   
40  seq_line = Martel.Group("seq_line", seq_id + seq_space + seq_info + 
41                          Martel.Opt(seq_num) + 
42                          Martel.Str("\n")) 
43   
44  match_stars = Martel.Group("match_stars", 
45                             Martel.Re("[ :\.\*]+") + 
46                             Martel.Opt(Martel.AnyEol())) 
47   
48  # separator between blocks 
49  new_block = Martel.Group("new_block", 
50                           Martel.AnyEol()) 
51   
52  block_info = Martel.Group("block_info", 
53                            Martel.Rep1(seq_line) + 
54                            Martel.Opt(match_stars) + 
55                            Martel.Rep(new_block)) 
56   
57   
58  # define the format we can import to parse clustal files, one header 
59  # plus multiple lines of alignments 
60  format = Martel.Group("clustalx", 
61                        header + 
62                        Martel.Rep1(block_info)) 
63