Package Bio :: Package Saf :: Module saf_format
[hide private]
[frames] | no frames]

Source Code for Module Bio.Saf.saf_format

 1  # Copyright 2001 by Katharine Lindner.  All rights reserved. 
 2  # This code is part of the Biopython distribution and governed by its 
 3  # license.  Please see the LICENSE file that should have been included 
 4  # as part of this package. 
 5   
 6  """Martel based parser to read SAF formatted files. 
 7   
 8  This is a huge regular regular expression for SAF, built using 
 9  the 'regular expressiona on steroids' capabilities of Martel. 
10   
11  http://www.embl-heidelberg.de/predictprotein/Dexa/optin_safDes.html 
12   
13   
14  Notes: 
15  Just so I remember -- the new end of line syntax is: 
16    New regexp syntax - \R 
17       \R    means "\n|\r\n?" 
18       [\R]  means "[\n\r]" 
19   
20  This helps us have endlines be consistent across platforms. 
21   
22  """ 
23  #http://www.embl-heidelberg.de/predictprotein/Dexa/optin_safDes.html 
24   
25   
26  # Martel 
27  import Martel 
28  from Martel import Str 
29  from Martel import AnyEol 
30  from Martel import ToEol 
31  from Martel import Group 
32  from Martel import Alt 
33  from Martel import Rep 
34  from Martel import Rep1 
35  from Martel import Any 
36  from Martel import Opt 
37  from Martel import ToSep 
38  from Martel.Expression import Assert 
39   
40   
41   
42  # --- first set up some helper constants and functions 
43  # Copyright 2001 by Katharine Lindner.  All rights reserved. 
44  # This code is part of the Biopython distribution and governed by its 
45  # license.  Please see the LICENSE file that should have been included 
46  # as part of this package. 
47   
48   
49  digits = "0123456789" 
50  valid_sequence_characters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-. \t' 
51  white_space = "\t " 
52  valid_residue_characters = digits + white_space + chr( 0x2e ) 
53  residue_number_line = Group( "residue_number_line", \ 
54                        Rep1( Any( valid_residue_characters ) ) + 
55                        AnyEol()) 
56  comment_line = Group( "comment_line", \ 
57                 Str( "#" ) + 
58                 ToEol() ) 
59  ignored_line = Group( "ignored_line", \ 
60                 Alt( comment_line, residue_number_line ) ) 
61  candidate_line = Group( "candidate_line", \ 
62                   Assert( Str( "#" ), 1 ) + 
63                   Assert( Any( valid_residue_characters ), 1 ) + 
64                   ToSep( sep = ' ' ) + 
65                   Rep( Any( valid_sequence_characters ) ) + 
66                   ToEol() ) 
67  saf_record =  Group( "saf_record", \ 
68      candidate_line + Rep( Alt( candidate_line, ignored_line ) ) + Opt( Str( "#" ) ) ) 
69