Package Bio :: Package Saf :: Module saf_format
[hide private]
[frames] | no frames]

Source Code for Module Bio.Saf.saf_format

 1  # Copyright 2001 by Katharine Lindner.  All rights reserved. 
 2  # This code is part of the Biopython distribution and governed by its 
 3  # license.  Please see the LICENSE file that should have been included 
 4  # as part of this package. 
 5   
 6  """Martel based parser to read SAF formatted files. 
 7   
 8  This is a huge regular regular expression for SAF, built using 
 9  the 'regular expressiona on steroids' capabilities of Martel. 
10   
11  http://www.embl-heidelberg.de/predictprotein/Dexa/optin_safDes.html 
12   
13   
14  Notes: 
15  Just so I remember -- the new end of line syntax is: 
16    New regexp syntax - \R 
17       \R    means "\n|\r\n?" 
18       [\R]  means "[\n\r]" 
19   
20  This helps us have endlines be consistent across platforms. 
21   
22  """ 
23  # standard library 
24  #http://www.embl-heidelberg.de/predictprotein/Dexa/optin_safDes.html 
25   
26  import string 
27   
28  # Martel 
29  import Martel 
30  from Martel import RecordReader 
31  from Martel import Str 
32  from Martel import AnyEol 
33  from Martel import ToEol 
34  from Martel import Group 
35  from Martel import Alt 
36  from Martel import Rep 
37  from Martel import Rep1 
38  from Martel import Any 
39  from Martel import AnyBut 
40  from Martel import RepN 
41  from Martel import Opt 
42  from Martel import ToSep 
43  from Martel.Expression import Assert 
44   
45   
46   
47  # --- first set up some helper constants and functions 
48  # Copyright 2001 by Katharine Lindner.  All rights reserved. 
49  # This code is part of the Biopython distribution and governed by its 
50  # license.  Please see the LICENSE file that should have been included 
51  # as part of this package. 
52   
53   
54  digits = "0123456789" 
55  valid_sequence_characters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-. \t' 
56  white_space = "\t " 
57  valid_residue_characters = digits + white_space + chr( 0x2e ) 
58  residue_number_line = Group( "residue_number_line", \ 
59                        Rep1( Any( valid_residue_characters ) ) + 
60                        AnyEol()) 
61  comment_line = Group( "comment_line", \ 
62                 Str( "#" ) + 
63                 ToEol() ) 
64  ignored_line = Group( "ignored_line", \ 
65                 Alt( comment_line, residue_number_line ) ) 
66  candidate_line = Group( "candidate_line", \ 
67                   Assert( Str( "#" ), 1 ) + 
68                   Assert( Any( valid_residue_characters ), 1 ) + 
69                   ToSep( sep = ' ' ) + 
70                   Rep( Any( valid_sequence_characters ) ) + 
71                   ToEol() ) 
72  saf_record =  Group( "saf_record", \ 
73      candidate_line + Rep( Alt( candidate_line, ignored_line ) ) + Opt( Str( "#" ) ) ) 
74