1
2
3
4
5
6 """Martel based parser to read SAF formatted files.
7
8 This is a huge regular regular expression for SAF, built using
9 the 'regular expressiona on steroids' capabilities of Martel.
10
11 http://www.embl-heidelberg.de/predictprotein/Dexa/optin_safDes.html
12
13
14 Notes:
15 Just so I remember -- the new end of line syntax is:
16 New regexp syntax - \R
17 \R means "\n|\r\n?"
18 [\R] means "[\n\r]"
19
20 This helps us have endlines be consistent across platforms.
21
22 """
23
24
25
26 import string
27
28
29 import Martel
30 from Martel import RecordReader
31 from Martel import Str
32 from Martel import AnyEol
33 from Martel import ToEol
34 from Martel import Group
35 from Martel import Alt
36 from Martel import Rep
37 from Martel import Rep1
38 from Martel import Any
39 from Martel import AnyBut
40 from Martel import RepN
41 from Martel import Opt
42 from Martel import ToSep
43 from Martel.Expression import Assert
44
45
46
47
48
49
50
51
52
53
54 digits = "0123456789"
55 valid_sequence_characters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-. \t'
56 white_space = "\t "
57 valid_residue_characters = digits + white_space + chr( 0x2e )
58 residue_number_line = Group( "residue_number_line", \
59 Rep1( Any( valid_residue_characters ) ) +
60 AnyEol())
61 comment_line = Group( "comment_line", \
62 Str( "#" ) +
63 ToEol() )
64 ignored_line = Group( "ignored_line", \
65 Alt( comment_line, residue_number_line ) )
66 candidate_line = Group( "candidate_line", \
67 Assert( Str( "#" ), 1 ) +
68 Assert( Any( valid_residue_characters ), 1 ) +
69 ToSep( sep = ' ' ) +
70 Rep( Any( valid_sequence_characters ) ) +
71 ToEol() )
72 saf_record = Group( "saf_record", \
73 candidate_line + Rep( Alt( candidate_line, ignored_line ) ) + Opt( Str( "#" ) ) )
74