1
2
3
4
5
6 """Martel based parser to read NBRF formatted files.
7
8 This is a huge regular regular expression for NBRF, built using
9 the 'regular expressiona on steroids' capabilities of Martel.
10
11 http://www-nbrf.georgetown.edu/pirwww/pirhome.shtml
12 """
13
14
15 import string
16
17
18 import Martel
19 from Martel import RecordReader
20 from Martel import Str
21 from Martel import AnyEol, UntilEol
22 from Martel import ToEol
23 from Martel import Group
24 from Martel import Alt, Opt
25 from Martel import Rep
26 from Martel import Rep1
27 from Martel import Any
28 from Martel import AnyBut
29 from Martel import UntilSep
30
31 from Bio.NBRF.ValSeq import valid_sequence_dict
32
33 sequence_types = map( Str, valid_sequence_dict.keys() )
34 sequence_type = Group( "sequence_type", Alt( *sequence_types ) )
35 name_line = Martel.Group( "name_line", \
36 Str( ">" ) +
37 sequence_type +
38 Str( ";" ) +
39 UntilEol("sequence_name") +
40 AnyEol() )
41
42 comment_line = UntilEol("comment") + AnyEol()
43
44
45
46 excluded_chars = chr(0x2a) + chr(10) + chr(13)
47
48 sequence_text = Group( "sequence_text", \
49 Martel.Rep1( AnyBut( excluded_chars ) ) )
50 sequence_line = Group( "sequence_line", sequence_text +
51 AnyEol())
52
53 sequence_final_line = Group( "sequence_final_line",
54 UntilSep("sequence_final_text", chr(0x2a)) + Str(chr(0x2a)) +
55 Rep1(AnyEol()))
56
57 sequence_block = Group("sequence_block", Rep( sequence_line ))
58 nbrf_record = name_line + comment_line + sequence_block + sequence_final_line
59