1
2
3
4 """Martel based parser to read LocusLink flat files.
5
6 This is a huge regular expression for LocusLink,
7 built using the 'regular expressions on steroids' capabilities of
8 Martel.
9
10 A description of the format can be found in the 'ligand.doc' file
11 from the Ligand distribution, available from:
12
13 http://www.ncbi.nih.gov/LocusLink
14
15
16 """
17
18
19 from Martel import Str
20 from Martel import Str1
21 from Martel import Alt
22 from Martel import Rep
23 from Martel import Group
24 from Martel import ToEol
25 from Martel import AnyEol
26 from Martel import Any
27 from Martel import Word
28 from Martel import Opt
29 from Martel import AssertNot
30
31 from Martel import RecordReader
32
33
34 INDENT = 12
35
36 blank_spaces = Rep(Str1(" "))
37 point = Str1(".")
38
39 white_space = Rep( Any( " " ) )
40 locus_keys = [ \
41 'LOCUSID', \
42 'LOCUS_CONFIRMED', \
43 'LOCUS_TYPE', \
44 'ORGANISM', \
45 'STATUS', \
46 'NM', \
47 'NP', \
48 'CDD', \
49 'PRODUCT', \
50 'ASSEMBLY', \
51 'CONTIG', \
52 'EVID', \
53 'XM', \
54 'XP', \
55 'ACCNUM', \
56 'TYPE', \
57 'PROT', \
58 'OFFICIAL_SYMBOL', \
59 'OFFICIAL_GENE_NAME', \
60 'PREFERRED_PRODUCT', \
61 'ALIAS_SYMBOL', \
62 'SUMMARY', \
63 'CHR', \
64 'STS', \
65 'COMP', \
66 'ALIAS_PROT', \
67 'UNIGENE', \
68 'BUTTON', \
69 'LINK', \
70 'OMIM', \
71 'MAP', \
72 'MAPLINK', \
73 'ECNUM', \
74 'PROTOTYPE', \
75 'DB_DESCR', \
76 'DB_LINK', \
77 'PMID', \
78 'GRIF', \
79 'SUBFUNC', \
80 'GO', \
81 'EXTANNOT'
82
83 ]
84
85 accnum_block_keys = [ \
86 'ACCNUM', \
87 'TYPE', \
88 'PROT' \
89 ]
90 phenotype = Str1( 'PHENOTYPE' )
91 db = Str1( 'DB' )
92 accnum_block_key = Str( *accnum_block_keys )
93
94
95
96 valid_locus_key = Str( *locus_keys )
105
107 return Group( entry_name, \
108 define_locus_line( entry_tag ))
109
110 accnum_block = Group( 'accnum_block', \
111 define_locus_line( 'ACCNUM' ) + \
112 define_locus_line( 'TYPE' ) + \
113 Opt(define_locus_line( 'PROT' ) ) )
114
115 phenotype_block = Group( 'phenotype_block', \
116 define_locus_line( 'PHENOTYPE' ) + \
117 Opt( define_locus_line( 'PHENOTYPE_ID' ) ) )
118
119 db_block = Group( 'db_block', \
120 define_locus_line( 'DB_DESCR' ) + \
121 define_locus_line( 'DB_LINK' ) )
122
123 begin_record_line = Str1( '>>' ) + ToEol()
124 locus_line = Group( 'locus_line', \
125 white_space + AssertNot( accnum_block_key ) + AssertNot( phenotype ) + AssertNot( db ) + Word() + white_space + Str1( ':' ) + ToEol() )
126
127 locus_record = begin_record_line + Rep( Alt( locus_line, accnum_block, phenotype_block, db_block ) )
128
129