1 """Format from EMBL Nucleotide Sequence Database Release 65, December 2000
2
3 """
4
5 import warnings
6 warnings.warn("Bio.expressions was deprecated, as it does not work with recent versions of mxTextTools. If you want to continue to use this module, please get in contact with the Biopython developers at biopython-dev@biopython.org to avoid permanent removal of this module from Biopython", DeprecationWarning)
7
8
9
10 import Martel
11 from Martel import RecordReader, Time
12 from Bio import Std
13
14 from Bio.expressions.swissprot import sprot38
15
16 whitespace = Martel.Spaces()
17
18
19
20
21 divisions = Martel.Re("EST|PHG|FUN|GSS|HTC|HTG|HUM|INV|ORG|MAM|VRT|PLN|" + \
22 "PRO|ROD|SYN|STS|UNC|VRL|[A-Z]{3}")
23
24
25 ID_line = Martel.Str("ID ") + \
26 Std.dbid(Martel.UntilSep("entry_name", " "), {"type": "primary",
27 "dbname": "embl"}) + \
28 whitespace + \
29 Martel.ToSep("dataclass", ";") + \
30 whitespace + \
31 Martel.Group("molecule",
32 Std.alphabet(Martel.Str("DNA", "circular DNA"),
33 {"alphabet": "iupac-ambiguous-dna"}) |
34 Std.alphabet(Martel.Str("RNA", "circular RNA"),
35 {"alphabet": "iupac-ambiguous-rna"}) |
36 Std.alphabet(Martel.Str("XXX"),
37 {"alphabet": "nucleotide"})) + \
38 Martel.Str("; ") + \
39 Martel.Group("division", divisions) + \
40 Martel.Str("; ") + \
41 Martel.Digits("length") + \
42 Martel.Str(" BP.") + \
43 Martel.AnyEol()
44
45
46
47 accession = Std.dbid(Martel.UntilSep("accession", ";"),
48 {"type": "accession",
49 "dbname": "embl"}) + Martel.Str(";")
50 AC_line = Martel.Str("AC ") + \
51 accession + Martel.Rep(Martel.Str(" ") + accession) + \
52 Martel.AnyEol()
53
54 AC_block = Martel.Rep1(AC_line)
55
56
57 SV_line = Martel.Str("SV ") + \
58 Martel.Group("sequence_version",
59 Martel.ToSep("accession", ".") + \
60 Martel.Digits("version")) + \
61 Martel.AnyEol()
62
63
64
65 date = Time.make_expression("%(day)-%(Jan)-%(year)")
66
67 DT_created_line = Martel.Str("DT ") + \
68 Martel.Group("date_created", date) + \
69 Martel.Str(" (Rel. ") + \
70 Martel.Digits("release_created") + \
71 Martel.Str(", Created)") + \
72 Martel.AnyEol()
73
74 DT_updated_line = Martel.Str("DT ") + \
75 Martel.Group("date_updated", date) + \
76 Martel.Str(" (Rel. ") + \
77 Martel.Digits("release_updated") + \
78 Martel.Str(", Last updated, Version ") + \
79 Martel.Digits("version_number") + \
80 Martel.Str(")") + \
81 Martel.AnyEol()
82
83 DT_block = DT_created_line + DT_updated_line
84
85
86 DE_line = Martel.Str("DE ") + \
87 Std.description(Martel.UntilEol("description")) + \
88 Martel.AnyEol()
89
90 DE_block = Std.description_block(Martel.Group("description_block",
91 Martel.Rep1(DE_line)))
92
93
94 KW_line = Martel.Str("KW ") + \
95 Martel.ToEol("keyword_data")
96 KW_block = Martel.Rep1(KW_line)
97
98
99 OS_block = sprot38.OS_block
100
101
102 OC_block = sprot38.OC_block
103
104
105 OG_block = sprot38.OG_block
106
107 organism = Martel.Group("organism",
108 OS_block + \
109 OC_block + \
110 Martel.Opt(OG_block))
111
112
113
114
115
116
117
118
119 RN_line = sprot38.RN
120 RC_block = sprot38.RC_block
121 RP_line = sprot38.RP
122
123 RX_line = sprot38.RX
124 RX_block = Martel.Group("RX_block", Martel.Rep1(RX_line))
125
126 RA_block = sprot38.RA_block
127 RT_block = sprot38.RT_block
128 RL_block = sprot38.RL_block
129
130 reference = Martel.Group("reference",
131 RN_line + \
132 Martel.Opt(RC_block) + \
133 Martel.Opt(RP_line) + \
134 Martel.Opt(RX_block) + \
135 RA_block + \
136 RT_block + \
137 RL_block)
138
139
140 DR_block = sprot38.DR_block
141
142
143 FH_block = Martel.Str("FH Key Location/Qualifiers") + \
144 Martel.AnyEol() + \
145 Martel.Str("FH") + \
146 Martel.AnyEol()
147
148
149
150
151
152
153 fq_dbxref = Std.feature_qualifier_name(Martel.Str("db_xref")) + \
154 Martel.Str('=') + \
155 Std.feature_qualifier_description(
156 Martel.Str('"') + \
157 Std.dbxref(Std.dbxref_dbname(Martel.UntilSep(None, ":")) + \
158 Martel.Str(":") + \
159 Std.dbxref_dbid(Martel.UntilSep(None, '"'))) + \
160 Martel.Str('"')) + \
161 Martel.AnyEol()
162
163
164 fq_generic = \
165 Martel.Assert(Martel.Word() + Martel.Str("=")) + \
166 Std.feature_qualifier_name(Martel.Word()) + \
167 Martel.Str("=") + \
168 Std.feature_qualifier_description(Martel.UntilEol()) + \
169 Martel.AnyEol() + \
170 Martel.Rep(
171 Martel.Str("FT ") + \
172 (Martel.AssertNot(Martel.Str("/")) |
173 Martel.AssertNot(Martel.Re(r"/\w+="))) + \
174 Std.feature_qualifier_description(Martel.UntilEol()) + \
175 Martel.AnyEol())
176
177 feature_qualifier = Std.feature_qualifier(
178 Martel.Str("FT /") + \
179 (fq_dbxref | fq_generic))
180
181 feature = Std.feature(
182 Martel.Str("FT ") + \
183 Std.feature_name(Martel.UntilSep(sep = " ")) + \
184 whitespace + \
185 Std.feature_location(Martel.UntilEol()) + \
186 Martel.AnyEol() + \
187 Martel.Rep(Martel.Str("FT ") + \
188 Martel.AssertNot(Martel.Str("/")) + \
189 Std.feature_location(Martel.UntilEol()) + \
190 Martel.AnyEol()
191 ) + \
192 Martel.Rep(feature_qualifier)
193 )
194
195 FT_block = Std.feature_block(Martel.Rep(feature),
196 {"location-style": "genbank"})
197
198
199
200
201
202 CC_line = Martel.Str("CC ") + \
203 Martel.ToEol("comment")
204 CC_block = Martel.Rep1(CC_line)
205
206
207 XX = Martel.Str("XX") + Martel.AnyEol()
208
209
210 SQ_line = Martel.Str("SQ Sequence ") + \
211 Martel.Digits("num_BP") + \
212 Martel.Str(" BP; ") + \
213 Martel.Digits("num_A") + \
214 Martel.Str(" A; ") + \
215 Martel.Digits("num_C") + \
216 Martel.Str(" C; ") + \
217 Martel.Digits("num_G") + \
218 Martel.Str(" G; ") + \
219 Martel.Digits("num_T") + \
220 Martel.Str(" T; ") + \
221 Martel.Digits("num_other") + \
222 Martel.Str(" other;") + \
223 Martel.AnyEol()
224
225
226 SQ_data = Martel.Str(" ") + \
227 Std.sequence(Martel.Re(".{65}")) + \
228 whitespace + \
229 Martel.Digits("end_position") + \
230 Martel.AnyEol()
231
232 SQ_block = Std.sequence_block(SQ_line + Martel.Rep1(SQ_data))
233
234
235 end = Martel.Str("//") + Martel.AnyEol()
236
237 record = Martel.Group("record", \
238 ID_line + \
239 Martel.Opt(XX) + \
240 AC_block + \
241 Martel.Opt(XX) + \
242 SV_line + \
243 Martel.Opt(XX) + \
244 DT_block + \
245 Martel.Opt(XX) + \
246 DE_block + \
247 Martel.Opt(XX) + \
248 KW_block + \
249 Martel.Opt(XX) + \
250 Martel.Rep1(organism + Martel.Opt(XX)) + \
251 Martel.Rep(reference + Martel.Opt(XX)) + \
252 Martel.Opt(DR_block + \
253 Martel.Opt(XX)) + \
254 Martel.Rep(CC_block + \
255 Martel.Opt(XX)) + \
256 FH_block + \
257 FT_block + \
258 Martel.Opt(XX) + \
259 SQ_block + \
260 end,
261 {"format": "embl/65"})
262
263 format_expression = Martel.Group("dataset", Martel.Rep1(record),
264 {"format": "embl/65"})
265
266 format = Martel.ParseRecords("dataset", {"format": "embl/65"},
267 record, RecordReader.EndsWith, ("//\n",) )
268