1 """
2 A Martel format to parse the NLM's XML format for Medline.
3
4 http://www.nlm.nih.gov/databases/dtd/nlmmedline_010319.dtd
5 http://www.nlm.nih.gov/databases/dtd/nlmmedlinecitation_010319.dtd
6 http://www.nlm.nih.gov/databases/dtd/nlmcommon_010319.dtd
7
8 Formats:
9 citation_format Format for one MedlineCitation.
10 format Format for a whole file.
11
12 """
13
14 import warnings
15 warnings.warn("Bio.Medline.NLMMedlineXML was deprecated, as it does not seem to be able to parse recent Medline XML files. If you want to continue to use this module, please get in contact with the Biopython developers at biopython-dev@biopython.org to avoid permanent removal of this module from Biopython", DeprecationWarning)
16
17 import sys
18
19 from Martel import *
20 from Martel import RecordReader
21
22 self = sys.modules[__name__]
23
37
40
42 """simple_elem(element, *attrs)
43
44 Create a Martel Expression in this module's namespace that will
45 recognize an XML element in the form of:
46 <element>data</element>
47
48 The whole element must be on a single line. The Expression will
49 be created in the module's namespace with the same name as the
50 element.
51
52 """
53 start, end = _start_elem(element, *attrs), _end_elem(element)
54
55 group_name = element
56 group_expression = Re(r"[^<]+")
57 expr = start + \
58 Group(group_name, group_expression) + \
59 end + \
60 AnyEol()
61 setattr(self, element, expr)
62
63
64
65
66
68 start_name, end_name = "%s_start" % element, "%s_end" % element
69 start_expr = getattr(self, start_name, None)
70 if start_expr is None:
71 start_expr = _start_elem(element, *attrs) + AnyEol()
72 setattr(self, start_name, start_expr)
73 end_expr = getattr(self, end_name, None)
74 if end_expr is None:
75 end_expr = _end_elem(element) + AnyEol()
76 setattr(self, end_name, end_expr)
77
78 group_expr = start_expr + expr + end_expr
79 group_expr = Group(element, group_expr)
80 setattr(self, element, group_expr)
81
82
83
84
85
86
87
88
89
90 elements = [
91 "FirstName", "MiddleName", "LastName",
92 "Initials", "Suffix",
93 "CollectiveName"
94 ]
95 [simple_elem(e) for e in elements]
96 personal_name = LastName + \
97 Opt(FirstName + Opt(MiddleName)) + \
98 Opt(Initials) + \
99 Opt(Suffix)
100 author_name = Alt(personal_name, CollectiveName)
101
102
103
104
105 elements = [
106 "Year", "Month", "Day",
107 "Season", "MedlineDate",
108 "Hour", "Minute", "Second"
109 ]
110 [simple_elem(e) for e in elements]
111 normal_date = Year + Month + Day + \
112 Opt(Hour + Opt(Minute + Opt(Second)))
113 pub_date = Alt((Year + Opt(Alt((Month + Opt(Day)), Season))), MedlineDate)
114
115
116 simple_elem("CopyrightInformation")
117 simple_elem("AbstractText")
118 group_elem("Abstract", AbstractText + Opt(CopyrightInformation))
119
120
121
122
123 simple_elem("NlmUniqueID")
124 simple_elem("PMID")
125 simple_elem("SubHeading", "MajorTopicYN")
126 simple_elem("Descriptor", "MajorTopicYN")
127 group_elem("MeshHeading", Descriptor + Rep(SubHeading))
128 group_elem("MeshHeadingList", Rep1(MeshHeading))
129 simple_elem("MedlinePgn")
130 simple_elem("EndPage")
131 simple_elem("StartPage")
132 group_elem("Pagination",
133 Alt(StartPage + Opt(EndPage) + Opt(MedlinePgn), MedlinePgn))
134
135 simple_elem("Affiliation")
136 group_elem("Author", author_name + Opt(Affiliation))
137 group_elem("AuthorList", Rep1(Author), "CompleteYN")
138 simple_elem("Language")
139 simple_elem("PublicationType")
140 group_elem("PublicationTypeList", Rep1(PublicationType))
141 simple_elem("Title")
142 simple_elem("Volume")
143 simple_elem("VernacularTitle")
144 simple_elem("CollectionTitle")
145 simple_elem("ArticleTitle")
146 simple_elem("Publisher")
147 group_elem("PubDate", pub_date)
148 group_elem("Book", PubDate + Publisher + Title +
149 Opt(AuthorList) + Opt(CollectionTitle) + Opt(Volume))
150 simple_elem("Country")
151 simple_elem("MedlineTA")
152 simple_elem("MedlineCode")
153 group_elem("MedlineJournalInfo",
154 Opt(Country) + MedlineTA + Opt(MedlineCode) + Opt(NlmUniqueID))
155 simple_elem("DateOfElectronicPublication")
156 simple_elem("ISOAbbreviation")
157 simple_elem("Coden")
158 simple_elem("Issue")
159 group_elem("JournalIssue", Opt(Volume) + Opt(Issue) + PubDate)
160 simple_elem("ISSN")
161 group_elem("Journal",
162 Opt(ISSN) + \
163 JournalIssue + \
164 Opt(Coden) + \
165 Opt(Title) + \
166 Opt(ISOAbbreviation)
167 )
168
169 simple_elem("GrantID")
170 simple_elem("Acronym")
171 simple_elem("Agency")
172 group_elem("Grant", Opt(GrantID) + Opt(Acronym) + Opt(Agency))
173 group_elem("GrantList", Rep1(Grant), "CompleteYN")
174 simple_elem("AccessionNumber")
175 group_elem("AccessionNumberList", Rep1(AccessionNumber))
176 simple_elem("DataBankName")
177 group_elem("DataBank", DataBankName + Opt(AccessionNumberList))
178 group_elem("DataBankList", Rep1(DataBank), "CompleteYN")
179
180 group_elem("Article",
181 Alt(Journal, Book) + \
182 ArticleTitle + \
183 Pagination + \
184 Opt(Abstract) + \
185 Opt(Affiliation) + \
186 Opt(AuthorList) + \
187 Rep1(Language) + \
188 Opt(DataBankList) + \
189 Opt(GrantList) + \
190 PublicationTypeList + \
191 Opt(VernacularTitle) + \
192 Opt(DateOfElectronicPublication)
193 )
194 group_elem("NCBIArticle", PMID + Article + Opt(MedlineJournalInfo))
195
196
197
198
199
200
201
202
203
204
205
206
207 simple_elem("MedlineID")
208
209 simple_elem("Note")
210 simple_elem("RefSource")
211 Ref_template = RefSource + Opt(MedlineID) + Opt(Note)
212
213
214
215
216
217 group_elem("OriginalReportIn", Ref_template)
218 group_elem("SummaryForPatientsIn", Ref_template)
219 group_elem("CommentOn", Ref_template)
220 group_elem("CommentIn", Ref_template)
221 group_elem("ErratumIn", Ref_template)
222 group_elem("RepublishedFrom", Ref_template)
223 group_elem("RepublishedIn", Ref_template)
224 group_elem("RetractionOf", Ref_template)
225 group_elem("RetractionIn", Ref_template)
226 group_elem("UpdateIn", Ref_template)
227 group_elem("UpdateOf", Ref_template)
228 group_elem("CommentsCorrections",
229 Rep(CommentOn) + Rep(CommentIn) + \
230 Rep(ErratumIn) + \
231 Rep(RepublishedFrom) + Rep(RepublishedIn) + \
232 Rep(RetractionOf) + Rep(RetractionIn) + \
233 Rep(UpdateIn) + Rep(UpdateOf) + \
234 Rep(SummaryForPatientsIn) + Rep(OriginalReportIn)
235 )
236 simple_elem("NumberOfReferences")
237 group_elem("PersonalNameSubject", personal_name)
238 group_elem("PersonalNameSubjectList", Rep1(PersonalNameSubject))
239 simple_elem("GeneSymbol")
240 group_elem("GeneSymbolList", Rep1(GeneSymbol))
241 simple_elem("NameOfSubstance")
242 simple_elem("CASRegistryNumber")
243 group_elem("Chemical", CASRegistryNumber + NameOfSubstance)
244 group_elem("ChemicalList", Rep1(Chemical))
245 simple_elem("CitationSubset")
246 simple_elem("GeneralNote", "Owner")
247 group_elem("Investigator", personal_name + Opt(Affiliation))
248 group_elem("InvestigatorList", Rep1(Investigator))
249 simple_elem("OtherID", "Source")
250 simple_elem("SpaceFlightMission")
251 simple_elem("Keyword", "MajorTopicYN")
252 group_elem("KeywordList", Rep1(Keyword), "Owner")
253 group_elem("OtherAbstract",
254 AbstractText + Opt(CopyrightInformation),
255 "Type")
256 group_elem("DateRevised", normal_date)
257 group_elem("DateCompleted", normal_date)
258 group_elem("DateCreated", normal_date)
259 group_elem("MedlineCitation",
260 MedlineID + \
261 Opt(PMID) + \
262 DateCreated + \
263 Opt(DateCompleted) + \
264 Opt(DateRevised) + \
265 Article + \
266 MedlineJournalInfo + \
267 Opt(ChemicalList) + \
268 Rep(CitationSubset) + \
269 Opt(CommentsCorrections) + \
270 Opt(GeneSymbolList) + \
271 Opt(MeshHeadingList) + \
272 Opt(NumberOfReferences) + \
273 Opt(PersonalNameSubjectList) + \
274 Rep(OtherID) + \
275 Rep(OtherAbstract) + \
276 Rep(KeywordList) + \
277 Rep(SpaceFlightMission) + \
278 Opt(InvestigatorList) + \
279 Rep(GeneralNote),
280 "Owner"
281 )
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297 space = Any(" \t")
298 DeleteCitation_start = Rep(space) + Str("<DeleteCitation>") + AnyEol()
299 DeleteCitation_end = Rep(space) + Str("</DeleteCitation>") + AnyEol()
300
301
302
303 MedlineCitationSet_end = Str("</MedlineCitationSet>") + Opt(AnyEol())
304
305
306 group_elem("DeleteCitation", Alt(Rep1(MedlineID), Rep1(PMID)))
307 group_elem("MedlineCitationSet", Rep(MedlineCitation) + Opt(DeleteCitation))
308
309
310
311
312
313
314
315
316
317
318
319
320 DOCTYPE = Str("<!DOCTYPE") + Re(r"[^>]+") + Str(">") + AnyEol()
321
322 citation_format = MedlineCitation
323
324
325
326
327
328
329 header_format = Group("header", DOCTYPE + MedlineCitationSet_start)
330 footer_format = Opt(DeleteCitation) + MedlineCitationSet_end
331 format = HeaderFooter(
332 None, {},
333
334
335
336
337
338
339
340
341 header_format, RecordReader.CountLines, (4,),
342 citation_format, RecordReader.EndsWith, ("</MedlineCitation>",),
343 footer_format, RecordReader.Everything, (),
344 )
345