1
2
3 """
4 This file implements a SOAP interface into the Bibliographic database of
5 the European Bioinformatics Institute. It is a low-level interface and is
6 intended to be used by higher-level objects to build object representations
7 from data retrieved by this interface. This file needs a version of the
8 pywebsvcs package LATER than 0.97 available from www.sourceforge.net.
9 """
10
11 import sys
12 import SOAP
13 import copy
14
15
16 SOAP.Config.BuildWithNoType = 1
17 SOAP.Config.BuildWithNoNamespacePrefix = 1
18
19 namespace = 'http://industry.ebi.ac.uk/openBQS'
20
22 """
23 this class provides low-level access to the EBI Bibliographics services exported
24 through SOAP. there exist an almost 1-to-1 mapping between the methods and the
25 RPC's available on the SOAP server.
26 """
27
36
38 if cid == -1:
39 return self.server.getBibRefCount()
40 else:
41 return self.server.getBibRefCount(cid)
42
43 - def find(self, cid, keywords, attrs, criteria):
48
50 if cid == -1:
51 raise 'no collection id'
52 self.server.resetRetrieval(cid)
53
55 if cid == -1:
56 raise 'no collection id'
57 return self.server.hasNext(cid)
58
60 if cid == -1:
61 raise 'no collection id'
62 return self.server.getNext(cid)
63
65 if cid == -1:
66 raise 'no collection id'
67 if cnt <= 0:
68 raise 'invalid count' + cnt
69 return self.server.getMore(cid, cnt)
70
72 if cid == -1:
73 raise 'no collection id (large result safeguard)'
74 return self.server.getAllIDs(cid)
75
77 if cid == -1:
78 raise 'no collection id (large result safeguard)'
79 return self.server.getAllBibRefs(cid)
80
82 return self.server.getById(id)
83
85 if cid == -1:
86 raise 'no collection id'
87 return self.server.exists(cid)
88
90 if cid == -1:
91 raise 'no collection id'
92 self.server.destroy(cid)
93
95 return self.server.getAllVocabularyNames()
96
98 return self.server.getAllValues(vocab)
99
100 - def get_entry_description(self, vocab, entry):
101 return self.server.getEntryDescription(vocab, entry)
102
104 return self.server.contains(vocab, entry)
105
106
108 """
109 this class attempts to hide the concept of a collection id from users. each
110 find action's results are grouped in the server under a unique collection id.
111 this id could be used in subsequent calls to refine its content more by
112 entering more specific search criteria. it can also return a new collection
113 by using the subcollection method. each collection has its own current
114 collection id as returned by the SOAP server by using the lower level Biblio
115 class's services and it takes care of freeing this collection in the server
116 upon destruction.
117 """
118
122
125
127 if self.cid == -1:
128 raise 'no collection id (use find)'
129 return self.cid
130
133
134 - def refine(self, keywords, attrs, criteria):
139
144
147
150
152 if self.cid == -1:
153 return 0
154 return self.biblio.exists(self.cid)
155
157 if self.cid == -1:
158 return
159 self.biblio.destroy(self.cid)
160
162 if idx-1 > len(sys.argv):
163 raise 'argument expected at position %d for option %s' % (idx, msg)
164
166 """
167 this function implements a command-line utility using the classes implemented
168 in this file above and serves as base-line access software to query the BQS.
169 """
170
171
172 serverurl = 'http://industry.ebi.ac.uk/soap/openBQS'
173
174
175 try:
176 sys.argv.index('-h')
177 except:
178 pass
179 else:
180 print """
181 usage: biblio.py [options] [- [finds]]
182 where options may be:
183 -l <server URL> to change the server URL
184 (the default URL is %s)
185 -g <citation ID> to get the XML version of a citation
186 -c to obtain the size of a citation collection with each refinement
187 -a to retrieve the citations in a collection instead of showing only
188 their citation id's
189 -f <prefix> to specify the location whereto dump citations (implies -a)
190 found in a collection
191 -o get citations one-by-one i.e. each will end up in its own file if used
192 in conjunction with -f
193 -Vn to get the vocabulary names in the database
194 -Vv <vocabulary> to get all antries for vocabulary
195 -Vd <vocabulary> <entry> to get description for vocabulary entry
196 -Ve <vocabulary> <entry> to determine whether vocabulary entry exist
197 and finds are any number of successive occurrences of the following:
198 -find <keyword> [-attr <attribute>]
199 where each new find occurrence refines the result of the previous
200 examples of using this script is:
201 biblio.py -l http://192.168.0.163:8123 -g 21322295
202 biblio.py -g 21322295
203 biblio.py -a - -find study -find gene
204 biblio.py -f genestudies - -find study -find gene
205 biblio.py -f brazma - -find brazma -attr author
206 biblio.py -Vn
207 biblio.py -Vv MEDLINE/Person/properties
208 biblio.py -Vd MEDLINE/Person/properties LAST_NAME
209 biblio.py -Ve MEDLINE/Person/properties LAST_NAME
210 """ % serverurl
211 sys.exit
212
213
214 idx = 0
215 try:
216 idx = sys.argv.index('-l')
217 except:
218 pass
219 else:
220 checkargv(idx+1, '-l')
221 serverurl = sys.argv[idx+1]
222 server = Biblio(serverurl)
223
224
225
226
227 try:
228 idx = sys.argv.index('-g')
229 except:
230 pass
231 else:
232 checkargv(idx+1, '-g')
233 print server.get_by_id(sys.argv[idx+1])
234
235
236 showsize = 0
237 try:
238 idx = sys.argv.index('-c')
239 except:
240 pass
241 else:
242 print 'total number of citations ->', server.get_count()
243 showsize = 1
244
245
246 fetch = 0
247 try:
248 idx = sys.argv.index('-a')
249 except:
250 pass
251 else:
252 fetch = 1
253
254
255 prefix = None
256 try:
257 idx = sys.argv.index('-f')
258 except:
259 pass
260 else:
261 checkargv(idx+1, '-f')
262 prefix = sys.argv[idx+1]
263 fetch = 1
264
265
266 indiv = 0
267 try:
268 idx = sys.argv.index('-o')
269 except:
270 pass
271 else:
272 checkargv(idx+1, '-o')
273 indiv = 1
274
275
276 try:
277 idx = sys.argv.index('-Vn')
278 except:
279 pass
280 else:
281 checkargv(idx+1, '-Vn')
282 vocab = server.get_vocabulary_names()
283 if len(vocab) > 0:
284 print 'the vocabulary names are:'
285 else:
286 print 'there is no names in the vocabulary.'
287 for v in vocab:
288 print v
289
290
291 try:
292 idx = sys.argv.index('-Vv')
293 except:
294 pass
295 else:
296 checkargv(idx+1, '-Vv')
297 values = server.get_all_values(sys.argv[idx+1])
298 if len(values) > 0:
299 print 'the vocabulary entries for %s are:' % sys.argv[idx+1]
300 else:
301 print 'there is no entries in the vocabulary %s.' % sys.argv[idx+1]
302 for v in values:
303 print v
304
305
306 try:
307 idx = sys.argv.index('-Vd')
308 except:
309 pass
310 else:
311 checkargv(idx+1, '-Vd name')
312 checkargv(idx+2, '-Vd entry')
313 print server.get_entry_description(sys.argv[idx+1], sys.argv[idx+2])
314
315
316 try:
317 idx = sys.argv.index('-Ve')
318 except:
319 pass
320 else:
321 checkargv(idx+1, '-Ve name')
322 checkargv(idx+2, '-Ve entry')
323 if server.contains(sys.argv[idx+1], sys.argv[idx+2]):
324 print 'entry %s::%s exists.' % (sys.argv[idx+1], sys.argv[idx+2])
325 else:
326 print 'entry %s::%s doesn\'t exists.' % (sys.argv[idx+1], sys.argv[idx+2])
327
328
329 base = 0
330 try:
331 idx = sys.argv.index('-')
332 except:
333 sys.exit
334 else:
335 base = idx
336
337
338 collection = BiblioCollection(server)
339 while 1:
340 attrs = ''
341 keys = ''
342 try:
343 idx = sys.argv[base:].index('-find')
344 except:
345 break
346 else:
347 checkargv(base+idx+1, '-find')
348 keys = sys.argv[base+idx+1]
349 if len(sys.argv[base+idx+1:]) > 1:
350 if sys.argv[base+idx+2] == '-attr':
351 checkargv(base+idx+3, '-attr')
352 attrs = sys.argv[base+idx+3]
353 if fetch:
354 collection.refine(keys, attrs, '')
355 else:
356 print 'search with:', keys, attrs
357 collection.refine(keys, attrs, '')
358 print 'collection ->', collection.get_collection_id()
359 if showsize:
360 print 'collection size is ->', collection.get_count()
361 ids = collection.get_all_ids()
362 if len(ids) > 0:
363 print 'citations in collection ->'
364 else:
365 print 'no citations in collection.'
366 for id in ids:
367 print id
368 base = base+idx+1
369
370 if fetch:
371 if prefix != None:
372 if indiv:
373 ids = collection.get_all_ids()
374 for id in ids:
375 print 'saving %s ...' % id
376 fn = prefix + '-' + id + '.xml'
377 try:
378 f = open(fn, 'w')
379 except:
380 print 'failed to open %s.' % fn
381 else:
382 f.write(server.get_by_id(id))
383 f.close()
384 else:
385 fn = prefix + '.xml'
386 try:
387 f = open(fn, 'w')
388 except:
389 print 'failed to open %s.' % fn
390 else:
391 f.write(collection.get_all())
392 f.close()
393 else:
394 print collection.get_all()
395
396 if __name__ == "__main__":
397 main()
398