Package Bio :: Module biblio
[hide private]
[frames] | no frames]

Source Code for Module Bio.biblio

  1  #!/usr/bin/env python 
  2  # Copyright 2002 by Tiaan Wessels.  All rights reserved. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6   
  7  """ 
  8  This file implements a SOAP interface into the Bibliographic database of 
  9  the European Bioinformatics Institute. It is a low-level interface and is 
 10  intended to be used by higher-level objects to build object representations 
 11  from data retrieved by this interface. This file needs a version of the 
 12  pywebsvcs package LATER than 0.97 available from www.sourceforge.net. 
 13  """ 
 14   
 15  import warnings 
 16  warnings.warn("Bio.biblio is deprecated because it seems to be out of date, " 
 17                "and no one came forward saying they use this module. If you " 
 18                "use Bio.biblio, please join the biopython mailing list and " 
 19                "email us.", 
 20                DeprecationWarning) 
 21   
 22  import sys 
 23  import SOAP 
 24  import copy 
 25   
 26  #SOAP.Config.debug = 1 
 27  SOAP.Config.BuildWithNoType = 1 
 28  SOAP.Config.BuildWithNoNamespacePrefix = 1 
 29   
 30  namespace = 'http://industry.ebi.ac.uk/openBQS' 
 31   
32 -class Biblio:
33 """ 34 this class provides low-level access to the EBI Bibliographics services exported 35 through SOAP. there exist an almost 1-to-1 mapping between the methods and the 36 RPC's available on the SOAP server. 37 """ 38
39 - def __init__(self, serverurl):
40 self.serverurl = serverurl 41 self.namespace = namespace 42 43 self.server = SOAP.SOAPProxy( 44 self.serverurl, 45 namespace = self.namespace 46 )
47
48 - def get_count(self, cid):
49 if cid == -1: 50 return self.server.getBibRefCount() 51 else: 52 return self.server.getBibRefCount(cid)
53
54 - def find(self, cid, keywords, attrs, criteria):
55 if cid == -1: 56 return self.server.find([keywords, attrs, criteria]) 57 else: 58 return self.server.find(cid, [keywords, attrs, criteria])
59
60 - def reset_retrieval(self, cid):
61 if cid == -1: 62 raise 'no collection id' 63 self.server.resetRetrieval(cid)
64
65 - def has_next(self, cid):
66 if cid == -1: 67 raise 'no collection id' 68 return self.server.hasNext(cid)
69
70 - def get_next(self, cid):
71 if cid == -1: 72 raise 'no collection id' 73 return self.server.getNext(cid)
74
75 - def get_more(self, cid, cnt):
76 if cid == -1: 77 raise 'no collection id' 78 if cnt <= 0: 79 raise 'invalid count' + cnt 80 return self.server.getMore(cid, cnt)
81
82 - def get_all_ids(self, cid):
83 if cid == -1: 84 raise 'no collection id (large result safeguard)' 85 return self.server.getAllIDs(cid)
86
87 - def get_all(self, cid):
88 if cid == -1: 89 raise 'no collection id (large result safeguard)' 90 return self.server.getAllBibRefs(cid)
91
92 - def get_by_id(self, id):
93 return self.server.getById(id)
94
95 - def exists(self, cid):
96 if cid == -1: 97 raise 'no collection id' 98 return self.server.exists(cid)
99
100 - def destroy(self, cid):
101 if cid == -1: 102 raise 'no collection id' 103 self.server.destroy(cid)
104
105 - def get_vocabulary_names(self):
106 return self.server.getAllVocabularyNames()
107
108 - def get_all_values(self, vocab):
109 return self.server.getAllValues(vocab)
110
111 - def get_entry_description(self, vocab, entry):
112 return self.server.getEntryDescription(vocab, entry)
113
114 - def contains(self, vocab, entry):
115 return self.server.contains(vocab, entry)
116 117
118 -class BiblioCollection:
119 """ 120 this class attempts to hide the concept of a collection id from users. each 121 find action's results are grouped in the server under a unique collection id. 122 this id could be used in subsequent calls to refine its content more by 123 entering more specific search criteria. it can also return a new collection 124 by using the subcollection method. each collection has its own current 125 collection id as returned by the SOAP server by using the lower level Biblio 126 class's services and it takes care of freeing this collection in the server 127 upon destruction. 128 """ 129
130 - def __init__(self, biblio, cid = -1):
131 self.biblio = biblio 132 self.cid = cid
133
134 - def __del__(self):
135 self.destroy()
136
137 - def get_collection_id(self):
138 if self.cid == -1: 139 raise 'no collection id (use find)' 140 return self.cid
141
142 - def get_count(self):
143 return self.biblio.get_count(self.cid)
144
145 - def refine(self, keywords, attrs, criteria):
146 remembercid = self.cid 147 self.cid = self.biblio.find(self.cid, keywords, attrs, criteria) 148 if remembercid != -1: 149 self.biblio.destroy(remembercid)
150
151 - def subcollection(self, keywords, attrs, criteria):
152 return BiblioCollection( 153 self.biblio, self.biblio.find(self.cid, keywords, attrs, criteria) 154 )
155
156 - def get_all_ids(self):
157 return self.biblio.get_all_ids(self.cid)
158
159 - def get_all(self):
160 return self.biblio.get_all(self.cid)
161
162 - def exists(self):
163 if self.cid == -1: 164 return 0 165 return self.biblio.exists(self.cid)
166
167 - def destroy(self):
168 if self.cid == -1: 169 return 170 self.biblio.destroy(self.cid)
171
172 -def checkargv(idx, msg):
173 if idx-1 > len(sys.argv): 174 raise 'argument expected at position %d for option %s' % (idx, msg)
175
176 -def main():
177 """ 178 this function implements a command-line utility using the classes implemented 179 in this file above and serves as base-line access software to query the BQS. 180 """ 181 182 # the default server location at EBI 183 serverurl = 'http://industry.ebi.ac.uk/soap/openBQS' 184 185 # is help requested 186 try: 187 sys.argv.index('-h') 188 except: 189 pass 190 else: 191 print """ 192 usage: biblio.py [options] [- [finds]] 193 where options may be: 194 -l <server URL> to change the server URL 195 (the default URL is %s) 196 -g <citation ID> to get the XML version of a citation 197 -c to obtain the size of a citation collection with each refinement 198 -a to retrieve the citations in a collection instead of showing only 199 their citation id's 200 -f <prefix> to specify the location whereto dump citations (implies -a) 201 found in a collection 202 -o get citations one-by-one i.e. each will end up in its own file if used 203 in conjunction with -f 204 -Vn to get the vocabulary names in the database 205 -Vv <vocabulary> to get all antries for vocabulary 206 -Vd <vocabulary> <entry> to get description for vocabulary entry 207 -Ve <vocabulary> <entry> to determine whether vocabulary entry exist 208 and finds are any number of successive occurrences of the following: 209 -find <keyword> [-attr <attribute>] 210 where each new find occurrence refines the result of the previous 211 examples of using this script is: 212 biblio.py -l http://192.168.0.163:8123 -g 21322295 213 biblio.py -g 21322295 214 biblio.py -a - -find study -find gene 215 biblio.py -f genestudies - -find study -find gene 216 biblio.py -f brazma - -find brazma -attr author 217 biblio.py -Vn 218 biblio.py -Vv MEDLINE/Person/properties 219 biblio.py -Vd MEDLINE/Person/properties LAST_NAME 220 biblio.py -Ve MEDLINE/Person/properties LAST_NAME 221 """ % serverurl 222 sys.exit 223 224 # make server. (see if different server URL specified with -l) 225 idx = 0 226 try: 227 idx = sys.argv.index('-l') 228 except: 229 pass 230 else: 231 checkargv(idx+1, '-l') 232 serverurl = sys.argv[idx+1] 233 server = Biblio(serverurl) 234 235 # handle all the possible command-line options: 236 237 # get a citation by its id 238 try: 239 idx = sys.argv.index('-g') 240 except: 241 pass 242 else: 243 checkargv(idx+1, '-g') 244 print server.get_by_id(sys.argv[idx+1]) 245 246 # size of citation collection 247 showsize = 0 248 try: 249 idx = sys.argv.index('-c') 250 except: 251 pass 252 else: 253 print 'total number of citations ->', server.get_count() 254 showsize = 1 255 256 # get all citations in collection 257 fetch = 0 258 try: 259 idx = sys.argv.index('-a') 260 except: 261 pass 262 else: 263 fetch = 1 264 265 # dump to a file with prefix 266 prefix = None 267 try: 268 idx = sys.argv.index('-f') 269 except: 270 pass 271 else: 272 checkargv(idx+1, '-f') 273 prefix = sys.argv[idx+1] 274 fetch = 1 275 276 # get individualy ? 277 indiv = 0 278 try: 279 idx = sys.argv.index('-o') 280 except: 281 pass 282 else: 283 checkargv(idx+1, '-o') 284 indiv = 1 285 286 # get vocabulary names 287 try: 288 idx = sys.argv.index('-Vn') 289 except: 290 pass 291 else: 292 checkargv(idx+1, '-Vn') 293 vocab = server.get_vocabulary_names() 294 if len(vocab) > 0: 295 print 'the vocabulary names are:' 296 else: 297 print 'there is no names in the vocabulary.' 298 for v in vocab: 299 print v 300 301 # get entries for vocabulary name 302 try: 303 idx = sys.argv.index('-Vv') 304 except: 305 pass 306 else: 307 checkargv(idx+1, '-Vv') 308 values = server.get_all_values(sys.argv[idx+1]) 309 if len(values) > 0: 310 print 'the vocabulary entries for %s are:' % sys.argv[idx+1] 311 else: 312 print 'there is no entries in the vocabulary %s.' % sys.argv[idx+1] 313 for v in values: 314 print v 315 316 # get entry for vocabulary entry for name 317 try: 318 idx = sys.argv.index('-Vd') 319 except: 320 pass 321 else: 322 checkargv(idx+1, '-Vd name') 323 checkargv(idx+2, '-Vd entry') 324 print server.get_entry_description(sys.argv[idx+1], sys.argv[idx+2]) 325 326 # vocabulary entry for name exist ? 327 try: 328 idx = sys.argv.index('-Ve') 329 except: 330 pass 331 else: 332 checkargv(idx+1, '-Ve name') 333 checkargv(idx+2, '-Ve entry') 334 if server.contains(sys.argv[idx+1], sys.argv[idx+2]): 335 print 'entry %s::%s exists.' % (sys.argv[idx+1], sys.argv[idx+2]) 336 else: 337 print 'entry %s::%s doesn\'t exists.' % (sys.argv[idx+1], sys.argv[idx+2]) 338 339 # - separates find from rest so this is a Rubicon 340 base = 0 341 try: 342 idx = sys.argv.index('-') 343 except: 344 sys.exit 345 else: 346 base = idx 347 348 # handle the find's (each successive find refines previous) 349 collection = BiblioCollection(server) 350 while 1: 351 attrs = '' 352 keys = '' 353 try: 354 idx = sys.argv[base:].index('-find') 355 except: 356 break 357 else: 358 checkargv(base+idx+1, '-find') 359 keys = sys.argv[base+idx+1] 360 if len(sys.argv[base+idx+1:]) > 1: 361 if sys.argv[base+idx+2] == '-attr': 362 checkargv(base+idx+3, '-attr') 363 attrs = sys.argv[base+idx+3] 364 if fetch: 365 collection.refine(keys, attrs, '') 366 else: 367 print 'search with:', keys, attrs 368 collection.refine(keys, attrs, '') 369 print 'collection ->', collection.get_collection_id() 370 if showsize: 371 print 'collection size is ->', collection.get_count() 372 ids = collection.get_all_ids() 373 if len(ids) > 0: 374 print 'citations in collection ->' 375 else: 376 print 'no citations in collection.' 377 for id in ids: 378 print id 379 base = base+idx+1 380 381 if fetch: 382 if prefix != None: 383 if indiv: 384 ids = collection.get_all_ids() 385 for id in ids: 386 print 'saving %s ...' % id 387 fn = prefix + '-' + id + '.xml' 388 try: 389 f = open(fn, 'w') 390 except: 391 print 'failed to open %s.' % fn 392 else: 393 f.write(server.get_by_id(id)) 394 f.close() 395 else: 396 fn = prefix + '.xml' 397 try: 398 f = open(fn, 'w') 399 except: 400 print 'failed to open %s.' % fn 401 else: 402 f.write(collection.get_all()) 403 f.close() 404 else: 405 print collection.get_all()
406 407 if __name__ == "__main__": 408 main() 409