Package Bio :: Module biblio
[hide private]
[frames] | no frames]

Source Code for Module Bio.biblio

  1  #!/usr/bin/env python 
  2   
  3  """ 
  4  This file implements a SOAP interface into the Bibliographic database of 
  5  the European Bioinformatics Institute. It is a low-level interface and is 
  6  intended to be used by higher-level objects to build object representations 
  7  from data retrieved by this interface. This file needs a version of the 
  8  pywebsvcs package LATER than 0.97 available from www.sourceforge.net. 
  9  """ 
 10   
 11  import sys 
 12  import SOAP 
 13  import copy 
 14   
 15  #SOAP.Config.debug = 1 
 16  SOAP.Config.BuildWithNoType = 1 
 17  SOAP.Config.BuildWithNoNamespacePrefix = 1 
 18   
 19  namespace = 'http://industry.ebi.ac.uk/openBQS' 
 20   
21 -class Biblio:
22 """ 23 this class provides low-level access to the EBI Bibliographics services exported 24 through SOAP. there exist an almost 1-to-1 mapping between the methods and the 25 RPC's available on the SOAP server. 26 """ 27
28 - def __init__(self, serverurl):
29 self.serverurl = serverurl 30 self.namespace = namespace 31 32 self.server = SOAP.SOAPProxy( 33 self.serverurl, 34 namespace = self.namespace 35 )
36
37 - def get_count(self, cid):
38 if cid == -1: 39 return self.server.getBibRefCount() 40 else: 41 return self.server.getBibRefCount(cid)
42
43 - def find(self, cid, keywords, attrs, criteria):
44 if cid == -1: 45 return self.server.find([keywords, attrs, criteria]) 46 else: 47 return self.server.find(cid, [keywords, attrs, criteria])
48
49 - def reset_retrieval(self, cid):
50 if cid == -1: 51 raise 'no collection id' 52 self.server.resetRetrieval(cid)
53
54 - def has_next(self, cid):
55 if cid == -1: 56 raise 'no collection id' 57 return self.server.hasNext(cid)
58
59 - def get_next(self, cid):
60 if cid == -1: 61 raise 'no collection id' 62 return self.server.getNext(cid)
63
64 - def get_more(self, cid, cnt):
65 if cid == -1: 66 raise 'no collection id' 67 if cnt <= 0: 68 raise 'invalid count' + cnt 69 return self.server.getMore(cid, cnt)
70
71 - def get_all_ids(self, cid):
72 if cid == -1: 73 raise 'no collection id (large result safeguard)' 74 return self.server.getAllIDs(cid)
75
76 - def get_all(self, cid):
77 if cid == -1: 78 raise 'no collection id (large result safeguard)' 79 return self.server.getAllBibRefs(cid)
80
81 - def get_by_id(self, id):
82 return self.server.getById(id)
83
84 - def exists(self, cid):
85 if cid == -1: 86 raise 'no collection id' 87 return self.server.exists(cid)
88
89 - def destroy(self, cid):
90 if cid == -1: 91 raise 'no collection id' 92 self.server.destroy(cid)
93
94 - def get_vocabulary_names(self):
95 return self.server.getAllVocabularyNames()
96
97 - def get_all_values(self, vocab):
98 return self.server.getAllValues(vocab)
99
100 - def get_entry_description(self, vocab, entry):
101 return self.server.getEntryDescription(vocab, entry)
102
103 - def contains(self, vocab, entry):
104 return self.server.contains(vocab, entry)
105 106
107 -class BiblioCollection:
108 """ 109 this class attempts to hide the concept of a collection id from users. each 110 find action's results are grouped in the server under a unique collection id. 111 this id could be used in subsequent calls to refine its content more by 112 entering more specific search criteria. it can also return a new collection 113 by using the subcollection method. each collection has its own current 114 collection id as returned by the SOAP server by using the lower level Biblio 115 class's services and it takes care of freeing this collection in the server 116 upon destruction. 117 """ 118
119 - def __init__(self, biblio, cid = -1):
120 self.biblio = biblio 121 self.cid = cid
122
123 - def __del__(self):
124 self.destroy()
125
126 - def get_collection_id(self):
127 if self.cid == -1: 128 raise 'no collection id (use find)' 129 return self.cid
130
131 - def get_count(self):
132 return self.biblio.get_count(self.cid)
133
134 - def refine(self, keywords, attrs, criteria):
135 remembercid = self.cid 136 self.cid = self.biblio.find(self.cid, keywords, attrs, criteria) 137 if remembercid != -1: 138 self.biblio.destroy(remembercid)
139
140 - def subcollection(self, keywords, attrs, criteria):
141 return BiblioCollection( 142 self.biblio, self.biblio.find(self.cid, keywords, attrs, criteria) 143 )
144
145 - def get_all_ids(self):
146 return self.biblio.get_all_ids(self.cid)
147
148 - def get_all(self):
149 return self.biblio.get_all(self.cid)
150
151 - def exists(self):
152 if self.cid == -1: 153 return 0 154 return self.biblio.exists(self.cid)
155
156 - def destroy(self):
157 if self.cid == -1: 158 return 159 self.biblio.destroy(self.cid)
160
161 -def checkargv(idx, msg):
162 if idx-1 > len(sys.argv): 163 raise 'argument expected at position %d for option %s' % (idx, msg)
164
165 -def main():
166 """ 167 this function implements a command-line utility using the classes implemented 168 in this file above and serves as base-line access software to query the BQS. 169 """ 170 171 # the default server location at EBI 172 serverurl = 'http://industry.ebi.ac.uk/soap/openBQS' 173 174 # is help requested 175 try: 176 sys.argv.index('-h') 177 except: 178 pass 179 else: 180 print """ 181 usage: biblio.py [options] [- [finds]] 182 where options may be: 183 -l <server URL> to change the server URL 184 (the default URL is %s) 185 -g <citation ID> to get the XML version of a citation 186 -c to obtain the size of a citation collection with each refinement 187 -a to retrieve the citations in a collection instead of showing only 188 their citation id's 189 -f <prefix> to specify the location whereto dump citations (implies -a) 190 found in a collection 191 -o get citations one-by-one i.e. each will end up in its own file if used 192 in conjunction with -f 193 -Vn to get the vocabulary names in the database 194 -Vv <vocabulary> to get all antries for vocabulary 195 -Vd <vocabulary> <entry> to get description for vocabulary entry 196 -Ve <vocabulary> <entry> to determine whether vocabulary entry exist 197 and finds are any number of successive occurrences of the following: 198 -find <keyword> [-attr <attribute>] 199 where each new find occurrence refines the result of the previous 200 examples of using this script is: 201 biblio.py -l http://192.168.0.163:8123 -g 21322295 202 biblio.py -g 21322295 203 biblio.py -a - -find study -find gene 204 biblio.py -f genestudies - -find study -find gene 205 biblio.py -f brazma - -find brazma -attr author 206 biblio.py -Vn 207 biblio.py -Vv MEDLINE/Person/properties 208 biblio.py -Vd MEDLINE/Person/properties LAST_NAME 209 biblio.py -Ve MEDLINE/Person/properties LAST_NAME 210 """ % serverurl 211 sys.exit 212 213 # make server. (see if different server URL specified with -l) 214 idx = 0 215 try: 216 idx = sys.argv.index('-l') 217 except: 218 pass 219 else: 220 checkargv(idx+1, '-l') 221 serverurl = sys.argv[idx+1] 222 server = Biblio(serverurl) 223 224 # handle all the possible command-line options: 225 226 # get a citation by its id 227 try: 228 idx = sys.argv.index('-g') 229 except: 230 pass 231 else: 232 checkargv(idx+1, '-g') 233 print server.get_by_id(sys.argv[idx+1]) 234 235 # size of citation collection 236 showsize = 0 237 try: 238 idx = sys.argv.index('-c') 239 except: 240 pass 241 else: 242 print 'total number of citations ->', server.get_count() 243 showsize = 1 244 245 # get all citations in collection 246 fetch = 0 247 try: 248 idx = sys.argv.index('-a') 249 except: 250 pass 251 else: 252 fetch = 1 253 254 # dump to a file with prefix 255 prefix = None 256 try: 257 idx = sys.argv.index('-f') 258 except: 259 pass 260 else: 261 checkargv(idx+1, '-f') 262 prefix = sys.argv[idx+1] 263 fetch = 1 264 265 # get individualy ? 266 indiv = 0 267 try: 268 idx = sys.argv.index('-o') 269 except: 270 pass 271 else: 272 checkargv(idx+1, '-o') 273 indiv = 1 274 275 # get vocabulary names 276 try: 277 idx = sys.argv.index('-Vn') 278 except: 279 pass 280 else: 281 checkargv(idx+1, '-Vn') 282 vocab = server.get_vocabulary_names() 283 if len(vocab) > 0: 284 print 'the vocabulary names are:' 285 else: 286 print 'there is no names in the vocabulary.' 287 for v in vocab: 288 print v 289 290 # get entries for vocabulary name 291 try: 292 idx = sys.argv.index('-Vv') 293 except: 294 pass 295 else: 296 checkargv(idx+1, '-Vv') 297 values = server.get_all_values(sys.argv[idx+1]) 298 if len(values) > 0: 299 print 'the vocabulary entries for %s are:' % sys.argv[idx+1] 300 else: 301 print 'there is no entries in the vocabulary %s.' % sys.argv[idx+1] 302 for v in values: 303 print v 304 305 # get entry for vocabulary entry for name 306 try: 307 idx = sys.argv.index('-Vd') 308 except: 309 pass 310 else: 311 checkargv(idx+1, '-Vd name') 312 checkargv(idx+2, '-Vd entry') 313 print server.get_entry_description(sys.argv[idx+1], sys.argv[idx+2]) 314 315 # vocabulary entry for name exist ? 316 try: 317 idx = sys.argv.index('-Ve') 318 except: 319 pass 320 else: 321 checkargv(idx+1, '-Ve name') 322 checkargv(idx+2, '-Ve entry') 323 if server.contains(sys.argv[idx+1], sys.argv[idx+2]): 324 print 'entry %s::%s exists.' % (sys.argv[idx+1], sys.argv[idx+2]) 325 else: 326 print 'entry %s::%s doesn\'t exists.' % (sys.argv[idx+1], sys.argv[idx+2]) 327 328 # - separates find from rest so this is a Rubicon 329 base = 0 330 try: 331 idx = sys.argv.index('-') 332 except: 333 sys.exit 334 else: 335 base = idx 336 337 # handle the find's (each successive find refines previous) 338 collection = BiblioCollection(server) 339 while 1: 340 attrs = '' 341 keys = '' 342 try: 343 idx = sys.argv[base:].index('-find') 344 except: 345 break 346 else: 347 checkargv(base+idx+1, '-find') 348 keys = sys.argv[base+idx+1] 349 if len(sys.argv[base+idx+1:]) > 1: 350 if sys.argv[base+idx+2] == '-attr': 351 checkargv(base+idx+3, '-attr') 352 attrs = sys.argv[base+idx+3] 353 if fetch: 354 collection.refine(keys, attrs, '') 355 else: 356 print 'search with:', keys, attrs 357 collection.refine(keys, attrs, '') 358 print 'collection ->', collection.get_collection_id() 359 if showsize: 360 print 'collection size is ->', collection.get_count() 361 ids = collection.get_all_ids() 362 if len(ids) > 0: 363 print 'citations in collection ->' 364 else: 365 print 'no citations in collection.' 366 for id in ids: 367 print id 368 base = base+idx+1 369 370 if fetch: 371 if prefix != None: 372 if indiv: 373 ids = collection.get_all_ids() 374 for id in ids: 375 print 'saving %s ...' % id 376 fn = prefix + '-' + id + '.xml' 377 try: 378 f = open(fn, 'w') 379 except: 380 print 'failed to open %s.' % fn 381 else: 382 f.write(server.get_by_id(id)) 383 f.close() 384 else: 385 fn = prefix + '.xml' 386 try: 387 f = open(fn, 'w') 388 except: 389 print 'failed to open %s.' % fn 390 else: 391 f.write(collection.get_all()) 392 f.close() 393 else: 394 print collection.get_all()
395 396 if __name__ == "__main__": 397 main() 398