Package Bio :: Package WWW :: Module NCBI
[hide private]
[frames] | no frames]

Source Code for Module Bio.WWW.NCBI

  1  # Copyright 1999-2000 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Provides code to access NCBI over the WWW. 
  7   
  8  The main Entrez web page is available at: 
  9  http://www.ncbi.nlm.nih.gov/Entrez/ 
 10   
 11  A list of the Entrez utilities (will go away Dec 2002) is available 
 12  at: 
 13  http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html 
 14   
 15  Documentation for the e-utilies are available at: 
 16  http://www.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html 
 17   
 18  The main Blast web page is available at: 
 19  http://www.ncbi.nlm.nih.gov/BLAST/ 
 20   
 21   
 22  Functions: 
 23  query        Query Entrez. 
 24  pmfetch      Retrieve results using a unique identifier. 
 25  pmqty        Search PubMed. 
 26  pmneighbor   Return a list of related articles for a PubMed entry. 
 27   
 28  efetch       Access the efetch script. 
 29  _open 
 30   
 31  """ 
 32  import string 
 33  import urllib 
 34   
 35  from Bio import File 
 36   
37 -def query(cmd, db, cgi='http://www.ncbi.nlm.nih.gov/entrez/query.fcgi', 38 **keywds):
39 """query(cmd, db, cgi='http://www.ncbi.nlm.nih.gov/entrez/query.fcgi', 40 **keywds) -> handle 41 42 Query Entrez and return a handle to the results. See the online 43 documentation for an explanation of the parameters: 44 http://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html 45 46 Raises an IOError exception if there's a network error. 47 48 """ 49 variables = {'cmd' : cmd, 'db' : db} 50 variables.update(keywds) 51 return _open(cgi, variables)
52
53 -def pmfetch(db, id, report=None, mode=None, 54 cgi="http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch.fcgi"):
55 """pmfetch(db, id, report=None, mode=None, 56 cgi="http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch.fcgi") 57 58 Query PmFetch and return a handle to the results. See the 59 online documentation for an explanation of the parameters: 60 http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch_help.html 61 62 Raises an IOError exception if there's a network error. 63 64 """ 65 variables = {'db' : db, 'id' : id} 66 if report is not None: 67 variables['report'] = report 68 if mode is not None: 69 variables['mode'] = mode 70 return _open(cgi, variables)
71
72 -def pmqty(db, term, dopt=None, 73 cgi='http://www.ncbi.nlm.nih.gov/entrez/utils/pmqty.fcgi', 74 **keywds):
75 """pmqty(db, term, dopt=None, 76 cgi='http://www.ncbi.nlm.nih.gov/entrez/utils/pmqty.fcgi') -> handle 77 78 Query PmQty and return a handle to the results. See the 79 online documentation for an explanation of the parameters: 80 http://www.ncbi.nlm.nih.gov/entrez/utils/pmqty_help.html 81 82 Raises an IOError exception if there's a network error. 83 84 """ 85 variables = {'db' : db, 'term' : term} 86 if dopt is not None: 87 variables['dopt'] = dopt 88 variables.update(keywds) 89 return _open(cgi, variables)
90
91 -def pmneighbor(pmid, display, 92 cgi='http://www.ncbi.nlm.nih.gov/entrez/utils/pmneighbor.fcgi'):
93 """pmneighbor(pmid, display, 94 cgi='http://www.ncbi.nlm.nih.gov/entrez/utils/pmneighbor.fcgi') -> handle 95 96 Query PMNeighbor and return a handle to the results. See the 97 online documentation for an explanation of the parameters: 98 http://www.ncbi.nlm.nih.gov/entrez/utils/pmneighbor_help.html 99 100 Raises an IOError exception if there's a network error. 101 102 """ 103 # Warning: HUGE HACK HERE! pmneighbor expects the display 104 # parameter to be passed as just a tag, with no value. 105 # Unfortunately, _open doesn't support these types of parameters, 106 # so I'm building my own cgi string. This is really due to the 107 # limitations of urllib.urlencode. We'll have to figure out a 108 # good workaround. 109 fullcgi = "%s?pmid=%s&%s" % (cgi, pmid, display) 110 return _open(fullcgi)
111 112 # XXX retmode?
113 -def epost(db, id, cgi='http://www.ncbi.nlm.nih.gov/entrez/eutils/epost.fcgi', 114 **keywds):
115 """epost(db, id[, cgi]) -> handle 116 117 Query Entrez and return a handle to the results. See the online 118 documentation for an explanation of the parameters: 119 http://www.ncbi.nlm.nih.gov/entrez/query/static/epost_help.html 120 121 Raises an IOError exception if there's a network error. 122 123 """ 124 variables = {'db' : db, 'id' : id} 125 variables.update(keywds) 126 return _open(cgi, variables)
127
128 -def efetch(db, cgi='http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi', 129 **keywds):
130 """efetch(db[, cgi][...]) -> handle 131 132 Query Entrez and return a handle to the results. See the online 133 documentation for an explanation of the parameters: 134 http://www.ncbi.nlm.nih.gov/entrez/query/static/efetch_help.html 135 136 Raises an IOError exception if there's a network error. 137 138 """ 139 variables = {'db' : db} 140 variables.update(keywds) 141 return _open(cgi, variables)
142
143 -def esearch(db, term, 144 cgi='http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi', 145 **keywds):
146 """esearch(db, term[, cgi][...]) -> handle 147 148 Query Entrez and return a handle to the results. See the online 149 documentation for an explanation of the parameters: 150 http://www.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html 151 152 Raises an IOError exception if there's a network error. 153 154 """ 155 variables = {'db' : db, 156 'term' : term} 157 variables.update(keywds) 158 return _open(cgi, variables)
159 174
175 -def _open(cgi, params={}, get=1):
176 """_open(cgi, params={}, get=1) -> UndoHandle 177 178 Open a handle to Entrez. cgi is the URL for the cgi script to access. 179 params is a dictionary with the options to pass to it. get is a boolean 180 that describes whether a GET should be used. Does some 181 simple error checking, and will raise an IOError if it encounters one. 182 183 """ 184 # Open a handle to Entrez. 185 options = urllib.urlencode(params) 186 if get: # do a GET 187 fullcgi = cgi 188 if options: 189 fullcgi = "%s?%s" % (cgi, options) 190 # print fullcgi 191 handle = urllib.urlopen(fullcgi) 192 else: # do a POST 193 handle = urllib.urlopen(cgi, options) 194 195 # Wrap the handle inside an UndoHandle. 196 uhandle = File.UndoHandle(handle) 197 198 # Check for errors in the first 5 lines. 199 # This is kind of ugly. 200 lines = [] 201 for i in range(5): 202 lines.append(uhandle.readline()) 203 for i in range(4, -1, -1): 204 uhandle.saveline(lines[i]) 205 data = string.join(lines, '') 206 207 if string.find(data, "500 Proxy Error") >= 0: 208 # Sometimes Entrez returns a Proxy Error instead of results 209 raise IOError, "500 Proxy Error (NCBI busy?)" 210 elif string.find(data, "502 Proxy Error") >= 0: 211 raise IOError, "502 Proxy Error (NCBI busy?)" 212 elif string.find(data, "WWW Error 500 Diagnostic") >= 0: 213 raise IOError, "WWW Error 500 Diagnostic (NCBI busy?)" 214 elif data[:5] == "ERROR": 215 # XXX Possible bug here, because I don't know whether this really 216 # occurs on the first line. I need to check this! 217 raise IOError, "ERROR, possibly because id not available?" 218 # Should I check for 404? timeout? etc? 219 return uhandle
220