Package Bio :: Module EZRetrieve
[hide private]
[frames] | no frames]

Source Code for Module Bio.EZRetrieve

 1  """This module contains code to access EZRetrieve. 
 2   
 3  Functions: 
 4  retrieve_single  Retrieve a single sequence from EZRetrieve. 
 5  parse_single     Parse the results from EZRetrieve into FASTA format. 
 6   
 7  """ 
 8   
9 -def retrieve_single(id, from_, to, retrieve_by=None, organism=None, 10 parse_results=1):
11 import urllib 12 13 CGI = "http://siriusb.umdnj.edu:18080/EZRetrieve/single_r_run.jsp" 14 org2value = {"Hs" : "0", "Mm" : "1", "Rn" : 2} 15 organism = organism or "Hs" 16 assert organism in org2value 17 18 acctype2value = {"genbank":0, "unigene":1, "locuslink":2, "image":3} 19 retrieve_by = retrieve_by or "GenBank" 20 retrieve_by = retrieve_by.lower() 21 assert retrieve_by in acctype2value 22 23 params = { 24 "input" : str(id), 25 "from" : str(from_), 26 "to" : str(to), 27 "org" : org2value[organism], 28 "AccType" : acctype2value[retrieve_by], 29 } 30 options = urllib.urlencode(params) 31 handle = urllib.urlopen(CGI, options) 32 if parse_results: 33 results = parse_single(handle) 34 else: 35 results = handle.read() 36 return results
37
38 -def parse_single(handle):
39 """Return a FASTA-formatted string for the sequence. May raise an 40 AssertionError if there was a problem retrieving the sequence. 41 42 """ 43 import re 44 results = handle.read() 45 lresults = results.lower() 46 47 i = results.find("Error: ") 48 if i >= 0: 49 j = lresults.index("<br>", i) 50 errmsg = results[i:j].strip() 51 raise AssertionError, errmsg 52 53 i = lresults.find("<b>>") 54 assert i >= 0, "Couldn't find sequence." 55 j = lresults.find("<br><br>", i) 56 seqdata = results[i:j] 57 reobj = re.compile(r"<[^>]*>", re.IGNORECASE|re.DOTALL) 58 seqdata = reobj.sub("", seqdata) 59 seqdata = re.sub(r"\s+", r"\n", seqdata) 60 seqdata = seqdata.strip() + "\n" 61 return seqdata
62