Package Bio :: Package EUtils :: Module Datatypes
[hide private]
[frames] | no frames]

Source Code for Module Bio.EUtils.Datatypes

  1  """various EUtils datatypes""" 
  2   
  3  from __future__ import generators 
  4   
  5  import re, types 
  6   
7 -class EUtilsError(Exception):
8 """Base class for all EUtils-specific errors 9 10 Contains a single error string -- use str(err) to get it. 11 """ 12 pass
13
14 -class EUtilsSearchError(EUtilsError):
15 """Used when the ESearch XML says there is an ERROR 16 17 The main error is in err.errmsg but more information 18 may be available in err.errors or err.warnings. Eg, 19 the error message is often "Can't run executor" but 20 you can get more information from the list of errors. 21 22 """
23 - def __init__(self, errmsg, errors = None, warnings = None):
24 EUtilsError.__init__(self, errmsg) 25 26 if errors is None: errors = [] 27 if warnings is None: warnings = [] 28 29 self.errmsg = errmsg 30 self.errors = errors 31 self.warnings = warnings
32 - def __repr__(self):
33 return "%s(%r, %r, %r)" % (self.__class__.__name__, 34 self.errmsg, self.errors, self.warnings)
35 - def __str__(self):
36 s = self.errmsg 37 if self.errors: 38 s = s + "; ERRORS: " + ", ".join(map(str, self.errors)) 39 if self.warnings: 40 s = s + "; WARNINGS: " + ", ".join(map(str, self.warnings)) 41 return s.encode("latin1")
42 43 44 45 ####################################
46 -class DBIds:
47 """Store a list of identifiers for a database 48 49 This is used as input for the '*_using_dbids' functions. 50 51 Constructed with the database name and list of identifier strings. 52 53 """
54 - def __init__(self, db, ids):
55 """db, ids 56 57 'db' -- the database for those identifiers 58 'ids' -- a list of identifiers for the given database 59 """ 60 self.db = db 61 self.ids = ids
62 - def __len__(self):
63 """number of identifers""" 64 return len(self.ids)
65 - def __getitem__(self, i):
66 """get an identifier or a subset of the DBIds""" 67 if isinstance(i, types.SliceType): 68 # XXX Python 2.3 fixes this, I think 69 # Either that, or I'm doing something wrong? 70 step = i.step 71 start = i.start 72 if start is None: start = 0 73 stop = i.stop 74 if stop is None: stop = len(self.ids) 75 if step is None: 76 return self.__class__(self.db, self.ids[start:stop]) 77 else: 78 return self.__class__(self.db, self.ids[start:stop:step]) 79 # XXX Should this return a DBIds as well? Because of this, I nee 80 # the 'item' method 81 return self.ids[i]
82 - def item(self, i):
83 """Get a DBIds containing the item at position i 84 85 Can't use dbids[i] since that returns only the identifier. 86 This returns a DBIds, which can be used for another request. 87 """ 88 return self.__class__(self.db, [self.ids[i]])
89
90 - def __iter__(self):
91 """Iterate over the list of identifiers""" 92 return iter(self.ids)
93 - def __repr__(self):
94 return "DBIds(%r, %r)" % (self.db, self.ids)
95 - def __eq__(self, other):
96 """does this DBIds equal the other? 97 98 The database names must match, but the identifiers 99 themselves can be in any order. 100 """ 101 if self.ids == other.ids: 102 return self.db == other.db 103 if self.db != other.db: 104 return 0 105 # Could be in a different order, and there may be non-unique 106 # keys. XXX use a sets.Set from Python 2.3? But then 107 # there won't be a simple mapping from id to efetch results. 108 d1 = {} 109 for x in self.ids: 110 d1[x] = 0 111 d2 = {} 112 for x in other.ids: 113 d2[x] = 0 114 return d1 == d2
115 - def __ne__(self, other):
116 """check if this isn't equal to the other DBIds""" 117 return not self == other
118
119 - def __sub__(self, other):
120 """DBIds of the identifiers in this set which aren't in the other""" 121 if self.db != other.db: 122 raise TypeError("Different databases: %r and %r" % ( 123 self.db, other.db)) 124 other_d = {} 125 for x in other.ids: 126 other_d[x] = 0 127 new_ids = [x for x in self.ids if x not in other_d] 128 return DBIds(self.db, new_ids)
129
130 -class WithinNDays:
131 """Restrict a search to matches in the last N days 132 133 Eg, to see what's been published in PubMed about rabies 134 in the last 20 days. 135 136 client.search("rabies", daterange = WithinNDays(20, "pdat") 137 """
138 - def __init__(self, ndays, datetype = None):
139 """ndays, datetype = None 140 141 'ndays' -- within this many days of now (the 'reldate' field 142 of a search) 143 'datetype' -- the date field to use (defaults to Entrez date, 144 which is "edat") 145 """ 146 self.ndays = ndays 147 self.datetype = datetype
148 - def get_query_params(self):
149 """returns the fields to add to the EUtils query 150 151 This is an internal implementation feature you can ignore. 152 """ 153 return {"reldate": self.ndays, 154 "datetype": self.datetype}
155 156 # Could actually check the month and day fields... 157 _date_re_match = re.compile(r"\d{4}(/\d\d(/\d\d)?)?$").match 158
159 -class DateRange:
160 """Restrict a search to matches within a date range 161 162 Some examples: 163 matches between 1995 and 2000 -- DateRange("1995", "1999/12/31") 164 matches before 1990 -- DateRange(maxdate = "1990/01/01") 165 matches in 2002 or later -- DateRange(mindate = "2002/01/01") 166 matches in June or July of 2001 -- DateRange("2001/06", "2001/07") 167 168 """
169 - def __init__(self, mindate = None, maxdate = None, datetype = None):
170 """mindate = None, maxdate = None, datetype = None 171 172 'mindate' -- matches must be on or after this date 173 'maxdate' -- matches must be on or before this date 174 'datetype' -- the date field to use for the search (defaults 175 to Entrez date, which is "edat") 176 177 At least one of mindate or maxdate must be specified. 178 If mindate is omitted, all results on or before maxdate are returned. 179 If maxdate is omitted, all results on or after mindate are returned. 180 181 Dates must be formatted as 'YYYY/MM/DD', 'YYYY/MM', or 'YYYY'. 182 """ 183 if mindate is None and maxdate is None: 184 raise TypeError("Must specify at least one of mindate or maxdate") 185 186 errinfo = None 187 if mindate is not None and _date_re_match(mindate) is None: 188 errinfo = ("mindate", mindate) 189 elif maxdate is not None and _date_re_match(maxdate) is None: 190 errinfo = ("maxdate", maxdate) 191 if errinfo: 192 raise TypeError( 193 "%s is not in YYYY/MM/DD format (month and " 194 "day are optional): %r" % errinfo) 195 self.mindate = mindate 196 self.maxdate = maxdate 197 self.datetype = datetype
198
199 - def get_query_params(self):
200 """returns the fields to add to the EUtils query 201 202 This is an internal implementation feature you can ignore. 203 """ 204 return {"mindate": str(self.mindate), 205 "maxdate": str(self.maxdate), 206 "datetype": self.datetype}
207 208 #################################### 209
210 -class Expression:
211 """Base class for the Expression given in the eSearch output 212 213 NCBI does some processing on the request. They return the 214 translated expression as part of the search results. To get the 215 expression as an Entrez string, use str(expression). 216 217 iter(expression) traverses the expression tree in postfix order. 218 """
219 - def __and__(self, other):
220 """intersection of two expressions""" 221 return And(self, other)
222 - def __or__(self, other):
223 """union of two expressions""" 224 return Or(self, other)
225 - def __iter__(self):
226 """Traverse the tree in postfix order""" 227 raise NotImplementedError
228
229 -class Term(Expression):
230 """Information about an Expression Term, which is the leaf node 231 232 The fields are: 233 term -- a word from the search term 234 field -- the field searched by this term 235 count -- the number of records matching this word 236 explode -- no idea 237 """
238 - def __init__(self, term, field, count, explode):
239 self.term = term 240 self.field = field 241 self.count = count 242 self.explode = explode
243 - def __str__(self):
244 return self.term
245 - def __iter__(self):
246 """Traverse the tree in postfix order""" 247 yield self
248
249 -class BinaryOp(Expression):
250 """Base class for binary expressions. Has a left and a right child"""
251 - def __init__(self, left, right):
252 self.left = left 253 self.right = right
254 - def __iter__(self):
255 """Traverse the tree in postfix order""" 256 for x in self.left: 257 yield x 258 for x in self.right: 259 yield x 260 yield self
261 262 # NCBI processes booleans left to right (no precedence) 263 # I'm not going to worry about using minimal parens, 264 # I'll just always put them around them
265 -class And(BinaryOp):
266 """intersection of two subexpressions"""
267 - def __str__(self):
268 return "(%s AND %s)" % (self.left, self.right)
269
270 -class Or(BinaryOp):
271 """union two subexpressions"""
272 - def __str__(self):
273 return "(%s OR %s)" % (self.left, self.right)
274 275 # NOT and BUTNOT
276 -class Not(BinaryOp):
277 """the set of the left child without elements from the right child 278 279 This is used for something like "poliovirus NOT polio" 280 """
281 - def __str__(self):
282 return "(%s NOT %s)" % (self.left, self.right)
283
284 -class Range(BinaryOp):
285 """Used to store a date range"""
286 - def __init__(self, left, right):
287 if left.field != right.field: 288 raise TypeError("dates must have the same field: %r and %r" % 289 (left.field, right.field)) 290 BinaryOp.__init__(self, left, right)
291
292 - def __str__(self):
293 i = self.left.term.rfind("[") 294 if i == -1: 295 i = len(self.left.term) 296 x = self.left.term[:i] 297 298 i = self.right.term.rfind("[") 299 if i == -1: 300 i = len(self.right.term) 301 y = self.right.term[:i] 302 303 return "%s:%s[%s]" % (x, y, self.left.field)
304 305 ################## 306
307 -class SearchResult:
308 """Store results from a database search 309 310 Attributes are: 311 count -- total number of matches to the query 312 retmax -- total number of identifiers requested 313 retstart -- a search can return a portion of the total 314 number of results. retstart is the offset into this list 315 ids -- matching identifiers (may be a subset of the full list) 316 translation_set -- dict mapping an input name to the canonical 317 form prefered by NCBI 318 expression -- the full equery as understood by NCBI 319 webenv -- the WebEnv string (if use_history is set) 320 query_key -- the query_key (if use_history is set) 321 errors -- list of Problems in the ErrorList 322 warnings -- list of Problems in the WarningList 323 timestamp -- timestamp (from time.time()) when this record 324 was received from the server. 325 326 Returns a list of identifers instead of a DBIds because the output 327 from NCBI's eSearch doesn't include the database name. 328 """
329 - def __init__(self, 330 count, retmax, retstart, ids, 331 translation_set, expression, 332 webenv, query_key, errors, 333 warnings, timestamp):
334 self.count = count 335 self.retmax = retmax 336 self.retstart = retstart 337 self.ids = ids 338 self.translation_set = translation_set 339 self.expression = expression 340 self.webenv = webenv 341 self.query_key = query_key 342 self.errors = errors 343 self.warnings = warnings 344 self.timestamp = timestamp
345
346 -class PostResult:
347 """Store the results of a Post 348 349 Attributes are: 350 webenv -- the WebEnv string 351 query_key -- the query_ket 352 timestamp -- timestamp (from time.time()) when this record 353 was received from the server. 354 """
355 - def __init__(self, webenv, query_key, invalid_ids, timestamp):
356 self.webenv = webenv 357 self.query_key = query_key 358 self.invalid_ids = invalid_ids 359 self.timestamp = timestamp
360
361 -class Summary:
362 """Store information from calling eSummary 363 364 Attributes are: 365 id -- the identifier string for this record 366 dataitems -- an OrderedDictList containing the parsed Item 367 elements for this Summary. 368 """
369 - def __init__(self, id, dataitems):
370 self.id = id 371 self.dataitems = dataitems
372 - def __repr__(self):
373 return "Summary(%r, %r)" % (self.id, self.dataitems)
374 - def __str__(self):
375 return "<Summary id=%s, %s>" % (self.id, self.dataitems)
376 377 # XXX Use the new 'datetime' module when 2.3 is out!
378 -class Date:
379 """Allow simple Date storage 380 381 Parameters and attributes are 'year', 'month', and 'day' 382 """
383 - def __init__(self, year, month, day):
384 self.year = year 385 self.month = month 386 self.day = day
387 - def __repr__(self):
388 return "%s(%r, %r, %r)" % (self.__class__.__name__, 389 self.year, self.month, self.day)
390 - def __str__(self):
391 return "%4d/%02d/%02d" % (self.year, self.month, self.day)
392 - def timetuple(self):
393 """Return the 9-tuple needed by various time functions""" 394 # NOTE: I don't yet deal with the last three fields 395 # (day of week, day of year, isDST) 396 return (self.year, self.month, self.day, 0, 0, 0, 0, 0, -1)
397 - def __eq__(self, other):
398 """Are these two times equal?""" 399 return (self.year == other.year and 400 self.month == other.month and 401 self.day == other.day)
402 - def __ne__(self, other):
403 """Are these two times dissimilar?""" 404 return not self == other
405 406 407 # possible errors from eSearch 408 # <!ELEMENT ErrorList (PhraseNotFound*,FieldNotFound*)> 409 # <!ELEMENT WarningList (PhraseIgnored*, 410 # QuotedPhraseNotFound*, 411 # OutputMessage*)> 412
413 -class Problem:
414 """Base class for Search Errors or Warnings 415 416 A problem has: 417 text -- the text of the problem 418 severity -- either Problem.ERROR or Problem.WARNING 419 category -- how NCBI categorizes this problem 420 """ 421 ERROR = "ERROR" 422 WARNING = "WARNING"
423 - def __init__(self, text):
424 self.text = text
425 - def __eq__(self, other):
426 return (self.text == other.text and 427 self.severity == other.severity and 428 self.category == other.category)
429 - def __ne__(self, other):
430 return not self == other
431 - def __repr__(self):
432 return "%s(%r)" % (self.__class__.__name__, self.text)
433 - def __str__(self):
434 return str(self.text)
435
436 -class ErrorProblem(Problem):
437 severity = Problem.ERROR
438
439 -class WarningProblem(Problem):
440 severity = Problem.WARNING
441
442 -class PhraseNotFound(ErrorProblem):
443 category = "PhraseNotFound"
444
445 -class FieldNotFound(ErrorProblem):
446 severity = Problem.ERROR 447 category = "FieldNotFound"
448
449 -class PhraseIgnored(WarningProblem):
450 category = "PhraseIgnored"
451
452 -class QuotedPhraseNotFound(WarningProblem):
453 category = "QuotedPhraseNotFound"
454
455 -class OutputMessage(WarningProblem):
456 category = "OutputMessage"
457
458 -def _build_problem_mapping():
459 """Internal: make a map from category name (in XML) to the right class""" 460 mapping = {} 461 for v in globals().values(): 462 try: 463 if issubclass(v, Problem) and hasattr(v, "category"): 464 mapping[v.category] = v 465 except TypeError: 466 pass 467 return mapping
468 469 problem_category_mapping = _build_problem_mapping() 470 471 472 # elinks with cmd=="neighbor" 489
490 -class IdCheck:
491 """Store results from an lcheck link 492 493 Attributes are: 494 id -- the id of the requested record 495 has_linkout -- boolean, either it does or doesn't 496 has_neighbor -- boolean, either it does or doesn't 497 """
498 - def __init__(self, id, has_linkout = 0, has_neighbor = 0):
499 self.id = id 500 self.has_linkout = has_linkout 501 self.has_neighbor = has_neighbor
502 - def __eq__(self, other):
503 return (self.id == other.id and 504 self.has_linkout == other.has_linkout and 505 self.has_neighbor == other.has_neighbor)
506 - def __ne__(self, other):
507 return not self == other
508 - def __repr__(self):
509 return "IdCheck(%r, %r, %r)" % (self.id, self.has_linkout, self.has_neighbor)
510
511 -class LinkSetDb(object):
512 """Used in eLink with cmd == neighbor 513 514 Attributes are: 515 dbto -- the links are TO this database name 516 linkname -- the name for this set (eg, "pubmed_protein") 517 links -- list of Links, one per matching record (includes score) 518 List order is the sames as the XML, which is ordered from 519 most likely to least. The identifer is from 'dbto' 520 info -- ignored; this is only used as a warning when there is 521 an empty list 522 523 You can also use 524 dbids -- get a DBIds of dbto and the identifiers in each Link 525 """
526 - def __init__(self, dbto, linkname, links = None, info = None):
527 if links is None: 528 if info is None: 529 raise TypeError("At least one of 'links' and 'info' must be set") 530 links = [] 531 self.dbto = dbto 532 self.linkname = linkname 533 self.links = links
534
535 - def _get_dbids(self):
536 return DBIds(self.dbto, [link.id for link in self.links])
537 dbids = property(_get_dbids) 538
539 - def __eq__(self, other):
540 return (self.dbto == other.dbto and 541 self.linkname == other.linkname and 542 self.links == other.links)
543 - def __ne__(self, other):
544 return not self == other
545 - def __repr__(self):
546 return "LinkSetDb(%r, %r, %r)" % (self.dbto, self.linkname, self.links)
547
548 -class NeighborLinkSet:
549 """Results from an eLink neighbor search 550 551 Attributes are: 552 dbids -- the DBIds of the *REQUESTED* identifiers 553 linksetdbs -- an OrderedMultiDict of LinkSetDb objects 554 555 """
556 - def __init__(self, dbids, linksetdbs):
557 self.dbids = dbids 558 self.linksetdbs = linksetdbs
559 - def __eq__(self, other):
560 return (self.dbids == other.dbids and 561 self.linksetdbs == other.linksetdbs)
562 - def __ne__(self, other):
563 return not self == other
564
565 - def __repr__(self):
566 return "NeighborLinkSet(%r, %r)" % (self.dbids, self.linksetdbs)
567 568 # elinks with cmd in ("ncheck", "lcheck")
569 -class CheckLinkSet(object):
570 """Results from 'ncheck' and 'lcheck' searches 571 572 This is used to check if a set of records has neighbors 573 or links. 574 575 Attributes are: 576 dbfrom -- the database containing those records 577 idchecks -- list of IdCheck objects, one per id 578 579 dbids -- the DBIds make from dbfrom and the idchecks 580 """
581 - def __init__(self, dbfrom, idchecks):
582 self.dbfrom = dbfrom 583 self.idchecks = idchecks
584
585 - def _get_dbids(self):
586 return DBIds(self.dbfrom, [idcheck.id for idcheck in self.idchecks])
587 dbids = property(_get_dbids) 588
589 - def __eq__(self, other):
590 return (self.dbfrom == other.dbfrom and 591 self.idchecks == other.idchecks)
592 - def __ne__(self, other):
593 return not self == other
594 - def __repr__(self):
595 return "CheckLinkSet(%r, %r)" % (self.dbfrom, self.idchecks)
596 597 598 # elinks with cmd == "llinks"
599 -class Provider:
600 """The Provider, as listed in 'llinks' (LinkOut) 601 602 Attributes are: 603 name -- name of the provider 604 name_abbr -- an abbreviated name for the provider 605 id -- a unique id for the provider 606 url -- where to go for more information about the provider 607 icon_url -- a small image to use for the provider 608 609 """
610 - def __init__(self, name, name_abbr, id, 611 url = None, icon_url = None):
612 self.name = name 613 self.name_abbr = name_abbr 614 self.id = id 615 self.url = url 616 self.icon_url = icon_url
617 - def __eq__(self, other):
618 return (self.name == other.name and 619 self.name_abbr == other.name_abbr and 620 self.id == other.id and 621 self.url == other.url and 622 self.icon_url == other.icon_url)
623 - def __ne__(self, other):
624 return not self == other
625 - def __repr__(self):
626 return "Provider(%r, %r, %r, %r, %r)" % ( 627 self.name, self.name_abbr, self.id, self.url, self.icon_url)
628 629
630 -class ObjUrl:
631 """The ObjUrl containing LinkOut information for a record 632 633 Attributes are: 634 subject_types -- list of strings describing this link (0 or more) 635 provider -- a Provider instance 636 linkname -- a name used to categorize this link (optional) 637 attributes -- list of attributes (text strings), (0 or more) 638 url -- URL of the link (optional) 639 iconurl -- URL containing image for this link (optional) 640 """
641 - def __init__(self, subject_types, provider, 642 linkname = None, url = None, attributes = None):
643 assert isinstance(subject_types, list) 644 self.subject_types = subject_types 645 self.provider = provider 646 self.linkname = linkname 647 if attributes is None: 648 attributes = [] 649 self.url = url 650 self.attributes = attributes
651 - def __eq__(self, other):
652 return (self.linkname == other.linkname and 653 self.subject_types == other.subject_types and 654 self.url == other.url and 655 self.attributes == other.attributes and 656 self.provider == other.provider)
657 - def __ne__(self, other):
658 return not self == other
659 - def __repr__(self):
660 return "ObjUrl(%r, %r, %r, %r, %r)" % ( 661 self.subject_types, self.provider, self.linkname, 662 self.url, self.attributes)
663
664 -class IdUrlSet:
665 """Set of ObjUrls for the record with the given 'id'"""
666 - def __init__(self, id, objurls):
667 self.id = id 668 self.objurls = objurls
669 - def __eq__(self, other):
670 return (self.id == other.id and 671 self.objurls == other.objurls)
672 - def __ne__(self, other):
673 return not self == other
674 - def __repr__(self):
675 return "IdUrlSet(%r, %r)" % (self.id, self.objurls)
676
677 -class LinksLinkSet:
678 """Results of an 'llink' (LinkOut) search 679 680 Finds links from records in a given database to external 681 resources. 682 683 Fields are: 684 dbfrom -- the database in which search started 685 idurlset -- a list of IdUrlSet, one for each identifier 686 """ 687
688 - def __init__(self, dbfrom, idurlset):
689 self.dbfrom = dbfrom 690 self.idurlset = idurlset
691 - def __eq__(self, other):
692 return (self.dbfrom == other.dbfrom and 693 self.idurlset == other.idurlset)
694 - def __ne__(self, other):
695 return not self == other
696 - def __repr__(self):
697 return "LinksLinkSet(%r, %r)" % (self.dbfrom, self.idurlset)
698