Package Bio :: Package Restriction :: Module Restriction
[hide private]
[frames] | no frames]

Source Code for Module Bio.Restriction.Restriction

   1  #!/usr/bin/env python 
   2  # 
   3  #      Restriction Analysis Libraries. 
   4  #      Copyright (C) 2004. Frederic Sohm. 
   5  # 
   6  # This code is part of the Biopython distribution and governed by its 
   7  # license.  Please see the LICENSE file that should have been included 
   8  # as part of this package. 
   9  # 
  10   
  11  """ Notes about the diverses class of the restriction enzyme implementation. 
  12   
  13          RestrictionType is the type of all restriction enzymes. 
  14      ---------------------------------------------------------------------------- 
  15          AbstractCut implements some methods that are common to all enzymes. 
  16      ---------------------------------------------------------------------------- 
  17          NoCut, OneCut,TwoCuts   represent the number of double strand cuts 
  18                                  produced by the enzyme. 
  19                                  they correspond to the 4th field of the rebase 
  20                                  record emboss_e.NNN. 
  21                  0->NoCut    : the enzyme is not characterised. 
  22                  2->OneCut   : the enzyme produce one double strand cut. 
  23                  4->TwoCuts  : two double strand cuts. 
  24      ---------------------------------------------------------------------------- 
  25          Meth_Dep, Meth_Undep    represent the methylation susceptibility to 
  26                                  the enzyme. 
  27                                  Not implemented yet. 
  28      ---------------------------------------------------------------------------- 
  29          Palindromic,            if the site is palindromic or not. 
  30          NotPalindromic          allow some optimisations of the code. 
  31                                  No need to check the reverse strand 
  32                                  with palindromic sites. 
  33      ----------------------------------------------------------------------------                                     
  34          Unknown, Blunt,         represent the overhang. 
  35          Ov5, Ov3                Unknown is here for symetry reasons and 
  36                                  correspond to enzymes that are not characterised 
  37                                  in rebase. 
  38      ---------------------------------------------------------------------------- 
  39          Defined, Ambiguous,     represent the sequence of the overhang. 
  40          NotDefined              
  41                                  NotDefined is for enzymes not characterised in 
  42                                  rebase. 
  43                                   
  44                                  Defined correspond to enzymes that display a 
  45                                  constant overhang whatever the sequence. 
  46                                  ex : EcoRI. G^AATTC -> overhang :AATT 
  47                                              CTTAA^G 
  48   
  49                                  Ambiguous : the overhang varies with the 
  50                                  sequence restricted. 
  51                                  Typically enzymes which cut outside their 
  52                                  restriction site or (but not always) 
  53                                  inside an ambiguous site. 
  54                                  ex : 
  55                                  AcuI CTGAAG(22/20)  -> overhang : NN 
  56                                  AasI GACNNN^NNNGTC  -> overhang : NN 
  57                                       CTGN^NNNNNCAG 
  58   
  59              note : these 3 classes refers to the overhang not the site. 
  60                 So the enzyme ApoI (RAATTY) is defined even if its restriction 
  61                 site is ambiguous. 
  62                                   
  63                      ApoI R^AATTY -> overhang : AATT -> Defined 
  64                           YTTAA^R 
  65                 Accordingly, blunt enzymes are always Defined even 
  66                 when they cut outside their restriction site. 
  67      ---------------------------------------------------------------------------- 
  68          Not_available,          as found in rebase file emboss_r.NNN files. 
  69          Commercially_available 
  70                                  allow the selection of the enzymes according to 
  71                                  their suppliers to reduce the quantity 
  72                                  of results. 
  73                                  Also will allow the implementation of buffer 
  74                                  compatibility tables. Not implemented yet. 
  75   
  76                                  the list of suppliers is extracted from 
  77                                  emboss_s.NNN 
  78      ---------------------------------------------------------------------------- 
  79          """ 
  80   
  81  import re 
  82   
  83  from sets import Set 
  84  import itertools 
  85   
  86  from Bio.Seq import Seq, MutableSeq 
  87  from Bio.Alphabet import IUPAC 
  88   
  89  from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict,\ 
  90       typedict, suppliers as suppliers_dict 
  91  from Bio.Restriction.RanaConfig import * 
  92  from Bio.Restriction.PrintFormat import PrintFormat 
  93  from Bio.Restriction.DNAUtils import check_bases 
  94   
  95   
  96   
  97  matching = {'A' : 'ARWMHVDN', 'C' : 'CYSMHBVN', 'G' : 'GRSKBVDN', 
  98              'T' : 'TYWKHBDN', 'R' : 'ABDGHKMNSRWV', 'Y' : 'CBDHKMNSTWVY', 
  99              'W' : 'ABDHKMNRTWVY', 'S' : 'CBDGHKMNSRVY', 'M' : 'ACBDHMNSRWVY', 
 100              'K' : 'BDGHKNSRTWVY', 'H' : 'ACBDHKMNSRTWVY', 
 101              'B' : 'CBDGHKMNSRTWVY', 'V' : 'ACBDGHKMNSRWVY', 
 102              'D' : 'ABDGHKMNSRTWVY', 'N' : 'ACBDGHKMNSRTWVY'} 
 103   
 104  DNA = Seq 
 105       
106 -class FormattedSeq(object) :
107 """FormattedSeq(seq, [linear=True])-> new FormattedSeq. 108 109 Translate a Bio.Seq into a formatted sequence to be used with Restriction. 110 111 Roughly : 112 remove anything which is not IUPAC alphabet and then add a space 113 in front of the sequence to get a biological index instead of a 114 python index (i.e. index of the first base is 1 not 0). 115 116 Retains information about the shape of the molecule linear (default) 117 or circular. Restriction sites are search over the edges of circular 118 sequence.""" 119
120 - def __init__(self, seq, linear = True) :
121 """FormattedSeq(seq, [linear=True])-> new FormattedSeq. 122 123 seq is either a Bio.Seq, Bio.MutableSeq or a FormattedSeq. 124 if seq is a FormattedSeq, linear will have no effect on the 125 shape of the sequence.""" 126 if isinstance(seq, Seq) or isinstance(seq, MutableSeq) : 127 stringy = seq.tostring() 128 self.lower = stringy.islower() 129 self.data = check_bases(stringy) 130 self.linear = linear 131 self.klass = seq.__class__ 132 self.alphabet = seq.alphabet 133 elif isinstance(seq, FormattedSeq) : 134 self.lower = seq.lower 135 self.data = seq.data 136 self.linear = seq.linear 137 self.alphabet = seq.alphabet 138 self.klass = seq.klass 139 else : 140 raise TypeError, 'expected Seq or MutableSeq, got %s' % type(seq)
141
142 - def __len__(self) :
143 return len(self.data) - 1
144
145 - def __repr__(self) :
146 return 'FormattedSeq(%s, linear=%s)' %(repr(self[1:]), repr(self.linear))
147
148 - def __eq__(self, other) :
149 if isinstance(other, FormattedSeq) : 150 if repr(self) == repr(other) : 151 return True 152 else : 153 return False 154 return False
155
156 - def circularise(self) :
157 """FS.circularise() -> circularise FS""" 158 self.linear = False 159 return
160
161 - def linearise(self) :
162 """FS.linearise() -> linearise FS""" 163 self.linear = True 164 return
165
166 - def to_linear(self) :
167 """FS.to_linear() -> new linear FS instance""" 168 new = self.__class__(self) 169 new.linear = True 170 return new
171
172 - def to_circular(self) :
173 """FS.to_circular() -> new circular FS instance""" 174 new = self.__class__(self) 175 new.linear = False 176 return new
177
178 - def is_linear(self) :
179 """FS.is_linear() -> bool. 180 181 True if the sequence will analysed as a linear sequence.""" 182 return self.linear
183
184 - def finditer(self, pattern, size) :
185 """FS.finditer(pattern, size) -> list. 186 187 return a list of pattern into the sequence. 188 the list is made of tuple (location, pattern.group). 189 the latter is used with non palindromic sites. 190 pattern is the regular expression pattern corresponding to the 191 enzyme restriction site. 192 size is the size of the restriction enzyme recognition-site size.""" 193 if self.is_linear() : 194 data = self.data 195 else : 196 data = self.data + self.data[1:size+1] 197 return [(i.start(), i.group) for i in re.finditer(pattern, data)]
198
199 - def __getitem__(self, i) :
200 if self.lower : 201 return self.klass((self.data[i]).lower(), self.alphabet) 202 return self.klass(self.data[i], self.alphabet)
203 204
205 -class RestrictionType(type) :
206 """RestrictionType. Type from which derives all enzyme classes. 207 208 Implement the operator methods.""" 209
210 - def __init__(cls, name='', bases=(), dct={}) :
211 """RE(name, bases, dct) -> RestrictionType instance. 212 213 Not intended to be used in normal operation. The enzymes are 214 instantiated when importing the module. 215 216 see below.""" 217 super(RestrictionType, cls).__init__(name, bases, dict) 218 cls.compsite = re.compile(cls.compsite)
219
220 - def __add__(cls, other) :
221 """RE.__add__(other) -> RestrictionBatch(). 222 223 if other is an enzyme returns a batch of the two enzymes. 224 if other is already a RestrictionBatch add enzyme to it.""" 225 if isinstance(other, RestrictionType) : 226 return RestrictionBatch([cls, other]) 227 elif isinstance(other, RestrictionBatch) : 228 return other.add_nocheck(cls) 229 else : 230 raise TypeError
231
232 - def __div__(cls, other) :
233 """RE.__div__(other) -> list. 234 235 RE/other 236 returns RE.search(other).""" 237 return cls.search(other)
238
239 - def __rdiv__(cls, other) :
240 """RE.__rdiv__(other) -> list. 241 242 other/RE 243 returns RE.search(other).""" 244 return cls.search(other)
245
246 - def __truediv__(cls, other) :
247 """RE.__truediv__(other) -> list. 248 249 RE/other 250 returns RE.search(other).""" 251 return cls.search(other)
252
253 - def __rtruediv__(cls, other) :
254 """RE.__rtruediv__(other) -> list. 255 256 other/RE 257 returns RE.search(other).""" 258 return cls.search(other)
259
260 - def __floordiv__(cls, other) :
261 """RE.__floordiv__(other) -> list. 262 263 RE//other 264 returns RE.catalyse(other).""" 265 return cls.catalyse(other)
266
267 - def __rfloordiv__(cls, other) :
268 """RE.__rfloordiv__(other) -> list. 269 270 other//RE 271 returns RE.catalyse(other).""" 272 return cls.catalyse(other)
273
274 - def __str__(cls) :
275 """RE.__str__() -> str. 276 277 return the name of the enzyme.""" 278 return cls.__name__
279
280 - def __repr__(cls) :
281 """RE.__repr__() -> str. 282 283 used with eval or exec will instantiate the enzyme.""" 284 return "%s" % cls.__name__
285
286 - def __len__(cls) :
287 """RE.__len__() -> int. 288 289 length of the recognition site.""" 290 return cls.size
291
292 - def __eq__(cls, other) :
293 """RE == other -> bool 294 295 True if RE and other are the same enzyme.""" 296 return other is cls
297
298 - def __ne__(cls, other) :
299 """RE != other -> bool. 300 isoschizomer strict, same recognition site, same restriction -> False 301 all the other-> True""" 302 if not isinstance(other, RestrictionType) : 303 return True 304 elif cls.charac == other.charac : 305 return False 306 else : 307 return True
308
309 - def __rshift__(cls, other) :
310 """RE >> other -> bool. 311 312 neoschizomer : same recognition site, different restriction. -> True 313 all the others : -> False""" 314 if not isinstance(other, RestrictionType) : 315 return False 316 elif cls.site == other.site and cls.charac != other.charac : 317 return True 318 else : 319 return False
320
321 - def __mod__(cls, other) :
322 """a % b -> bool. 323 324 Test compatibility of the overhang of a and b. 325 True if a and b have compatible overhang.""" 326 if not isinstance(other, RestrictionType) : 327 raise TypeError,\ 328 'expected RestrictionType, got %s instead' % type(other) 329 return cls._mod1(other)
330
331 - def __ge__(cls, other) :
332 """a >= b -> bool. 333 334 a is greater or equal than b if the a site is longer than b site. 335 if their site have the same length sort by alphabetical order of their 336 names.""" 337 if not isinstance(other, RestrictionType) : 338 raise NotImplementedError 339 if len(cls) > len(other) : 340 return True 341 elif cls.size == len(other) and cls.__name__ >= other.__name__ : 342 return True 343 else : 344 return False
345
346 - def __gt__(cls, other) :
347 """a > b -> bool. 348 349 sorting order : 350 1. size of the recognition site. 351 2. if equal size, alphabetical order of the names.""" 352 if not isinstance(other, RestrictionType) : 353 raise NotImplementedError 354 if len(cls) > len(other) : 355 return True 356 elif cls.size == len(other) and cls.__name__ > other.__name__ : 357 return True 358 else : 359 return False
360
361 - def __le__(cls, other) :
362 """a <= b -> bool. 363 364 sorting order : 365 1. size of the recognition site. 366 2. if equal size, alphabetical order of the names.""" 367 if not isinstance(other, RestrictionType) : 368 raise NotImplementedError 369 elif len(cls) < len(other) : 370 return True 371 elif len(cls) == len(other) and cls.__name__ <= other.__name__ : 372 return True 373 else : 374 return False
375
376 - def __lt__(cls, other) :
377 """a < b -> bool. 378 379 sorting order : 380 1. size of the recognition site. 381 2. if equal size, alphabetical order of the names.""" 382 if not isinstance(other, RestrictionType) : 383 raise NotImplementedError 384 elif len(cls) < len(other) : 385 return True 386 elif len(cls) == len(other) and cls.__name__ < other.__name__ : 387 return True 388 else : 389 return False
390 391
392 -class AbstractCut(RestrictionType) :
393 """Implement the methods that are common to all restriction enzymes. 394 395 All the methods are classmethod. 396 397 For internal use only. Not meant to be instantiate.""" 398
399 - def search(cls, dna, linear=True) :
400 """RE.search(dna, linear=True) -> list. 401 402 return a list of all the site of RE in dna. Compensate for circular 403 sequences and so on. 404 405 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 406 407 if linear is False, the restriction sites than span over the boundaries 408 will be included. 409 410 The positions are the first base of the 3' fragment, 411 i.e. the first base after the position the enzyme will cut. """ 412 # 413 # Separating search from _search allow a (very limited) optimisation 414 # of the search when using a batch of restriction enzymes. 415 # in this case the DNA is tested once by the class which implements 416 # the batch instead of being tested by each enzyme single. 417 # see RestrictionBatch.search() for example. 418 # 419 if isinstance(dna, FormattedSeq) : 420 cls.dna = dna 421 return cls._search() 422 else : 423 cls.dna = FormattedSeq(dna, linear) 424 return cls._search()
425 search = classmethod(search) 426
427 - def all_suppliers(self) :
428 """RE.all_suppliers -> print all the suppliers of R""" 429 supply = [x[0] for x in suppliers_dict.itervalues()] 430 supply.sort() 431 print ",\n".join(supply) 432 return
433 all_suppliers = classmethod(all_suppliers) 434
435 - def is_equischizomer(self, other) :
436 """RE.is_equischizomers(other) -> bool. 437 438 True if other is an isoschizomer of RE. 439 False else. 440 441 equischizomer <=> same site, same position of restriction.""" 442 return not self != other
443 is_equischizomer = classmethod(is_equischizomer) 444
445 - def is_neoschizomer(self, other) :
446 """RE.is_neoschizomers(other) -> bool. 447 448 True if other is an isoschizomer of RE. 449 False else. 450 451 neoschizomer <=> same site, different position of restriction.""" 452 return self >> other
453 is_neoschizomer = classmethod(is_neoschizomer) 454
455 - def is_isoschizomer(self, other) :
456 """RE.is_isoschizomers(other) -> bool. 457 458 True if other is an isoschizomer of RE. 459 False else. 460 461 isoschizomer <=> same site.""" 462 return (not self != other) or self >> other
463 is_isoschizomer = classmethod(is_isoschizomer) 464
465 - def equischizomers(self, batch=None) :
466 """RE.equischizomers([batch]) -> list. 467 468 return a tuple of all the isoschizomers of RE. 469 if batch is supplied it is used instead of the default AllEnzymes. 470 471 equischizomer <=> same site, same position of restriction.""" 472 if not batch : batch = AllEnzymes 473 r = [x for x in batch if not self != x] 474 i = r.index(self) 475 del r[i] 476 r.sort() 477 return r
478 equischizomers = classmethod(equischizomers) 479
480 - def neoschizomers(self, batch=None) :
481 """RE.neoschizomers([batch]) -> list. 482 483 return a tuple of all the neoschizomers of RE. 484 if batch is supplied it is used instead of the default AllEnzymes. 485 486 neoschizomer <=> same site, different position of restriction.""" 487 if not batch : batch = AllEnzymes 488 r = [x for x in batch if self >> x] 489 r.sort() 490 return r
491 neoschizomers = classmethod(neoschizomers) 492
493 - def isoschizomers(self, batch=None) :
494 """RE.isoschizomers([batch]) -> list. 495 496 return a tuple of all the equischizomers and neoschizomers of RE. 497 if batch is supplied it is used instead of the default AllEnzymes.""" 498 if not batch : batch = AllEnzymes 499 r = [x for x in batch if (self >> x) or (not self != x)] 500 i = r.index(self) 501 del r[i] 502 r.sort() 503 return r
504 isoschizomers = classmethod(isoschizomers) 505
506 - def frequency(self) :
507 """RE.frequency() -> int. 508 509 frequency of the site.""" 510 return self.freq
511 frequency = classmethod(frequency)
512 513
514 -class NoCut(AbstractCut) :
515 """Implement the methods specific to the enzymes that do not cut. 516 517 These enzymes are generally enzymes that have been only partially 518 characterised and the way they cut the DNA is unknow or enzymes for 519 which the pattern of cut is to complex to be recorded in Rebase 520 (ncuts values of 0 in emboss_e.###). 521 522 When using search() with these enzymes the values returned are at the start of 523 the restriction site. 524 525 Their catalyse() method returns a TypeError. 526 527 Unknown and NotDefined are also part of the base classes of these enzymes. 528 529 Internal use only. Not meant to be instantiated.""" 530
531 - def cut_once(self) :
532 """RE.cut_once() -> bool. 533 534 True if the enzyme cut the sequence one time on each strand.""" 535 return False
536 cut_once = classmethod(cut_once) 537
538 - def cut_twice(self) :
539 """RE.cut_twice() -> bool. 540 541 True if the enzyme cut the sequence twice on each strand.""" 542 return False
543 cut_twice = classmethod(cut_twice) 544
545 - def _modify(self, location) :
546 """RE._modify(location) -> int. 547 548 for internal use only. 549 550 location is an integer corresponding to the location of the match for 551 the enzyme pattern in the sequence. 552 _modify returns the real place where the enzyme will cut. 553 554 example : 555 EcoRI pattern : GAATTC 556 EcoRI will cut after the G. 557 so in the sequence : 558 ______ 559 GAATACACGGAATTCGA 560 | 561 10 562 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 563 EcoRI cut after the G so : 564 EcoRI._modify(10) -> 11. 565 566 if the enzyme cut twice _modify will returns two integer corresponding 567 to each cutting site. 568 """ 569 yield location
570 _modify = classmethod(_modify) 571
572 - def _rev_modify(self, location) :
573 """RE._rev_modify(location) -> generator of int. 574 575 for internal use only. 576 577 as _modify for site situated on the antiparallel strand when the 578 enzyme is not palindromic 579 """ 580 yield location
581 _rev_modify = classmethod(_rev_modify) 582
583 - def characteristic(self) :
584 """RE.characteristic() -> tuple. 585 586 the tuple contains the attributes : 587 fst5 -> first 5' cut ((current strand) or None 588 fst3 -> first 3' cut (complementary strand) or None 589 scd5 -> second 5' cut (current strand) or None 590 scd5 -> second 3' cut (complementary strand) or None 591 site -> recognition site.""" 592 return None, None, None, None, self.site
593 characteristic = classmethod(characteristic)
594
595 -class OneCut(AbstractCut) :
596 """Implement the methods specific to the enzymes that cut the DNA only once 597 598 Correspond to ncuts values of 2 in emboss_e.### 599 600 Internal use only. Not meant to be instantiated.""" 601
602 - def cut_once(self) :
603 """RE.cut_once() -> bool. 604 605 True if the enzyme cut the sequence one time on each strand.""" 606 return True
607 cut_once = classmethod(cut_once) 608
609 - def cut_twice(self) :
610 """RE.cut_twice() -> bool. 611 612 True if the enzyme cut the sequence twice on each strand.""" 613 return False
614 cut_twice = classmethod(cut_twice) 615
616 - def _modify(self, location) :
617 """RE._modify(location) -> int. 618 619 for internal use only. 620 621 location is an integer corresponding to the location of the match for 622 the enzyme pattern in the sequence. 623 _modify returns the real place where the enzyme will cut. 624 625 example : 626 EcoRI pattern : GAATTC 627 EcoRI will cut after the G. 628 so in the sequence : 629 ______ 630 GAATACACGGAATTCGA 631 | 632 10 633 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 634 EcoRI cut after the G so : 635 EcoRI._modify(10) -> 11. 636 637 if the enzyme cut twice _modify will returns two integer corresponding 638 to each cutting site. 639 """ 640 yield location + self.fst5
641 _modify = classmethod(_modify) 642
643 - def _rev_modify(self, location) :
644 """RE._rev_modify(location) -> generator of int. 645 646 for internal use only. 647 648 as _modify for site situated on the antiparallel strand when the 649 enzyme is not palindromic 650 """ 651 yield location - self.fst3
652 _rev_modify = classmethod(_rev_modify) 653
654 - def characteristic(self) :
655 """RE.characteristic() -> tuple. 656 657 the tuple contains the attributes : 658 fst5 -> first 5' cut ((current strand) or None 659 fst3 -> first 3' cut (complementary strand) or None 660 scd5 -> second 5' cut (current strand) or None 661 scd5 -> second 3' cut (complementary strand) or None 662 site -> recognition site.""" 663 return self.fst5, self.fst3, None, None, self.site
664 characteristic = classmethod(characteristic)
665 666
667 -class TwoCuts(AbstractCut) :
668 """Implement the methods specific to the enzymes that cut the DNA twice 669 670 Correspond to ncuts values of 4 in emboss_e.### 671 672 Internal use only. Not meant to be instantiated.""" 673
674 - def cut_once(self) :
675 """RE.cut_once() -> bool. 676 677 True if the enzyme cut the sequence one time on each strand.""" 678 return False
679 cut_once = classmethod(cut_once) 680
681 - def cut_twice(self) :
682 """RE.cut_twice() -> bool. 683 684 True if the enzyme cut the sequence twice on each strand.""" 685 return True
686 cut_twice = classmethod(cut_twice) 687
688 - def _modify(self, location) :
689 """RE._modify(location) -> int. 690 691 for internal use only. 692 693 location is an integer corresponding to the location of the match for 694 the enzyme pattern in the sequence. 695 _modify returns the real place where the enzyme will cut. 696 697 example : 698 EcoRI pattern : GAATTC 699 EcoRI will cut after the G. 700 so in the sequence : 701 ______ 702 GAATACACGGAATTCGA 703 | 704 10 705 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 706 EcoRI cut after the G so : 707 EcoRI._modify(10) -> 11. 708 709 if the enzyme cut twice _modify will returns two integer corresponding 710 to each cutting site. 711 """ 712 yield location + self.fst5 713 yield location + self.scd5
714 _modify = classmethod(_modify) 715
716 - def _rev_modify(self, location) :
717 """RE._rev_modify(location) -> generator of int. 718 719 for internal use only. 720 721 as _modify for site situated on the antiparallel strand when the 722 enzyme is not palindromic 723 """ 724 yield location - self.fst3 725 yield location - self.scd3
726 _rev_modify = classmethod(_rev_modify) 727
728 - def characteristic(self) :
729 """RE.characteristic() -> tuple. 730 731 the tuple contains the attributes : 732 fst5 -> first 5' cut ((current strand) or None 733 fst3 -> first 3' cut (complementary strand) or None 734 scd5 -> second 5' cut (current strand) or None 735 scd5 -> second 3' cut (complementary strand) or None 736 site -> recognition site.""" 737 return self.fst5, self.fst3, self.scd5, self.scd3, self.site
738 characteristic = classmethod(characteristic)
739 740
741 -class Meth_Dep(AbstractCut) :
742 """Implement the information about methylation. 743 744 Enzymes of this class possess a site which is methylable.""" 745
746 - def is_methylable(self) :
747 """RE.is_methylable() -> bool. 748 749 True if the recognition site is a methylable.""" 750 return True
751 is_methylable = classmethod(is_methylable)
752
753 -class Meth_Undep(AbstractCut) :
754 """Implement informations about methylation sensitibility. 755 756 Enzymes of this class are not sensible to methylation.""" 757
758 - def is_methylable(self) :
759 """RE.is_methylable() -> bool. 760 761 True if the recognition site is a methylable.""" 762 return False
763 is_methylable = classmethod(is_methylable)
764
765 -class Palindromic(AbstractCut) :
766 """Implement the methods specific to the enzymes which are palindromic 767 768 palindromic means : the recognition site and its reverse complement are 769 identical. 770 Remarks : an enzyme with a site CGNNCG is palindromic even if some 771 of the sites that it will recognise are not. 772 for example here : CGAACG 773 774 Internal use only. Not meant to be instantiated.""" 775
776 - def _search(self) :
777 """RE._search() -> list. 778 779 for internal use only. 780 781 implement the search method for palindromic and non palindromic enzyme. 782 """ 783 siteloc = self.dna.finditer(self.compsite,self.size) 784 self.results = [r for s,g in siteloc for r in self._modify(s)] 785 if self.results : self._drop() 786 return self.results
787 _search = classmethod(_search) 788
789 - def is_palindromic(self) :
790 """RE.is_palindromic() -> bool. 791 792 True if the recognition site is a palindrom.""" 793 return True
794 is_palindromic = classmethod(is_palindromic)
795 796
797 -class NonPalindromic(AbstractCut) :
798 """Implement the methods specific to the enzymes which are not palindromic 799 800 palindromic means : the recognition site and its reverse complement are 801 identical. 802 803 Internal use only. Not meant to be instantiated.""" 804
805 - def _search(self) :
806 """RE._search() -> list. 807 808 for internal use only. 809 810 implement the search method for palindromic and non palindromic enzyme. 811 """ 812 iterator = self.dna.finditer(self.compsite, self.size) 813 self.results = [] 814 modif = self._modify 815 revmodif = self._rev_modify 816 s = str(self) 817 self.on_minus = [] 818 for start, group in iterator : 819 if group(s) : 820 self.results += [r for r in modif(start)] 821 else : 822 self.on_minus += [r for r in revmodif(start)] 823 self.results += self.on_minus 824 if self.results : 825 self.results.sort() 826 self._drop() 827 return self.results
828 _search = classmethod(_search) 829
830 - def is_palindromic(self) :
831 """RE.is_palindromic() -> bool. 832 833 True if the recognition site is a palindrom.""" 834 return False
835 is_palindromic = classmethod(is_palindromic)
836
837 -class Unknown(AbstractCut) :
838 """Implement the methods specific to the enzymes for which the overhang 839 is unknown. 840 841 These enzymes are also NotDefined and NoCut. 842 843 Internal use only. Not meant to be instantiated.""" 844
845 - def catalyse(self, dna, linear=True) :
846 """RE.catalyse(dna, linear=True) -> tuple of DNA. 847 RE.catalyze(dna, linear=True) -> tuple of DNA. 848 849 return a tuple of dna as will be produced by using RE to restrict the 850 dna. 851 852 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 853 854 if linear is False, the sequence is considered to be circular and the 855 output will be modified accordingly.""" 856 raise NotImplementedError, '%s restriction is unknown.' % self.__name__
857 catalyze = catalyse = classmethod(catalyse) 858
859 - def is_blunt(self) :
860 """RE.is_blunt() -> bool. 861 862 True if the enzyme produces blunt end. 863 864 see also : 865 RE.is_3overhang() 866 RE.is_5overhang() 867 RE.is_unknown()""" 868 return False
869 is_blunt = classmethod(is_blunt) 870
871 - def is_5overhang(self) :
872 """RE.is_5overhang() -> bool. 873 874 True if the enzyme produces 5' overhang sticky end. 875 876 see also : 877 RE.is_3overhang() 878 RE.is_blunt() 879 RE.is_unknown()""" 880 return False
881 is_5overhang = classmethod(is_5overhang) 882
883 - def is_3overhang(self) :
884 """RE.is_3overhang() -> bool. 885 886 True if the enzyme produces 3' overhang sticky end. 887 888 see also : 889 RE.is_5overhang() 890 RE.is_blunt() 891 RE.is_unknown()""" 892 return False
893 is_3overhang = classmethod(is_3overhang) 894
895 - def overhang(self) :
896 """RE.overhang() -> str. type of overhang of the enzyme., 897 898 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 899 return 'unknown'
900 overhang = classmethod(overhang) 901
902 - def compatible_end(self) :
903 """RE.compatible_end() -> list. 904 905 list of all the enzymes that share compatible end with RE.""" 906 return []
907 compatible_end = classmethod(compatible_end) 908
909 - def _mod1(self, other) :
910 """RE._mod1(other) -> bool. 911 912 for internal use only 913 914 test for the compatibility of restriction ending of RE and other.""" 915 return False
916 _mod1 = classmethod(_mod1)
917
918 -class Blunt(AbstractCut) :
919 """Implement the methods specific to the enzymes for which the overhang 920 is blunt. 921 922 The enzyme cuts the + strand and the - strand of the DNA at the same 923 place. 924 925 Internal use only. Not meant to be instantiated.""" 926
927 - def catalyse(self, dna, linear=True) :
928 """RE.catalyse(dna, linear=True) -> tuple of DNA. 929 RE.catalyze(dna, linear=True) -> tuple of DNA. 930 931 return a tuple of dna as will be produced by using RE to restrict the 932 dna. 933 934 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 935 936 if linear is False, the sequence is considered to be circular and the 937 output will be modified accordingly.""" 938 r = self.search(dna, linear) 939 d = self.dna 940 if not r : return d[1:], 941 fragments = [] 942 length = len(r)-1 943 if d.is_linear() : 944 # 945 # START of the sequence to FIRST site. 946 # 947 fragments.append(d[1:r[0]]) 948 if length : 949 # 950 # if more than one site add them. 951 # 952 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 953 # 954 # LAST site to END of the sequence. 955 # 956 fragments.append(d[r[-1]:]) 957 else : 958 # 959 # circular : bridge LAST site to FIRST site. 960 # 961 fragments.append(d[r[-1]:]+d[1:r[0]]) 962 if not length: 963 # 964 # one site we finish here. 965 # 966 return tuple(fragments) 967 # 968 # add the others. 969 # 970 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 971 return tuple(fragments)
972 catalyze = catalyse = classmethod(catalyse) 973
974 - def is_blunt(self) :
975 """RE.is_blunt() -> bool. 976 977 True if the enzyme produces blunt end. 978 979 see also : 980 RE.is_3overhang() 981 RE.is_5overhang() 982 RE.is_unknown()""" 983 return True
984 is_blunt = classmethod(is_blunt) 985
986 - def is_5overhang(self) :
987 """RE.is_5overhang() -> bool. 988 989 True if the enzyme produces 5' overhang sticky end. 990 991 see also : 992 RE.is_3overhang() 993 RE.is_blunt() 994 RE.is_unknown()""" 995 return False
996 is_5overhang = classmethod(is_5overhang) 997
998 - def is_3overhang(self) :
999 """RE.is_3overhang() -> bool. 1000 1001 True if the enzyme produces 3' overhang sticky end. 1002 1003 see also : 1004 RE.is_5overhang() 1005 RE.is_blunt() 1006 RE.is_unknown()""" 1007 return False
1008 is_3overhang = classmethod(is_3overhang) 1009
1010 - def overhang(self) :
1011 """RE.overhang() -> str. type of overhang of the enzyme., 1012 1013 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 1014 return 'blunt'
1015 overhang = classmethod(overhang) 1016
1017 - def compatible_end(self, batch=None) :
1018 """RE.compatible_end() -> list. 1019 1020 list of all the enzymes that share compatible end with RE.""" 1021 if not batch : batch = AllEnzymes 1022 r = [x for x in iter(AllEnzymes) if x.is_blunt()] 1023 r.sort() 1024 return r
1025 compatible_end = classmethod(compatible_end) 1026
1027 - def _mod1(other) :
1028 """RE._mod1(other) -> bool. 1029 1030 for internal use only 1031 1032 test for the compatibility of restriction ending of RE and other.""" 1033 if issubclass(other, Blunt) : return True 1034 else : return False
1035 _mod1 = staticmethod(_mod1)
1036
1037 -class Ov5(AbstractCut) :
1038 """Implement the methods specific to the enzymes for which the overhang 1039 is recessed in 3'. 1040 1041 The enzyme cuts the + strand after the - strand of the DNA. 1042 1043 Internal use only. Not meant to be instantiated.""" 1044
1045 - def catalyse(self, dna, linear=True) :
1046 """RE.catalyse(dna, linear=True) -> tuple of DNA. 1047 RE.catalyze(dna, linear=True) -> tuple of DNA. 1048 1049 return a tuple of dna as will be produced by using RE to restrict the 1050 dna. 1051 1052 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1053 1054 if linear is False, the sequence is considered to be circular and the 1055 output will be modified accordingly.""" 1056 r = self.search(dna, linear) 1057 d = self.dna 1058 if not r : return d[1:], 1059 length = len(r)-1 1060 fragments = [] 1061 if d.is_linear() : 1062 # 1063 # START of the sequence to FIRST site. 1064 # 1065 fragments.append(d[1:r[0]]) 1066 if length : 1067 # 1068 # if more than one site add them. 1069 # 1070 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1071 # 1072 # LAST site to END of the sequence. 1073 # 1074 fragments.append(d[r[-1]:]) 1075 else : 1076 # 1077 # circular : bridge LAST site to FIRST site. 1078 # 1079 fragments.append(d[r[-1]:]+d[1:r[0]]) 1080 if not length: 1081 # 1082 # one site we finish here. 1083 # 1084 return tuple(fragments) 1085 # 1086 # add the others. 1087 # 1088 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1089 return tuple(fragments)
1090 catalyze = catalyse = classmethod(catalyse) 1091
1092 - def is_blunt(self) :
1093 """RE.is_blunt() -> bool. 1094 1095 True if the enzyme produces blunt end. 1096 1097 see also : 1098 RE.is_3overhang() 1099 RE.is_5overhang() 1100 RE.is_unknown()""" 1101 return False
1102 is_blunt = classmethod(is_blunt) 1103
1104 - def is_5overhang(self) :
1105 """RE.is_5overhang() -> bool. 1106 1107 True if the enzyme produces 5' overhang sticky end. 1108 1109 see also : 1110 RE.is_3overhang() 1111 RE.is_blunt() 1112 RE.is_unknown()""" 1113 return True
1114 is_5overhang = classmethod(is_5overhang) 1115
1116 - def is_3overhang(self) :
1117 """RE.is_3overhang() -> bool. 1118 1119 True if the enzyme produces 3' overhang sticky end. 1120 1121 see also : 1122 RE.is_5overhang() 1123 RE.is_blunt() 1124 RE.is_unknown()""" 1125 return False
1126 is_3overhang = classmethod(is_3overhang) 1127
1128 - def overhang(self) :
1129 """RE.overhang() -> str. type of overhang of the enzyme., 1130 1131 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 1132 return "5' overhang"
1133 overhang = classmethod(overhang) 1134
1135 - def compatible_end(self, batch=None) :
1136 """RE.compatible_end() -> list. 1137 1138 list of all the enzymes that share compatible end with RE.""" 1139 if not batch : batch = AllEnzymes 1140 r = [x for x in iter(AllEnzymes) if x.is_5overhang() and x % self] 1141 r.sort() 1142 return r
1143 compatible_end = classmethod(compatible_end) 1144
1145 - def _mod1(self, other) :
1146 """RE._mod1(other) -> bool. 1147 1148 for internal use only 1149 1150 test for the compatibility of restriction ending of RE and other.""" 1151 if issubclass(other, Ov5) : return self._mod2(other) 1152 else : return False
1153 _mod1 = classmethod(_mod1)
1154 1155
1156 -class Ov3(AbstractCut) :
1157 """Implement the methods specific to the enzymes for which the overhang 1158 is recessed in 5'. 1159 1160 The enzyme cuts the - strand after the + strand of the DNA. 1161 1162 Internal use only. Not meant to be instantiated.""" 1163
1164 - def catalyse(self, dna, linear=True) :
1165 """RE.catalyse(dna, linear=True) -> tuple of DNA. 1166 RE.catalyze(dna, linear=True) -> tuple of DNA. 1167 1168 return a tuple of dna as will be produced by using RE to restrict the 1169 dna. 1170 1171 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1172 1173 if linear is False, the sequence is considered to be circular and the 1174 output will be modified accordingly.""" 1175 r = self.search(dna, linear) 1176 d = self.dna 1177 if not r : return d[1:], 1178 fragments = [] 1179 length = len(r)-1 1180 if d.is_linear() : 1181 # 1182 # START of the sequence to FIRST site. 1183 # 1184 fragments.append(d[1:r[0]]) 1185 if length : 1186 # 1187 # if more than one site add them. 1188 # 1189 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1190 # 1191 # LAST site to END of the sequence. 1192 # 1193 fragments.append(d[r[-1]:]) 1194 else : 1195 # 1196 # circular : bridge LAST site to FIRST site. 1197 # 1198 fragments.append(d[r[-1]:]+d[1:r[0]]) 1199 if not length: 1200 # 1201 # one site we finish here. 1202 # 1203 return tuple(fragments) 1204 # 1205 # add the others. 1206 # 1207 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1208 return tuple(fragments)
1209 catalyze = catalyse = classmethod(catalyse) 1210
1211 - def is_blunt(self) :
1212 """RE.is_blunt() -> bool. 1213 1214 True if the enzyme produces blunt end. 1215 1216 see also : 1217 RE.is_3overhang() 1218 RE.is_5overhang() 1219 RE.is_unknown()""" 1220 return False
1221 is_blunt = classmethod(is_blunt) 1222
1223 - def is_5overhang(self) :
1224 """RE.is_5overhang() -> bool. 1225 1226 True if the enzyme produces 5' overhang sticky end. 1227 1228 see also : 1229 RE.is_3overhang() 1230 RE.is_blunt() 1231 RE.is_unknown()""" 1232 return False
1233 is_5overhang = classmethod(is_5overhang) 1234
1235 - def is_3overhang(self) :
1236 """RE.is_3overhang() -> bool. 1237 1238 True if the enzyme produces 3' overhang sticky end. 1239 1240 see also : 1241 RE.is_5overhang() 1242 RE.is_blunt() 1243 RE.is_unknown()""" 1244 return True
1245 is_3overhang = classmethod(is_3overhang) 1246
1247 - def overhang(self) :
1248 """RE.overhang() -> str. type of overhang of the enzyme., 1249 1250 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 1251 return "3' overhang"
1252 overhang = classmethod(overhang) 1253
1254 - def compatible_end(self, batch=None) :
1255 """RE.compatible_end() -> list. 1256 1257 list of all the enzymes that share compatible end with RE.""" 1258 if not batch : batch = AllEnzymes 1259 r = [x for x in iter(AllEnzymes) if x.is_3overhang() and x % self] 1260 r.sort() 1261 return r
1262 compatible_end = classmethod(compatible_end) 1263
1264 - def _mod1(self, other) :
1265 """RE._mod1(other) -> bool. 1266 1267 for internal use only 1268 1269 test for the compatibility of restriction ending of RE and other.""" 1270 # 1271 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1272 # 1273 if issubclass(other, Ov3) : return self._mod2(other) 1274 else : return False
1275 _mod1 = classmethod(_mod1)
1276 1277
1278 -class Defined(AbstractCut) :
1279 """Implement the methods specific to the enzymes for which the overhang 1280 and the cut are not variable. 1281 1282 Typical example : EcoRI -> G^AATT_C 1283 The overhang will always be AATT 1284 Notes : 1285 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N 1286 There overhang is always the same : blunt! 1287 1288 Internal use only. Not meant to be instantiated.""" 1289
1290 - def _drop(self) :
1291 """RE._drop() -> list. 1292 1293 for internal use only. 1294 1295 drop the site that are situated outside the sequence in linear sequence. 1296 modify the index for site in circular sequences.""" 1297 # 1298 # remove or modify the results that are outside the sequence. 1299 # This is necessary since after finding the site we add the distance 1300 # from the site to the cut with the _modify and _rev_modify methods. 1301 # For linear we will remove these sites altogether. 1302 # For circular sequence, we modify the result rather than _drop it 1303 # since the site is in the sequence. 1304 # 1305 length = len(self.dna) 1306 drop = itertools.dropwhile 1307 take = itertools.takewhile 1308 if self.dna.is_linear() : 1309 self.results = [x for x in drop(lambda x:x<1, self.results)] 1310 self.results = [x for x in take(lambda x:x<length, self.results)] 1311 else : 1312 for index, location in enumerate(self.results) : 1313 if location < 1 : 1314 self.results[index] += length 1315 else : 1316 break 1317 for index, location in enumerate(self.results[::-1]) : 1318 if location > length : 1319 self.results[-(index+1)] -= length 1320 else : 1321 break 1322 return
1323 _drop = classmethod(_drop) 1324
1325 - def is_defined(self) :
1326 """RE.is_defined() -> bool. 1327 1328 True if the sequence recognised and cut is constant, 1329 i.e. the recognition site is not degenerated AND the enzyme cut inside 1330 the site. 1331 1332 see also : 1333 RE.is_ambiguous() 1334 RE.is_unknown()""" 1335 return True
1336 is_defined = classmethod(is_defined) 1337
1338 - def is_ambiguous(self) :
1339 """RE.is_ambiguous() -> bool. 1340 1341 True if the sequence recognised and cut is ambiguous, 1342 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1343 the site. 1344 1345 see also : 1346 RE.is_defined() 1347 RE.is_unknown()""" 1348 return False
1349 is_ambiguous = classmethod(is_ambiguous) 1350
1351 - def is_unknown(self) :
1352 """RE.is_unknown() -> bool. 1353 1354 True if the sequence is unknown, 1355 i.e. the recognition site has not been characterised yet. 1356 1357 see also : 1358 RE.is_defined() 1359 RE.is_ambiguous()""" 1360 return False
1361 is_unknown = classmethod(is_unknown) 1362
1363 - def elucidate(self) :
1364 """RE.elucidate() -> str 1365 1366 return a representation of the site with the cut on the (+) strand 1367 represented as '^' and the cut on the (-) strand as '_'. 1368 ie : 1369 >>> EcoRI.elucidate() # 5' overhang 1370 'G^AATT_C' 1371 >>> KpnI.elucidate() # 3' overhang 1372 'G_GTAC^C' 1373 >>> EcoRV.elucidate() # blunt 1374 'GAT^_ATC' 1375 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1376 '? GTATAC ?' 1377 >>> 1378 """ 1379 f5 = self.fst5 1380 f3 = self.fst3 1381 site = self.site 1382 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.' 1383 elif self.is_5overhang() : 1384 if f5 == f3 == 0 : re = 'N^'+ self.site + '_N' 1385 elif f3 == 0 : re = site[:f5] + '^' + site[f5:] + '_N' 1386 else : re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1387 elif self.is_blunt() : 1388 re = site[:f5] + '^_' + site[f5:] 1389 else : 1390 if f5 == f3 == 0 : re = 'N_'+ site + '^N' 1391 else : re = site[:f3] + '_' + site[f3:f5] +'^'+ site[f5:] 1392 return re
1393 elucidate = classmethod(elucidate) 1394
1395 - def _mod2(self, other) :
1396 """RE._mod2(other) -> bool. 1397 1398 for internal use only 1399 1400 test for the compatibility of restriction ending of RE and other.""" 1401 # 1402 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1403 # 1404 if other.ovhgseq == self.ovhgseq : 1405 return True 1406 elif issubclass(other, Ambiguous) : 1407 return other._mod2(self) 1408 else: 1409 return False
1410 _mod2 = classmethod(_mod2)
1411 1412
1413 -class Ambiguous(AbstractCut) :
1414 """Implement the methods specific to the enzymes for which the overhang 1415 is variable. 1416 1417 Typical example : BstXI -> CCAN_NNNN^NTGG 1418 The overhang can be any sequence of 4 bases. 1419 Notes : 1420 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N 1421 There overhang is always the same : blunt! 1422 1423 Internal use only. Not meant to be instantiated.""" 1424
1425 - def _drop(self) :
1426 """RE._drop() -> list. 1427 1428 for internal use only. 1429 1430 drop the site that are situated outside the sequence in linear sequence. 1431 modify the index for site in circular sequences.""" 1432 length = len(self.dna) 1433 drop = itertools.dropwhile 1434 take = itertools.takewhile 1435 if self.dna.is_linear() : 1436 self.results = [x for x in drop(lambda x : x < 1, self.results)] 1437 self.results = [x for x in take(lambda x : x <length, self.results)] 1438 else : 1439 for index, location in enumerate(self.results) : 1440 if location < 1 : 1441 self.results[index] += length 1442 else : 1443 break 1444 for index, location in enumerate(self.results[::-1]) : 1445 if location > length : 1446 self.results[-(index+1)] -= length 1447 else : 1448 break 1449 return
1450 _drop = classmethod(_drop) 1451
1452 - def is_defined(self) :
1453 """RE.is_defined() -> bool. 1454 1455 True if the sequence recognised and cut is constant, 1456 i.e. the recognition site is not degenerated AND the enzyme cut inside 1457 the site. 1458 1459 see also : 1460 RE.is_ambiguous() 1461 RE.is_unknown()""" 1462 return False
1463 is_defined = classmethod(is_defined) 1464
1465 - def is_ambiguous(self) :
1466 """RE.is_ambiguous() -> bool. 1467 1468 True if the sequence recognised and cut is ambiguous, 1469 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1470 the site. 1471 1472 1473 see also : 1474 RE.is_defined() 1475 RE.is_unknown()""" 1476 return True
1477 is_ambiguous = classmethod(is_ambiguous) 1478
1479 - def is_unknown(self) :
1480 """RE.is_unknown() -> bool. 1481 1482 True if the sequence is unknown, 1483 i.e. the recognition site has not been characterised yet. 1484 1485 see also : 1486 RE.is_defined() 1487 RE.is_ambiguous()""" 1488 return False
1489 is_unknown = classmethod(is_unknown) 1490
1491 - def _mod2(self, other) :
1492 """RE._mod2(other) -> bool. 1493 1494 for internal use only 1495 1496 test for the compatibility of restriction ending of RE and other.""" 1497 # 1498 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1499 # 1500 if len(self.ovhgseq) != len(other.ovhgseq) : 1501 return False 1502 else : 1503 se = self.ovhgseq 1504 for base in se : 1505 if base in 'ATCG' : 1506 pass 1507 if base in 'N' : 1508 se = '.'.join(se.split('N')) 1509 if base in 'RYWMSKHDBV': 1510 expand = '['+ matching[base] + ']' 1511 se = expand.join(se.split(base)) 1512 if re.match(se, other.ovhgseq) : 1513 return True 1514 else : 1515 return False
1516 _mod2 = classmethod(_mod2) 1517
1518 - def elucidate(self) :
1519 """RE.elucidate() -> str 1520 1521 return a representation of the site with the cut on the (+) strand 1522 represented as '^' and the cut on the (-) strand as '_'. 1523 ie : 1524 >>> EcoRI.elucidate() # 5' overhang 1525 'G^AATT_C' 1526 >>> KpnI.elucidate() # 3' overhang 1527 'G_GTAC^C' 1528 >>> EcoRV.elucidate() # blunt 1529 'GAT^_ATC' 1530 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1531 '? GTATAC ?' 1532 >>> 1533 """ 1534 f5 = self.fst5 1535 f3 = self.fst3 1536 length = len(self) 1537 site = self.site 1538 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.' 1539 elif self.is_5overhang() : 1540 if f3 == f5 == 0 : 1541 re = 'N^' + site +'_N' 1542 elif 0 <= f5 <= length and 0 <= f3+length <= length : 1543 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1544 elif 0 <= f5 <= length : 1545 re = site[:f5] + '^' + site[f5:] + f3*'N' + '_N' 1546 elif 0 <= f3+length <= length : 1547 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:] 1548 elif f3+length < 0 : 1549 re = 'N^'*abs(f5)*'N' + '_' + abs(length+f3)*'N' + site 1550 elif f5 > length : 1551 re = site + (f5-length)*'N'+'^'+(length+f3-f5)*'N'+'_N' 1552 else : 1553 re = 'N^' + abs(f5) * 'N' + site + f3*'N' + '_N' 1554 elif self.is_blunt() : 1555 if f5 < 0 : 1556 re = 'N^_' + abs(f5)*'N' + site 1557 elif f5 > length : 1558 re = site + (f5-length)*'N' + '^_N' 1559 else : 1560 raise ValueError, '%s.easyrepr() : error f5=%i'% (self.name,f5) 1561 else : 1562 if f3 == 0 : 1563 if f5 == 0 : re = 'N_' + site + '^N' 1564 else : re = site + '_' + (f5-length)*'N' + '^N' 1565 elif 0 < f3+length <= length and 0 <= f5 <= length : 1566 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:] 1567 elif 0 < f3+length <= length : 1568 re = site[:f3] + '_' + site[f3:] + (f5-length)*'N' + '^N' 1569 elif 0 <= f5 <= length: 1570 re = 'N_' +'N'*(f3+length) + site[:f5] + '^' + site[f5:] 1571 elif f3 > 0 : 1572 re = site + f3*'N' + '_' + (f5-f3-length)*'N' + '^N' 1573 elif f5 < 0 : 1574 re = 'N_' + abs(f3-f5+length)*'N' + '^' + abs(f5)*'N' + site 1575 else : 1576 re = 'N_' + abs(f3+length)*'N' + site + (f5-length)*'N' + '^N' 1577 return re
1578 elucidate = classmethod(elucidate)
1579 1580
1581 -class NotDefined(AbstractCut) :
1582 """Implement the methods specific to the enzymes for which the overhang 1583 is not characterised. 1584 1585 Correspond to NoCut and Unknown. 1586 1587 Internal use only. Not meant to be instantiated.""" 1588
1589 - def _drop(self) :
1590 """RE._drop() -> list. 1591 1592 for internal use only. 1593 1594 drop the site that are situated outside the sequence in linear sequence. 1595 modify the index for site in circular sequences.""" 1596 if self.dna.is_linear() : 1597 return 1598 else : 1599 length = len(self.dna) 1600 for index, location in enumerate(self.results) : 1601 if location < 1 : 1602 self.results[index] += length 1603 else : 1604 break 1605 for index, location in enumerate(self.results[:-1]) : 1606 if location > length : 1607 self.results[-(index+1)] -= length 1608 else : 1609 break 1610 return
1611 _drop = classmethod(_drop) 1612
1613 - def is_defined(self) :
1614 """RE.is_defined() -> bool. 1615 1616 True if the sequence recognised and cut is constant, 1617 i.e. the recognition site is not degenerated AND the enzyme cut inside 1618 the site. 1619 1620 see also : 1621 RE.is_ambiguous() 1622 RE.is_unknown()""" 1623 return False
1624 is_defined = classmethod(is_defined) 1625
1626 - def is_ambiguous(self) :
1627 """RE.is_ambiguous() -> bool. 1628 1629 True if the sequence recognised and cut is ambiguous, 1630 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1631 the site. 1632 1633 1634 see also : 1635 RE.is_defined() 1636 RE.is_unknown()""" 1637 return False
1638 is_ambiguous = classmethod(is_ambiguous) 1639
1640 - def is_unknown(self) :
1641 """RE.is_unknown() -> bool. 1642 1643 True if the sequence is unknown, 1644 i.e. the recognition site has not been characterised yet. 1645 1646 see also : 1647 RE.is_defined() 1648 RE.is_ambiguous()""" 1649 return True
1650 is_unknown = classmethod(is_unknown) 1651
1652 - def _mod2(self, other) :
1653 """RE._mod2(other) -> bool. 1654 1655 for internal use only 1656 1657 test for the compatibility of restriction ending of RE and other.""" 1658 # 1659 # Normally we should not arrive here. But well better safe than sorry. 1660 # the overhang is not defined we are compatible with nobody. 1661 # could raise an Error may be rather than return quietly. 1662 # 1663 #return False 1664 raise ValueError, "%s.mod2(%s), %s : NotDefined. pas glop pas glop!" \ 1665 % (str(self), str(other), str(self))
1666 _mod2 = classmethod(_mod2) 1667
1668 - def elucidate(self) :
1669 """RE.elucidate() -> str 1670 1671 return a representation of the site with the cut on the (+) strand 1672 represented as '^' and the cut on the (-) strand as '_'. 1673 ie : 1674 >>> EcoRI.elucidate() # 5' overhang 1675 'G^AATT_C' 1676 >>> KpnI.elucidate() # 3' overhang 1677 'G_GTAC^C' 1678 >>> EcoRV.elucidate() # blunt 1679 'GAT^_ATC' 1680 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1681 '? GTATAC ?' 1682 >>> 1683 """ 1684 return '? %s ?' % self.site
1685 elucidate = classmethod(elucidate)
1686 1687
1688 -class Commercially_available(AbstractCut) :
1689 # 1690 # Recent addition to Rebase make this naming convention uncertain. 1691 # May be better to says enzymes which have a supplier. 1692 # 1693 """Implement the methods specific to the enzymes which are commercially 1694 available. 1695 1696 Internal use only. Not meant to be instantiated.""" 1697
1698 - def suppliers(self) :
1699 """RE.suppliers() -> print the suppliers of RE.""" 1700 supply = suppliers_dict.items() 1701 for k,v in supply : 1702 if k in self.suppl : 1703 print v[0]+',' 1704 return
1705 suppliers = classmethod(suppliers) 1706
1707 - def supplier_list(self) :
1708 """RE.supplier_list() -> list. 1709 1710 list of the supplier names for RE.""" 1711 return [v[0] for k,v in suppliers_dict.items() if k in self.suppl]
1712 supplier_list = classmethod(supplier_list) 1713
1714 - def buffers(self, supplier) :
1715 """RE.buffers(supplier) -> string. 1716 1717 not implemented yet.""" 1718 return
1719 buffers = classmethod(buffers) 1720
1721 - def is_comm(self) :
1722 """RE.iscomm() -> bool. 1723 1724 True if RE has suppliers.""" 1725 return True
1726 is_comm = classmethod(is_comm)
1727 1728
1729 -class Not_available(AbstractCut) :
1730 """Implement the methods specific to the enzymes which are not commercially 1731 available. 1732 1733 Internal use only. Not meant to be instantiated.""" 1734
1735 - def suppliers() :
1736 """RE.suppliers() -> print the suppliers of RE.""" 1737 return None
1738 suppliers = staticmethod(suppliers) 1739
1740 - def supplier_list(self) :
1741 """RE.supplier_list() -> list. 1742 1743 list of the supplier names for RE.""" 1744 return []
1745 supplier_list = classmethod(supplier_list) 1746
1747 - def buffers(self, supplier) :
1748 """RE.buffers(supplier) -> string. 1749 1750 not implemented yet.""" 1751 raise TypeError, "Enzyme not commercially available."
1752 buffers = classmethod(buffers) 1753
1754 - def is_comm(self) :
1755 """RE.iscomm() -> bool. 1756 1757 True if RE has suppliers.""" 1758 return False
1759 is_comm = classmethod(is_comm)
1760 1761 1762 ############################################################################### 1763 # # 1764 # Restriction Batch # 1765 # # 1766 ############################################################################### 1767 1768
1769 -class RestrictionBatch(Set) :
1770
1771 - def __init__(self, first=[], suppliers=[]) :
1772 """RestrictionBatch([sequence]) -> new RestrictionBatch.""" 1773 first = [self.format(x) for x in first] 1774 first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]] 1775 Set.__init__(self, first) 1776 self.mapping = dict.fromkeys(self) 1777 self.already_mapped = DNA('')
1778
1779 - def __str__(self) :
1780 if len(self) < 5 : 1781 return '+'.join(self.elements()) 1782 else : 1783 return '...'.join(('+'.join(self.elements()[:2]),\ 1784 '+'.join(self.elements()[-2:])))
1785
1786 - def __repr__(self) :
1787 return 'RestrictionBatch(%s)' % self.elements()
1788
1789 - def __contains__(self, other) :
1790 try : 1791 other = self.format(other) 1792 except ValueError : # other is not a restriction enzyme 1793 return False 1794 return Set.__contains__(self, other)
1795
1796 - def __div__(self, other) :
1797 return self.search(other)
1798
1799 - def __rdiv__(self, other) :
1800 return self.search(other)
1801
1802 - def get(self, enzyme, add=False) :
1803 """B.get(enzyme[, add]) -> enzyme class. 1804 1805 if add is True and enzyme is not in B add enzyme to B. 1806 if add is False (which is the default) only return enzyme. 1807 if enzyme is not a RestrictionType or can not be evaluated to 1808 a RestrictionType, raise a ValueError.""" 1809 e = self.format(enzyme) 1810 if e in self : 1811 return e 1812 elif add : 1813 self.add(e) 1814 return e 1815 else : 1816 raise ValueError, 'enzyme %s is not in RestrictionBatch'%e.__name__
1817
1818 - def lambdasplit(self, func) :
1819 """B.lambdasplit(func) -> RestrictionBatch . 1820 1821 the new batch will contains only the enzymes for which 1822 func return True.""" 1823 d = [x for x in itertools.ifilter(func, self)] 1824 new = RestrictionBatch() 1825 new._data = dict(map(None, d, [True]*len(d))) 1826 return new
1827
1828 - def add_supplier(self, letter) :
1829 """B.add_supplier(letter) -> add a new set of enzyme to B. 1830 1831 letter represents the suppliers as defined in the dictionary 1832 RestrictionDictionary.suppliers 1833 return None. 1834 raise a KeyError if letter is not a supplier code.""" 1835 supplier = suppliers_dict[letter] 1836 self.suppliers.append(letter) 1837 for x in supplier[1] : 1838 self.add_nocheck(eval(x)) 1839 return
1840
1841 - def current_suppliers(self) :
1842 """B.current_suppliers() -> add a new set of enzyme to B. 1843 1844 return a sorted list of the suppliers which have been used to 1845 create the batch.""" 1846 suppl_list = [suppliers_dict[x][0] for x in self.suppliers] 1847 suppl_list.sort() 1848 return suppl_list
1849
1850 - def __iadd__(self, other) :
1851 """ b += other -> add other to b, check the type of other.""" 1852 self.add(other) 1853 return self
1854
1855 - def __add__(self, other) :
1856 """ b + other -> new RestrictionBatch.""" 1857 new = self.__class__(self) 1858 new.add(other) 1859 return new
1860
1861 - def remove(self, other) :
1862 """B.remove(other) -> remove other from B if other is a RestrictionType. 1863 1864 Safe Set.remove method. Verify that other is a RestrictionType or can be 1865 evaluated to a RestrictionType. 1866 raise a ValueError if other can not be evaluated to a RestrictionType. 1867 raise a KeyError if other is not in B.""" 1868 return Set.remove(self, self.format(other))
1869
1870 - def add(self, other) :
1871 """B.add(other) -> add other to B if other is a RestrictionType. 1872 1873 Safe Set.add method. Verify that other is a RestrictionType or can be 1874 evaluated to a RestrictionType. 1875 raise a ValueError if other can not be evaluated to a RestrictionType. 1876 """ 1877 return Set.add(self, self.format(other))
1878
1879 - def add_nocheck(self, other) :
1880 """B.add_nocheck(other) -> add other to B. don't check type of other. 1881 """ 1882 return Set.add(self, other)
1883
1884 - def format(self, y) :
1885 """B.format(y) -> RestrictionType or raise ValueError. 1886 1887 if y is a RestrictionType return y 1888 if y can be evaluated to a RestrictionType return eval(y) 1889 raise a Value Error in all other case.""" 1890 try : 1891 if isinstance(y, RestrictionType) : 1892 return y 1893 elif isinstance(eval(str(y)), RestrictionType): 1894 return eval(y) 1895 1896 else : 1897 pass 1898 except (NameError, SyntaxError) : 1899 pass 1900 raise ValueError, '%s is not a RestrictionType'%y.__class__
1901 1902
1903 - def is_restriction(self, y) :
1904 """B.is_restriction(y) -> bool. 1905 1906 True is y or eval(y) is a RestrictionType.""" 1907 return isinstance(y, RestrictionType) or \ 1908 isinstance(eval(str(y)), RestrictionType)
1909
1910 - def split(self, *classes, **bool) :
1911 """B.split(class, [class.__name__ = True]) -> new RestrictionBatch. 1912 1913 it works but it is slow, so it has really an interest when splitting 1914 over multiple conditions.""" 1915 def splittest(element) : 1916 for klass in classes : 1917 b = bool.get(klass.__name__, True) 1918 if issubclass(element, klass) : 1919 if b : 1920 continue 1921 else : 1922 return False 1923 elif b : 1924 return False 1925 else : 1926 continue 1927 return True
1928 d = [k for k in itertools.ifilter(splittest, self)] 1929 new = RestrictionBatch() 1930 new._data = dict(map(None, d, [True]*len(d))) 1931 return new
1932
1933 - def elements(self) :
1934 """B.elements() -> tuple. 1935 1936 give all the names of the enzymes in B sorted alphabetically.""" 1937 l = [str(e) for e in self] 1938 l.sort() 1939 return l
1940
1941 - def as_string(self) :
1942 """B.as_string() -> list. 1943 1944 return a list of the name of the elements of B.""" 1945 return [str(e) for e in self]
1946
1947 - def suppl_codes(self) :
1948 """B.suppl_codes() -> dict 1949 1950 letter code for the suppliers""" 1951 supply = dict([(k,v[0]) for k,v in suppliers_dict.iteritems()]) 1952 return supply
1953 suppl_codes = classmethod(suppl_codes) 1954
1955 - def show_codes(self) :
1956 "B.show_codes() -> letter codes for the suppliers""" 1957 supply = [' = '.join(i) for i in self.suppl_codes().iteritems()] 1958 print '\n'.join(supply) 1959 return
1960 show_codes = classmethod(show_codes) 1961
1962 - def search(self, dna, linear=True) :
1963 """B.search(dna) -> dict.""" 1964 # 1965 # here we replace the search method of the individual enzymes 1966 # with one unique testing method. 1967 # 1968 if isinstance(dna, DNA) : 1969 if (dna, linear) == self.already_mapped : 1970 return self.mapping 1971 else : 1972 self.already_mapped = dna, linear 1973 fseq = FormattedSeq(dna, linear) 1974 self.mapping = dict([(x, x.search(fseq)) for x in self]) 1975 return self.mapping 1976 elif isinstance(dna, FormattedSeq) : 1977 if (dna, dna.linear) == self.already_mapped : 1978 return self.mapping 1979 else : 1980 self.already_mapped = dna, dna.linear 1981 self.mapping = dict([(x, x.search(dna)) for x in self]) 1982 return self.mapping 1983 raise TypeError,\ 1984 "Expected Seq or MutableSeq instance, got %s instead"%type(dna)
1985 1986 ############################################################################### 1987 # # 1988 # Restriction Analysis # 1989 # # 1990 ############################################################################### 1991
1992 -class Analysis(RestrictionBatch, PrintFormat) :
1993
1994 - def __init__(self, restrictionbatch=RestrictionBatch(),sequence=DNA(''), 1995 linear=True) :
1996 """Analysis([restrictionbatch [, sequence] linear=True]) -> New Analysis class. 1997 1998 For most of the method of this class if a dictionary is given it will 1999 be used as the base to calculate the results. 2000 If no dictionary is given a new analysis using the Restriction Batch 2001 which has been given when the Analysis class has been instantiated.""" 2002 RestrictionBatch.__init__(self, restrictionbatch) 2003 self.rb = restrictionbatch 2004 self.sequence = sequence 2005 self.linear = linear 2006 if self.sequence : 2007 self.search(self.sequence, self.linear)
2008
2009 - def __repr__(self) :
2010 return 'Analysis(%s,%s,%s)'%\ 2011 (repr(self.rb),repr(self.sequence),self.linear)
2012
2013 - def _sub_set(self, set) :
2014 """A._sub_set(set) -> dict. 2015 2016 Internal use only. 2017 2018 screen the results through set. 2019 Keep only the results for which the enzymes is in set. 2020 """ 2021 return dict([(k,v) for k,v in self.mapping.iteritems() if k in set])
2022
2023 - def _boundaries(self, start, end) :
2024 """A._boundaries(start, end) -> tuple. 2025 2026 Format the boundaries for use with the methods that limit the 2027 search to only part of the sequence given to analyse. 2028 """ 2029 if not isinstance(start, int) : 2030 raise TypeError, 'expected int, got %s instead' % type(start) 2031 if not isinstance(end, int) : 2032 raise TypeError, 'expected int, got %s instead' % type(end) 2033 if start < 1 : 2034 start += len(self.sequence) 2035 if end < 1 : 2036 end += len(self.sequence) 2037 if start < end : 2038 pass 2039 else : 2040 start, end == end, start 2041 if start < 1 : 2042 start == 1 2043 if start < end : 2044 return start, end, self._test_normal 2045 else : 2046 return start, end, self._test_reverse
2047
2048 - def _test_normal(self, start, end, site) :
2049 """A._test_normal(start, end, site) -> bool. 2050 2051 Internal use only 2052 Test if site is in between start and end. 2053 """ 2054 return start <= site < end
2055
2056 - def _test_reverse(self, start, end, site) :
2057 """A._test_reverse(start, end, site) -> bool. 2058 2059 Internal use only 2060 Test if site is in between end and start (for circular sequences). 2061 """ 2062 return start <= site <= len(self.sequence) or 1 <= site < end
2063
2064 - def print_that(self, dct=None, title='', s1='') :
2065 """A.print_that([dct[, title[, s1]]]) -> print the results from dct. 2066 2067 If dct is not given the full dictionary is used. 2068 """ 2069 if not dct : 2070 dct = self.mapping 2071 print 2072 return PrintFormat.print_that(self, dct, title, s1)
2073
2074 - def change(self, **what) :
2075 """A.change(**attribute_name) -> Change attribute of Analysis. 2076 2077 It is possible to change the width of the shell by setting 2078 self.ConsoleWidth to what you want. 2079 self.NameWidth refer to the maximal length of the enzyme name. 2080 2081 Changing one of these parameters here might not give the results 2082 you expect. In which case, you can settle back to a 80 columns shell 2083 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until 2084 you get it right.""" 2085 for k,v in what.iteritems() : 2086 if k in ('NameWidth', 'ConsoleWidth') : 2087 setattr(self, k, v) 2088 self.Cmodulo = self.ConsoleWidth % self.NameWidth 2089 self.PrefWidth = self.ConsoleWidth - self.Cmodulo 2090 elif k is 'sequence' : 2091 setattr(self, 'sequence', v) 2092 self.search(self.sequence, self.linear) 2093 elif k is 'rb' : 2094 self = Analysis.__init__(self, v, self.sequence, self.linear) 2095 elif k is 'linear' : 2096 setattr(self, 'linear', v) 2097 self.search(self.sequence, v) 2098 elif k in ('Indent', 'Maxsize') : 2099 setattr(self, k, v) 2100 elif k in ('Cmodulo', 'PrefWidth') : 2101 raise AttributeError, \ 2102 'To change %s, change NameWidth and/or ConsoleWidth'%name 2103 else : 2104 raise AttributeError, 'Analysis has no attribute %s' % name 2105 return
2106
2107 - def full(self, linear=True) :
2108 """A.full() -> dict. 2109 2110 Full Restriction Map of the sequence.""" 2111 return self.mapping
2112
2113 - def blunt(self, dct = None) :
2114 """A.blunt([dct]) -> dict. 2115 2116 Only the enzymes which have a 3'overhang restriction site.""" 2117 if not dct : 2118 dct = self.mapping 2119 return dict([(k,v) for k,v in dct.iteritems() if k.is_blunt()])
2120
2121 - def overhang5(self, dct=None) :
2122 """A.overhang5([dct]) -> dict. 2123 2124 Only the enzymes which have a 5' overhang restriction site.""" 2125 if not dct : 2126 dct = self.mapping 2127 return dict([(k,v) for k,v in dct.iteritems() if k.is_5overhang()])
2128 2129
2130 - def overhang3(self, dct=None) :
2131 """A.Overhang3([dct]) -> dict. 2132 2133 Only the enzymes which have a 3'overhang restriction site.""" 2134 if not dct : 2135 dct = self.mapping 2136 return dict([(k,v) for k,v in dct.iteritems() if k.is_3overhang()])
2137 2138
2139 - def defined(self, dct=None) :
2140 """A.defined([dct]) -> dict. 2141 2142 Only the enzymes that have a defined restriction site in Rebase.""" 2143 if not dct : 2144 dct = self.mapping 2145 return dict([(k,v) for k,v in dct.iteritems() if k.is_defined()])
2146
2147 - def with_sites(self, dct=None) :
2148 """A.with_sites([dct]) -> dict. 2149 2150 Enzymes which have at least one site in the sequence.""" 2151 if not dct : 2152 dct = self.mapping 2153 return dict([(k,v) for k,v in dct.iteritems() if v])
2154
2155 - def without_site(self, dct=None) :
2156 """A.without_site([dct]) -> dict. 2157 2158 Enzymes which have no site in the sequence.""" 2159 if not dct : 2160 dct = self.mapping 2161 return dict([(k,v) for k,v in dct.iteritems() if not v])
2162
2163 - def with_N_sites(self, N, dct=None) :
2164 """A.With_N_Sites(N [, dct]) -> dict. 2165 2166 Enzymes which cut N times the sequence.""" 2167 if not dct : 2168 dct = self.mapping 2169 return dict([(k,v) for k,v in dct.iteritems()if len(v) == N])
2170
2171 - def with_number_list(self, list, dct= None) :
2172 if not dct : 2173 dct = self.mapping 2174 return dict([(k,v) for k,v in dct.iteritems() if len(v) in list])
2175
2176 - def with_name(self, names, dct=None) :
2177 """A.with_name(list_of_names [, dct]) -> 2178 2179 Limit the search to the enzymes named in list_of_names.""" 2180 for i, enzyme in enumerate(names) : 2181 if not enzyme in AllEnzymes : 2182 print "no datas for the enzyme:", str(name) 2183 del names[i] 2184 if not dct : 2185 return RestrictionBatch(names).search(self.sequence) 2186 return dict([(n, dct[n]) for n in names if n in dct])
2187
2188 - def with_site_size(self, site_size, dct=None) :
2189 """A.with_site_size(site_size [, dct]) -> 2190 2191 Limit the search to the enzymes whose site is of size <site_size>.""" 2192 sites = [name for name in self if name.size == site_size] 2193 if not dct : 2194 return RestrictionBatch(sites).search(self.sequence) 2195 return dict([(k,v) for k,v in dct.iteritems() if k in site_size])
2196
2197 - def only_between(self, start, end, dct=None) :
2198 """A.only_between(start, end[, dct]) -> dict. 2199 2200 Enzymes that cut the sequence only in between start and end.""" 2201 start, end, test = self._boundaries(start, end) 2202 if not dct : 2203 dct = self.mapping 2204 d = dict(dct) 2205 for key, sites in dct.iteritems() : 2206 if not sites : 2207 del d[key] 2208 continue 2209 for site in sites: 2210 if test(start, end, site) : 2211 continue 2212 else : 2213 del d[key] 2214 break 2215 return d
2216
2217 - def between(self, start, end, dct=None) :
2218 """A.between(start, end [, dct]) -> dict. 2219 2220 Enzymes that cut the sequence at least in between start and end. 2221 They may cut outside as well.""" 2222 start, end, test = self._boundaries(start, end) 2223 d = {} 2224 if not dct : 2225 dct = self.mapping 2226 for key, sites in dct.iteritems() : 2227 for site in sites : 2228 if test(start, end, site) : 2229 d[key] = sites 2230 break 2231 continue 2232 return d
2233
2234 - def show_only_between(self, start, end, dct=None) :
2235 """A.show_only_between(start, end [, dct]) -> dict. 2236 2237 Enzymes that cut the sequence outside of the region 2238 in between start and end but do not cut inside.""" 2239 d = [] 2240 if start <= end : 2241 d = [(k, [vv for vv in v if start<=vv<=end]) 2242 for v in self.between(start, end, dct)] 2243 else : 2244 d = [(k, [vv for vv in v if start<=vv or vv <= end]) 2245 for v in self.between(start, end, dct)] 2246 return dict(d)
2247
2248 - def only_outside(self, start, end, dct = None) :
2249 """A.only_outside(start, end [, dct]) -> dict. 2250 2251 Enzymes that cut the sequence outside of the region 2252 in between start and end but do not cut inside.""" 2253 start, end, test = self._boundaries(start, end) 2254 if not dct : dct = self.mapping 2255 d = dict(dct) 2256 for key, sites in dct.iteritems() : 2257 if not sites : 2258 del d[key] 2259 continue 2260 for site in sites : 2261 if test(start, end, site) : 2262 del d[key] 2263 break 2264 else : 2265 continue 2266 return d
2267
2268 - def outside(self, start, end, dct=None) :
2269 """A.outside((start, end [, dct]) -> dict. 2270 2271 Enzymes that cut outside the region in between start and end. 2272 No test is made to know if they cut or not inside this region.""" 2273 start, end, test = self._boundaries(start, end) 2274 if not dct : 2275 dct = self.mapping 2276 d = {} 2277 for key, sites in dct.iteritems() : 2278 for site in sites : 2279 if test(start, end, site) : 2280 continue 2281 else : 2282 d[key] = sites 2283 break 2284 return d
2285 2286
2287 - def do_not_cut(self, start, end, dct = None) :
2288 """A.do_not_cut(start, end [, dct]) -> dict. 2289 2290 Enzymes that do not cut the region in between start and end.""" 2291 if not dct : 2292 dct = self.mapping 2293 d = self.without_site() 2294 d.update(self.only_outside(start, end, dct)) 2295 return d
2296 2297 # 2298 # The restriction enzyme classes are created dynamically when the module is 2299 # imported. Here is the magic which allow the creation of the 2300 # restriction-enzyme classes. 2301 # 2302 # The reason for the two dictionaries in Restriction_Dictionary 2303 # one for the types (which will be called pseudo-type as they really 2304 # correspond to the values that instances of RestrictionType can take) 2305 # and one for the enzymes is efficiency as the bases are evaluated 2306 # once per pseudo-type. 2307 # 2308 # However Restriction is still a very inefficient module at import. But 2309 # remember that around 660 classes (which is more or less the size of Rebase) 2310 # have to be created dynamically. However, this processing take place only 2311 # once. 2312 # This inefficiency is however largely compensated by the use of metaclass 2313 # which provide a very efficient layout for the class themselves mostly 2314 # alleviating the need of if/else loops in the class methods. 2315 # 2316 # It is essential to run Restriction with doc string optimisation (-OO switch) 2317 # as the doc string of 660 classes take a lot of processing. 2318 # 2319 CommOnly = RestrictionBatch() # commercial enzymes 2320 NonComm = RestrictionBatch() # not available commercially 2321 for TYPE, (bases, enzymes) in typedict.iteritems() : 2322 # 2323 # The keys are the pseudo-types TYPE (stored as type1, type2...) 2324 # The names are not important and are only present to differentiate 2325 # the keys in the dict. All the pseudo-types are in fact RestrictionType. 2326 # These names will not be used after and the pseudo-types are not 2327 # kept in the locals() dictionary. It is therefore impossible to 2328 # import them. 2329 # Now, if you have look at the dictionary, you will see that not all the 2330 # types are present as those without corresponding enzymes have been 2331 # removed by Dictionary_Builder(). 2332 # 2333 # The values are tuples which contain 2334 # as first element a tuple of bases (as string) and 2335 # as second element the names of the enzymes. 2336 # 2337 # First eval the bases. 2338 # 2339 bases = tuple([eval(x) for x in bases]) 2340 # 2341 # now create the particular value of RestrictionType for the classes 2342 # in enzymes. 2343 # 2344 T = type.__new__(RestrictionType, 'RestrictionType', bases, {}) 2345 for k in enzymes : 2346 # 2347 # Now, we go through all the enzymes and assign them their type. 2348 # enzymedict[k] contains the values of the attributes for this 2349 # particular class (self.site, self.ovhg,....). 2350 # 2351 newenz = T(k, bases, enzymedict[k]) 2352 # 2353 # we add the enzymes to the corresponding batch. 2354 # 2355 # No need to verify the enzyme is a RestrictionType -> add_nocheck 2356 # 2357 if newenz.is_comm() : CommOnly.add_nocheck(newenz) 2358 else : NonComm.add_nocheck(newenz) 2359 # 2360 # AllEnzymes is a RestrictionBatch with all the enzymes from Rebase. 2361 # 2362 AllEnzymes = CommOnly | NonComm 2363 # 2364 # Now, place the enzymes in locals so they can be imported. 2365 # 2366 names = [str(x) for x in AllEnzymes] 2367 locals().update(dict(map(None, names, AllEnzymes))) 2368 __all__=['FormattedSeq', 'Analysis', 'RestrictionBatch','AllEnzymes','CommOnly','NonComm']+names 2369 del k, x, enzymes, TYPE, bases, names 2370