1
2
3
4
5
6
7
8
9
10
11 """ Notes about the diverses class of the restriction enzyme implementation.
12
13 RestrictionType is the type of all restriction enzymes.
14 ----------------------------------------------------------------------------
15 AbstractCut implements some methods that are common to all enzymes.
16 ----------------------------------------------------------------------------
17 NoCut, OneCut,TwoCuts represent the number of double strand cuts
18 produced by the enzyme.
19 they correspond to the 4th field of the rebase
20 record emboss_e.NNN.
21 0->NoCut : the enzyme is not characterised.
22 2->OneCut : the enzyme produce one double strand cut.
23 4->TwoCuts : two double strand cuts.
24 ----------------------------------------------------------------------------
25 Meth_Dep, Meth_Undep represent the methylation susceptibility to
26 the enzyme.
27 Not implemented yet.
28 ----------------------------------------------------------------------------
29 Palindromic, if the site is palindromic or not.
30 NotPalindromic allow some optimisations of the code.
31 No need to check the reverse strand
32 with palindromic sites.
33 ----------------------------------------------------------------------------
34 Unknown, Blunt, represent the overhang.
35 Ov5, Ov3 Unknown is here for symetry reasons and
36 correspond to enzymes that are not characterised
37 in rebase.
38 ----------------------------------------------------------------------------
39 Defined, Ambiguous, represent the sequence of the overhang.
40 NotDefined
41 NotDefined is for enzymes not characterised in
42 rebase.
43
44 Defined correspond to enzymes that display a
45 constant overhang whatever the sequence.
46 ex : EcoRI. G^AATTC -> overhang :AATT
47 CTTAA^G
48
49 Ambiguous : the overhang varies with the
50 sequence restricted.
51 Typically enzymes which cut outside their
52 restriction site or (but not always)
53 inside an ambiguous site.
54 ex :
55 AcuI CTGAAG(22/20) -> overhang : NN
56 AasI GACNNN^NNNGTC -> overhang : NN
57 CTGN^NNNNNCAG
58
59 note : these 3 classes refers to the overhang not the site.
60 So the enzyme ApoI (RAATTY) is defined even if its restriction
61 site is ambiguous.
62
63 ApoI R^AATTY -> overhang : AATT -> Defined
64 YTTAA^R
65 Accordingly, blunt enzymes are always Defined even
66 when they cut outside their restriction site.
67 ----------------------------------------------------------------------------
68 Not_available, as found in rebase file emboss_r.NNN files.
69 Commercially_available
70 allow the selection of the enzymes according to
71 their suppliers to reduce the quantity
72 of results.
73 Also will allow the implementation of buffer
74 compatibility tables. Not implemented yet.
75
76 the list of suppliers is extracted from
77 emboss_s.NNN
78 ----------------------------------------------------------------------------
79 """
80
81 import re
82
83 from sets import Set
84 import itertools
85
86 from Bio.Seq import Seq, MutableSeq
87 from Bio.Alphabet import IUPAC
88
89 from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict,\
90 typedict, suppliers as suppliers_dict
91 from Bio.Restriction.RanaConfig import *
92 from Bio.Restriction.PrintFormat import PrintFormat
93 from Bio.Restriction.DNAUtils import check_bases
94
95
96
97 matching = {'A' : 'ARWMHVDN', 'C' : 'CYSMHBVN', 'G' : 'GRSKBVDN',
98 'T' : 'TYWKHBDN', 'R' : 'ABDGHKMNSRWV', 'Y' : 'CBDHKMNSTWVY',
99 'W' : 'ABDHKMNRTWVY', 'S' : 'CBDGHKMNSRVY', 'M' : 'ACBDHMNSRWVY',
100 'K' : 'BDGHKNSRTWVY', 'H' : 'ACBDHKMNSRTWVY',
101 'B' : 'CBDGHKMNSRTWVY', 'V' : 'ACBDGHKMNSRWVY',
102 'D' : 'ABDGHKMNSRTWVY', 'N' : 'ACBDGHKMNSRTWVY'}
103
104 DNA = Seq
105
203
204
206 """RestrictionType. Type from which derives all enzyme classes.
207
208 Implement the operator methods."""
209
210 - def __init__(cls, name='', bases=(), dct={}) :
211 """RE(name, bases, dct) -> RestrictionType instance.
212
213 Not intended to be used in normal operation. The enzymes are
214 instantiated when importing the module.
215
216 see below."""
217 super(RestrictionType, cls).__init__(name, bases, dict)
218 cls.compsite = re.compile(cls.compsite)
219
221 """RE.__add__(other) -> RestrictionBatch().
222
223 if other is an enzyme returns a batch of the two enzymes.
224 if other is already a RestrictionBatch add enzyme to it."""
225 if isinstance(other, RestrictionType) :
226 return RestrictionBatch([cls, other])
227 elif isinstance(other, RestrictionBatch) :
228 return other.add_nocheck(cls)
229 else :
230 raise TypeError
231
233 """RE.__div__(other) -> list.
234
235 RE/other
236 returns RE.search(other)."""
237 return cls.search(other)
238
240 """RE.__rdiv__(other) -> list.
241
242 other/RE
243 returns RE.search(other)."""
244 return cls.search(other)
245
247 """RE.__truediv__(other) -> list.
248
249 RE/other
250 returns RE.search(other)."""
251 return cls.search(other)
252
254 """RE.__rtruediv__(other) -> list.
255
256 other/RE
257 returns RE.search(other)."""
258 return cls.search(other)
259
261 """RE.__floordiv__(other) -> list.
262
263 RE//other
264 returns RE.catalyse(other)."""
265 return cls.catalyse(other)
266
268 """RE.__rfloordiv__(other) -> list.
269
270 other//RE
271 returns RE.catalyse(other)."""
272 return cls.catalyse(other)
273
275 """RE.__str__() -> str.
276
277 return the name of the enzyme."""
278 return cls.__name__
279
281 """RE.__repr__() -> str.
282
283 used with eval or exec will instantiate the enzyme."""
284 return "%s" % cls.__name__
285
287 """RE.__len__() -> int.
288
289 length of the recognition site."""
290 return cls.size
291
293 """RE == other -> bool
294
295 True if RE and other are the same enzyme."""
296 return other is cls
297
299 """RE != other -> bool.
300 isoschizomer strict, same recognition site, same restriction -> False
301 all the other-> True"""
302 if not isinstance(other, RestrictionType) :
303 return True
304 elif cls.charac == other.charac :
305 return False
306 else :
307 return True
308
310 """RE >> other -> bool.
311
312 neoschizomer : same recognition site, different restriction. -> True
313 all the others : -> False"""
314 if not isinstance(other, RestrictionType) :
315 return False
316 elif cls.site == other.site and cls.charac != other.charac :
317 return True
318 else :
319 return False
320
322 """a % b -> bool.
323
324 Test compatibility of the overhang of a and b.
325 True if a and b have compatible overhang."""
326 if not isinstance(other, RestrictionType) :
327 raise TypeError,\
328 'expected RestrictionType, got %s instead' % type(other)
329 return cls._mod1(other)
330
332 """a >= b -> bool.
333
334 a is greater or equal than b if the a site is longer than b site.
335 if their site have the same length sort by alphabetical order of their
336 names."""
337 if not isinstance(other, RestrictionType) :
338 raise NotImplementedError
339 if len(cls) > len(other) :
340 return True
341 elif cls.size == len(other) and cls.__name__ >= other.__name__ :
342 return True
343 else :
344 return False
345
347 """a > b -> bool.
348
349 sorting order :
350 1. size of the recognition site.
351 2. if equal size, alphabetical order of the names."""
352 if not isinstance(other, RestrictionType) :
353 raise NotImplementedError
354 if len(cls) > len(other) :
355 return True
356 elif cls.size == len(other) and cls.__name__ > other.__name__ :
357 return True
358 else :
359 return False
360
362 """a <= b -> bool.
363
364 sorting order :
365 1. size of the recognition site.
366 2. if equal size, alphabetical order of the names."""
367 if not isinstance(other, RestrictionType) :
368 raise NotImplementedError
369 elif len(cls) < len(other) :
370 return True
371 elif len(cls) == len(other) and cls.__name__ <= other.__name__ :
372 return True
373 else :
374 return False
375
377 """a < b -> bool.
378
379 sorting order :
380 1. size of the recognition site.
381 2. if equal size, alphabetical order of the names."""
382 if not isinstance(other, RestrictionType) :
383 raise NotImplementedError
384 elif len(cls) < len(other) :
385 return True
386 elif len(cls) == len(other) and cls.__name__ < other.__name__ :
387 return True
388 else :
389 return False
390
391
393 """Implement the methods that are common to all restriction enzymes.
394
395 All the methods are classmethod.
396
397 For internal use only. Not meant to be instantiate."""
398
400 """RE.search(dna, linear=True) -> list.
401
402 return a list of all the site of RE in dna. Compensate for circular
403 sequences and so on.
404
405 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
406
407 if linear is False, the restriction sites than span over the boundaries
408 will be included.
409
410 The positions are the first base of the 3' fragment,
411 i.e. the first base after the position the enzyme will cut. """
412
413
414
415
416
417
418
419 if isinstance(dna, FormattedSeq) :
420 cls.dna = dna
421 return cls._search()
422 else :
423 cls.dna = FormattedSeq(dna, linear)
424 return cls._search()
425 search = classmethod(search)
426
428 """RE.all_suppliers -> print all the suppliers of R"""
429 supply = [x[0] for x in suppliers_dict.itervalues()]
430 supply.sort()
431 print ",\n".join(supply)
432 return
433 all_suppliers = classmethod(all_suppliers)
434
436 """RE.is_equischizomers(other) -> bool.
437
438 True if other is an isoschizomer of RE.
439 False else.
440
441 equischizomer <=> same site, same position of restriction."""
442 return not self != other
443 is_equischizomer = classmethod(is_equischizomer)
444
446 """RE.is_neoschizomers(other) -> bool.
447
448 True if other is an isoschizomer of RE.
449 False else.
450
451 neoschizomer <=> same site, different position of restriction."""
452 return self >> other
453 is_neoschizomer = classmethod(is_neoschizomer)
454
456 """RE.is_isoschizomers(other) -> bool.
457
458 True if other is an isoschizomer of RE.
459 False else.
460
461 isoschizomer <=> same site."""
462 return (not self != other) or self >> other
463 is_isoschizomer = classmethod(is_isoschizomer)
464
466 """RE.equischizomers([batch]) -> list.
467
468 return a tuple of all the isoschizomers of RE.
469 if batch is supplied it is used instead of the default AllEnzymes.
470
471 equischizomer <=> same site, same position of restriction."""
472 if not batch : batch = AllEnzymes
473 r = [x for x in batch if not self != x]
474 i = r.index(self)
475 del r[i]
476 r.sort()
477 return r
478 equischizomers = classmethod(equischizomers)
479
481 """RE.neoschizomers([batch]) -> list.
482
483 return a tuple of all the neoschizomers of RE.
484 if batch is supplied it is used instead of the default AllEnzymes.
485
486 neoschizomer <=> same site, different position of restriction."""
487 if not batch : batch = AllEnzymes
488 r = [x for x in batch if self >> x]
489 r.sort()
490 return r
491 neoschizomers = classmethod(neoschizomers)
492
494 """RE.isoschizomers([batch]) -> list.
495
496 return a tuple of all the equischizomers and neoschizomers of RE.
497 if batch is supplied it is used instead of the default AllEnzymes."""
498 if not batch : batch = AllEnzymes
499 r = [x for x in batch if (self >> x) or (not self != x)]
500 i = r.index(self)
501 del r[i]
502 r.sort()
503 return r
504 isoschizomers = classmethod(isoschizomers)
505
507 """RE.frequency() -> int.
508
509 frequency of the site."""
510 return self.freq
511 frequency = classmethod(frequency)
512
513
514 -class NoCut(AbstractCut) :
515 """Implement the methods specific to the enzymes that do not cut.
516
517 These enzymes are generally enzymes that have been only partially
518 characterised and the way they cut the DNA is unknow or enzymes for
519 which the pattern of cut is to complex to be recorded in Rebase
520 (ncuts values of 0 in emboss_e.###).
521
522 When using search() with these enzymes the values returned are at the start of
523 the restriction site.
524
525 Their catalyse() method returns a TypeError.
526
527 Unknown and NotDefined are also part of the base classes of these enzymes.
528
529 Internal use only. Not meant to be instantiated."""
530
532 """RE.cut_once() -> bool.
533
534 True if the enzyme cut the sequence one time on each strand."""
535 return False
536 cut_once = classmethod(cut_once)
537
539 """RE.cut_twice() -> bool.
540
541 True if the enzyme cut the sequence twice on each strand."""
542 return False
543 cut_twice = classmethod(cut_twice)
544
546 """RE._modify(location) -> int.
547
548 for internal use only.
549
550 location is an integer corresponding to the location of the match for
551 the enzyme pattern in the sequence.
552 _modify returns the real place where the enzyme will cut.
553
554 example :
555 EcoRI pattern : GAATTC
556 EcoRI will cut after the G.
557 so in the sequence :
558 ______
559 GAATACACGGAATTCGA
560 |
561 10
562 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
563 EcoRI cut after the G so :
564 EcoRI._modify(10) -> 11.
565
566 if the enzyme cut twice _modify will returns two integer corresponding
567 to each cutting site.
568 """
569 yield location
570 _modify = classmethod(_modify)
571
573 """RE._rev_modify(location) -> generator of int.
574
575 for internal use only.
576
577 as _modify for site situated on the antiparallel strand when the
578 enzyme is not palindromic
579 """
580 yield location
581 _rev_modify = classmethod(_rev_modify)
582
584 """RE.characteristic() -> tuple.
585
586 the tuple contains the attributes :
587 fst5 -> first 5' cut ((current strand) or None
588 fst3 -> first 3' cut (complementary strand) or None
589 scd5 -> second 5' cut (current strand) or None
590 scd5 -> second 3' cut (complementary strand) or None
591 site -> recognition site."""
592 return None, None, None, None, self.site
593 characteristic = classmethod(characteristic)
594
596 """Implement the methods specific to the enzymes that cut the DNA only once
597
598 Correspond to ncuts values of 2 in emboss_e.###
599
600 Internal use only. Not meant to be instantiated."""
601
603 """RE.cut_once() -> bool.
604
605 True if the enzyme cut the sequence one time on each strand."""
606 return True
607 cut_once = classmethod(cut_once)
608
610 """RE.cut_twice() -> bool.
611
612 True if the enzyme cut the sequence twice on each strand."""
613 return False
614 cut_twice = classmethod(cut_twice)
615
617 """RE._modify(location) -> int.
618
619 for internal use only.
620
621 location is an integer corresponding to the location of the match for
622 the enzyme pattern in the sequence.
623 _modify returns the real place where the enzyme will cut.
624
625 example :
626 EcoRI pattern : GAATTC
627 EcoRI will cut after the G.
628 so in the sequence :
629 ______
630 GAATACACGGAATTCGA
631 |
632 10
633 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
634 EcoRI cut after the G so :
635 EcoRI._modify(10) -> 11.
636
637 if the enzyme cut twice _modify will returns two integer corresponding
638 to each cutting site.
639 """
640 yield location + self.fst5
641 _modify = classmethod(_modify)
642
644 """RE._rev_modify(location) -> generator of int.
645
646 for internal use only.
647
648 as _modify for site situated on the antiparallel strand when the
649 enzyme is not palindromic
650 """
651 yield location - self.fst3
652 _rev_modify = classmethod(_rev_modify)
653
655 """RE.characteristic() -> tuple.
656
657 the tuple contains the attributes :
658 fst5 -> first 5' cut ((current strand) or None
659 fst3 -> first 3' cut (complementary strand) or None
660 scd5 -> second 5' cut (current strand) or None
661 scd5 -> second 3' cut (complementary strand) or None
662 site -> recognition site."""
663 return self.fst5, self.fst3, None, None, self.site
664 characteristic = classmethod(characteristic)
665
666
668 """Implement the methods specific to the enzymes that cut the DNA twice
669
670 Correspond to ncuts values of 4 in emboss_e.###
671
672 Internal use only. Not meant to be instantiated."""
673
675 """RE.cut_once() -> bool.
676
677 True if the enzyme cut the sequence one time on each strand."""
678 return False
679 cut_once = classmethod(cut_once)
680
682 """RE.cut_twice() -> bool.
683
684 True if the enzyme cut the sequence twice on each strand."""
685 return True
686 cut_twice = classmethod(cut_twice)
687
689 """RE._modify(location) -> int.
690
691 for internal use only.
692
693 location is an integer corresponding to the location of the match for
694 the enzyme pattern in the sequence.
695 _modify returns the real place where the enzyme will cut.
696
697 example :
698 EcoRI pattern : GAATTC
699 EcoRI will cut after the G.
700 so in the sequence :
701 ______
702 GAATACACGGAATTCGA
703 |
704 10
705 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
706 EcoRI cut after the G so :
707 EcoRI._modify(10) -> 11.
708
709 if the enzyme cut twice _modify will returns two integer corresponding
710 to each cutting site.
711 """
712 yield location + self.fst5
713 yield location + self.scd5
714 _modify = classmethod(_modify)
715
717 """RE._rev_modify(location) -> generator of int.
718
719 for internal use only.
720
721 as _modify for site situated on the antiparallel strand when the
722 enzyme is not palindromic
723 """
724 yield location - self.fst3
725 yield location - self.scd3
726 _rev_modify = classmethod(_rev_modify)
727
729 """RE.characteristic() -> tuple.
730
731 the tuple contains the attributes :
732 fst5 -> first 5' cut ((current strand) or None
733 fst3 -> first 3' cut (complementary strand) or None
734 scd5 -> second 5' cut (current strand) or None
735 scd5 -> second 3' cut (complementary strand) or None
736 site -> recognition site."""
737 return self.fst5, self.fst3, self.scd5, self.scd3, self.site
738 characteristic = classmethod(characteristic)
739
740
742 """Implement the information about methylation.
743
744 Enzymes of this class possess a site which is methylable."""
745
747 """RE.is_methylable() -> bool.
748
749 True if the recognition site is a methylable."""
750 return True
751 is_methylable = classmethod(is_methylable)
752
754 """Implement informations about methylation sensitibility.
755
756 Enzymes of this class are not sensible to methylation."""
757
759 """RE.is_methylable() -> bool.
760
761 True if the recognition site is a methylable."""
762 return False
763 is_methylable = classmethod(is_methylable)
764
766 """Implement the methods specific to the enzymes which are palindromic
767
768 palindromic means : the recognition site and its reverse complement are
769 identical.
770 Remarks : an enzyme with a site CGNNCG is palindromic even if some
771 of the sites that it will recognise are not.
772 for example here : CGAACG
773
774 Internal use only. Not meant to be instantiated."""
775
777 """RE._search() -> list.
778
779 for internal use only.
780
781 implement the search method for palindromic and non palindromic enzyme.
782 """
783 siteloc = self.dna.finditer(self.compsite,self.size)
784 self.results = [r for s,g in siteloc for r in self._modify(s)]
785 if self.results : self._drop()
786 return self.results
787 _search = classmethod(_search)
788
790 """RE.is_palindromic() -> bool.
791
792 True if the recognition site is a palindrom."""
793 return True
794 is_palindromic = classmethod(is_palindromic)
795
796
798 """Implement the methods specific to the enzymes which are not palindromic
799
800 palindromic means : the recognition site and its reverse complement are
801 identical.
802
803 Internal use only. Not meant to be instantiated."""
804
806 """RE._search() -> list.
807
808 for internal use only.
809
810 implement the search method for palindromic and non palindromic enzyme.
811 """
812 iterator = self.dna.finditer(self.compsite, self.size)
813 self.results = []
814 modif = self._modify
815 revmodif = self._rev_modify
816 s = str(self)
817 self.on_minus = []
818 for start, group in iterator :
819 if group(s) :
820 self.results += [r for r in modif(start)]
821 else :
822 self.on_minus += [r for r in revmodif(start)]
823 self.results += self.on_minus
824 if self.results :
825 self.results.sort()
826 self._drop()
827 return self.results
828 _search = classmethod(_search)
829
831 """RE.is_palindromic() -> bool.
832
833 True if the recognition site is a palindrom."""
834 return False
835 is_palindromic = classmethod(is_palindromic)
836
838 """Implement the methods specific to the enzymes for which the overhang
839 is unknown.
840
841 These enzymes are also NotDefined and NoCut.
842
843 Internal use only. Not meant to be instantiated."""
844
846 """RE.catalyse(dna, linear=True) -> tuple of DNA.
847 RE.catalyze(dna, linear=True) -> tuple of DNA.
848
849 return a tuple of dna as will be produced by using RE to restrict the
850 dna.
851
852 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
853
854 if linear is False, the sequence is considered to be circular and the
855 output will be modified accordingly."""
856 raise NotImplementedError, '%s restriction is unknown.' % self.__name__
857 catalyze = catalyse = classmethod(catalyse)
858
860 """RE.is_blunt() -> bool.
861
862 True if the enzyme produces blunt end.
863
864 see also :
865 RE.is_3overhang()
866 RE.is_5overhang()
867 RE.is_unknown()"""
868 return False
869 is_blunt = classmethod(is_blunt)
870
872 """RE.is_5overhang() -> bool.
873
874 True if the enzyme produces 5' overhang sticky end.
875
876 see also :
877 RE.is_3overhang()
878 RE.is_blunt()
879 RE.is_unknown()"""
880 return False
881 is_5overhang = classmethod(is_5overhang)
882
884 """RE.is_3overhang() -> bool.
885
886 True if the enzyme produces 3' overhang sticky end.
887
888 see also :
889 RE.is_5overhang()
890 RE.is_blunt()
891 RE.is_unknown()"""
892 return False
893 is_3overhang = classmethod(is_3overhang)
894
896 """RE.overhang() -> str. type of overhang of the enzyme.,
897
898 can be "3' overhang", "5' overhang", "blunt", "unknown" """
899 return 'unknown'
900 overhang = classmethod(overhang)
901
903 """RE.compatible_end() -> list.
904
905 list of all the enzymes that share compatible end with RE."""
906 return []
907 compatible_end = classmethod(compatible_end)
908
909 - def _mod1(self, other) :
910 """RE._mod1(other) -> bool.
911
912 for internal use only
913
914 test for the compatibility of restriction ending of RE and other."""
915 return False
916 _mod1 = classmethod(_mod1)
917
918 -class Blunt(AbstractCut) :
919 """Implement the methods specific to the enzymes for which the overhang
920 is blunt.
921
922 The enzyme cuts the + strand and the - strand of the DNA at the same
923 place.
924
925 Internal use only. Not meant to be instantiated."""
926
928 """RE.catalyse(dna, linear=True) -> tuple of DNA.
929 RE.catalyze(dna, linear=True) -> tuple of DNA.
930
931 return a tuple of dna as will be produced by using RE to restrict the
932 dna.
933
934 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
935
936 if linear is False, the sequence is considered to be circular and the
937 output will be modified accordingly."""
938 r = self.search(dna, linear)
939 d = self.dna
940 if not r : return d[1:],
941 fragments = []
942 length = len(r)-1
943 if d.is_linear() :
944
945
946
947 fragments.append(d[1:r[0]])
948 if length :
949
950
951
952 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
953
954
955
956 fragments.append(d[r[-1]:])
957 else :
958
959
960
961 fragments.append(d[r[-1]:]+d[1:r[0]])
962 if not length:
963
964
965
966 return tuple(fragments)
967
968
969
970 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
971 return tuple(fragments)
972 catalyze = catalyse = classmethod(catalyse)
973
975 """RE.is_blunt() -> bool.
976
977 True if the enzyme produces blunt end.
978
979 see also :
980 RE.is_3overhang()
981 RE.is_5overhang()
982 RE.is_unknown()"""
983 return True
984 is_blunt = classmethod(is_blunt)
985
987 """RE.is_5overhang() -> bool.
988
989 True if the enzyme produces 5' overhang sticky end.
990
991 see also :
992 RE.is_3overhang()
993 RE.is_blunt()
994 RE.is_unknown()"""
995 return False
996 is_5overhang = classmethod(is_5overhang)
997
999 """RE.is_3overhang() -> bool.
1000
1001 True if the enzyme produces 3' overhang sticky end.
1002
1003 see also :
1004 RE.is_5overhang()
1005 RE.is_blunt()
1006 RE.is_unknown()"""
1007 return False
1008 is_3overhang = classmethod(is_3overhang)
1009
1011 """RE.overhang() -> str. type of overhang of the enzyme.,
1012
1013 can be "3' overhang", "5' overhang", "blunt", "unknown" """
1014 return 'blunt'
1015 overhang = classmethod(overhang)
1016
1018 """RE.compatible_end() -> list.
1019
1020 list of all the enzymes that share compatible end with RE."""
1021 if not batch : batch = AllEnzymes
1022 r = [x for x in iter(AllEnzymes) if x.is_blunt()]
1023 r.sort()
1024 return r
1025 compatible_end = classmethod(compatible_end)
1026
1028 """RE._mod1(other) -> bool.
1029
1030 for internal use only
1031
1032 test for the compatibility of restriction ending of RE and other."""
1033 if issubclass(other, Blunt) : return True
1034 else : return False
1035 _mod1 = staticmethod(_mod1)
1036
1037 -class Ov5(AbstractCut) :
1038 """Implement the methods specific to the enzymes for which the overhang
1039 is recessed in 3'.
1040
1041 The enzyme cuts the + strand after the - strand of the DNA.
1042
1043 Internal use only. Not meant to be instantiated."""
1044
1046 """RE.catalyse(dna, linear=True) -> tuple of DNA.
1047 RE.catalyze(dna, linear=True) -> tuple of DNA.
1048
1049 return a tuple of dna as will be produced by using RE to restrict the
1050 dna.
1051
1052 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1053
1054 if linear is False, the sequence is considered to be circular and the
1055 output will be modified accordingly."""
1056 r = self.search(dna, linear)
1057 d = self.dna
1058 if not r : return d[1:],
1059 length = len(r)-1
1060 fragments = []
1061 if d.is_linear() :
1062
1063
1064
1065 fragments.append(d[1:r[0]])
1066 if length :
1067
1068
1069
1070 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1071
1072
1073
1074 fragments.append(d[r[-1]:])
1075 else :
1076
1077
1078
1079 fragments.append(d[r[-1]:]+d[1:r[0]])
1080 if not length:
1081
1082
1083
1084 return tuple(fragments)
1085
1086
1087
1088 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1089 return tuple(fragments)
1090 catalyze = catalyse = classmethod(catalyse)
1091
1093 """RE.is_blunt() -> bool.
1094
1095 True if the enzyme produces blunt end.
1096
1097 see also :
1098 RE.is_3overhang()
1099 RE.is_5overhang()
1100 RE.is_unknown()"""
1101 return False
1102 is_blunt = classmethod(is_blunt)
1103
1105 """RE.is_5overhang() -> bool.
1106
1107 True if the enzyme produces 5' overhang sticky end.
1108
1109 see also :
1110 RE.is_3overhang()
1111 RE.is_blunt()
1112 RE.is_unknown()"""
1113 return True
1114 is_5overhang = classmethod(is_5overhang)
1115
1117 """RE.is_3overhang() -> bool.
1118
1119 True if the enzyme produces 3' overhang sticky end.
1120
1121 see also :
1122 RE.is_5overhang()
1123 RE.is_blunt()
1124 RE.is_unknown()"""
1125 return False
1126 is_3overhang = classmethod(is_3overhang)
1127
1129 """RE.overhang() -> str. type of overhang of the enzyme.,
1130
1131 can be "3' overhang", "5' overhang", "blunt", "unknown" """
1132 return "5' overhang"
1133 overhang = classmethod(overhang)
1134
1136 """RE.compatible_end() -> list.
1137
1138 list of all the enzymes that share compatible end with RE."""
1139 if not batch : batch = AllEnzymes
1140 r = [x for x in iter(AllEnzymes) if x.is_5overhang() and x % self]
1141 r.sort()
1142 return r
1143 compatible_end = classmethod(compatible_end)
1144
1145 - def _mod1(self, other) :
1146 """RE._mod1(other) -> bool.
1147
1148 for internal use only
1149
1150 test for the compatibility of restriction ending of RE and other."""
1151 if issubclass(other, Ov5) : return self._mod2(other)
1152 else : return False
1153 _mod1 = classmethod(_mod1)
1154
1155
1156 -class Ov3(AbstractCut) :
1157 """Implement the methods specific to the enzymes for which the overhang
1158 is recessed in 5'.
1159
1160 The enzyme cuts the - strand after the + strand of the DNA.
1161
1162 Internal use only. Not meant to be instantiated."""
1163
1165 """RE.catalyse(dna, linear=True) -> tuple of DNA.
1166 RE.catalyze(dna, linear=True) -> tuple of DNA.
1167
1168 return a tuple of dna as will be produced by using RE to restrict the
1169 dna.
1170
1171 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1172
1173 if linear is False, the sequence is considered to be circular and the
1174 output will be modified accordingly."""
1175 r = self.search(dna, linear)
1176 d = self.dna
1177 if not r : return d[1:],
1178 fragments = []
1179 length = len(r)-1
1180 if d.is_linear() :
1181
1182
1183
1184 fragments.append(d[1:r[0]])
1185 if length :
1186
1187
1188
1189 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1190
1191
1192
1193 fragments.append(d[r[-1]:])
1194 else :
1195
1196
1197
1198 fragments.append(d[r[-1]:]+d[1:r[0]])
1199 if not length:
1200
1201
1202
1203 return tuple(fragments)
1204
1205
1206
1207 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1208 return tuple(fragments)
1209 catalyze = catalyse = classmethod(catalyse)
1210
1212 """RE.is_blunt() -> bool.
1213
1214 True if the enzyme produces blunt end.
1215
1216 see also :
1217 RE.is_3overhang()
1218 RE.is_5overhang()
1219 RE.is_unknown()"""
1220 return False
1221 is_blunt = classmethod(is_blunt)
1222
1224 """RE.is_5overhang() -> bool.
1225
1226 True if the enzyme produces 5' overhang sticky end.
1227
1228 see also :
1229 RE.is_3overhang()
1230 RE.is_blunt()
1231 RE.is_unknown()"""
1232 return False
1233 is_5overhang = classmethod(is_5overhang)
1234
1236 """RE.is_3overhang() -> bool.
1237
1238 True if the enzyme produces 3' overhang sticky end.
1239
1240 see also :
1241 RE.is_5overhang()
1242 RE.is_blunt()
1243 RE.is_unknown()"""
1244 return True
1245 is_3overhang = classmethod(is_3overhang)
1246
1248 """RE.overhang() -> str. type of overhang of the enzyme.,
1249
1250 can be "3' overhang", "5' overhang", "blunt", "unknown" """
1251 return "3' overhang"
1252 overhang = classmethod(overhang)
1253
1255 """RE.compatible_end() -> list.
1256
1257 list of all the enzymes that share compatible end with RE."""
1258 if not batch : batch = AllEnzymes
1259 r = [x for x in iter(AllEnzymes) if x.is_3overhang() and x % self]
1260 r.sort()
1261 return r
1262 compatible_end = classmethod(compatible_end)
1263
1264 - def _mod1(self, other) :
1265 """RE._mod1(other) -> bool.
1266
1267 for internal use only
1268
1269 test for the compatibility of restriction ending of RE and other."""
1270
1271
1272
1273 if issubclass(other, Ov3) : return self._mod2(other)
1274 else : return False
1275 _mod1 = classmethod(_mod1)
1276
1277
1279 """Implement the methods specific to the enzymes for which the overhang
1280 and the cut are not variable.
1281
1282 Typical example : EcoRI -> G^AATT_C
1283 The overhang will always be AATT
1284 Notes :
1285 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N
1286 There overhang is always the same : blunt!
1287
1288 Internal use only. Not meant to be instantiated."""
1289
1291 """RE._drop() -> list.
1292
1293 for internal use only.
1294
1295 drop the site that are situated outside the sequence in linear sequence.
1296 modify the index for site in circular sequences."""
1297
1298
1299
1300
1301
1302
1303
1304
1305 length = len(self.dna)
1306 drop = itertools.dropwhile
1307 take = itertools.takewhile
1308 if self.dna.is_linear() :
1309 self.results = [x for x in drop(lambda x:x<1, self.results)]
1310 self.results = [x for x in take(lambda x:x<length, self.results)]
1311 else :
1312 for index, location in enumerate(self.results) :
1313 if location < 1 :
1314 self.results[index] += length
1315 else :
1316 break
1317 for index, location in enumerate(self.results[::-1]) :
1318 if location > length :
1319 self.results[-(index+1)] -= length
1320 else :
1321 break
1322 return
1323 _drop = classmethod(_drop)
1324
1326 """RE.is_defined() -> bool.
1327
1328 True if the sequence recognised and cut is constant,
1329 i.e. the recognition site is not degenerated AND the enzyme cut inside
1330 the site.
1331
1332 see also :
1333 RE.is_ambiguous()
1334 RE.is_unknown()"""
1335 return True
1336 is_defined = classmethod(is_defined)
1337
1339 """RE.is_ambiguous() -> bool.
1340
1341 True if the sequence recognised and cut is ambiguous,
1342 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1343 the site.
1344
1345 see also :
1346 RE.is_defined()
1347 RE.is_unknown()"""
1348 return False
1349 is_ambiguous = classmethod(is_ambiguous)
1350
1352 """RE.is_unknown() -> bool.
1353
1354 True if the sequence is unknown,
1355 i.e. the recognition site has not been characterised yet.
1356
1357 see also :
1358 RE.is_defined()
1359 RE.is_ambiguous()"""
1360 return False
1361 is_unknown = classmethod(is_unknown)
1362
1364 """RE.elucidate() -> str
1365
1366 return a representation of the site with the cut on the (+) strand
1367 represented as '^' and the cut on the (-) strand as '_'.
1368 ie :
1369 >>> EcoRI.elucidate() # 5' overhang
1370 'G^AATT_C'
1371 >>> KpnI.elucidate() # 3' overhang
1372 'G_GTAC^C'
1373 >>> EcoRV.elucidate() # blunt
1374 'GAT^_ATC'
1375 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1376 '? GTATAC ?'
1377 >>>
1378 """
1379 f5 = self.fst5
1380 f3 = self.fst3
1381 site = self.site
1382 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.'
1383 elif self.is_5overhang() :
1384 if f5 == f3 == 0 : re = 'N^'+ self.site + '_N'
1385 elif f3 == 0 : re = site[:f5] + '^' + site[f5:] + '_N'
1386 else : re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:]
1387 elif self.is_blunt() :
1388 re = site[:f5] + '^_' + site[f5:]
1389 else :
1390 if f5 == f3 == 0 : re = 'N_'+ site + '^N'
1391 else : re = site[:f3] + '_' + site[f3:f5] +'^'+ site[f5:]
1392 return re
1393 elucidate = classmethod(elucidate)
1394
1395 - def _mod2(self, other) :
1396 """RE._mod2(other) -> bool.
1397
1398 for internal use only
1399
1400 test for the compatibility of restriction ending of RE and other."""
1401
1402
1403
1404 if other.ovhgseq == self.ovhgseq :
1405 return True
1406 elif issubclass(other, Ambiguous) :
1407 return other._mod2(self)
1408 else:
1409 return False
1410 _mod2 = classmethod(_mod2)
1411
1412
1414 """Implement the methods specific to the enzymes for which the overhang
1415 is variable.
1416
1417 Typical example : BstXI -> CCAN_NNNN^NTGG
1418 The overhang can be any sequence of 4 bases.
1419 Notes :
1420 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N
1421 There overhang is always the same : blunt!
1422
1423 Internal use only. Not meant to be instantiated."""
1424
1426 """RE._drop() -> list.
1427
1428 for internal use only.
1429
1430 drop the site that are situated outside the sequence in linear sequence.
1431 modify the index for site in circular sequences."""
1432 length = len(self.dna)
1433 drop = itertools.dropwhile
1434 take = itertools.takewhile
1435 if self.dna.is_linear() :
1436 self.results = [x for x in drop(lambda x : x < 1, self.results)]
1437 self.results = [x for x in take(lambda x : x <length, self.results)]
1438 else :
1439 for index, location in enumerate(self.results) :
1440 if location < 1 :
1441 self.results[index] += length
1442 else :
1443 break
1444 for index, location in enumerate(self.results[::-1]) :
1445 if location > length :
1446 self.results[-(index+1)] -= length
1447 else :
1448 break
1449 return
1450 _drop = classmethod(_drop)
1451
1453 """RE.is_defined() -> bool.
1454
1455 True if the sequence recognised and cut is constant,
1456 i.e. the recognition site is not degenerated AND the enzyme cut inside
1457 the site.
1458
1459 see also :
1460 RE.is_ambiguous()
1461 RE.is_unknown()"""
1462 return False
1463 is_defined = classmethod(is_defined)
1464
1466 """RE.is_ambiguous() -> bool.
1467
1468 True if the sequence recognised and cut is ambiguous,
1469 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1470 the site.
1471
1472
1473 see also :
1474 RE.is_defined()
1475 RE.is_unknown()"""
1476 return True
1477 is_ambiguous = classmethod(is_ambiguous)
1478
1480 """RE.is_unknown() -> bool.
1481
1482 True if the sequence is unknown,
1483 i.e. the recognition site has not been characterised yet.
1484
1485 see also :
1486 RE.is_defined()
1487 RE.is_ambiguous()"""
1488 return False
1489 is_unknown = classmethod(is_unknown)
1490
1491 - def _mod2(self, other) :
1492 """RE._mod2(other) -> bool.
1493
1494 for internal use only
1495
1496 test for the compatibility of restriction ending of RE and other."""
1497
1498
1499
1500 if len(self.ovhgseq) != len(other.ovhgseq) :
1501 return False
1502 else :
1503 se = self.ovhgseq
1504 for base in se :
1505 if base in 'ATCG' :
1506 pass
1507 if base in 'N' :
1508 se = '.'.join(se.split('N'))
1509 if base in 'RYWMSKHDBV':
1510 expand = '['+ matching[base] + ']'
1511 se = expand.join(se.split(base))
1512 if re.match(se, other.ovhgseq) :
1513 return True
1514 else :
1515 return False
1516 _mod2 = classmethod(_mod2)
1517
1519 """RE.elucidate() -> str
1520
1521 return a representation of the site with the cut on the (+) strand
1522 represented as '^' and the cut on the (-) strand as '_'.
1523 ie :
1524 >>> EcoRI.elucidate() # 5' overhang
1525 'G^AATT_C'
1526 >>> KpnI.elucidate() # 3' overhang
1527 'G_GTAC^C'
1528 >>> EcoRV.elucidate() # blunt
1529 'GAT^_ATC'
1530 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1531 '? GTATAC ?'
1532 >>>
1533 """
1534 f5 = self.fst5
1535 f3 = self.fst3
1536 length = len(self)
1537 site = self.site
1538 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.'
1539 elif self.is_5overhang() :
1540 if f3 == f5 == 0 :
1541 re = 'N^' + site +'_N'
1542 elif 0 <= f5 <= length and 0 <= f3+length <= length :
1543 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:]
1544 elif 0 <= f5 <= length :
1545 re = site[:f5] + '^' + site[f5:] + f3*'N' + '_N'
1546 elif 0 <= f3+length <= length :
1547 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:]
1548 elif f3+length < 0 :
1549 re = 'N^'*abs(f5)*'N' + '_' + abs(length+f3)*'N' + site
1550 elif f5 > length :
1551 re = site + (f5-length)*'N'+'^'+(length+f3-f5)*'N'+'_N'
1552 else :
1553 re = 'N^' + abs(f5) * 'N' + site + f3*'N' + '_N'
1554 elif self.is_blunt() :
1555 if f5 < 0 :
1556 re = 'N^_' + abs(f5)*'N' + site
1557 elif f5 > length :
1558 re = site + (f5-length)*'N' + '^_N'
1559 else :
1560 raise ValueError, '%s.easyrepr() : error f5=%i'% (self.name,f5)
1561 else :
1562 if f3 == 0 :
1563 if f5 == 0 : re = 'N_' + site + '^N'
1564 else : re = site + '_' + (f5-length)*'N' + '^N'
1565 elif 0 < f3+length <= length and 0 <= f5 <= length :
1566 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:]
1567 elif 0 < f3+length <= length :
1568 re = site[:f3] + '_' + site[f3:] + (f5-length)*'N' + '^N'
1569 elif 0 <= f5 <= length:
1570 re = 'N_' +'N'*(f3+length) + site[:f5] + '^' + site[f5:]
1571 elif f3 > 0 :
1572 re = site + f3*'N' + '_' + (f5-f3-length)*'N' + '^N'
1573 elif f5 < 0 :
1574 re = 'N_' + abs(f3-f5+length)*'N' + '^' + abs(f5)*'N' + site
1575 else :
1576 re = 'N_' + abs(f3+length)*'N' + site + (f5-length)*'N' + '^N'
1577 return re
1578 elucidate = classmethod(elucidate)
1579
1580
1582 """Implement the methods specific to the enzymes for which the overhang
1583 is not characterised.
1584
1585 Correspond to NoCut and Unknown.
1586
1587 Internal use only. Not meant to be instantiated."""
1588
1590 """RE._drop() -> list.
1591
1592 for internal use only.
1593
1594 drop the site that are situated outside the sequence in linear sequence.
1595 modify the index for site in circular sequences."""
1596 if self.dna.is_linear() :
1597 return
1598 else :
1599 length = len(self.dna)
1600 for index, location in enumerate(self.results) :
1601 if location < 1 :
1602 self.results[index] += length
1603 else :
1604 break
1605 for index, location in enumerate(self.results[:-1]) :
1606 if location > length :
1607 self.results[-(index+1)] -= length
1608 else :
1609 break
1610 return
1611 _drop = classmethod(_drop)
1612
1614 """RE.is_defined() -> bool.
1615
1616 True if the sequence recognised and cut is constant,
1617 i.e. the recognition site is not degenerated AND the enzyme cut inside
1618 the site.
1619
1620 see also :
1621 RE.is_ambiguous()
1622 RE.is_unknown()"""
1623 return False
1624 is_defined = classmethod(is_defined)
1625
1627 """RE.is_ambiguous() -> bool.
1628
1629 True if the sequence recognised and cut is ambiguous,
1630 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1631 the site.
1632
1633
1634 see also :
1635 RE.is_defined()
1636 RE.is_unknown()"""
1637 return False
1638 is_ambiguous = classmethod(is_ambiguous)
1639
1641 """RE.is_unknown() -> bool.
1642
1643 True if the sequence is unknown,
1644 i.e. the recognition site has not been characterised yet.
1645
1646 see also :
1647 RE.is_defined()
1648 RE.is_ambiguous()"""
1649 return True
1650 is_unknown = classmethod(is_unknown)
1651
1652 - def _mod2(self, other) :
1653 """RE._mod2(other) -> bool.
1654
1655 for internal use only
1656
1657 test for the compatibility of restriction ending of RE and other."""
1658
1659
1660
1661
1662
1663
1664 raise ValueError, "%s.mod2(%s), %s : NotDefined. pas glop pas glop!" \
1665 % (str(self), str(other), str(self))
1666 _mod2 = classmethod(_mod2)
1667
1669 """RE.elucidate() -> str
1670
1671 return a representation of the site with the cut on the (+) strand
1672 represented as '^' and the cut on the (-) strand as '_'.
1673 ie :
1674 >>> EcoRI.elucidate() # 5' overhang
1675 'G^AATT_C'
1676 >>> KpnI.elucidate() # 3' overhang
1677 'G_GTAC^C'
1678 >>> EcoRV.elucidate() # blunt
1679 'GAT^_ATC'
1680 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1681 '? GTATAC ?'
1682 >>>
1683 """
1684 return '? %s ?' % self.site
1685 elucidate = classmethod(elucidate)
1686
1687
1689
1690
1691
1692
1693 """Implement the methods specific to the enzymes which are commercially
1694 available.
1695
1696 Internal use only. Not meant to be instantiated."""
1697
1699 """RE.suppliers() -> print the suppliers of RE."""
1700 supply = suppliers_dict.items()
1701 for k,v in supply :
1702 if k in self.suppl :
1703 print v[0]+','
1704 return
1705 suppliers = classmethod(suppliers)
1706
1708 """RE.supplier_list() -> list.
1709
1710 list of the supplier names for RE."""
1711 return [v[0] for k,v in suppliers_dict.items() if k in self.suppl]
1712 supplier_list = classmethod(supplier_list)
1713
1715 """RE.buffers(supplier) -> string.
1716
1717 not implemented yet."""
1718 return
1719 buffers = classmethod(buffers)
1720
1722 """RE.iscomm() -> bool.
1723
1724 True if RE has suppliers."""
1725 return True
1726 is_comm = classmethod(is_comm)
1727
1728
1730 """Implement the methods specific to the enzymes which are not commercially
1731 available.
1732
1733 Internal use only. Not meant to be instantiated."""
1734
1736 """RE.suppliers() -> print the suppliers of RE."""
1737 return None
1738 suppliers = staticmethod(suppliers)
1739
1741 """RE.supplier_list() -> list.
1742
1743 list of the supplier names for RE."""
1744 return []
1745 supplier_list = classmethod(supplier_list)
1746
1748 """RE.buffers(supplier) -> string.
1749
1750 not implemented yet."""
1751 raise TypeError, "Enzyme not commercially available."
1752 buffers = classmethod(buffers)
1753
1755 """RE.iscomm() -> bool.
1756
1757 True if RE has suppliers."""
1758 return False
1759 is_comm = classmethod(is_comm)
1760
1761
1762
1763
1764
1765
1766
1767
1768
1770
1771 - def __init__(self, first=[], suppliers=[]) :
1772 """RestrictionBatch([sequence]) -> new RestrictionBatch."""
1773 first = [self.format(x) for x in first]
1774 first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]]
1775 Set.__init__(self, first)
1776 self.mapping = dict.fromkeys(self)
1777 self.already_mapped = DNA('')
1778
1780 if len(self) < 5 :
1781 return '+'.join(self.elements())
1782 else :
1783 return '...'.join(('+'.join(self.elements()[:2]),\
1784 '+'.join(self.elements()[-2:])))
1785
1787 return 'RestrictionBatch(%s)' % self.elements()
1788
1790 try :
1791 other = self.format(other)
1792 except ValueError :
1793 return False
1794 return Set.__contains__(self, other)
1795
1797 return self.search(other)
1798
1800 return self.search(other)
1801
1803 """B.get(enzyme[, add]) -> enzyme class.
1804
1805 if add is True and enzyme is not in B add enzyme to B.
1806 if add is False (which is the default) only return enzyme.
1807 if enzyme is not a RestrictionType or can not be evaluated to
1808 a RestrictionType, raise a ValueError."""
1809 e = self.format(enzyme)
1810 if e in self :
1811 return e
1812 elif add :
1813 self.add(e)
1814 return e
1815 else :
1816 raise ValueError, 'enzyme %s is not in RestrictionBatch'%e.__name__
1817
1819 """B.lambdasplit(func) -> RestrictionBatch .
1820
1821 the new batch will contains only the enzymes for which
1822 func return True."""
1823 d = [x for x in itertools.ifilter(func, self)]
1824 new = RestrictionBatch()
1825 new._data = dict(map(None, d, [True]*len(d)))
1826 return new
1827
1829 """B.add_supplier(letter) -> add a new set of enzyme to B.
1830
1831 letter represents the suppliers as defined in the dictionary
1832 RestrictionDictionary.suppliers
1833 return None.
1834 raise a KeyError if letter is not a supplier code."""
1835 supplier = suppliers_dict[letter]
1836 self.suppliers.append(letter)
1837 for x in supplier[1] :
1838 self.add_nocheck(eval(x))
1839 return
1840
1842 """B.current_suppliers() -> add a new set of enzyme to B.
1843
1844 return a sorted list of the suppliers which have been used to
1845 create the batch."""
1846 suppl_list = [suppliers_dict[x][0] for x in self.suppliers]
1847 suppl_list.sort()
1848 return suppl_list
1849
1851 """ b += other -> add other to b, check the type of other."""
1852 self.add(other)
1853 return self
1854
1856 """ b + other -> new RestrictionBatch."""
1857 new = self.__class__(self)
1858 new.add(other)
1859 return new
1860
1862 """B.remove(other) -> remove other from B if other is a RestrictionType.
1863
1864 Safe Set.remove method. Verify that other is a RestrictionType or can be
1865 evaluated to a RestrictionType.
1866 raise a ValueError if other can not be evaluated to a RestrictionType.
1867 raise a KeyError if other is not in B."""
1868 return Set.remove(self, self.format(other))
1869
1870 - def add(self, other) :
1871 """B.add(other) -> add other to B if other is a RestrictionType.
1872
1873 Safe Set.add method. Verify that other is a RestrictionType or can be
1874 evaluated to a RestrictionType.
1875 raise a ValueError if other can not be evaluated to a RestrictionType.
1876 """
1877 return Set.add(self, self.format(other))
1878
1880 """B.add_nocheck(other) -> add other to B. don't check type of other.
1881 """
1882 return Set.add(self, other)
1883
1901
1902
1904 """B.is_restriction(y) -> bool.
1905
1906 True is y or eval(y) is a RestrictionType."""
1907 return isinstance(y, RestrictionType) or \
1908 isinstance(eval(str(y)), RestrictionType)
1909
1910 - def split(self, *classes, **bool) :
1911 """B.split(class, [class.__name__ = True]) -> new RestrictionBatch.
1912
1913 it works but it is slow, so it has really an interest when splitting
1914 over multiple conditions."""
1915 def splittest(element) :
1916 for klass in classes :
1917 b = bool.get(klass.__name__, True)
1918 if issubclass(element, klass) :
1919 if b :
1920 continue
1921 else :
1922 return False
1923 elif b :
1924 return False
1925 else :
1926 continue
1927 return True
1928 d = [k for k in itertools.ifilter(splittest, self)]
1929 new = RestrictionBatch()
1930 new._data = dict(map(None, d, [True]*len(d)))
1931 return new
1932
1934 """B.elements() -> tuple.
1935
1936 give all the names of the enzymes in B sorted alphabetically."""
1937 l = [str(e) for e in self]
1938 l.sort()
1939 return l
1940
1942 """B.as_string() -> list.
1943
1944 return a list of the name of the elements of B."""
1945 return [str(e) for e in self]
1946
1948 """B.suppl_codes() -> dict
1949
1950 letter code for the suppliers"""
1951 supply = dict([(k,v[0]) for k,v in suppliers_dict.iteritems()])
1952 return supply
1953 suppl_codes = classmethod(suppl_codes)
1954
1956 "B.show_codes() -> letter codes for the suppliers"""
1957 supply = [' = '.join(i) for i in self.suppl_codes().iteritems()]
1958 print '\n'.join(supply)
1959 return
1960 show_codes = classmethod(show_codes)
1961
1963 """B.search(dna) -> dict."""
1964
1965
1966
1967
1968 if isinstance(dna, DNA) :
1969 if (dna, linear) == self.already_mapped :
1970 return self.mapping
1971 else :
1972 self.already_mapped = dna, linear
1973 fseq = FormattedSeq(dna, linear)
1974 self.mapping = dict([(x, x.search(fseq)) for x in self])
1975 return self.mapping
1976 elif isinstance(dna, FormattedSeq) :
1977 if (dna, dna.linear) == self.already_mapped :
1978 return self.mapping
1979 else :
1980 self.already_mapped = dna, dna.linear
1981 self.mapping = dict([(x, x.search(dna)) for x in self])
1982 return self.mapping
1983 raise TypeError,\
1984 "Expected Seq or MutableSeq instance, got %s instead"%type(dna)
1985
1986
1987
1988
1989
1990
1991
1992 -class Analysis(RestrictionBatch, PrintFormat) :
1993
1996 """Analysis([restrictionbatch [, sequence] linear=True]) -> New Analysis class.
1997
1998 For most of the method of this class if a dictionary is given it will
1999 be used as the base to calculate the results.
2000 If no dictionary is given a new analysis using the Restriction Batch
2001 which has been given when the Analysis class has been instantiated."""
2002 RestrictionBatch.__init__(self, restrictionbatch)
2003 self.rb = restrictionbatch
2004 self.sequence = sequence
2005 self.linear = linear
2006 if self.sequence :
2007 self.search(self.sequence, self.linear)
2008
2010 return 'Analysis(%s,%s,%s)'%\
2011 (repr(self.rb),repr(self.sequence),self.linear)
2012
2014 """A._sub_set(set) -> dict.
2015
2016 Internal use only.
2017
2018 screen the results through set.
2019 Keep only the results for which the enzymes is in set.
2020 """
2021 return dict([(k,v) for k,v in self.mapping.iteritems() if k in set])
2022
2024 """A._boundaries(start, end) -> tuple.
2025
2026 Format the boundaries for use with the methods that limit the
2027 search to only part of the sequence given to analyse.
2028 """
2029 if not isinstance(start, int) :
2030 raise TypeError, 'expected int, got %s instead' % type(start)
2031 if not isinstance(end, int) :
2032 raise TypeError, 'expected int, got %s instead' % type(end)
2033 if start < 1 :
2034 start += len(self.sequence)
2035 if end < 1 :
2036 end += len(self.sequence)
2037 if start < end :
2038 pass
2039 else :
2040 start, end == end, start
2041 if start < 1 :
2042 start == 1
2043 if start < end :
2044 return start, end, self._test_normal
2045 else :
2046 return start, end, self._test_reverse
2047
2049 """A._test_normal(start, end, site) -> bool.
2050
2051 Internal use only
2052 Test if site is in between start and end.
2053 """
2054 return start <= site < end
2055
2057 """A._test_reverse(start, end, site) -> bool.
2058
2059 Internal use only
2060 Test if site is in between end and start (for circular sequences).
2061 """
2062 return start <= site <= len(self.sequence) or 1 <= site < end
2063
2064 - def print_that(self, dct=None, title='', s1='') :
2065 """A.print_that([dct[, title[, s1]]]) -> print the results from dct.
2066
2067 If dct is not given the full dictionary is used.
2068 """
2069 if not dct :
2070 dct = self.mapping
2071 print
2072 return PrintFormat.print_that(self, dct, title, s1)
2073
2075 """A.change(**attribute_name) -> Change attribute of Analysis.
2076
2077 It is possible to change the width of the shell by setting
2078 self.ConsoleWidth to what you want.
2079 self.NameWidth refer to the maximal length of the enzyme name.
2080
2081 Changing one of these parameters here might not give the results
2082 you expect. In which case, you can settle back to a 80 columns shell
2083 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until
2084 you get it right."""
2085 for k,v in what.iteritems() :
2086 if k in ('NameWidth', 'ConsoleWidth') :
2087 setattr(self, k, v)
2088 self.Cmodulo = self.ConsoleWidth % self.NameWidth
2089 self.PrefWidth = self.ConsoleWidth - self.Cmodulo
2090 elif k is 'sequence' :
2091 setattr(self, 'sequence', v)
2092 self.search(self.sequence, self.linear)
2093 elif k is 'rb' :
2094 self = Analysis.__init__(self, v, self.sequence, self.linear)
2095 elif k is 'linear' :
2096 setattr(self, 'linear', v)
2097 self.search(self.sequence, v)
2098 elif k in ('Indent', 'Maxsize') :
2099 setattr(self, k, v)
2100 elif k in ('Cmodulo', 'PrefWidth') :
2101 raise AttributeError, \
2102 'To change %s, change NameWidth and/or ConsoleWidth'%name
2103 else :
2104 raise AttributeError, 'Analysis has no attribute %s' % name
2105 return
2106
2108 """A.full() -> dict.
2109
2110 Full Restriction Map of the sequence."""
2111 return self.mapping
2112
2113 - def blunt(self, dct = None) :
2114 """A.blunt([dct]) -> dict.
2115
2116 Only the enzymes which have a 3'overhang restriction site."""
2117 if not dct :
2118 dct = self.mapping
2119 return dict([(k,v) for k,v in dct.iteritems() if k.is_blunt()])
2120
2122 """A.overhang5([dct]) -> dict.
2123
2124 Only the enzymes which have a 5' overhang restriction site."""
2125 if not dct :
2126 dct = self.mapping
2127 return dict([(k,v) for k,v in dct.iteritems() if k.is_5overhang()])
2128
2129
2131 """A.Overhang3([dct]) -> dict.
2132
2133 Only the enzymes which have a 3'overhang restriction site."""
2134 if not dct :
2135 dct = self.mapping
2136 return dict([(k,v) for k,v in dct.iteritems() if k.is_3overhang()])
2137
2138
2140 """A.defined([dct]) -> dict.
2141
2142 Only the enzymes that have a defined restriction site in Rebase."""
2143 if not dct :
2144 dct = self.mapping
2145 return dict([(k,v) for k,v in dct.iteritems() if k.is_defined()])
2146
2148 """A.with_sites([dct]) -> dict.
2149
2150 Enzymes which have at least one site in the sequence."""
2151 if not dct :
2152 dct = self.mapping
2153 return dict([(k,v) for k,v in dct.iteritems() if v])
2154
2156 """A.without_site([dct]) -> dict.
2157
2158 Enzymes which have no site in the sequence."""
2159 if not dct :
2160 dct = self.mapping
2161 return dict([(k,v) for k,v in dct.iteritems() if not v])
2162
2164 """A.With_N_Sites(N [, dct]) -> dict.
2165
2166 Enzymes which cut N times the sequence."""
2167 if not dct :
2168 dct = self.mapping
2169 return dict([(k,v) for k,v in dct.iteritems()if len(v) == N])
2170
2172 if not dct :
2173 dct = self.mapping
2174 return dict([(k,v) for k,v in dct.iteritems() if len(v) in list])
2175
2177 """A.with_name(list_of_names [, dct]) ->
2178
2179 Limit the search to the enzymes named in list_of_names."""
2180 for i, enzyme in enumerate(names) :
2181 if not enzyme in AllEnzymes :
2182 print "no datas for the enzyme:", str(name)
2183 del names[i]
2184 if not dct :
2185 return RestrictionBatch(names).search(self.sequence)
2186 return dict([(n, dct[n]) for n in names if n in dct])
2187
2189 """A.with_site_size(site_size [, dct]) ->
2190
2191 Limit the search to the enzymes whose site is of size <site_size>."""
2192 sites = [name for name in self if name.size == site_size]
2193 if not dct :
2194 return RestrictionBatch(sites).search(self.sequence)
2195 return dict([(k,v) for k,v in dct.iteritems() if k in site_size])
2196
2198 """A.only_between(start, end[, dct]) -> dict.
2199
2200 Enzymes that cut the sequence only in between start and end."""
2201 start, end, test = self._boundaries(start, end)
2202 if not dct :
2203 dct = self.mapping
2204 d = dict(dct)
2205 for key, sites in dct.iteritems() :
2206 if not sites :
2207 del d[key]
2208 continue
2209 for site in sites:
2210 if test(start, end, site) :
2211 continue
2212 else :
2213 del d[key]
2214 break
2215 return d
2216
2217 - def between(self, start, end, dct=None) :
2218 """A.between(start, end [, dct]) -> dict.
2219
2220 Enzymes that cut the sequence at least in between start and end.
2221 They may cut outside as well."""
2222 start, end, test = self._boundaries(start, end)
2223 d = {}
2224 if not dct :
2225 dct = self.mapping
2226 for key, sites in dct.iteritems() :
2227 for site in sites :
2228 if test(start, end, site) :
2229 d[key] = sites
2230 break
2231 continue
2232 return d
2233
2235 """A.show_only_between(start, end [, dct]) -> dict.
2236
2237 Enzymes that cut the sequence outside of the region
2238 in between start and end but do not cut inside."""
2239 d = []
2240 if start <= end :
2241 d = [(k, [vv for vv in v if start<=vv<=end])
2242 for v in self.between(start, end, dct)]
2243 else :
2244 d = [(k, [vv for vv in v if start<=vv or vv <= end])
2245 for v in self.between(start, end, dct)]
2246 return dict(d)
2247
2249 """A.only_outside(start, end [, dct]) -> dict.
2250
2251 Enzymes that cut the sequence outside of the region
2252 in between start and end but do not cut inside."""
2253 start, end, test = self._boundaries(start, end)
2254 if not dct : dct = self.mapping
2255 d = dict(dct)
2256 for key, sites in dct.iteritems() :
2257 if not sites :
2258 del d[key]
2259 continue
2260 for site in sites :
2261 if test(start, end, site) :
2262 del d[key]
2263 break
2264 else :
2265 continue
2266 return d
2267
2268 - def outside(self, start, end, dct=None) :
2269 """A.outside((start, end [, dct]) -> dict.
2270
2271 Enzymes that cut outside the region in between start and end.
2272 No test is made to know if they cut or not inside this region."""
2273 start, end, test = self._boundaries(start, end)
2274 if not dct :
2275 dct = self.mapping
2276 d = {}
2277 for key, sites in dct.iteritems() :
2278 for site in sites :
2279 if test(start, end, site) :
2280 continue
2281 else :
2282 d[key] = sites
2283 break
2284 return d
2285
2286
2288 """A.do_not_cut(start, end [, dct]) -> dict.
2289
2290 Enzymes that do not cut the region in between start and end."""
2291 if not dct :
2292 dct = self.mapping
2293 d = self.without_site()
2294 d.update(self.only_outside(start, end, dct))
2295 return d
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319 CommOnly = RestrictionBatch()
2320 NonComm = RestrictionBatch()
2321 for TYPE, (bases, enzymes) in typedict.iteritems() :
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339 bases = tuple([eval(x) for x in bases])
2340
2341
2342
2343
2344 T = type.__new__(RestrictionType, 'RestrictionType', bases, {})
2345 for k in enzymes :
2346
2347
2348
2349
2350
2351 newenz = T(k, bases, enzymedict[k])
2352
2353
2354
2355
2356
2357 if newenz.is_comm() : CommOnly.add_nocheck(newenz)
2358 else : NonComm.add_nocheck(newenz)
2359
2360
2361
2362 AllEnzymes = CommOnly | NonComm
2363
2364
2365
2366 names = [str(x) for x in AllEnzymes]
2367 locals().update(dict(map(None, names, AllEnzymes)))
2368 __all__=['FormattedSeq', 'Analysis', 'RestrictionBatch','AllEnzymes','CommOnly','NonComm']+names
2369 del k, x, enzymes, TYPE, bases, names
2370