1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 """ SCOP: Structural Classification of Proteins.
16
17 The SCOP database aims to provide a manually constructed classification of
18 all know protein structures into a hierarchy, the main levels of which
19 are family, superfamily and fold.
20
21 * "SCOP":http://scop.mrc-lmb.cam.ac.uk/scop/
22
23 * "Introduction":http://scop.mrc-lmb.cam.ac.uk/scop/intro.html
24
25 * "SCOP parsable files":http://scop.mrc-lmb.cam.ac.uk/scop/parse/
26
27 The Scop object in this module represents the entire SCOP classification. It
28 can be built from the three SCOP parsable files, modified is so desired, and
29 converted back to the same file formats. A single SCOP domain (represented
30 by the Domain class) can be obtained from Scop using the domain's SCOP
31 identifier (sid).
32
33
34 nodeCodeDict -- A mapping between known 2 letter node codes and a longer
35 description. The known node types are 'cl' (class), 'cf'
36 (fold), 'sf' (superfamily), 'fa' (family), 'dm' (domain),
37 'sp' (species), 'px' (domain). Additional node types may
38 be added in the future.
39 """
40 from types import *
41 import os, string
42
43 import Des
44 import Cla
45 import Hie
46 from Residues import *
47 from Bio import SeqIO
48 from Bio.Alphabet import IUPAC
49 from Bio.Seq import Seq
50
51
52 nodeCodeDict = { 'cl':'class', 'cf':'fold', 'sf':'superfamily',
53 'fa':'family', 'dm':'protein', 'sp':'species', 'px':'domain'}
54
55
56 _nodetype_to_code= { 'class': 'cl', 'fold': 'cf', 'superfamily': 'sf',
57 'family': 'fa', 'protein': 'dm', 'species': 'sp', 'domain': 'px'}
58
59 nodeCodeOrder = [ 'ro', 'cl', 'cf', 'sf', 'fa', 'dm', 'sp', 'px' ]
60
61 astralBibIds = [10,20,25,30,35,40,50,70,90,95,100]
62
63 astralEvs = [10, 5, 1, 0.5, 0.1, 0.05, 0.01, 0.005, 0.001, 1e-4, 1e-5, 1e-10, 1e-15,
64 1e-20, 1e-25, 1e-50]
65
66 astralEv_to_file = { 10: 'e+1', 5: 'e+0,7', 1: 'e+0', 0.5: 'e-0,3', 0.1: 'e-1',
67 0.05: 'e-1,3', 0.01: 'e-2', 0.005: 'e-2,3', 0.001: 'e-3',
68 1e-4: 'e-4', 1e-5: 'e-5', 1e-10: 'e-10', 1e-15: 'e-15',
69 1e-20: 'e-20', 1e-25: 'e-25', 1e-50: 'e-50' }
70
71 astralEv_to_sql = { 10: 'e1', 5: 'e0_7', 1: 'e0', 0.5: 'e_0_3', 0.1: 'e_1',
72 0.05: 'e_1_3', 0.01: 'e_2', 0.005: 'e_2_3', 0.001: 'e_3',
73 1e-4: 'e_4', 1e-5: 'e_5', 1e-10: 'e_10', 1e-15: 'e_15',
74 1e-20: 'e_20', 1e-25: 'e_25', 1e-50: 'e_50' }
75
76
78 """Order SCOP concise classification strings (sccs).
79
80 a.4.5.1 < a.4.5.11 < b.1.1.1
81
82 A sccs (e.g. a.4.5.11) compactly represents a domain's classification.
83 The letter represents the class, and the numbers are the fold,
84 superfamily, and family, respectively.
85
86 """
87
88 s1 = sccs1.split(".")
89 s2 = sccs2.split(".")
90
91 if s1[0] != s2[0]: return cmp(s1[0], s2[0])
92
93 s1 = map(int, s1[1:])
94 s2 = map(int, s2[1:])
95
96 return cmp(s1,s2)
97
98
99 _domain_re = re.compile(r">?([\w_\.]*)\s+([\w\.]*)\s+\(([^)]*)\) (.*)")
100
101 -def parse_domain(str) :
102 """Convert an ASTRAL header string into a Scop domain.
103
104 An ASTRAL (http://astral.stanford.edu/) header contains a concise
105 description of a SCOP domain. A very similar format is used when a
106 Domain object is converted into a string. The Domain returned by this
107 method contains most of the SCOP information, but it will not be located
108 within the SCOP hierarchy (i.e. The parent node will be None). The
109 description is composed of the SCOP protein and species descriptions.
110
111 A typical ASTRAL header looks like --
112 >d1tpt_1 a.46.2.1 (1-70) Thymidine phosphorylase {Escherichia coli}
113 """
114
115 m = _domain_re.match(str)
116 if (not m) : raise SyntaxError, "Domain: "+ str
117
118 dom = Domain()
119 dom.sid = m.group(1)
120 dom.sccs = m.group(2)
121 dom.residues = Residues(m.group(3))
122 if not dom.residues.pdbid :
123 dom.residues.pdbid= dom.sid[1:5]
124 dom.description = m.group(4).strip()
125
126 return dom
127
129 filename = "dir.%s.scop.txt_%s" % (filetype,version)
130 handle = open(os.path.join( scop_dir_path, filename))
131 return handle
132
133
135 """The entire SCOP hierarchy.
136
137 root -- The root node of the hierarchy
138 """
139 - def __init__(self, cla_handle=None, des_handle=None, hie_handle=None,
140 dir_path=None, db_handle=None, version=None):
141 """Build the SCOP hierarchy from the SCOP parsable files, or a sql backend.
142
143 If no file handles are given, then a Scop object with a single
144 empty root node is returned.
145
146 If a directory and version are given (with dir_path=.., version=...) or
147 file handles for each file, the whole scop tree will be built in memory.
148
149 If a MySQLdb database handle is given, the tree will be built as needed,
150 minimising construction times. To build the SQL database to the methods
151 write_xxx_sql to create the tables.
152
153 """
154 self._sidDict = {}
155 self._sunidDict = {}
156
157 if cla_handle==des_handle==hie_handle==dir_path==db_handle==None: return
158
159 if dir_path is None and db_handle is None:
160 if cla_handle == None or des_handle==None or hie_handle==None:
161 raise RuntimeError,"Need CLA, DES and HIE files to build SCOP"
162
163 sunidDict = {}
164
165 self.db_handle = db_handle
166 try:
167
168 if db_handle:
169
170 pass
171
172 else:
173
174 if dir_path:
175 if not version :
176 raise RuntimeError, "Need SCOP version to find parsable files in directory"
177 if cla_handle or des_handle or hie_handle:
178 raise RuntimeError,"Cannot specify SCOP directory and specific files"
179
180 cla_handle = _open_scop_file( dir_path, version, 'cla')
181 des_handle = _open_scop_file( dir_path, version, 'des')
182 hie_handle = _open_scop_file( dir_path, version, 'hie')
183
184 root = Node()
185 domains = []
186 root.sunid=0
187 root.type='ro'
188 sunidDict[root.sunid] = root
189 self.root = root
190 root.description = 'SCOP Root'
191
192
193 i = Des.Iterator(des_handle, Des.Parser())
194 while 1 :
195 rec = i.next()
196 if rec is None : break
197 if rec.nodetype =='px' :
198 n = Domain()
199 n.sid = rec.name
200 domains.append(n)
201 else :
202 n = Node()
203 n.sunid = rec.sunid
204 n.type = rec.nodetype
205 n.sccs = rec.sccs
206 n.description = rec.description
207
208 sunidDict[n.sunid] = n
209
210
211 i = Hie.Iterator(hie_handle, Hie.Parser())
212 while 1 :
213 rec = i.next()
214 if rec is None : break
215 if not sunidDict.has_key(rec.sunid) :
216 print rec.sunid
217
218 n = sunidDict[rec.sunid]
219
220 if rec.parent != '' :
221
222 if not sunidDict.has_key(rec.parent):
223 raise SyntaxError, "Incomplete data?"
224
225 n.parent = sunidDict[rec.parent]
226
227 for c in rec.children:
228 if not sunidDict.has_key(c) :
229 raise SyntaxError, "Incomplete data?"
230 n.children.append(sunidDict[c])
231
232
233
234 sidDict = {}
235 i = Cla.Iterator(cla_handle, Cla.Parser())
236 while 1 :
237 rec = i.next()
238 if rec is None : break
239 n = sunidDict[rec.sunid]
240 assert n.sccs == rec.sccs
241 assert n.sid == rec.sid
242 n.residues = rec.residues
243 sidDict[n.sid] = n
244
245
246 self._sunidDict = sunidDict
247 self._sidDict = sidDict
248 self._domains = tuple(domains)
249
250 finally:
251 if dir_path :
252
253 if cla_handle : cla_handle.close()
254 if des_handle : des_handle.close()
255 if hie_handle : hie_handle.close()
256
257
260
261
262 - def getDomainBySid(self, sid) :
263 """Return a domain from its sid"""
264 if self._sidDict.has_key(sid):
265 return self._sidDict[sid]
266 if self.db_handle:
267 self.getDomainFromSQL(sid=sid)
268 if self._sidDict.has_key(sid):
269 return self._sidDict[sid]
270 else:
271 return None
272
273
275 """Return a node from its sunid"""
276 if self._sunidDict.has_key(sunid):
277 return self._sunidDict[sunid]
278 if self.db_handle:
279 self.getDomainFromSQL(sunid=sunid)
280 if self._sunidDict.has_key(sunid):
281 return self._sunidDict[sunid]
282 else:
283 return None
284
285
286 - def getDomains(self) :
287 """Returns an ordered tuple of all SCOP Domains"""
288 if self.db_handle:
289 return self.getRoot().getDescendents('px')
290 else:
291 return self._domains
292
293
294
303
304
314
315
324
325
326 - def getDomainFromSQL(self, sunid=None, sid=None):
327 """Load a node from the SQL backend using sunid or sid"""
328 if sunid==sid==None: return None
329
330 cur = self.db_handle.cursor()
331
332 if sid:
333 cur.execute("SELECT sunid FROM cla WHERE sid=%s", sid)
334 res = cur.fetchone()
335 if res is None:
336 return None
337 sunid = res[0]
338
339 cur.execute("SELECT * FROM des WHERE sunid=%s", sunid)
340 data = cur.fetchone()
341
342 if data is not None:
343 n = None
344
345
346 if data[1] != "px":
347 n = Node(scop=self)
348
349 cur.execute("SELECT child FROM hie WHERE parent=%s", sunid)
350 children = []
351 for c in cur.fetchall():
352 children.append(c[0])
353 n.children = children
354
355
356 else:
357 n = Domain(scop=self)
358 cur.execute("select sid, residues, pdbid from cla where sunid=%s",
359 sunid)
360
361 [n.sid,n.residues,pdbid] = cur.fetchone()
362 n.residues = Residues(n.residues)
363 n.residues.pdbid=pdbid
364 self._sidDict[n.sid] = n
365
366 [n.sunid,n.type,n.sccs,n.description] = data
367
368 if data[1] != 'ro':
369 cur.execute("SELECT parent FROM hie WHERE child=%s", sunid)
370 n.parent = cur.fetchone()[0]
371
372 n.sunid = int(n.sunid)
373
374 self._sunidDict[n.sunid] = n
375
376
388
389
391 """Get descendents of a node using the database backend. This avoids
392 repeated iteration of SQL calls and is therefore much quicker than
393 repeatedly calling node.getChildren().
394 """
395 if nodeCodeOrder.index(type) <= nodeCodeOrder.index(node.type): return []
396
397 des_list = []
398
399
400
401 if node.type == 'ro':
402 for c in node.getChildren():
403 for d in self.getDescendentsFromSQL(c,type):
404 des_list.append(d)
405 return des_list
406
407 cur = self.db_handle.cursor()
408
409
410 if type != 'px':
411 cur.execute("SELECT DISTINCT des.sunid,des.type,des.sccs,description FROM \
412 cla,des WHERE cla."+node.type+"=%s AND cla."+type+"=des.sunid", (node.sunid))
413 data = cur.fetchall()
414 for d in data:
415 if not self._sunidDict.has_key(int(d[0])):
416 n = Node(scop=self)
417 [n.sunid,n.type,n.sccs,n.description] = d
418 n.sunid=int(n.sunid)
419 self._sunidDict[n.sunid] = n
420
421 cur.execute("SELECT parent FROM hie WHERE child=%s", n.sunid)
422 n.parent = cur.fetchone()[0]
423
424 cur.execute("SELECT child FROM hie WHERE parent=%s", n.sunid)
425 children = []
426 for c in cur.fetchall():
427 children.append(c[0])
428 n.children = children
429
430
431 des_list.append( self._sunidDict[int(d[0])] )
432
433 else:
434 cur.execute("SELECT cla.sunid,sid,pdbid,residues,cla.sccs,type,description,sp\
435 FROM cla,des where cla.sunid=des.sunid and cla."+node.type+"=%s",
436 node.sunid)
437
438 data = cur.fetchall()
439 for d in data:
440 if not self._sunidDict.has_key(int(d[0])):
441 n = Domain(scop=self)
442
443
444 [n.sunid,n.sid, pdbid,n.residues,n.sccs,n.type,n.description,
445 n.parent] = d[0:8]
446 n.residues = Residues(n.residues)
447 n.residues.pdbid = pdbid
448 n.sunid = int(n.sunid)
449 self._sunidDict[n.sunid] = n
450 self._sidDict[n.sid] = n
451
452
453 des_list.append( self._sunidDict[int(d[0])] )
454
455 return des_list
456
457
458
459
461 """Write HIE data to SQL database"""
462 cur = handle.cursor()
463
464 cur.execute("DROP TABLE IF EXISTS hie")
465 cur.execute("CREATE TABLE hie (parent INT, child INT, PRIMARY KEY (child),\
466 INDEX (parent) )")
467
468 for p in self._sunidDict.values():
469 for c in p.children:
470 cur.execute("INSERT INTO hie VALUES (%s,%s)" % (p.sunid, c.sunid))
471
472
474 """Write CLA data to SQL database"""
475 cur = handle.cursor()
476
477 cur.execute("DROP TABLE IF EXISTS cla")
478 cur.execute("CREATE TABLE cla (sunid INT, sid CHAR(8), pdbid CHAR(4),\
479 residues VARCHAR(50), sccs CHAR(10), cl INT, cf INT, sf INT, fa INT,\
480 dm INT, sp INT, px INT, PRIMARY KEY (sunid), INDEX (SID) )")
481
482 for n in self._sidDict.values():
483 c = n.toClaRecord()
484 cur.execute( "INSERT INTO cla VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",
485 (n.sunid, n.sid, c.residues.pdbid, c.residues, n.sccs,
486 n.getAscendent('cl').sunid, n.getAscendent('cf').sunid,
487 n.getAscendent('sf').sunid, n.getAscendent('fa').sunid,
488 n.getAscendent('dm').sunid, n.getAscendent('sp').sunid,
489 n.sunid ))
490
491
493 """Write DES data to SQL database"""
494 cur = handle.cursor()
495
496 cur.execute("DROP TABLE IF EXISTS des")
497 cur.execute("CREATE TABLE des (sunid INT, type CHAR(2), sccs CHAR(10),\
498 description VARCHAR(255),\
499 PRIMARY KEY (sunid) )")
500
501 for n in self._sunidDict.values():
502 cur.execute( "INSERT INTO des VALUES (%s,%s,%s,%s)",
503 ( n.sunid, n.type, n.sccs, n.description ) )
504
505
506
508 """ A node in the Scop hierarchy
509
510 sunid -- SCOP unique identifiers. e.g. '14986'
511
512 parent -- The parent node
513
514 children -- A list of child nodes
515
516 sccs -- SCOP concise classification string. e.g. 'a.1.1.2'
517
518 type -- A 2 letter node type code. e.g. 'px' for domains
519
520 description --
521
522 """
524 """Create a Node in the scop hierarchy. If a Scop instance is provided to the
525 constructor, this will be used to lookup related references using the SQL
526 methods. If no instance is provided, it is assumed the whole tree exists
527 and is connected."""
528 self.sunid=''
529 self.parent = None
530 self.children=[]
531 self.sccs = ''
532 self.type =''
533 self.description =''
534 self.scop=scop
535
544
546 """Return an Hie.Record"""
547 rec = Hie.Record()
548 rec.sunid = str(self.sunid)
549 if self.getParent() :
550 rec.parent = str(self.getParent().sunid)
551 else:
552 rec.parent = '-'
553 for c in self.getChildren() :
554 rec.children.append(str(c.sunid))
555 return rec
556
558 """Return a Des.Record"""
559 rec = Des.Record()
560 rec.sunid = str(self.sunid)
561 rec.nodetype = self.type
562 rec.sccs = self.sccs
563 rec.description = self.description
564 return rec
565
567 """Return a list of children of this Node"""
568 if self.scop is None:
569 return self.children
570 else:
571 return map ( self.scop.getNodeBySunid, self.children )
572
574 """Return the parent of this Node"""
575 if self.scop is None:
576 return self.parent
577 else:
578 return self.scop.getNodeBySunid( self.parent )
579
599
600
602 """ Return the ancenstor node of the given type, or None.Node type can a
603 two letter code or longer description. e.g. 'fa' or 'family'"""
604 if _nodetype_to_code.has_key(node_type):
605 node_type = _nodetype_to_code[node_type]
606
607 if self.scop:
608 return self.scop.getAscendentFromSQL(self,node_type)
609 else:
610 n = self
611 if n.type == node_type: return None
612 while n.type != node_type:
613 if n.type == 'ro': return None
614 n = n.getParent()
615
616 return n
617
618
619
620 -class Domain(Node) :
621 """ A SCOP domain. A leaf node in the Scop hierarchy.
622
623 sid -- The SCOP domain identifier. e.g. 'd5hbib_'
624
625 residues -- A Residue object. It defines the collection
626 of PDB atoms that make up this domain.
627 """
628 - def __init__(self,scop=None) :
629 Node.__init__(self,scop=scop)
630 self.sid = ''
631 self.residues = None
632
633 - def __str__(self) :
634 s = []
635 s.append(self.sid)
636 s.append(self.sccs)
637 s.append("("+str(self.residues)+")")
638
639 if not self.getParent() :
640 s.append(self.description)
641 else :
642 sp = self.getParent()
643 dm = sp.getParent()
644 s.append(dm.description)
645 s.append("{"+sp.description+"}")
646
647 return " ".join(s)
648
649 - def toDesRecord(self):
650 """Return a Des.Record"""
651 rec = Node.toDesRecord(self)
652 rec.name = self.sid
653 return rec
654
655 - def toClaRecord(self) :
656 """Return a Cla.Record"""
657 rec = Cla.Record()
658 rec.sid = self.sid
659 rec.residues = self.residues
660 rec.sccs = self.sccs
661 rec.sunid = self.sunid
662
663 n = self
664 while n.sunid != 0:
665 rec.hierarchy.append( (n.type, str(n.sunid)) )
666 n = n.getParent()
667
668 rec.hierarchy.reverse()
669
670 return rec
671
673 """Abstraction of the ASTRAL database, which has sequences for all the SCOP domains,
674 as well as clusterings by percent id or evalue.
675 """
676
677 - def __init__( self, dir_path=None, version=None, scop=None, ind_file=None,
678 astral_file=None, db_handle=None):
679 """
680 Initialise the astral database.
681
682 You must provide either a directory of SCOP files:
683
684 dir_path - string, the path to location of the scopseq-x.xx directory
685 (not the directory itself), and
686 version -a version number.
687
688 or, a FASTA file:
689
690 astral_file - string, a path to a fasta file (which will be loaded in memory)
691
692 or, a MYSQL database:
693
694 db_handle - a database handle for a MYSQL database containing a table
695 'astral' with the astral data in it. This can be created
696 using writeToSQL.
697
698 Note that the ind_file argument is deprecated.
699 """
700 if ind_file :
701 raise RuntimeError, "The ind_file (index file) argument is deprecated"
702
703 if astral_file==dir_path==db_handle==None:
704 raise RunTimeError,"Need either file handle, or (dir_path + version)\
705 or database handle to construct Astral"
706 if not scop:
707 raise RuntimeError, "Must provide a Scop instance to construct"
708
709 self.scop = scop
710 self.db_handle = db_handle
711
712
713 if not astral_file and not db_handle:
714 if dir_path == None or version == None:
715 raise RuntimeError, "must provide dir_path and version"
716
717 self.version = version
718 self.path = os.path.join( dir_path, "scopseq-%s" % version)
719 astral_file = "astral-scopdom-seqres-all-%s.fa" % self.version
720 astral_file = os.path.join (self.path, astral_file)
721
722 if astral_file:
723
724 self.fasta_dict = SeqIO.to_dict(SeqIO.parse(open(astral_file), "fasta"))
725
726 self.astral_file = astral_file
727 self.EvDatasets = {}
728 self.EvDatahash = {}
729 self.IdDatasets = {}
730 self.IdDatahash = {}
731
732
734 """get domains clustered by evalue"""
735 if not self.EvDatasets.has_key(id):
736 if self.db_handle:
737 self.EvDatasets[id] = self.getAstralDomainsFromSQL(astralEv_to_sql[id])
738
739 else:
740 if not self.path:
741 raise RuntimeError, "No scopseq directory specified"
742
743 file_prefix = "astral-scopdom-seqres-sel-gs"
744 filename = "%s-e100m-%s-%s.id" % (file_prefix, astralEv_to_file[id] ,
745 self.version)
746 filename = os.path.join(self.path,filename)
747 self.EvDatasets[id] = self.getAstralDomainsFromFile(filename)
748 return self.EvDatasets[id]
749
750
752 """get domains clustered by percent id"""
753 if not self.IdDatasets.has_key(id):
754 if self.db_handle:
755 self.IdDatasets[id] = self.getAstralDomainsFromSQL("id"+str(id))
756
757 else:
758 if not self.path:
759 raise RuntimeError, "No scopseq directory specified"
760
761 file_prefix = "astral-scopdom-seqres-sel-gs"
762 filename = "%s-bib-%s-%s.id" % (file_prefix, id, self.version)
763 filename = os.path.join(self.path,filename)
764 self.IdDatasets[id] = self.getAstralDomainsFromFile(filename)
765 return self.IdDatasets[id]
766
767
768 - def getAstralDomainsFromFile(self,filename=None,file_handle=None):
769 """Get the scop domains from a file containing a list of sids"""
770 if file_handle == filename == none:
771 raise RuntimeError, "You must provide a filename or handle"
772 if not file_handle:
773 file_handle = open(filename)
774 doms = []
775 while 1:
776 line = file_handle.readline()
777 if not line:
778 break
779 line = line.rstrip()
780 doms.append(line)
781 if filename:
782 file_handle.close()
783
784 doms = filter( lambda a: a[0]=='d', doms )
785 doms = map( self.scop.getDomainBySid, doms )
786 return doms
787
788 - def getAstralDomainsFromSQL(self, column):
789 """Load a set of astral domains from a column in the astral table of a MYSQL
790 database (which can be created with writeToSQL(...)"""
791 cur = self.db_handle.cursor()
792 cur.execute("SELECT sid FROM astral WHERE "+column+"=1")
793 data = cur.fetchall()
794 data = map( lambda x: self.scop.getDomainBySid(x[0]), data)
795
796 return data
797
798
800 """get the seq record of a given domain from its sid"""
801 if self.db_handle is None:
802 return self.fasta_dict[domain].seq
803
804 else:
805 cur = self.db_handle.cursor()
806 cur.execute("SELECT seq FROM astral WHERE sid=%s", domain)
807 return Seq(cur.fetchone()[0])
808
810 """Return seq associated with domain"""
811 return self.getSeqBySid(domain.sid)
812
813
814 - def hashedDomainsById(self,id):
815 """Get domains clustered by sequence identity in a dict"""
816 if not self.IdDatahash.has_key(id):
817 self.IdDatahash[id] = {}
818 for d in self.domainsClusteredById(id):
819 self.IdDatahash[id][d] = 1
820 return self.IdDatahash[id]
821
822 - def hashedDomainsByEv(self,id):
823 """Get domains clustered by evalue in a dict"""
824 if not self.EvDatahash.has_key(id):
825 self.EvDatahash[id] = {}
826 for d in self.domainsClusteredByEv(id):
827 self.EvDatahash[id][d] = 1
828 return self.EvDatahash[id]
829
830
831 - def isDomainInId(self,dom,id):
832 """Returns true if the domain is in the astral clusters for percent ID"""
833 return self.hashedDomainsById(id).has_key(dom)
834
835 - def isDomainInEv(self,dom,id):
836 """Returns true if the domain is in the ASTRAL clusters for evalues"""
837 return self.hashedDomainsByEv(id).has_key(dom)
838
839
841 """Write the ASTRAL database to a MYSQL database"""
842 cur = db_handle.cursor()
843
844 cur.execute("DROP TABLE IF EXISTS astral")
845 cur.execute("CREATE TABLE astral (sid CHAR(8), seq TEXT, PRIMARY KEY (sid))")
846
847 for dom in self.fasta_dict.keys():
848 cur.execute( "INSERT INTO astral (sid,seq) values (%s,%s)",
849 (dom, self.fasta_dict[dom].seq.data))
850
851
852 for i in astralBibIds:
853 cur.execute("ALTER TABLE astral ADD (id"+str(i)+" TINYINT)")
854
855 for d in self.domainsClusteredById(i):
856 cur.execute("UPDATE astral SET id"+str(i)+"=1 WHERE sid=%s",
857 d.sid)
858
859 for ev in astralEvs:
860 cur.execute("ALTER TABLE astral ADD ("+astralEv_to_sql[ev]+" TINYINT)")
861
862 for d in self.domainsClusteredByEv(ev):
863
864 cur.execute("UPDATE astral SET "+astralEv_to_sql[ev]+"=1 WHERE sid=%s",
865 d.sid)
866