1
2
3
4
5
6 from Numeric import sum
7 from types import StringType
8
9 from Bio.Alphabet import ProteinAlphabet
10 from Bio.Seq import Seq
11 from Bio.SCOP.Raf import to_one_letter_code
12 from Bio.PDB.PDBExceptions import PDBException
13 from Bio.PDB.Residue import Residue, DisorderedResidue
14 from Vector import calc_dihedral, calc_angle
15
16 __doc__="""
17 Polypeptide related classes (construction and representation).
18
19 Example:
20
21 >>> ppb=PPBuilder()
22 >>> for pp in ppb.build_peptides(structure):
23 >>> print pp.get_sequence()
24 """
25
26 standard_aa_names=["ALA", "CYS", "ASP", "GLU", "PHE", "GLY", "HIS", "ILE", "LYS",
27 "LEU", "MET", "ASN", "PRO", "GLN", "ARG", "SER", "THR", "VAL",
28 "TRP", "TYR"]
29
30
31 aa1="ACDEFGHIKLMNPQRSTVWY"
32 aa3=standard_aa_names
33
34 d1_to_index={}
35 dindex_to_1={}
36 d3_to_index={}
37 dindex_to_3={}
38
39
40 for i in range(0, 20):
41 n1=aa1[i]
42 n3=aa3[i]
43 d1_to_index[n1]=i
44 dindex_to_1[i]=n1
45 d3_to_index[n3]=i
46 dindex_to_3[i]=n3
47
49 """
50 Index to corresponding one letter amino acid name.
51 For example: 0 to A.
52 """
53 return dindex_to_1[index]
54
56 """
57 One letter code to index.
58 For example: A to 0.
59 """
60 return d1_to_index[s]
61
63 """
64 Index to corresponding three letter amino acid name.
65 For example: 0 to ALA.
66 """
67 return dindex_to_3[i]
68
70 """
71 Three letter code to index.
72 For example: ALA to 0.
73 """
74 return d3_to_index[s]
75
77 """
78 Three letter code to one letter code.
79 For example: ALA to A.
80 """
81 i=d3_to_index[s]
82 return dindex_to_1[i]
83
85 """
86 One letter code to three letter code.
87 For example: A to ALA.
88 """
89 i=d1_to_index[s]
90 return dindex_to_3[i]
91
92 -def is_aa(residue, standard=0):
93 """
94 Return 1 if residue object/string is an amino acid.
95
96 @param residue: a L{Residue} object OR a three letter amino acid code
97 @type residue: L{Residue} or string
98
99 @param standard: flag to check for the 20 AA (default false)
100 @type standard: boolean
101 """
102 if not type(residue)==StringType:
103 residue=residue.get_resname()
104 residue=residue.upper()
105 if standard:
106 return d3_to_index.has_key(residue)
107 else:
108 return to_one_letter_code.has_key(residue)
109
110
112 """
113 A polypeptide is simply a list of L{Residue} objects.
114 """
116 """
117 @return: the list of C-alpha atoms
118 @rtype: [L{Atom}, L{Atom}, ...]
119 """
120 ca_list=[]
121 for res in self:
122 ca=res["CA"]
123 ca_list.append(ca)
124 return ca_list
125
127 """
128 Return the list of phi/psi dihedral angles
129 """
130 ppl=[]
131 lng=len(self)
132 for i in range(0, lng):
133 res=self[i]
134 try:
135 n=res['N'].get_vector()
136 ca=res['CA'].get_vector()
137 c=res['C'].get_vector()
138 except:
139
140
141 ppl.append((None, None))
142 res.xtra["PHI"]=None
143 res.xtra["PSI"]=None
144 continue
145
146 if i>0:
147 rp=self[i-1]
148 try:
149 cp=rp['C'].get_vector()
150 phi=calc_dihedral(cp, n, ca, c)
151 except:
152 phi=None
153 else:
154
155 phi=None
156
157 if i<(lng-1):
158 rn=self[i+1]
159 try:
160 nn=rn['N'].get_vector()
161 psi=calc_dihedral(n, ca, c, nn)
162 except:
163 psi=None
164 else:
165
166 psi=None
167 ppl.append((phi, psi))
168
169 res.xtra["PHI"]=phi
170 res.xtra["PSI"]=psi
171 return ppl
172
174 """
175 Return list of tau torsions angles for all 4 consecutive
176 Calpha atoms.
177 """
178 ca_list=self.get_ca_list()
179 tau_list=[]
180 for i in range(0, len(ca_list)-3):
181 atom_list=[ca_list[i], ca_list[i+1], ca_list[i+2], ca_list[i+3]]
182 vector_list=map(lambda a: a.get_vector(), atom_list)
183 v1, v2, v3, v4=vector_list
184 tau=calc_dihedral(v1, v2, v3, v4)
185 tau_list.append(tau)
186
187 res=ca_list[i+2].get_parent()
188 res.xtra["TAU"]=tau
189 return tau_list
190
192 """
193 Return list of theta angles for all 3 consecutive
194 Calpha atoms.
195 """
196 theta_list=[]
197 ca_list=self.get_ca_list()
198 for i in range(0, len(ca_list)-2):
199 atom_list=[ca_list[i], ca_list[i+1], ca_list[i+2]]
200 vector_list=map(lambda a: a.get_vector(), atom_list)
201 v1, v2, v3=vector_list
202 theta=calc_angle(v1, v2, v3)
203 theta_list.append(theta)
204
205 res=ca_list[i+1].get_parent()
206 res.xtra["THETA"]=theta
207 return theta_list
208
226
228 """
229 Return <Polypeptide start=START end=END>, where START
230 and END are sequence identifiers of the outer residues.
231 """
232 start=self[0].get_id()[1]
233 end=self[-1].get_id()[1]
234 s="<Polypeptide start=%s end=%s>" % (start, end)
235 return s
236
238 """
239 Base class to extract polypeptides.
240 It checks if two consecutive residues in a chain
241 are connected. The connectivity test is implemented by a
242 subclass.
243 """
245 """
246 @param radius: distance
247 @type radius: float
248 """
249 self.radius=radius
250
252 "Check if the residue is an amino acid."
253 if is_aa(residue):
254
255 return 1
256 else:
257 return 0
258
260 """
261 Build and return a list of Polypeptide objects.
262
263 @param entity: polypeptides are searched for in this object
264 @type entity: L{Structure}, L{Model} or L{Chain}
265
266 @param aa_only: if 1, the residue needs to be a standard AA
267 @type aa_only: int
268 """
269 is_connected=self._is_connected
270 accept=self._accept
271 level=entity.get_level()
272
273 if level=="S":
274 model=entity[0]
275 chain_list=model.get_list()
276 elif level=="M":
277 chain_list=entity.get_list()
278 elif level=="C":
279 chain_list=[entity]
280 else:
281 raise PDBException, "Entity should be Structure, Model or Chain."
282 pp_list=[]
283 for chain in chain_list:
284 chain_it=iter(chain)
285 prev=chain_it.next()
286 pp=None
287 for next in chain_it:
288 if aa_only and not accept(prev):
289 prev=next
290 continue
291 if is_connected(prev, next):
292 if pp is None:
293 pp=Polypeptide()
294 pp.append(prev)
295 pp_list.append(pp)
296 pp.append(next)
297 else:
298 pp=None
299 prev=next
300 return pp_list
301
302
304 """
305 Use CA--CA distance to find polypeptides.
306 """
309
330
331
333 """
334 Use C--N distance to find polypeptides.
335 """
338
373
375 "Return 1 if distance between atoms<radius"
376 if (c-n)<self.radius:
377 return 1
378 else:
379 return 0
380
381
382 if __name__=="__main__":
383
384 import sys
385
386 from Bio.PDB.PDBParser import PDBParser
387
388 p=PDBParser(PERMISSIVE=1)
389
390 s=p.get_structure("scr", sys.argv[1])
391
392 ppb=PPBuilder()
393
394 print "C-N"
395 for pp in ppb.build_peptides(s):
396 print pp.get_sequence()
397 for pp in ppb.build_peptides(s[0]):
398 print pp.get_sequence()
399 for pp in ppb.build_peptides(s[0]["A"]):
400 print pp.get_sequence()
401
402 for pp in ppb.build_peptides(s):
403 for phi, psi in pp.get_phi_psi_list():
404 print phi, psi
405
406 ppb=CaPPBuilder()
407
408 print "CA-CA"
409 for pp in ppb.build_peptides(s):
410 print pp.get_sequence()
411 for pp in ppb.build_peptides(s[0]):
412 print pp.get_sequence()
413 for pp in ppb.build_peptides(s[0]["A"]):
414 print pp.get_sequence()
415