Package Bio :: Package PDB :: Module Polypeptide
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.Polypeptide

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  from Numeric import sum 
  7  from types import StringType 
  8   
  9  from Bio.Alphabet import ProteinAlphabet 
 10  from Bio.Seq import Seq 
 11  from Bio.SCOP.Raf import to_one_letter_code 
 12  from Bio.PDB.PDBExceptions import PDBException 
 13  from Bio.PDB.Residue import Residue, DisorderedResidue 
 14  from Vector import calc_dihedral, calc_angle 
 15   
 16  __doc__=""" 
 17  Polypeptide related classes (construction and representation). 
 18   
 19  Example: 
 20   
 21      >>> ppb=PPBuilder() 
 22      >>> for pp in ppb.build_peptides(structure): 
 23      >>>     print pp.get_sequence() 
 24  """ 
 25   
 26  standard_aa_names=["ALA", "CYS", "ASP", "GLU", "PHE", "GLY", "HIS", "ILE", "LYS",  
 27                     "LEU", "MET", "ASN", "PRO", "GLN", "ARG", "SER", "THR", "VAL", 
 28                     "TRP", "TYR"] 
 29   
 30   
 31  aa1="ACDEFGHIKLMNPQRSTVWY" 
 32  aa3=standard_aa_names 
 33   
 34  d1_to_index={} 
 35  dindex_to_1={} 
 36  d3_to_index={} 
 37  dindex_to_3={} 
 38   
 39  # Create some lookup tables 
 40  for i in range(0, 20): 
 41      n1=aa1[i] 
 42      n3=aa3[i] 
 43      d1_to_index[n1]=i 
 44      dindex_to_1[i]=n1 
 45      d3_to_index[n3]=i 
 46      dindex_to_3[i]=n3 
 47   
48 -def index_to_one(index):
49 """ 50 Index to corresponding one letter amino acid name. 51 For example: 0 to A. 52 """ 53 return dindex_to_1[index]
54
55 -def one_to_index(s):
56 """ 57 One letter code to index. 58 For example: A to 0. 59 """ 60 return d1_to_index[s]
61
62 -def index_to_three(i):
63 """ 64 Index to corresponding three letter amino acid name. 65 For example: 0 to ALA. 66 """ 67 return dindex_to_3[i]
68
69 -def three_to_index(s):
70 """ 71 Three letter code to index. 72 For example: ALA to 0. 73 """ 74 return d3_to_index[s]
75
76 -def three_to_one(s):
77 """ 78 Three letter code to one letter code. 79 For example: ALA to A. 80 """ 81 i=d3_to_index[s] 82 return dindex_to_1[i]
83
84 -def one_to_three(s):
85 """ 86 One letter code to three letter code. 87 For example: A to ALA. 88 """ 89 i=d1_to_index[s] 90 return dindex_to_3[i]
91
92 -def is_aa(residue, standard=0):
93 """ 94 Return 1 if residue object/string is an amino acid. 95 96 @param residue: a L{Residue} object OR a three letter amino acid code 97 @type residue: L{Residue} or string 98 99 @param standard: flag to check for the 20 AA (default false) 100 @type standard: boolean 101 """ 102 if not type(residue)==StringType: 103 residue=residue.get_resname() 104 residue=residue.upper() 105 if standard: 106 return d3_to_index.has_key(residue) 107 else: 108 return to_one_letter_code.has_key(residue)
109 110
111 -class Polypeptide(list):
112 """ 113 A polypeptide is simply a list of L{Residue} objects. 114 """
115 - def get_ca_list(self):
116 """ 117 @return: the list of C-alpha atoms 118 @rtype: [L{Atom}, L{Atom}, ...] 119 """ 120 ca_list=[] 121 for res in self: 122 ca=res["CA"] 123 ca_list.append(ca) 124 return ca_list
125
126 - def get_phi_psi_list(self):
127 """ 128 Return the list of phi/psi dihedral angles 129 """ 130 ppl=[] 131 lng=len(self) 132 for i in range(0, lng): 133 res=self[i] 134 try: 135 n=res['N'].get_vector() 136 ca=res['CA'].get_vector() 137 c=res['C'].get_vector() 138 except: 139 # Some atoms are missing 140 # Phi/Psi cannot be calculated for this residue 141 ppl.append((None, None)) 142 res.xtra["PHI"]=None 143 res.xtra["PSI"]=None 144 continue 145 # Phi 146 if i>0: 147 rp=self[i-1] 148 try: 149 cp=rp['C'].get_vector() 150 phi=calc_dihedral(cp, n, ca, c) 151 except: 152 phi=None 153 else: 154 # No phi for residue 0! 155 phi=None 156 # Psi 157 if i<(lng-1): 158 rn=self[i+1] 159 try: 160 nn=rn['N'].get_vector() 161 psi=calc_dihedral(n, ca, c, nn) 162 except: 163 psi=None 164 else: 165 # No psi for last residue! 166 psi=None 167 ppl.append((phi, psi)) 168 # Add Phi/Psi to xtra dict of residue 169 res.xtra["PHI"]=phi 170 res.xtra["PSI"]=psi 171 return ppl
172
173 - def get_tau_list(self):
174 """ 175 Return list of tau torsions angles for all 4 consecutive 176 Calpha atoms. 177 """ 178 ca_list=self.get_ca_list() 179 tau_list=[] 180 for i in range(0, len(ca_list)-3): 181 atom_list=[ca_list[i], ca_list[i+1], ca_list[i+2], ca_list[i+3]] 182 vector_list=map(lambda a: a.get_vector(), atom_list) 183 v1, v2, v3, v4=vector_list 184 tau=calc_dihedral(v1, v2, v3, v4) 185 tau_list.append(tau) 186 # Put tau in xtra dict of residue 187 res=ca_list[i+2].get_parent() 188 res.xtra["TAU"]=tau 189 return tau_list
190
191 - def get_theta_list(self):
192 """ 193 Return list of theta angles for all 3 consecutive 194 Calpha atoms. 195 """ 196 theta_list=[] 197 ca_list=self.get_ca_list() 198 for i in range(0, len(ca_list)-2): 199 atom_list=[ca_list[i], ca_list[i+1], ca_list[i+2]] 200 vector_list=map(lambda a: a.get_vector(), atom_list) 201 v1, v2, v3=vector_list 202 theta=calc_angle(v1, v2, v3) 203 theta_list.append(theta) 204 # Put tau in xtra dict of residue 205 res=ca_list[i+1].get_parent() 206 res.xtra["THETA"]=theta 207 return theta_list
208
209 - def get_sequence(self):
210 """ 211 Return the AA sequence. 212 213 @return: polypeptide sequence 214 @rtype: L{Seq} 215 """ 216 s="" 217 for res in self: 218 resname=res.get_resname() 219 if to_one_letter_code.has_key(resname): 220 resname=to_one_letter_code[resname] 221 else: 222 resname='X' 223 s=s+resname 224 seq=Seq(s, ProteinAlphabet) 225 return seq
226
227 - def __repr__(self):
228 """ 229 Return <Polypeptide start=START end=END>, where START 230 and END are sequence identifiers of the outer residues. 231 """ 232 start=self[0].get_id()[1] 233 end=self[-1].get_id()[1] 234 s="<Polypeptide start=%s end=%s>" % (start, end) 235 return s
236
237 -class _PPBuilder:
238 """ 239 Base class to extract polypeptides. 240 It checks if two consecutive residues in a chain 241 are connected. The connectivity test is implemented by a 242 subclass. 243 """
244 - def __init__(self, radius):
245 """ 246 @param radius: distance 247 @type radius: float 248 """ 249 self.radius=radius
250
251 - def _accept(self, residue):
252 "Check if the residue is an amino acid." 253 if is_aa(residue): 254 # not a standard AA so skip 255 return 1 256 else: 257 return 0
258
259 - def build_peptides(self, entity, aa_only=1):
260 """ 261 Build and return a list of Polypeptide objects. 262 263 @param entity: polypeptides are searched for in this object 264 @type entity: L{Structure}, L{Model} or L{Chain} 265 266 @param aa_only: if 1, the residue needs to be a standard AA 267 @type aa_only: int 268 """ 269 is_connected=self._is_connected 270 accept=self._accept 271 level=entity.get_level() 272 # Decide wich entity we are dealing with 273 if level=="S": 274 model=entity[0] 275 chain_list=model.get_list() 276 elif level=="M": 277 chain_list=entity.get_list() 278 elif level=="C": 279 chain_list=[entity] 280 else: 281 raise PDBException, "Entity should be Structure, Model or Chain." 282 pp_list=[] 283 for chain in chain_list: 284 chain_it=iter(chain) 285 prev=chain_it.next() 286 pp=None 287 for next in chain_it: 288 if aa_only and not accept(prev): 289 prev=next 290 continue 291 if is_connected(prev, next): 292 if pp is None: 293 pp=Polypeptide() 294 pp.append(prev) 295 pp_list.append(pp) 296 pp.append(next) 297 else: 298 pp=None 299 prev=next 300 return pp_list
301 302
303 -class CaPPBuilder(_PPBuilder):
304 """ 305 Use CA--CA distance to find polypeptides. 306 """
307 - def __init__(self, radius=4.3):
308 _PPBuilder.__init__(self, radius)
309
310 - def _is_connected(self, prev, next):
311 for r in [prev, next]: 312 if not r.has_id("CA"): 313 return 0 314 n=next["CA"] 315 p=prev["CA"] 316 # Unpack disordered 317 if n.is_disordered(): 318 nlist=n.disordered_get_list() 319 else: 320 nlist=[n] 321 if p.is_disordered(): 322 plist=p.disordered_get_list() 323 else: 324 plist=[p] 325 for nn in nlist: 326 for pp in plist: 327 if (nn-pp)<self.radius: 328 return 1 329 return 0
330 331
332 -class PPBuilder(_PPBuilder):
333 """ 334 Use C--N distance to find polypeptides. 335 """
336 - def __init__(self, radius=1.8):
337 _PPBuilder.__init__(self, radius)
338
339 - def _is_connected(self, prev, next):
340 if not prev.has_id("C"): 341 return 0 342 if not next.has_id("N"): 343 return 0 344 test_dist=self._test_dist 345 c=prev["C"] 346 n=next["N"] 347 # Test all disordered atom positions! 348 if c.is_disordered(): 349 clist=c.disordered_get_list() 350 else: 351 clist=[c] 352 if n.is_disordered(): 353 nlist=n.disordered_get_list() 354 else: 355 nlist=[n] 356 for nn in nlist: 357 for cc in clist: 358 # To form a peptide bond, N and C must be 359 # within radius and have the same altloc 360 # identifier or one altloc blanc 361 n_altloc=nn.get_altloc() 362 c_altloc=cc.get_altloc() 363 if n_altloc==c_altloc or n_altloc==" " or c_altloc==" ": 364 if test_dist(nn, cc): 365 # Select the disordered atoms that 366 # are indeed bonded 367 if c.is_disordered(): 368 c.disordered_select(c_altloc) 369 if n.is_disordered(): 370 n.disordered_select(n_altloc) 371 return 1 372 return 0
373
374 - def _test_dist(self, c, n):
375 "Return 1 if distance between atoms<radius" 376 if (c-n)<self.radius: 377 return 1 378 else: 379 return 0
380 381 382 if __name__=="__main__": 383 384 import sys 385 386 from Bio.PDB.PDBParser import PDBParser 387 388 p=PDBParser(PERMISSIVE=1) 389 390 s=p.get_structure("scr", sys.argv[1]) 391 392 ppb=PPBuilder() 393 394 print "C-N" 395 for pp in ppb.build_peptides(s): 396 print pp.get_sequence() 397 for pp in ppb.build_peptides(s[0]): 398 print pp.get_sequence() 399 for pp in ppb.build_peptides(s[0]["A"]): 400 print pp.get_sequence() 401 402 for pp in ppb.build_peptides(s): 403 for phi, psi in pp.get_phi_psi_list(): 404 print phi, psi 405 406 ppb=CaPPBuilder() 407 408 print "CA-CA" 409 for pp in ppb.build_peptides(s): 410 print pp.get_sequence() 411 for pp in ppb.build_peptides(s[0]): 412 print pp.get_sequence() 413 for pp in ppb.build_peptides(s[0]["A"]): 414 print pp.get_sequence() 415