Package Bio :: Package PDB :: Module FragmentMapper'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.FragmentMapper'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  from Numeric import array, Float0, zeros 
  7  import os 
  8   
  9  from Bio.SVDSuperimposer import SVDSuperimposer 
 10  from Bio.PDB import * 
 11   
 12  __doc__=""" 
 13  Classify protein backbone structure according to Kolodny et al's fragment 
 14  libraries. It can be regarded as a form of objective secondary structure 
 15  classification. Only fragments of length 5 or 7 are supported (ie. there is a 
 16  'central' residue). 
 17   
 18  Full reference: 
 19   
 20  Kolodny R, Koehl P, Guibas L, Levitt M. 
 21  Small libraries of protein fragments model native protein structures accurately. 
 22  J Mol Biol. 2002 323(2):297-307. 
 23   
 24  The definition files of the fragments can be obtained from: 
 25   
 26  U{http://csb.stanford.edu/~rachel/fragments/} 
 27   
 28  You need these files to use this module. 
 29   
 30  The following example uses the library with 10 fragments of length 5. 
 31  The library files can be found in directory 'fragment_data'. 
 32   
 33      >>> model=structure[0] 
 34      >>> fm=FragmentMapper(lsize=10, flength=5, dir="fragment_data") 
 35      >>> fm.map(model) 
 36      >>> fragment=fm[residue] 
 37  """ 
 38   
 39   
 40  # fragment file (lib_SIZE_z_LENGTH.txt) 
 41  # SIZE=number of fragments 
 42  # LENGTH=length of fragment (4,5,6,7) 
 43  _FRAGMENT_FILE="lib_%s_z_%s.txt" 
 44   
 45   
46 -def _read_fragments(size, length, dir="."):
47 """ 48 Read a fragment spec file (available from 49 U{http://csb.stanford.edu/rachel/fragments/} 50 and return a list of Fragment objects. 51 52 @param size: number of fragments in the library 53 @type size: int 54 55 @param length: length of the fragments 56 @type length: int 57 58 @param dir: directory where the fragment spec files can be found 59 @type dir: string 60 """ 61 filename=(dir+"/"+_FRAGMENT_FILE) % (size, length) 62 fp=open(filename, "r") 63 flist=[] 64 # ID of fragment=rank in spec file 65 fid=0 66 for l in fp.readlines(): 67 # skip comment and blank lines 68 if l[0]=="*" or l[0]=="\n": 69 continue 70 sl=l.split() 71 if sl[1]=="------": 72 # Start of fragment definition 73 f=Fragment(length, fid) 74 flist.append(f) 75 # increase fragment id (rank) 76 fid+=1 77 continue 78 # Add CA coord to Fragment 79 coord=array(map(float, sl[0:3])) 80 # XXX= dummy residue name 81 f.add_residue("XXX", coord) 82 fp.close() 83 return flist
84 85
86 -class Fragment:
87 """ 88 Represent a polypeptide C-alpha fragment. 89 """
90 - def __init__(self, length, fid):
91 """ 92 @param length: length of the fragment 93 @type length: int 94 95 @param fid: id for the fragment 96 @type fid: int 97 """ 98 # nr of residues in fragment 99 self.length=length 100 # nr of residues added 101 self.counter=0 102 self.resname_list=[] 103 # CA coordinate matrix 104 self.coords_ca=zeros((length, 3), "d") 105 self.fid=fid
106
107 - def get_resname_list(self):
108 """ 109 @return: the residue names 110 @rtype: [string, string,...] 111 """ 112 return self.resname_list
113
114 - def get_id(self):
115 """ 116 @return: id for the fragment 117 @rtype: int 118 """ 119 return self.fid
120
121 - def get_coords(self):
122 """ 123 @return: the CA coords in the fragment 124 @rtype: Numeric (Nx3) array 125 """ 126 return self.coords_ca
127
128 - def add_residue(self, resname, ca_coord):
129 """ 130 @param resname: residue name (eg. GLY). 131 @type resname: string 132 133 @param ca_coord: the c-alpha coorinates of the residues 134 @type ca_coord: Numeric array with length 3 135 """ 136 if self.counter>=self.length: 137 raise PDBException, "Fragment boundary exceeded." 138 self.resname_list.append(resname) 139 self.coords_ca[self.counter]=ca_coord 140 self.counter=self.counter+1
141
142 - def __len__(self):
143 """ 144 @return: length of fragment 145 @rtype: int 146 """ 147 return self.length
148
149 - def __sub__(self, other):
150 """ 151 Return rmsd between two fragments. 152 153 Example: 154 >>> rmsd=fragment1-fragment2 155 156 @return: rmsd between fragments 157 @rtype: float 158 """ 159 sup=SVDSuperimposer() 160 sup.set(self.coords_ca, other.coords_ca) 161 sup.run() 162 return sup.get_rms()
163
164 - def __repr__(self):
165 """ 166 Returns <Fragment length=L id=ID> where L=length of fragment 167 and ID the identifier (rank in the library). 168 """ 169 return "<Fragment length=%i id=%i>" % (self.length, self.fid)
170 171
172 -def _make_fragment_list(pp, length):
173 """ 174 Dice up a peptide in fragments of length "length". 175 176 @param pp: a list of residues (part of one peptide) 177 @type pp: [L{Residue}, L{Residue}, ...] 178 179 @param length: fragment length 180 @type length: int 181 """ 182 frag_list=[] 183 for i in range(0, len(pp)-length+1): 184 f=Fragment(length, -1) 185 for j in range(0, length): 186 residue=pp[i+j] 187 resname=residue.get_resname() 188 if residue.has_id("CA"): 189 ca=residue["CA"] 190 else: 191 raise "CHAINBREAK" 192 if ca.is_disordered(): 193 raise "CHAINBREAK" 194 ca_coord=ca.get_coord() 195 f.add_residue(resname, ca_coord) 196 frag_list.append(f) 197 return frag_list
198 199
200 -def _map_fragment_list(flist, reflist):
201 """ 202 Map all frgaments in flist to the closest 203 (in RMSD) fragment in reflist. 204 205 Returns a list of reflist indices. 206 207 @param flist: list of protein fragments 208 @type flist: [L{Fragment}, L{Fragment}, ...] 209 210 @param reflist: list of reference (ie. library) fragments 211 @type reflist: [L{Fragment}, L{Fragment}, ...] 212 """ 213 mapped=[] 214 for f in flist: 215 rank=[] 216 for i in range(0, len(reflist)): 217 rf=reflist[i] 218 rms=f-rf 219 rank.append((rms, rf)) 220 rank.sort() 221 fragment=rank[0][1] 222 mapped.append(fragment) 223 return mapped
224 225
226 -class FragmentMapper:
227 """ 228 Map polypeptides in a model to lists of representative fragments. 229 """
230 - def __init__(self, model, lsize=20, flength=5, fdir="."):
231 """ 232 @param model: the model that will be mapped 233 @type model: L{Model} 234 235 @param lsize: number of fragments in the library 236 @type lsize: int 237 238 @param flength: length of fragments in the library 239 @type flength: int 240 241 @param fdir: directory where the definition files are 242 found (default=".") 243 @type fdir: string 244 """ 245 if flength==5: 246 self.edge=2 247 elif flength==7: 248 self.edge=3 249 else: 250 raise PDBException, "Fragment length should be 5 or 7." 251 self.flength=flength 252 self.lsize=lsize 253 self.reflist=_read_fragments(lsize, flength, fdir) 254 self.model=model 255 self.fd=self._map(self.model)
256
257 - def _map(self, model):
258 """ 259 @param model: the model that will be mapped 260 @type model: L{Model} 261 """ 262 ppb=PPBuilder() 263 ppl=ppb.build_peptides(model) 264 fd={} 265 for pp in ppl: 266 try: 267 # make fragments 268 flist=_make_fragment_list(pp, self.flength) 269 # classify fragments 270 mflist=_map_fragment_list(flist, self.reflist) 271 for i in range(0, len(pp)): 272 res=pp[i] 273 if i<self.edge: 274 # start residues 275 continue 276 elif i>=(len(pp)-self.edge): 277 # end residues 278 continue 279 else: 280 # fragment 281 index=i-self.edge 282 assert(index>=0) 283 fd[res]=mflist[index] 284 except "CHAINBREAK": 285 # Funny polypeptide - skip 286 pass 287 return fd
288
289 - def has_key(self, res):
290 """ 291 @type res: L{Residue} 292 """ 293 return self.fd.has_key(res)
294
295 - def __getitem__(self, res):
296 """ 297 @type res: L{Residue} 298 299 @return: fragment classification 300 @rtype: L{Fragment} 301 """ 302 return self.fd[res]
303 304 305 if __name__=="__main__": 306 307 import sys 308 309 p=PDBParser() 310 s=p.get_structure("X", sys.argv[1]) 311 312 m=s[0] 313 fm=FragmentMapper(m, 10, 5, "levitt_data") 314 315 316 for r in Selection.unfold_entities(m, "R"): 317 318 print r, 319 if fm.has_key(r): 320 print fm[r] 321 else: 322 print 323