1
2
3
4
5
6
7
8 """ASTRAL RAF (Rapid Access Format) Sequence Maps.
9
10 The ASTRAL RAF Sequence Maps record the relationship between the PDB SEQRES
11 records (representing the sequence of the molecule used in an experiment) to
12 the ATOM records (representing the atoms experimentally observed).
13
14 This data is derived from the Protein Data Bank CIF files. Known errors in the
15 CIF files are corrected manually, with the original PDB file serving as the
16 final arbiter in case of discrepancies.
17
18 Residues are referenced by residue ID. This consists of a the PDB residue
19 sequence number (upto 4 digits) and an optional PDB insertion code (an
20 ascii alphabetic character, a-z, A-Z). e.g. "1", "10A", "1010b", "-1"
21
22 See "ASTRAL RAF Sequence Maps":http://astral.stanford.edu/raf.html
23
24 to_one_letter_code -- A mapping from the 3-letter amino acid codes found
25 in PDB files to 1-letter codes. The 3-letter codes
26 include chemically modified residues.
27 """
28
29 from copy import copy
30 from types import *
31
32 from Residues import Residues
33
34
35
36 to_one_letter_code= {
37 'ALA':'A', 'VAL':'V', 'PHE':'F', 'PRO':'P', 'MET':'M',
38 'ILE':'I', 'LEU':'L', 'ASP':'D', 'GLU':'E', 'LYS':'K',
39 'ARG':'R', 'SER':'S', 'THR':'T', 'TYR':'Y', 'HIS':'H',
40 'CYS':'C', 'ASN':'N', 'GLN':'Q', 'TRP':'W', 'GLY':'G',
41 '2AS':'D', '3AH':'H', '5HP':'E', 'ACL':'R', 'AIB':'A',
42 'ALM':'A', 'ALO':'T', 'ALY':'K', 'ARM':'R', 'ASA':'D',
43 'ASB':'D', 'ASK':'D', 'ASL':'D', 'ASQ':'D', 'AYA':'A',
44 'BCS':'C', 'BHD':'D', 'BMT':'T', 'BNN':'A', 'BUC':'C',
45 'BUG':'L', 'C5C':'C', 'C6C':'C', 'CCS':'C', 'CEA':'C',
46 'CHG':'A', 'CLE':'L', 'CME':'C', 'CSD':'A', 'CSO':'C',
47 'CSP':'C', 'CSS':'C', 'CSW':'C', 'CXM':'M', 'CY1':'C',
48 'CY3':'C', 'CYG':'C', 'CYM':'C', 'CYQ':'C', 'DAH':'F',
49 'DAL':'A', 'DAR':'R', 'DAS':'D', 'DCY':'C', 'DGL':'E',
50 'DGN':'Q', 'DHA':'A', 'DHI':'H', 'DIL':'I', 'DIV':'V',
51 'DLE':'L', 'DLY':'K', 'DNP':'A', 'DPN':'F', 'DPR':'P',
52 'DSN':'S', 'DSP':'D', 'DTH':'T', 'DTR':'W', 'DTY':'Y',
53 'DVA':'V', 'EFC':'C', 'FLA':'A', 'FME':'M', 'GGL':'E',
54 'GLZ':'G', 'GMA':'E', 'GSC':'G', 'HAC':'A', 'HAR':'R',
55 'HIC':'H', 'HIP':'H', 'HMR':'R', 'HPQ':'F', 'HTR':'W',
56 'HYP':'P', 'IIL':'I', 'IYR':'Y', 'KCX':'K', 'LLP':'K',
57 'LLY':'K', 'LTR':'W', 'LYM':'K', 'LYZ':'K', 'MAA':'A',
58 'MEN':'N', 'MHS':'H', 'MIS':'S', 'MLE':'L', 'MPQ':'G',
59 'MSA':'G', 'MSE':'M', 'MVA':'V', 'NEM':'H', 'NEP':'H',
60 'NLE':'L', 'NLN':'L', 'NLP':'L', 'NMC':'G', 'OAS':'S',
61 'OCS':'C', 'OMT':'M', 'PAQ':'Y', 'PCA':'E', 'PEC':'C',
62 'PHI':'F', 'PHL':'F', 'PR3':'C', 'PRR':'A', 'PTR':'Y',
63 'SAC':'S', 'SAR':'G', 'SCH':'C', 'SCS':'C', 'SCY':'C',
64 'SEL':'S', 'SEP':'S', 'SET':'S', 'SHC':'C', 'SHR':'K',
65 'SOC':'C', 'STY':'Y', 'SVA':'S', 'TIH':'A', 'TPL':'W',
66 'TPO':'T', 'TPQ':'A', 'TRG':'K', 'TRO':'W', 'TYB':'Y',
67 'TYQ':'Y', 'TYS':'Y', 'TYY':'Y', 'AGM':'R', 'GL3':'G',
68 'SMC':'C', 'ASX':'B', 'CGU':'E', 'CSX':'C', 'GLX':'Z',
69 'UNK':'X'
70 }
71
72
74 """Convert RAF one-letter amino acid codes into IUPAC standard codes.
75
76 Letters are uppercased, and "." ("Unknown") is converted to "X".
77 """
78 if one_letter_code == '.' :
79 return 'X'
80 else :
81 return one_letter_code.upper()
82
84 """An RAF file index.
85
86 The RAF file itself is about 50 MB. This index provides rapid, random
87 access of RAF records without having to load the entire file into memory.
88
89 The index key is a concatenation of the PDB ID and chain ID. e.g
90 "2drcA", "155c_". RAF uses an underscore to indicate blank
91 chain IDs.
92 """
93
95 """
96 Arguments:
97
98 filename -- The file to index
99 """
100 dict.__init__(self)
101 self.filename = filename
102
103 f = open(self.filename)
104 try:
105 position = 0
106 while True:
107 line = f.readline()
108 if not line: break
109 key = line[0:5]
110 if key != None :
111 self[key]=position
112 position = f.tell()
113 finally :
114 f.close()
115
128
129
131 """Get the sequence map for a collection of residues.
132
133 residues -- A Residues instance, or a string that can be converted into
134 a Residues instance.
135 """
136 if type(residues) == StringType :
137 residues = Residues(residues)
138
139 pdbid = residues.pdbid
140 frags = residues.fragments
141 if not frags: frags =(('_','',''),)
142
143 seqMap = None
144 for frag in frags :
145 chainid = frag[0]
146 if chainid=='' or chainid=='-' or chainid==' ' or chainid=='_':
147 chainid = '_'
148 id = pdbid + chainid
149
150
151 sm = self[id]
152
153
154 start = 0
155 end = len(sm.res)
156 if frag[1] : start = int(sm.index(frag[1], chainid))
157 if frag[2] : end = int(sm.index(frag[2], chainid)+1)
158
159 sm = sm[start:end]
160
161 if seqMap == None :
162 seqMap = sm
163 else :
164 seqMap += sm
165
166 return seqMap
167
168
169
171 """An ASTRAL RAF (Rapid Access Format) Sequence Map.
172
173 This is a list like object; You can find the location of particular residues
174 with index(), slice this SeqMap into fragments, and glue fragments back
175 together with extend().
176
177 pdbid -- The PDB 4 character ID
178
179 pdb_datestamp -- From the PDB file
180
181 version -- The RAF format version. e.g. 0.01
182
183 flags -- RAF flags. (See release notes for more information.)
184
185 res -- A list of Res objects, one for each residue in this sequence map
186 """
187
189 self.pdbid = ''
190 self.pdb_datestamp = ''
191 self.version = ''
192 self.flags = ''
193 self.res = []
194 if line:
195 self._process(line)
196
197
199 """Parses a RAF record into a SeqMap object.
200 """
201 header_len = 38
202
203 line = line.rstrip()
204
205 if len(line)<header_len:
206 raise ValueError, "Incomplete header: "+line
207
208 self.pdbid = line[0:4]
209 chainid = line[4:5]
210
211 self.version = line[6:10]
212
213
214 if(self.version != "0.01" and self.version !="0.02") :
215 raise ValueError, "Incompatible RAF version: "+self.version
216
217 self.pdb_datestamp = line[14:20]
218 self.flags = line[21:27]
219
220 for i in range(header_len, len(line), 7) :
221 f = line[i : i+7]
222 if len(f)!=7:
223 raise ValueError, "Corrupt Field: ("+f+")"
224 r = Res()
225 r.chainid = chainid
226 r.resid = f[0:5].strip()
227 r.atom = normalize_letters(f[5:6])
228 r.seqres = normalize_letters(f[6:7])
229
230 self.res.append(r)
231
232
233 - def index(self, resid, chainid="_") :
234 for i in range(0, len(self.res)) :
235 if self.res[i].resid == resid and self.res[i].chainid == chainid :
236 return i
237 raise KeyError, "No such residue "+chainid+resid
238
240 s = copy(self)
241 s.res = s.res[i:j]
242 return s
243
245 """Append another Res object onto the list of residue mappings."""
246 self.res.append(res)
247
249 """Append another SeqMap onto the end of self.
250
251 Both SeqMaps must have the same PDB ID, PDB datestamp and
252 RAF version. The RAF flags are erased if they are inconsistent. This
253 may happen when fragments are taken from different chains.
254 """
255 if not isinstance(other, SeqMap):
256 raise TypeError, "Can only extend a SeqMap with a SeqMap."
257 if self.pdbid != other.pdbid :
258 raise TypeError, "Cannot add fragments from different proteins"
259 if self.version != other.version :
260 raise TypeError, "Incompatible rafs"
261 if self.pdb_datestamp != other.pdb_datestamp :
262 raise TypeError, "Different pdb dates!"
263 if self.flags != other.flags :
264 self.flags = ''
265 self.res += other.res
266
268 self.extend(other)
269 return self
270
275
276 - def getAtoms(self, pdb_handle, out_handle) :
277 """Extract all relevant ATOM and HETATOM records from a PDB file.
278
279 The PDB file is scanned for ATOM and HETATOM records. If the
280 chain ID, residue ID (seqNum and iCode), and residue type match
281 a residue in this sequence map, then the record is echoed to the
282 output handle.
283
284 This is typically used to find the coordinates of a domain, or other
285 residue subset.
286
287 pdb_handle -- A handle to the relevant PDB file.
288
289 out_handle -- All output is written to this file like object.
290 """
291
292
293
294 resSet = {}
295 for r in self.res :
296 if r.atom=='X' :
297 continue
298 chainid = r.chainid
299 if chainid == '_':
300 chainid = ' '
301 resid = r.resid
302 resSet[(chainid,resid)] = r
303
304 resFound = {}
305 for line in pdb_handle.xreadlines() :
306 if line.startswith("ATOM ") or line.startswith("HETATM") :
307 chainid = line[21:22]
308 resid = line[22:27].strip()
309 key = (chainid, resid)
310 if resSet.has_key(key):
311 res = resSet[key]
312 atom_aa = res.atom
313 resName = line[17:20]
314 if to_one_letter_code.has_key(resName) :
315 if to_one_letter_code[resName] == atom_aa :
316 out_handle.write(line)
317 resFound[key] = res
318
319 if len(resSet) != len(resFound) :
320
321
322
323
324 raise RuntimeError, 'I could not find at least one ATOM or ' \
325 +'HETATM record for each and every residue in this sequence map.'
326
327
328
330 """ A single residue mapping from a RAF record.
331
332 chainid -- A single character chain ID.
333
334 resid -- The residue ID.
335
336 atom -- amino acid one-letter code from ATOM records.
337
338 seqres -- amino acid one-letter code from SEQRES records.
339 """
341 self.chainid = ''
342 self.resid = ''
343 self.atom = ''
344 self.seqres = ''
345
346
348 """Iterates over a RAF file, returning a SeqMap object for each line
349 in the file.
350
351 Arguments:
352
353 handle -- file-like object.
354 """
355 for line in handle:
356 yield SeqMap(line)
357
358
360 """Iterates over a RAF file.
361 """
362 - def __init__(self, handle, parser=None):
363 """Create an object that iterates over a RAF file.
364
365 handle -- file-like object.
366
367 parser -- an optional Parser object to change the results into
368 another form. If set to None, then the raw contents
369 of the file will be returned.
370
371 """
372 import warnings
373 warnings.warn("Bio.SCOP.Raf.Iterator is deprecated. Please use Bio.SCOP.Raf.parse() instead.", DeprecationWarning)
374
375 from types import FileType, InstanceType
376 if type(handle) is not FileType and type(handle) is not InstanceType:
377 raise TypeError, "I expected a file handle or file-like object"
378 self._handle = handle
379 self._parser = parser
380
382 """Retrieve the next record."""
383 while 1:
384 line = self._handle.readline()
385 if not line: return None
386 if line[0] !='#': break
387 if self._parser is not None :
388 return self._parser.parse(line)
389 return line
390
392 return iter(self.next, None)
393
394
397 import warnings
398 warnings.warn("""Bio.SCOP.Raf.Parser is deprecated.
399 Instead of
400
401 parser = Raf.Parser()
402 record = parser.parse(entry)
403
404 please use
405
406 record = Raf.SeqMap(entry)
407 """, DeprecationWarning)
408
409
411 """Returns a SeqMap """
412 return SeqMap(line)
413