1
2
3
4
5
6
7 import sys
8 from string import split
9 from Numeric import array, Float0
10
11
12 from StructureBuilder import StructureBuilder
13 from PDBExceptions import PDBConstructionException
14 from parse_pdb_header import _parse_pdb_header_list
15
16 __doc__="Parser for PDB files."
17
18
19
20
21
23 """
24 Parse a PDB file and return a Structure object.
25 """
26
27 - def __init__(self, PERMISSIVE=1, get_header=0, structure_builder=None):
28 """
29 The PDB parser call a number of standard methods in an aggregated
30 StructureBuilder object. Normally this object is instanciated by the
31 PDBParser object itself, but if the user provides his own StructureBuilder
32 object, the latter is used instead.
33
34 Arguments:
35 o PERMISSIVE - int, if this is 0 exceptions in constructing the
36 SMCRA data structure are fatal. If 1 (DEFAULT), the exceptions are
37 caught, but some residues or atoms will be missing. THESE EXCEPTIONS
38 ARE DUE TO PROBLEMS IN THE PDB FILE!.
39 o structure_builder - an optional user implemented StructureBuilder class.
40 """
41 if structure_builder!=None:
42 self.structure_builder=structure_builder
43 else:
44 self.structure_builder=StructureBuilder()
45 self.header=None
46 self.trailer=None
47 self.line_counter=0
48 self.PERMISSIVE=PERMISSIVE
49
50
51
53 """Return the structure.
54
55 Arguments:
56 o id - string, the id that will be used for the structure
57 o file - name of the PDB file OR an open filehandle
58 """
59 self.header=None
60 self.trailer=None
61
62 self.structure_builder.init_structure(id)
63 if isinstance(file, basestring):
64 file=open(file)
65 self._parse(file.readlines())
66 self.structure_builder.set_header(self.header)
67
68 return self.structure_builder.get_structure()
69
71 "Return the header."
72 return self.header
73
75 "Return the trailer."
76 return self.trailer
77
78
79
80 - def _parse(self, header_coords_trailer):
86
88 "Get the header of the PDB file, return the rest."
89 structure_builder=self.structure_builder
90 for i in range(0, len(header_coords_trailer)):
91 structure_builder.set_line_counter(i+1)
92 line=header_coords_trailer[i]
93 record_type=line[0:6]
94 if(record_type=='ATOM ' or record_type=='HETATM' or record_type=='MODEL '):
95 break
96 header=header_coords_trailer[0:i]
97
98 self.line_counter=i
99 coords_trailer=header_coords_trailer[i:]
100 header_dict=_parse_pdb_header_list(header)
101 return header_dict, coords_trailer
102
104 "Parse the atomic data in the PDB file."
105 local_line_counter=0
106 structure_builder=self.structure_builder
107 current_model_id=0
108
109 model_open=0
110 current_chain_id=None
111 current_segid=None
112 current_residue_id=None
113 current_resname=None
114 for i in range(0, len(coords_trailer)):
115 line=coords_trailer[i]
116 record_type=line[0:6]
117 global_line_counter=self.line_counter+local_line_counter+1
118 structure_builder.set_line_counter(global_line_counter)
119 if(record_type=='ATOM ' or record_type=='HETATM'):
120
121 if not model_open:
122 structure_builder.init_model(current_model_id)
123 current_model_id+=1
124 model_open=1
125 fullname=line[12:16]
126
127 split_list=split(fullname)
128 if len(split_list)!=1:
129
130
131 name=fullname
132 else:
133
134 name=split_list[0]
135 altloc=line[16:17]
136 resname=line[17:20]
137 chainid=line[21:22]
138 try:
139 serial_number=int(line[6:11])
140 except:
141 serial_number=0
142 resseq=int(split(line[22:26])[0])
143 icode=line[26:27]
144 if record_type=='HETATM':
145 if resname=="HOH" or resname=="WAT":
146 hetero_flag="W"
147 else:
148 hetero_flag="H"
149 else:
150 hetero_flag=" "
151 residue_id=(hetero_flag, resseq, icode)
152
153 x=float(line[30:38])
154 y=float(line[38:46])
155 z=float(line[46:54])
156 coord=array((x, y, z), Float0)
157
158 occupancy=float(line[54:60])
159 bfactor=float(line[60:66])
160 segid=line[72:76]
161 if current_segid!=segid:
162 current_segid=segid
163 structure_builder.init_seg(current_segid)
164 if current_chain_id!=chainid:
165 current_chain_id=chainid
166 structure_builder.init_chain(current_chain_id)
167 current_residue_id=residue_id
168 current_resname=resname
169 try:
170 structure_builder.init_residue(resname, hetero_flag, resseq, icode)
171 except PDBConstructionException, message:
172 self._handle_PDB_exception(message, global_line_counter)
173 elif current_residue_id!=residue_id or current_resname!=resname:
174 current_residue_id=residue_id
175 current_resname=resname
176 try:
177 structure_builder.init_residue(resname, hetero_flag, resseq, icode)
178 except PDBConstructionException, message:
179 self._handle_PDB_exception(message, global_line_counter)
180
181 try:
182 structure_builder.init_atom(name, coord, bfactor, occupancy, altloc, fullname, serial_number)
183 except PDBConstructionException, message:
184 self._handle_PDB_exception(message, global_line_counter)
185 elif(record_type=='ANISOU'):
186 anisou=map(float, (line[28:35], line[35:42], line[43:49], line[49:56], line[56:63], line[63:70]))
187
188 anisou_array=(array(anisou, Float0)/10000.0).astype(Float0)
189 structure_builder.set_anisou(anisou_array)
190 elif(record_type=='MODEL '):
191 structure_builder.init_model(current_model_id)
192 current_model_id+=1
193 model_open=1
194 current_chain_id=None
195 current_residue_id=None
196 elif(record_type=='END ' or record_type=='CONECT'):
197
198 self.line_counter=self.line_counter+local_line_counter
199 return coords_trailer[local_line_counter:]
200 elif(record_type=='ENDMDL'):
201 model_open=0
202 current_chain_id=None
203 current_residue_id=None
204 elif(record_type=='SIGUIJ'):
205
206 siguij=map(float, (line[28:35], line[35:42], line[42:49], line[49:56], line[56:63], line[63:70]))
207
208 siguij_array=(array(siguij, Float0)/10000.0).astype(Float0)
209 structure_builder.set_siguij(siguij_array)
210 elif(record_type=='SIGATM'):
211
212 sigatm=map(float, (line[30:38], line[38:45], line[46:54], line[54:60], line[60:66]))
213 sigatm_array=array(sigatm, Float0)
214 structure_builder.set_sigatm(sigatm_array)
215 local_line_counter=local_line_counter+1
216
217 self.line_counter=self.line_counter+local_line_counter
218 return []
219
221 """
222 This method catches an exception that occurs in the StructureBuilder
223 object (if PERMISSIVE==1), or raises it again, this time adding the
224 PDB line number to the error message.
225 """
226 message="%s at line %i." % (message, line_counter)
227 if self.PERMISSIVE:
228
229 print "PDBConstructionException: %s" % message
230 print "Exception ignored.\nSome atoms or residues will be missing in the data structure."
231 else:
232
233 raise PDBConstructionException, message
234
235
236 if __name__=="__main__":
237
238 import sys
239
240 p=PDBParser(PERMISSIVE=1)
241
242 s=p.get_structure("scr", sys.argv[1])
243
244 for m in s:
245 p=m.get_parent()
246 assert(p is s)
247 for c in m:
248 p=c.get_parent()
249 assert(p is m)
250 for r in c:
251 print r
252 p=r.get_parent()
253 assert(p is c)
254 for a in r:
255 p=a.get_parent()
256 if not p is r:
257 print p, r
258