1
2
3
4
5
6 """
7 This module provides code to work with the prosite.doc file from
8 Prosite.
9 http://www.expasy.ch/prosite/
10
11 Tested with:
12 Release 15.0, July 1998
13 Release 16.0, July 1999
14 Release 20.22, 13 November 2007
15
16
17 Functions:
18 parse Iterates over entries in a Prodoc file.
19 index_file Index a Prodoc file for a Dictionary.
20 _extract_record Extract Prodoc data from a web page.
21
22
23 Classes:
24 Record Holds Prodoc data.
25 Reference Holds data from a Prodoc reference.
26 Dictionary Accesses a Prodoc file using a dictionary interface.
27 RecordParser Parses a Prodoc record into a Record object.
28
29 _Scanner Scans Prodoc-formatted data.
30 _RecordConsumer Consumes Prodoc data to a Record object.
31 Iterator Iterates over entries in a Prodoc file; DEPRECATED.
32 """
33
34 from types import *
35 import os
36 import sgmllib
37 from Bio import File
38 from Bio import Index
39 from Bio.ParserSupport import *
40
52
61
62
63
64
65
67 """Holds information from a Prodoc record.
68
69 Members:
70 accession Accession number of the record.
71 prosite_refs List of tuples (prosite accession, prosite name).
72 text Free format text.
73 references List of reference objects.
74
75 """
77 self.accession = ''
78 self.prosite_refs = []
79 self.text = ''
80 self.references = []
81
83 """Holds information from a Prodoc citation.
84
85 Members:
86 number Number of the reference. (string)
87 authors Names of the authors.
88 citation Describes the citation.
89
90 """
92 self.number = ''
93 self.authors = ''
94 self.citation = ''
95
97 """Returns one record at a time from a Prodoc file.
98
99 Methods:
100 next Return the next record from the stream, or None.
101
102 """
103 - def __init__(self, handle, parser=None):
104 """__init__(self, handle, parser=None)
105
106 Create a new iterator. handle is a file-like object. parser
107 is an optional Parser object to change the results into another form.
108 If set to None, then the raw contents of the file will be returned.
109
110 """
111 import warnings
112 warnings.warn("Bio.Prosite.Prodoc.Iterator is deprecated; we recommend using the function Bio.Prosite.Prodoc.parse instead. Please contact the Biopython developers at biopython-dev@biopython.org you cannot use Bio.Prosite.Prodoc.parse instead of Bio.Prosite.Prodoc.Iterator.",
113 DeprecationWarning)
114 if type(handle) is not FileType and type(handle) is not InstanceType:
115 raise ValueError, "I expected a file handle or file-like object"
116 self._uhandle = File.UndoHandle(handle)
117 self._parser = parser
118
120 """next(self) -> object
121
122 Return the next Prodoc record from the file. If no more records,
123 return None.
124
125 """
126 lines = []
127 while 1:
128 line = self._uhandle.readline()
129 if not line:
130 break
131 lines.append(line)
132 if line[:5] == '{END}':
133 break
134
135 if not lines:
136 return None
137
138 data = "".join(lines)
139 if self._parser is not None:
140 return self._parser.parse(File.StringHandle(data))
141 return data
142
144 return iter(self.next, None)
145
147 """Accesses a Prodoc file using a dictionary interface.
148
149 """
150 __filename_key = '__filename'
151
152 - def __init__(self, indexname, parser=None):
153 """__init__(self, indexname, parser=None)
154
155 Open a Prodoc Dictionary. indexname is the name of the
156 index for the dictionary. The index should have been created
157 using the index_file function. parser is an optional Parser
158 object to change the results into another form. If set to None,
159 then the raw contents of the file will be returned.
160
161 """
162 self._index = Index.Index(indexname)
163 self._handle = open(self._index[Dictionary.__filename_key])
164 self._parser = parser
165
167 return len(self._index)
168
176
178 return getattr(self._index, name)
179
181 """Access PRODOC at ExPASy using a read-only dictionary interface.
182
183 """
184 - def __init__(self, delay=5.0, parser=None):
185 """__init__(self, delay=5.0, parser=None)
186
187 Create a new Dictionary to access PRODOC. parser is an optional
188 parser (e.g. Prodoc.RecordParser) object to change the results
189 into another form. If set to None, then the raw contents of the
190 file will be returned. delay is the number of seconds to wait
191 between each query.
192
193 """
194 import warnings
195 warnings.warn("Bio.Prosite.Prodoc.ExPASyDictionary is deprecated. Please use the function Bio.ExPASy.get_prosite_raw instead.",
196 DeprecationWarning)
197
198 self.delay = delay
199 self.parser = parser
200 self.last_query_time = None
201
203 raise NotImplementedError, "Prodoc contains lots of entries"
205 raise NotImplementedError, "This is a read-only dictionary"
207 raise NotImplementedError, "This is a read-only dictionary"
209 raise NotImplementedError, "This is a read-only dictionary"
211 raise NotImplementedError, "You don't need to do this..."
213 raise NotImplementedError, "You don't really want to do this..."
215 raise NotImplementedError, "You don't really want to do this..."
217 raise NotImplementedError, "You don't really want to do this..."
218
220 """has_key(self, id) -> bool"""
221 try:
222 self[id]
223 except KeyError:
224 return 0
225 return 1
226
227 - def get(self, id, failobj=None):
228 try:
229 return self[id]
230 except KeyError:
231 return failobj
232 raise "How did I get here?"
233
235 """__getitem__(self, id) -> object
236
237 Return a Prodoc entry. id is either the id or accession
238 for the entry. Raises a KeyError if there's an error.
239
240 """
241 import time
242 from Bio.WWW import ExPASy
243
244
245 if self.last_query_time is not None:
246 delay = self.last_query_time + self.delay - time.time()
247 if delay > 0.0:
248 time.sleep(delay)
249 self.last_query_time = time.time()
250
251 try:
252 handle = ExPASy.get_prodoc_entry(id)
253 except IOError:
254 raise KeyError, id
255 try:
256 handle = File.StringHandle(_extract_record(handle))
257 except ValueError:
258 raise KeyError, id
259
260 if self.parser is not None:
261 return self.parser.parse(handle)
262 return handle.read()
263
265 """Parses Prodoc data into a Record object.
266
267 """
271
272 - def parse(self, handle):
273 self._scanner.feed(handle, self._consumer)
274 return self._consumer.data
275
277 """Scans Prodoc-formatted data.
278
279 Tested with:
280 Release 15.0, July 1998
281
282 """
283 - def feed(self, handle, consumer):
284 """feed(self, handle, consumer)
285
286 Feed in Prodoc data for scanning. handle is a file-like
287 object that contains prosite data. consumer is a
288 Consumer object that will receive events as the report is scanned.
289
290 """
291 if isinstance(handle, File.UndoHandle):
292 uhandle = handle
293 else:
294 uhandle = File.UndoHandle(handle)
295
296 while 1:
297 line = uhandle.peekline()
298 if not line:
299 break
300 elif is_blank_line(line):
301
302 uhandle.readline()
303 continue
304 else:
305 self._scan_record(uhandle, consumer)
306
319
322
327
328 - def _scan_text(self, uhandle, consumer):
329 while 1:
330 line = safe_readline(uhandle)
331 if (line[0] == '[' and line[3] == ']' and line[4] == ' ') or \
332 line[:5] == '{END}':
333 uhandle.saveline(line)
334 break
335 consumer.text(line)
336
344
346
347
348 read_and_call_while(uhandle, consumer.noevent, blank=1)
349 if attempt_read_and_call(uhandle, consumer.noevent, start='+----'):
350 read_and_call_until(uhandle, consumer.noevent, start='+----')
351 read_and_call(uhandle, consumer.noevent, start='+----')
352 read_and_call_while(uhandle, consumer.noevent, blank=1)
353
355 """Consumer that converts a Prodoc record to a Record object.
356
357 Members:
358 data Record with Prodoc data.
359
360 """
363
366
369
371 line = line.rstrip()
372 if line[0] != '{' or line[-1] != '}':
373 raise ValueError, "I don't understand accession line\n%s" % line
374 acc = line[1:-1]
375 if acc[:4] != 'PDOC':
376 raise ValueError, "Invalid accession in line\n%s" % line
377 self.data.accession = acc
378
380 line = line.rstrip()
381 if line[0] != '{' or line[-1] != '}':
382 raise ValueError, "I don't understand accession line\n%s" % line
383 acc, name = line[1:-1].split('; ')
384 self.data.prosite_refs.append((acc, name))
385
386 - def text(self, line):
387 self.data.text = self.data.text + line
388
390 if line[0] == '[' and line[3] == ']':
391 self._ref = Reference()
392 self._ref.number = line[1:3].strip()
393 if line[1] == 'E':
394
395
396 self._ref.citation = line[4:].strip()
397 else:
398 self._ref.authors = line[4:].strip()
399 self.data.references.append(self._ref)
400 elif line[:4] == ' ':
401 if not self._ref:
402 raise ValueError, "Unnumbered reference lines\n%s" % line
403 self._ref.citation = self._ref.citation + line[5:]
404 else:
405 raise "I don't understand the reference line\n%s" % line
406
408
409 for ref in self.data.references:
410 ref.citation = ref.citation.rstrip()
411 ref.authors = ref.authors.rstrip()
412
413 -def index_file(filename, indexname, rec2key=None):
414 """index_file(filename, indexname, rec2key=None)
415
416 Index a Prodoc file. filename is the name of the file.
417 indexname is the name of the dictionary. rec2key is an
418 optional callback that takes a Record and generates a unique key
419 (e.g. the accession number) for the record. If not specified,
420 the id name will be used.
421
422 """
423 import os
424 if not os.path.exists(filename):
425 raise ValueError, "%s does not exist" % filename
426
427 index = Index.Index(indexname, truncate=1)
428 index[Dictionary._Dictionary__filename_key] = filename
429
430 handle = open(filename)
431 records = parse(handle)
432 end = 0L
433 for record in records:
434 start = end
435 end = long(handle.tell())
436 length = end - start
437
438 if rec2key is not None:
439 key = rec2key(record)
440 else:
441 key = record.accession
442
443 if not key:
444 raise KeyError, "empty key was produced"
445 elif index.has_key(key):
446 raise KeyError, "duplicate key %s found" % key
447
448 index[key] = start, length
449
450
451
453 """_extract_record(handle) -> str
454
455 Extract PRODOC data from a web page. Raises a ValueError if no
456 data was found in the web page.
457
458 """
459
460
461
462 class parser(sgmllib.SGMLParser):
463 def __init__(self):
464 sgmllib.SGMLParser.__init__(self)
465 self._in_pre = 0
466 self.data = []
467 def handle_data(self, data):
468 if self._in_pre:
469 self.data.append(data)
470 def do_br(self, attrs):
471 if self._in_pre:
472 self.data.append('\n')
473 def start_pre(self, attrs):
474 self._in_pre = 1
475 def end_pre(self):
476 self._in_pre = 0
477 p = parser()
478 p.feed(handle.read())
479 data = ''.join(p.data).lstrip()
480 if not data:
481 raise ValueError, "No data found in web page."
482 return data
483