Package Bio :: Module File
[hide private]
[frames] | no frames]

Source Code for Module Bio.File

  1  # Copyright 1999 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Code for more fancy file handles. 
  7   
  8   
  9  Classes: 
 10  UndoHandle     File object decorator with support for undo-like operations. 
 11  StringHandle   Wraps a file object around a string. 
 12  SGMLHandle     File object that automatically strips SGML tags from data. 
 13   
 14  SGMLStripper   Object that strips SGML. 
 15   
 16  """ 
 17  import os 
 18  import string 
 19  import StringIO 
 20  import sgmllib 
 21   
22 -class UndoHandle:
23 """A Python handle that adds functionality for saving lines. 24 25 Saves lines in a LIFO fashion. 26 27 Added methods: 28 saveline Save a line to be returned next time. 29 peekline Peek at the next line without consuming it. 30 31 """
32 - def __init__(self, handle):
33 self._handle = handle 34 self._saved = []
35
36 - def readlines(self, *args, **keywds):
37 lines = self._saved + self._handle.readlines(*args,**keywds) 38 self._saved = [] 39 return lines
40
41 - def readline(self, *args, **keywds):
42 if self._saved: 43 line = self._saved.pop(0) 44 else: 45 line = self._handle.readline(*args,**keywds) 46 return line
47
48 - def read(self, size=-1):
49 if size == -1: 50 saved = string.join(self._saved, "") 51 self._saved[:] = [] 52 else: 53 saved = '' 54 while size > 0 and self._saved: 55 if len(self._saved[0]) <= size: 56 size = size - len(self._saved[0]) 57 saved = saved + self._saved.pop(0) 58 else: 59 saved = saved + self._saved[0][:size] 60 self._saved[0] = self._saved[0][size:] 61 size = 0 62 return saved + self._handle.read(size)
63
64 - def saveline(self, line):
65 if line: 66 self._saved = [line] + self._saved
67
68 - def peekline(self):
69 if self._saved: 70 line = self._saved[0] 71 else: 72 line = self._handle.readline() 73 self.saveline(line) 74 return line
75
76 - def tell(self):
77 lengths = map(len, self._saved) 78 sum = reduce(lambda x, y: x+y, lengths, 0) 79 return self._handle.tell() - sum
80
81 - def seek(self, *args):
82 self._saved = [] 83 self._handle.seek(*args)
84
85 - def __getattr__(self, attr):
86 return getattr(self._handle, attr)
87 88 # I could make this faster by using cStringIO. 89 # However, cStringIO (in v1.52) does not implement the 90 # readlines method. 91 StringHandle = StringIO.StringIO 92 93 94
95 -class SGMLHandle:
96 """A Python handle that automatically strips SGML tags from data. 97 98 """
99 - def __init__(self, handle):
100 """SGMLStripper(handle) 101 102 handle is a file handle to SGML-formatted data. 103 104 """ 105 self._handle = handle 106 self._stripper = SGMLStripper()
107
108 - def read(self, *args, **keywds):
109 data = self._handle.read(*args, **keywds) 110 return self._stripper.strip(data)
111
112 - def readline(self, *args, **keywds):
113 line = self._handle.readline(*args, **keywds) 114 return self._stripper.strip(line)
115
116 - def readlines(self, *args, **keywds):
117 lines = self._handle.readlines(*args, **keywds) 118 for i in range(len(lines)): 119 lines[i] = self._stripper.strip(str) 120 return lines
121
122 - def __getattr__(self, attr):
123 return getattr(self._handle, attr)
124 125
126 -class SGMLStripper:
127 - class MyParser(sgmllib.SGMLParser):
128 - def __init__(self):
129 sgmllib.SGMLParser.__init__(self) 130 self.data = ''
131 - def handle_data(self, data):
132 self.data = self.data + data
133
134 - def __init__(self):
135 self._parser = SGMLStripper.MyParser()
136
137 - def strip(self, str):
138 """S.strip(str) -> string 139 140 Strip the SGML tags from str. 141 142 """ 143 if not str: # empty string, don't do anything. 144 return '' 145 # I need to make sure that I don't return an empty string if 146 # the buffer is not empty. This can happen if there's a newline 147 # character embedded within a tag. Thus, I'll first check to 148 # see if the last character is a newline. If it is, and it's stripped 149 # away, I'll add it back. 150 is_newline = str[-1] in ['\n', '\r'] 151 152 self._parser.data = '' # clear the parser's data (don't reset) 153 self._parser.feed(str) 154 if self._parser.data: 155 str = self._parser.data 156 elif is_newline: 157 str = '\n' 158 else: 159 str = '' 160 return str
161