Package Bio :: Package Fasta
[hide private]
[frames] | no frames]

Source Code for Package Bio.Fasta

  1  """Utilities for working with FASTA-formatted sequences. 
  2   
  3  This module uses Martel-based parsing to speed up the parsing process. 
  4   
  5  Classes: 
  6  Record             Holds FASTA sequence data. 
  7  Iterator           Iterates over sequence data in a FASTA file. 
  8  Dictionary         Accesses a FASTA file using a dictionary interface. 
  9  RecordParser       Parses FASTA sequence data into a Record object. 
 10  SequenceParser     Parses FASTA sequence data into a Sequence object. 
 11   
 12  Functions: 
 13  index_file         Index a FASTA file for a Dictionary. 
 14  """ 
 15  from Bio import Seq 
 16  from Bio import SeqRecord 
 17  from Bio import Alphabet 
 18   
 19   
20 -class Record:
21 """Holds information from a FASTA record. 22 23 Members: 24 title Title line ('>' character not included). 25 sequence The sequence. 26 27 """
28 - def __init__(self, colwidth=60):
29 """__init__(self, colwidth=60) 30 31 Create a new Record. colwidth specifies the number of residues 32 to put on each line when generating FASTA format. 33 34 """ 35 self.title = '' 36 self.sequence = '' 37 self._colwidth = colwidth
38
39 - def __str__(self):
40 s = [] 41 s.append('>%s' % self.title) 42 i = 0 43 while i < len(self.sequence): 44 s.append(self.sequence[i:i+self._colwidth]) 45 i = i + self._colwidth 46 #Was having a problem getting the tests to pass on windows... 47 #return os.linesep.join(s) 48 return "\n".join(s)
49
50 -class Iterator:
51 """Returns one record at a time from a FASTA file. 52 """
53 - def __init__(self, handle, parser = None, debug = 0):
54 """Initialize a new iterator. 55 """ 56 self.handle = handle 57 self._parser = parser 58 self._debug = debug 59 60 #Skip any text before the first record (e.g. blank lines) 61 while True : 62 line = handle.readline() 63 if line[0] == ">" : 64 break 65 if debug : print "Skipping: " + line 66 self._lookahead = line
67
68 - def __iter__(self):
69 return iter(self.next, None)
70
71 - def next(self):
72 """Return the next record in the file""" 73 line = self._lookahead 74 if not line: 75 return None 76 assert line[0]==">", line 77 lines = [line.rstrip()] 78 line = self.handle.readline() 79 while line: 80 if line[0] == ">": break 81 if line[0] == "#" : 82 if self._debug : print "Ignoring comment line" 83 pass 84 else : 85 lines.append(line.rstrip()) 86 line = self.handle.readline() 87 self._lookahead = line 88 if self._debug : print "Debug: '%s' and '%s'" % (title, "".join(lines)) 89 if self._parser is None: 90 return "\n".join(lines) 91 else : 92 return self._parser.parse_string("\n".join(lines))
93
94 -class RecordParser:
95 """Parses FASTA sequence data into a Fasta.Record object. 96 """
97 - def __init__(self, debug = 0):
98 pass
99
100 - def parse_string(self, text) :
101 text = text.replace("\r\n","\n") #Crude way of dealing with \r\n 102 assert text[0] == ">", text 103 text = text.split("\n>",1)[0] # Only do the first record if more than one 104 title, sequence = text.split("\n", 1) 105 title = title[1:] 106 rec = Record() 107 rec.title = title 108 rec.sequence = sequence.replace("\n","") 109 return rec
110
111 - def parse(self, handle):
112 return self.parse_string(handle.read())
113
114 -class SequenceParser:
115 """Parses FASTA sequence data into a SeqRecord object. 116 """
117 - def __init__(self, alphabet = Alphabet.generic_alphabet, title2ids = None, 118 debug = 0):
119 """Initialize a Scanner and Sequence Consumer. 120 121 Arguments: 122 o alphabet - The alphabet of the sequences to be parsed. If not 123 passed, this will be set as generic_alphabet. 124 o title2ids - A function that, when given the title of the FASTA 125 file (without the beginning >), will return the id, name and 126 description (in that order) for the record. If this is not given, 127 then the entire title line will be used as the description. 128 """ 129 self.alphabet = alphabet 130 self.title2ids = title2ids
131
132 - def parse_string(self, text) :
133 text = text.replace("\r\n","\n") #Crude way of dealing with \r\n 134 assert text[0] == ">", text 135 text = text.split("\n>",1)[0] # Only do the first record if more than one 136 title, sequence = text.split("\n", 1) 137 title = title[1:] 138 139 seq = Seq.Seq(sequence.replace("\n",""), self.alphabet) 140 rec = SeqRecord.SeqRecord(seq) 141 142 if self.title2ids: 143 seq_id, name, descr = self.title2ids(title) 144 rec.id = seq_id 145 rec.name = name 146 rec.description = descr 147 else: 148 rec.description = title 149 150 return rec
151
152 - def parse(self, handle):
153 return self.parse_string(handle.read())
154