1
2
3
4
5 """
6 Implementation of sequence motifs.
7
8 Changes:
9 9.2007 (BW) : added the to_faste() and .weblogo() methods allowing to use the Berkeley weblogo server at http://weblogo.berkeley.edu/
10 """
11
12 from __future__ import generators
13 from Bio.SubsMat import FreqTable
14
16 """
17 A class representing sequence motifs.
18 """
20 self.instances = []
21 self.score = 0.0
22 self.mask = []
23 self._pwm_is_current = 0
24 self._pwm = []
25 self.alphabet=None
26 self.length=None
27
29 if self.length==None:
30 self.length = len
31 elif self.length != len:
32 raise ValueError, "You can't change the length of the motif"
33
39
48
50 """
51 sets the mask for the motif
52
53 The mask should be a string containing asterisks in the position of significant columns and spaces in other columns
54 """
55 self._check_length(len(mask))
56 self.mask=[]
57 for char in mask:
58 if char=="*":
59 self.mask.append(1)
60 elif char==" ":
61 self.mask.append(0)
62 else:
63 raise ValueError("Mask should contain only '*' or ' ' and not a '%s'"%char)
64
66 """
67 returns the PWM computed for the set of instances
68 """
69
70 if self._pwm_is_current:
71 return self._pwm
72
73 self._pwm = []
74 for i in xrange(len(self.mask)):
75 dict = {}
76
77 for letter in self.alphabet.letters:
78 dict[letter]=0
79
80 for seq in self.instances:
81 dict[seq[i]]=dict[seq[i]]+1
82 self._pwm.append(FreqTable.FreqTable(dict,FreqTable.COUNT,self.alphabet))
83 self._pwm_is_current=True
84 return self._pwm
85
87 """
88 a generator function, returning found positions of instances of the motif in a given sequence
89 """
90 for pos in xrange(0,len(sequence)-self.length+1):
91 for instance in self.instances:
92 if instance.tostring()==sequence[pos:pos+self.length].tostring():
93 yield(pos,instance)
94 break
95
96 - def score_hit(self,sequence,position,normalized=1,masked=0):
110
111 - def search_pwm(self,sequence,threshold=0.0,normalized=1,masked=1):
120
121
122 - def sim(self, motif, masked = 0):
123 """
124 return the similarity score for the given motif against self.
125
126 We use the Pearson's correlation of the respective probabilities.
127 If the motifs have different length or mask raise the ValueError.
128 """
129
130 from math import sqrt
131
132 if self.alphabet != motif.alphabet:
133 raise ValueError("Wrong alphabet")
134
135 if self.length != motif.length:
136 raise ValueError("Wrong length")
137
138 if masked and self.mask!=motif.mask:
139 raise ValueError("Wrong mask")
140
141 sxx = 0
142 sxy = 0
143 sx = 0
144 sy = 0
145 syy = 0
146
147 for pos in xrange(self.length):
148 if not masked or self.mask:
149 for l in self.alphabet.letters:
150 xi = self.pwm()[pos][l]
151 yi = motif.pwm()[pos][l]
152 sx = sx + xi
153 sy = sy + yi
154 sxx = sxx + xi * xi
155 syy = syy + yi * yi
156 sxy = sxy + xi * yi
157
158 if masked:
159 norm = len(filter(lambda x: x,self.mask))
160 else:
161 norm = self.length
162
163 norm *= len(self.alphabet.letters)
164 s1 = (sxy - sx*sy*1.0/norm)
165 s2 = (sxx - sx*sx*1.0/norm)*(syy- sy*sy*1.0/norm)
166
167 return s1/sqrt(s2)
168
169 - def read(self,stream):
170 """
171 reads the motif from the stream
172
173 the self.alphabet variable must be set before
174 """
175 from Bio.Seq import Seq
176 while 1:
177 ln = stream.readline()
178 if "*" in ln:
179 self.set_mask(ln.strip("\n\c"))
180 break
181 self.add_instance(Seq(ln.strip(),self.alphabet))
182
184 """
185 string representation of motif
186 """
187 str = ""
188 for inst in self.instances:
189 str = str + inst.tostring() + "\n"
190
191 for i in xrange(self.length):
192 if self.mask[i]:
193 str = str + "*"
194 else:
195 str = str + " "
196 str = str + "\n"
197
198 return str
199
201 """
202 writes the motif to the stream
203 """
204
205 stream.write(self.__str__())
206
207
208
210 """
211 FASTA representation of motif
212 """
213 str = ""
214 for i,inst in enumerate(self.instances):
215 str = str + "> instance %d\n"%i + inst.tostring() + "\n"
216
217 return str
218
219 - def weblogo(self,fname,format="PNG",**kwds):
220 """
221 uses the Berkeley weblogo service to download and save a weblogo of itself
222
223 requires an internet connection.
224 The parameters from **kwds are passed directly to the weblogo server.
225 """
226 import urllib
227 import urllib2
228
229 al= self.to_fasta()
230
231 url = 'http://weblogo.berkeley.edu/logo.cgi'
232 values = {'sequence' : al,
233 'format' : format,
234 'logowidth' : '18',
235 'logoheight' : '5',
236 'logounits' : 'cm',
237 'kind' : 'AUTO',
238 'firstnum' : "1",
239 'command' : 'Create Logo',
240 'smallsamplecorrection' : "on",
241 'symbolsperline' : 32,
242 'res' : '96',
243 'res_units' : 'ppi',
244 'antialias' : 'on',
245 'title' : '',
246 'barbits' : '',
247 'xaxis': 'on',
248 'xaxis_label' : '',
249 'yaxis': 'on',
250 'yaxis_label' : '',
251 'showends' : 'on',
252 'shrink' : '0.5',
253 'fineprint' : 'on',
254 'ticbits' : '1',
255 'colorscheme' : 'DEFAULT',
256 'color1' : 'green',
257 'color2' : 'blue',
258 'color3' : 'red',
259 'color4' : 'black',
260 'color5' : 'purple',
261 'color6' : 'orange',
262 'color1' : 'black',
263 }
264 for k,v in kwds.items():
265 values[k]=str(v)
266
267 data = urllib.urlencode(values)
268 req = urllib2.Request(url, data)
269 response = urllib2.urlopen(req)
270 f=open(fname,"w")
271 im=response.read()
272
273 f.write(im)
274 f.close()
275