1 """Dynamic Programming algorithms for general usage.
2
3 This module contains classes which implement Dynamic Programming
4 algorithms that can be used generally.
5 """
6
8 """An abstract class to calculate forward and backward probabiliies.
9
10 This class should not be instantiated directly, but should be used
11 through a derived class which implements proper scaling of variables.
12
13 This class is just meant to encapsulate the basic foward and backward
14 algorithms, and allow derived classes to deal with the problems of
15 multiplying probabilities.
16
17 Derived class of this must implement:
18
19 o _forward_recursion -- Calculate the forward values in the recursion
20 using some kind of technique for preventing underflow errors.
21
22 o _backward_recursion -- Calculate the backward values in the recursion
23 step using some technique to prevent underflow errors.
24 """
25 - def __init__(self, markov_model, sequence):
26 """Initialize to calculate foward and backward probabilities.
27
28 Arguments:
29
30 o markov_model -- The current Markov model we are working with.
31
32 o sequence -- A training sequence containing a set of emissions.
33 """
34 self._mm = markov_model
35 self._seq = sequence
36
38 """Calculate the forward recursion value.
39 """
40 raise NotImplementedError("Subclasses must implement")
41
43 """Calculate sequence probability using the forward algorithm.
44
45 This implements the foward algorithm, as described on p57-58 of
46 Durbin et al.
47
48 Returns:
49
50 o A dictionary containing the foward variables. This has keys of the
51 form (state letter, position in the training sequence), and values
52 containing the calculated forward variable.
53
54 o The calculated probability of the sequence.
55 """
56
57 state_letters = self._seq.states.alphabet.letters
58
59
60
61
62
63
64
65 forward_var = {}
66
67 forward_var[(state_letters[0], -1)] = 1
68
69 for k in range(1, len(state_letters)):
70 forward_var[(state_letters[k], -1)] = 0
71
72
73
74
75 for i in range(len(self._seq.emissions)):
76
77 for main_state in state_letters:
78
79
80 forward_value = self._forward_recursion(main_state, i,
81 forward_var)
82
83 if forward_value is not None:
84 forward_var[(main_state, i)] = forward_value
85
86
87 first_state = state_letters[0]
88 seq_prob = 0
89
90 for state_item in state_letters:
91
92 forward_value = forward_var[(state_item,
93 len(self._seq.emissions) - 1)]
94
95 transition_value = self._mm.transition_prob[(state_item,
96 first_state)]
97
98 seq_prob += forward_value * transition_value
99
100 return forward_var, seq_prob
101
103 """Calculate the backward recursion value.
104 """
105 raise NotImplementedError("Subclasses must implement")
106
108 """Calculate sequence probability using the backward algorithm.
109
110 This implements the backward algorithm, as described on p58-59 of
111 Durbin et al.
112
113 Returns:
114
115 o A dictionary containing the backwards variables. This has keys
116 of the form (state letter, position in the training sequence),
117 and values containing the calculated backward variable.
118 """
119
120 state_letters = self._seq.states.alphabet.letters
121
122
123
124
125
126
127
128 backward_var = {}
129
130 first_letter = state_letters[0]
131
132 for state in state_letters:
133 backward_var[(state, len(self._seq.emissions) - 1)] = \
134 self._mm.transition_prob[(state, state_letters[0])]
135
136
137
138
139 all_indexes = range(len(self._seq.emissions) - 1)
140 all_indexes.reverse()
141 for i in all_indexes:
142
143 for main_state in state_letters:
144
145
146 backward_value = self._backward_recursion(main_state, i,
147 backward_var)
148
149 if backward_value is not None:
150 backward_var[(main_state, i)] = backward_value
151
152
153
154
155 return backward_var
156
158 """Implement forward and backward algorithms using a rescaling approach.
159
160 This scales the f and b variables, so that they remain within a
161 manageable numerical interval during calculations. This approach is
162 described in Durbin et al. on p 78.
163
164 This approach is a little more straightfoward then log transformation
165 but may still give underflow errors for some types of models. In these
166 cases, the LogDPAlgorithms class should be used.
167 """
168 - def __init__(self, markov_model, sequence):
169 """Initialize the scaled approach to calculating probabilities.
170 Arguments:
171
172 o markov_model -- The current Markov model we are working with.
173
174 o sequence -- A TrainingSequence object that must have a
175 set of emissions to work with.
176 """
177 AbstractDPAlgorithms.__init__(self, markov_model, sequence)
178
179 self._s_values = {}
180
182 """Calculate the next scaling variable for a sequence position.
183
184 This utilizes the approach of choosing s values such that the
185 sum of all of the scaled f values is equal to 1.
186
187 Arguments:
188
189 o seq_pos -- The current position we are at in the sequence.
190
191 o previous_vars -- All of the forward or backward variables
192 calculated so far.
193
194 Returns:
195
196 o The calculated scaling variable for the sequence item.
197 """
198
199 state_letters = self._seq.states.alphabet.letters
200
201
202 s_value = 0
203 for main_state in state_letters:
204 emission = self._mm.emission_prob[(main_state,
205 self._seq.emissions[seq_pos])]
206
207
208 trans_and_var_sum = 0
209 for second_state in self._mm.transitions_from(main_state):
210
211 var_value = previous_vars[(second_state, seq_pos - 1)]
212
213
214 trans_value = self._mm.transition_prob[(second_state,
215 main_state)]
216
217 trans_and_var_sum += (var_value * trans_value)
218
219 s_value += (emission * trans_and_var_sum)
220
221 return s_value
222
224 """Calculate the value of the forward recursion.
225
226 Arguments:
227
228 o cur_state -- The letter of the state we are calculating the
229 forward variable for.
230
231 o sequence_pos -- The position we are at in the training seq.
232
233 o forward_vars -- The current set of forward variables
234 """
235
236
237 if not(self._s_values.has_key(sequence_pos)):
238 self._s_values[sequence_pos] = \
239 self._calculate_s_value(sequence_pos, forward_vars)
240
241
242 seq_letter = self._seq.emissions[sequence_pos]
243 cur_emission_prob = self._mm.emission_prob[(cur_state, seq_letter)]
244
245 scale_emission_prob = (float(cur_emission_prob) /
246 float(self._s_values[sequence_pos]))
247
248
249 state_pos_sum = 0
250 have_transition = 0
251 for second_state in self._mm.transitions_from(cur_state):
252 have_transition = 1
253
254
255
256 prev_forward = forward_vars[(second_state, sequence_pos - 1)]
257
258
259 cur_trans_prob = self._mm.transition_prob[(second_state,
260 cur_state)]
261 state_pos_sum += prev_forward * cur_trans_prob
262
263
264
265 if have_transition:
266 return (scale_emission_prob * state_pos_sum)
267 else:
268 return None
269
271 """Calculate the value of the backward recursion
272
273 Arguments:
274
275 o cur_state -- The letter of the state we are calculating the
276 forward variable for.
277
278 o sequence_pos -- The position we are at in the training seq.
279
280 o backward_vars -- The current set of backward variables
281 """
282
283
284 if not(self._s_values.has_key(sequence_pos)):
285 self._s_values[sequence_pos] = \
286 self._calculate_s_value(sequence_pos, backward_vars)
287
288
289 state_pos_sum = 0
290 have_transition = 0
291 for second_state in self._mm.transitions_from(cur_state):
292 have_transition = 1
293
294 seq_letter = self._seq.emissions[sequence_pos + 1]
295 cur_emission_prob = self._mm.emission_prob[(cur_state, seq_letter)]
296
297
298
299 prev_backward = backward_vars[(second_state, sequence_pos + 1)]
300
301
302 cur_transition_prob = self._mm.transition_prob[(cur_state,
303 second_state)]
304
305 state_pos_sum += (cur_emission_prob * prev_backward *
306 cur_transition_prob)
307
308
309 if have_transition:
310 return (state_pos_sum / float(self._s_values[sequence_pos]))
311
312
313 else:
314 return None
315
317 """Implement forward and backward algorithms using a log approach.
318
319 This uses the approach of calculating the sum of log probabilities
320 using a lookup table for common values.
321
322 XXX This is not implemented yet!
323 """
324 - def __init__(self, markov_model, sequence):
325 raise NotImplementedError("Haven't coded this yet...")
326