Package Bio :: Module MaxEntropy
[hide private]
[frames] | no frames]

Source Code for Module Bio.MaxEntropy

  1  # Copyright 2001 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  Maximum Entropy code. 
  8   
  9  Uses Improved Iterative Scaling: 
 10  XXX ref 
 11   
 12  # XXX need to define terminology 
 13   
 14  """ 
 15  import math 
 16  from Numeric import * 
 17  from Bio import listfns 
 18   
 19  # XXX typecodes for Numeric 
 20  # XXX multiprocessor 
 21   
 22  MAX_IIS_ITERATIONS = 10000    # Maximum iterations for IIS. 
 23  IIS_CONVERGE = 1E-5           # Convergence criteria for IIS. 
 24  MAX_NEWTON_ITERATIONS = 100   # Maximum iterations on Newton's method. 
 25  NEWTON_CONVERGE = 1E-10       # Convergence criteria for Newton's method. 
 26   
27 -class MaxEntropy:
28 """Holds information for a Maximum Entropy classifier. 29 30 Members: 31 classes List of the possible classes of data. 32 alphas List of the weights for each feature. 33 feature_fns List of the feature functions. 34 35 """
36 - def __init__(self):
37 self.classes = [] 38 self.alphas = [] 39 self.feature_fns = []
40
41 -def calculate(me, observation):
42 """calculate(me, observation) -> list of log probs 43 44 Calculate the log of the probability for each class. me is a 45 MaxEntropy object that has been trained. observation is a vector 46 representing the observed data. The return value is a list of 47 unnormalized log probabilities for each class. 48 49 """ 50 scores = [] 51 for klass in range(len(me.classes)): 52 lprob = 0.0 53 for fn, alpha in map(None, me.feature_fns, me.alphas): 54 lprob += fn(observation, klass) * alpha 55 scores.append(lprob) 56 return scores
57
58 -def classify(me, observation):
59 """classify(me, observation) -> class 60 61 Classify an observation into a class. 62 63 """ 64 scores = calculate(me, observation) 65 max_score, klass = scores[0], me.classes[0] 66 for i in range(1, len(scores)): 67 if scores[i] > max_score: 68 max_score, klass = scores[i], me.classes[i] 69 return klass
70
71 -def _eval_feature_fn(fn, xs, classes):
72 """_eval_feature_fn(fn, xs, classes) -> dict of values 73 74 Evaluate a feature function on every instance of the training set 75 and class. fn is a callback function that takes two parameters: a 76 training instance and a class. Return a dictionary of (training 77 set index, class index) -> non-zero value. Values of 0 are not 78 stored in the dictionary. 79 80 """ 81 values = {} 82 for i in range(len(xs)): 83 for j in range(len(classes)): 84 f = fn(xs[i], classes[j]) 85 if f != 0: 86 values[(i, j)] = f 87 return values
88
89 -def _calc_empirical_expects(xs, ys, classes, features):
90 """_calc_empirical_expects(xs, ys, classes, features) -> list of expectations 91 92 Calculate the expectation of each function from the data. This is 93 the constraint for the maximum entropy distribution. Return a 94 list of expectations, parallel to the list of features. 95 96 """ 97 # E[f_i] = SUM_x,y P(x, y) f(x, y) 98 # = 1/N f(x, y) 99 class2index = listfns.itemindex(classes) 100 ys_i = [class2index[y] for y in ys] 101 102 expect = [] 103 N = len(xs) 104 for feature in features: 105 s = 0 106 for i in range(N): 107 s += feature.get((i, ys_i[i]), 0) 108 expect.append(float(s) / N) 109 return expect
110
111 -def _calc_model_expects(xs, classes, features, alphas):
112 """_calc_model_expects(xs, classes, features, alphas) -> list of expectations. 113 114 Calculate the expectation of each feature from the model. This is 115 not used in maximum entropy training, but provides a good function 116 for debugging. 117 118 """ 119 # SUM_X P(x) SUM_Y P(Y|X) F(X, Y) 120 # = 1/N SUM_X SUM_Y P(Y|X) F(X, Y) 121 p_yx = _calc_p_class_given_x(xs, classes, features, alphas) 122 123 expects = [] 124 for feature in features: 125 sum = 0.0 126 for (i, j), f in feature.items(): 127 sum += p_yx[i][j] * f 128 expects.append(sum/len(xs)) 129 return expects
130
131 -def _calc_p_class_given_x(xs, classes, features, alphas):
132 """_calc_p_class_given_x(xs, classes, features, alphas) -> matrix 133 134 Calculate P(y|x), where y is the class and x is an instance from 135 the training set. Return a XSxCLASSES matrix of probabilities. 136 137 """ 138 prob_yx = zeros((len(xs), len(classes)), Float32) 139 140 # Calculate log P(y, x). 141 for feature, alpha in map(None, features, alphas): 142 for (x, y), f in feature.items(): 143 prob_yx[x][y] += alpha * f 144 # Take an exponent to get P(y, x) 145 prob_yx = exp(prob_yx) 146 # Divide out the probability over each class, so we get P(y|x). 147 for i in range(len(xs)): 148 z = sum(prob_yx[i]) 149 prob_yx[i] = prob_yx[i] / z 150 151 #prob_yx = [] 152 #for i in range(len(xs)): 153 # z = 0.0 # Normalization factor for this x, over all classes. 154 # probs = [0.0] * len(classes) 155 # for j in range(len(classes)): 156 # log_p = 0.0 # log of the probability of f(x, y) 157 # for k in range(len(features)): 158 # log_p += alphas[k] * features[k].get((i, j), 0.0) 159 # probs[j] = math.exp(log_p) 160 # z += probs[j] 161 # # Normalize the probabilities for this x. 162 # probs = map(lambda x, z=z: x/z, probs) 163 # prob_yx.append(probs) 164 return prob_yx
165
166 -def _calc_f_sharp(N, nclasses, features):
167 """_calc_f_sharp(N, nclasses, features) -> matrix of f sharp values.""" 168 # f#(x, y) = SUM_i feature(x, y) 169 f_sharp = zeros((N, nclasses)) 170 for feature in features: 171 for (i, j), f in feature.items(): 172 f_sharp[i][j] += f 173 return f_sharp
174
175 -def _iis_solve_delta(N, feature, f_sharp, empirical, prob_yx):
176 # Solve delta using Newton's method for: 177 # SUM_x P(x) * SUM_c P(c|x) f_i(x, c) e^[delta_i * f#(x, c)] = 0 178 delta = 0.0 179 iters = 0 180 while iters < MAX_NEWTON_ITERATIONS: # iterate for Newton's method 181 f_newton = df_newton = 0.0 # evaluate the function and derivative 182 for (i, j), f in feature.items(): 183 prod = prob_yx[i][j] * f * math.exp(delta * f_sharp[i][j]) 184 f_newton += prod 185 df_newton += prod * f_sharp[i][j] 186 f_newton, df_newton = empirical - f_newton / N, -df_newton / N 187 188 ratio = f_newton / df_newton 189 delta -= ratio 190 if math.fabs(ratio) < NEWTON_CONVERGE: # converged 191 break 192 iters = iters + 1 193 else: 194 raise "Newton's method did not converge" 195 return delta
196
197 -def _train_iis(xs, classes, features, f_sharp, alphas, e_empirical):
198 # Do one iteration of hill climbing to find better alphas. 199 # This is a good function to parallelize. 200 201 # Pre-calculate P(y|x) 202 p_yx = _calc_p_class_given_x(xs, classes, features, alphas) 203 204 N = len(xs) 205 newalphas = alphas[:] 206 for i in range(len(alphas)): 207 delta = _iis_solve_delta(N, features[i], f_sharp, e_empirical[i], p_yx) 208 newalphas[i] += delta 209 return newalphas
210 211
212 -def train(training_set, results, feature_fns, update_fn=None):
213 """train(training_set, results, feature_fns[, update_fn]) -> MaxEntropy object 214 215 Train a maximum entropy classifier on a training set. 216 training_set is a list of observations. results is a list of the 217 class assignments for each observation. feature_fns is a list of 218 the features. These are callback functions that take an 219 observation and class and return a 1 or 0. update_fn is a 220 callback function that's called at each training iteration. It is 221 passed a MaxEntropy object that encapsulates the current state of 222 the training. 223 224 """ 225 if not len(training_set): 226 raise ValueError, "No data in the training set." 227 if len(training_set) != len(results): 228 raise ValueError, "training_set and results should be parallel lists." 229 230 # Rename variables for convenience. 231 xs, ys = training_set, results 232 233 # Get a list of all the classes that need to be trained. 234 classes = listfns.items(results) 235 classes.sort() 236 237 # Cache values for all features. 238 features = [_eval_feature_fn(fn, training_set, classes) 239 for fn in feature_fns] 240 # Cache values for f#. 241 f_sharp = _calc_f_sharp(len(training_set), len(classes), features) 242 243 # Pre-calculate the empirical expectations of the features. 244 e_empirical = _calc_empirical_expects(xs, ys, classes, features) 245 246 # Now train the alpha parameters to weigh each feature. 247 alphas = [0.0] * len(features) 248 iters = 0 249 while iters < MAX_IIS_ITERATIONS: 250 nalphas = _train_iis(xs, classes, features, f_sharp, 251 alphas, e_empirical) 252 diff = map(lambda x, y: math.fabs(x-y), alphas, nalphas) 253 diff = reduce(lambda x, y: x+y, diff, 0) 254 alphas = nalphas 255 256 me = MaxEntropy() 257 me.alphas, me.classes, me.feature_fns = alphas, classes, feature_fns 258 if update_fn is not None: 259 update_fn(me) 260 261 if diff < IIS_CONVERGE: # converged 262 break 263 else: 264 raise "IIS did not converge" 265 266 return me
267