Home | Trees | Indices | Help |
|
---|
|
1 # Natural Language Toolkit - Decision Stump 2 # Understands the procedure of creating a decision stump and 3 # calculating the number of errors 4 # Is generally created at the attribute level 5 # ie. each attribute will have a decision stump of its own 6 # 7 # Author: Sumukh Ghodke <sumukh dot ghodke at gmail dot com> 8 # 9 # URL: <http://nltk.sf.net> 10 # This software is distributed under GPL, for license information see LICENSE.TXT 11 12 from math import log 13 from nltk_lite.probability import FreqDist 1489 95 10117 self.attribute = attribute 18 """ 19 counts is a dictionary in which 20 each key is an attribute value 21 and each value is a dictionary of class frequencies for that attribute value 22 """ 23 self.counts, self.children = {}, {} #it has children only in decision trees 24 self.root = dictionary_of_values(klass) 25 for value in attribute.values: 26 self.counts[value] = dictionary_of_values(klass)2729 attr_value = instance.value(self.attribute) 30 self.counts[attr_value][instance.klass_value] += 1 31 self.root[instance.klass_value] += 13234 count_for_each_attr_value = self.counts.values() 35 total, errors = 0, 0 36 for class_count in count_for_each_attr_value: 37 subtotal, counts = 0, class_count.values() 38 counts.sort() 39 for count in counts: subtotal += count 40 errors += (subtotal - counts[-1]) 41 total += subtotal 42 return float(errors)/ total4345 attr_value = instance.value(self.attribute) 46 if not self.children.has_key(attr_value): 47 return self.majority_klass(attr_value) 48 return self.children[attr_value].klass(instance)4951 klass_values_with_count = self.counts[attr_value] 52 _max, klass_value = 0, None 53 for klass, count in klass_values_with_count.items(): 54 if count > _max: 55 _max, klass_value = count, klass 56 return klass_value5759 """ 60 Returns the entropy of class disctribution for a particular attribute value 61 """ 62 from nltk_lite.contrib.classifier import entropy_of_key_counts 63 return entropy_of_key_counts(self.counts[attr_value])6466 total, total_num_of_instances = 0, 0 67 for attr_value in self.attribute.values: 68 instance_count = total_counts(self.counts[attr_value]) 69 if instance_count == 0: 70 continue 71 total += (instance_count * self.entropy(attr_value)) 72 total_num_of_instances += instance_count 73 return float(total) / total_num_of_instances7476 from nltk_lite.contrib.classifier import entropy_of_key_counts 77 return entropy_of_key_counts(self.root) - self.mean_information()78 81
Home | Trees | Indices | Help |
|
---|
Generated by Epydoc 3.0beta1 on Wed May 16 22:47:42 2007 | http://epydoc.sourceforge.net |