Package nltk_lite :: Package contrib :: Package classifier :: Module decisiontree
[hide private]
[frames] | no frames]

Source Code for Module nltk_lite.contrib.classifier.decisiontree

 1  # Natural Language Toolkit - Decision Tree 
 2  #  Creates a Decision Tree Classifier 
 3  # 
 4  # Author: Sumukh Ghodke <sumukh dot ghodke at gmail dot com> 
 5  # 
 6  # URL: <http://nltk.sf.net> 
 7  # This software is distributed under GPL, for license information see LICENSE.TXT 
 8   
 9  from nltk_lite.contrib.classifier import oner 
10   
11 -class DecisionTree(oner.OneR):
12 - def __init__(self, training, attributes, klass):
13 oner.OneR.__init__(self, training, attributes, klass) 14 self.root = self.build_tree(self.training, [])
15
16 - def build_tree(self, instances, used_attributes):
17 decision_stump = self.best_decision_stump(instances, used_attributes, 'maximum_information_gain') 18 used_attributes.append(decision_stump.attribute) 19 for attr_value in decision_stump.attribute.values: 20 if decision_stump.entropy(attr_value) == 0: 21 continue 22 new_instances = instances.filter(decision_stump.attribute, attr_value) 23 new_child = self.build_tree(new_instances, used_attributes) 24 if new_child is not None: decision_stump.children[attr_value] = new_child 25 return decision_stump
26
27 - def classify(self, instances):
31
33 return self.higher_value_preferred(lambda decision_stump: decision_stump.information_gain())
34
35 - def maximum_gain_ratio(self):
36 return self.higher_value_preferred(lambda decision_stump: decision_stump.gain_ratio())
37
38 - def higher_value_preferred(self, method):
39 highest, max_stump = -1, None 40 for decision_stump in self.decision_stumps: 41 new = method(decision_stump) 42 if new > highest: highest, max_stump = new, decision_stump 43 return max_stump
44