Package nltk_lite :: Package contrib :: Package classifier_tests :: Module featureselecttests
[hide private]
[frames] | no frames]

Source Code for Module nltk_lite.contrib.classifier_tests.featureselecttests

  1  # Natural Language Toolkit - Feature Select tests 
  2  # 
  3  # Author: Sumukh Ghodke <sumukh dot ghodke at gmail dot com> 
  4  # 
  5  # URL: <http://nltk.sf.net> 
  6  # This software is distributed under GPL, for license information see LICENSE.TXT 
  7  from nltk_lite.contrib.classifier_tests import * 
  8  from nltk_lite.contrib.classifier import featureselect as fs, decisionstump as ds, format 
  9  from nltk_lite.contrib.classifier.exceptions import invaliddataerror as inv 
 10   
11 -class FeatureSelectTestCase(unittest.TestCase):
12 - def test_decodes_parameters(self):
13 feature_select = fs.FeatureSelect() 14 feature_select.parse(['-a', 'RNK', '-t', 'path', '-T', 'path1,path2', '-o', 'IG,4']) 15 algorithm = feature_select.values.ensure_value('algorithm', None) 16 training = feature_select.values.ensure_value('training', None) 17 test = feature_select.values.ensure_value('test', None) 18 options = feature_select.values.ensure_value('options', None) 19 20 self.assertEqual('RNK', algorithm) 21 self.assertEqual('path', training) 22 self.assertEqual('path1,path2', test) 23 self.assertEqual('IG,4', options)
24
25 - def test_validates_algorithm(self):
26 feat_sel = FeatureSelectStub() 27 self.assertFalse(feat_sel.error_called) 28 feat_sel.parse(['-a', 'RNL', '-t', 'path', '-T', 'path1,path2', '-o', 'IG,4']) 29 self.assertTrue(feat_sel.error_called) 30 self.assertEqual('option -a: invalid choice: \'RNL\' (choose from \'RNK\')', feat_sel.message)
31
33 feat_sel = FeatureSelectStub() 34 self.assertFalse(feat_sel.error_called) 35 feat_sel.run(['-a', 'RNK', '-t', 'path', '-o', 'IG,4']) 36 self.assertTrue(feat_sel.error_called) 37 self.assertEqual('Invalid arguments. One or more required arguments are not present.', feat_sel.message) 38 39 feat_sel = FeatureSelectStub() 40 self.assertFalse(feat_sel.error_called) 41 feat_sel.run(['-a', 'RNK', '-T', 'path1,path2', '-o', 'IG,4']) 42 self.assertTrue(feat_sel.error_called) 43 self.assertEqual('Invalid arguments. One or more required arguments are not present.', feat_sel.message) 44 45 #Takes in the default attribute 46 feat_sel = FeatureSelectStub() 47 self.assertFalse(feat_sel.error_called) 48 feat_sel.run(['-t', 'path', '-T', 'path1,path2', '-o', 'IG,4']) 49 self.assertFalse(feat_sel.error_called) 50 51 feat_sel = FeatureSelectStub() 52 self.assertFalse(feat_sel.error_called) 53 try: 54 feat_sel.run(['-a', 'RNK', '-t', 'path', '-T', 'path1,path2']) 55 except AttributeError: 56 #When not running on the stub will return as soon as it encounters the error 57 pass 58 self.assertTrue(feat_sel.error_called) 59 self.assertEqual('Invalid arguments. One or more required arguments are not present.', feat_sel.message)
60
62 path = datasetsDir(self) + 'numerical' + SEP + 'person' 63 training = format.C45_FORMAT.get_training_instances(path) 64 attributes = format.C45_FORMAT.get_attributes(path) 65 klass = format.C45_FORMAT.get_klass(path) 66 test = format.C45_FORMAT.get_test_instances(path) 67 feature_selection = fs.FeatureSelection(training, attributes, klass, test, None, ['IG','2']) 68 try: 69 feature_selection.by_rank() 70 self.fail('should throw error as path points to continuous attributes') 71 except inv.InvalidDataError: 72 pass
73
75 path = datasetsDir(self) + 'minigolf' + SEP + 'weather' 76 training = format.C45_FORMAT.get_training_instances(path) 77 attributes = format.C45_FORMAT.get_attributes(path) 78 klass = format.C45_FORMAT.get_klass(path) 79 test = format.C45_FORMAT.get_test_instances(path) 80 gold = format.C45_FORMAT.get_gold_instances(path) 81 82 feature_selection = fs.FeatureSelection(training, attributes, klass, test, gold, ['IG','3']) 83 84 ig_for_attr1 = information_gain(attributes[0], klass, training) 85 self.assertAlmostEqual(0.324409, ig_for_attr1, 6) 86 self.assertEqual('outlook', attributes[0].name) 87 ig_for_attr2 = information_gain(attributes[1], klass, training) 88 self.assertAlmostEqual(0.102187, ig_for_attr2, 6) 89 self.assertEqual('temperature', attributes[1].name) 90 ig_for_attr3 = information_gain(attributes[2], klass, training) 91 self.assertAlmostEqual(0.091091, ig_for_attr3, 6) 92 self.assertEqual('humidity', attributes[2].name) 93 ig_for_attr4 = information_gain(attributes[3], klass, training) 94 self.assertAlmostEqual(0.072780, ig_for_attr4, 6) 95 self.assertEqual('windy', attributes[3].name) 96 attributes_to_remove = feature_selection.find_attributes_by_ranking('information_gain', 3) 97 self.assertEqual(1, len(attributes_to_remove)) 98 self.assertEqual('windy', attributes_to_remove[0].name)
99
100 -def information_gain(attribute, klass, instances):
101 stump = ds.DecisionStump(attribute, klass) 102 for instance in instances: 103 stump.update_count(instance) 104 return stump.information_gain()
105
106 -class FeatureSelectStub(fs.FeatureSelect):
107 - def __init__(self):
108 fs.FeatureSelect.__init__(self) 109 self.error_called = False 110 self.message = None
111
112 - def error(self, message):
113 #in reality error will display usage and quit 114 self.message = message 115 self.error_called = True
116
118 pass
119