Package nltk_lite :: Package contrib :: Package classifier :: Module attribute
[hide private]
[frames] | no frames]

Source Code for Module nltk_lite.contrib.classifier.attribute

 1  # Natural Language Toolkit - Attribute 
 2  #  can extract the name and values from a line and operate on them 
 3  # 
 4  # Author: Sumukh Ghodke <sumukh dot ghodke at gmail dot com> 
 5  # 
 6  # URL: <http://nltk.sf.net> 
 7  # This software is distributed under GPL, for license information see LICENSE.TXT 
 8  from nltk_lite.contrib.classifier.exceptions import systemerror as se 
 9  from nltk_lite.contrib.classifier import autoclass as ac, cfile, decisionstump as ds 
10  import UserList 
11   
12  CONTINUOUS = 'continuous' 
13  DISCRETE = 'discrete' 
14   
15 -class Attribute:
16 - def __init__(self, name, values, index):
17 self.name = name 18 self.values = values 19 self.type = self.__get_type() 20 self.index = index
21
22 - def __get_type(self):
23 if len(self.values) == 1 and self.values[0] == CONTINUOUS: 24 return CONTINUOUS 25 return DISCRETE
26
27 - def has_value(self, to_test):
28 return self.values.__contains__(to_test)
29
30 - def is_continuous(self):
31 return self.type == CONTINUOUS
32
33 - def split_info(self):
34 from nltk_lite.contrib.classifier import entropy 35 return entropy(self.values)
36
37 - def __eq__(self, other):
38 if other is None: return False 39 if self.__class__ != other.__class__: return False 40 if self.name == other.name and \ 41 self.values == other.values and \ 42 self.index == other.index: 43 return True 44 return False
45
46 - def __str__(self):
47 return self.name +':' + str(self.values) + ' index:' + str(self.index)
48
49 - def values_as_str(self):
50 values_str = '' 51 for value in self.values: 52 values_str += value + ',' 53 return values_str[:-1]
54
55 -class Attributes(UserList.UserList):
56 - def __init__(self, attributes = []):
57 self.data = attributes
58
59 - def has_values(self, test_values):
60 if len(test_values) != len(self): return False 61 for i in range(len(test_values)): 62 test_value = test_values[i] 63 if self.data[i].is_continuous(): continue #do not test continuous attributes 64 if not self.data[i].has_value(test_value): return False 65 return True
66
68 for attribute in self.data: 69 if attribute.is_continuous(): 70 return True 71 return False
72
73 - def subset(self, indices):
74 subset = [] 75 for index in indices: 76 subset.append(self.data[index]) 77 return subset
78
79 - def discretise(self, discretised_attributes):
80 for disc_attr in discretised_attributes: 81 self.data[disc_attr.index] = disc_attr
82
83 - def empty_decision_stumps(self, ignore_attributes, klass):
84 decision_stumps = [] 85 for attribute in self.data: 86 if attribute in ignore_attributes: 87 continue 88 decision_stumps.append(ds.DecisionStump(attribute, klass)) 89 return decision_stumps
90
91 - def remove_attributes(self, attributes):
92 for attribute in attributes: 93 self.remove(attribute) 94 #reset indices 95 for i in range(len(self.data)): 96 self.data[i].index = i
97