Package nltk_lite :: Package contrib :: Module lex
[hide private]
[frames] | no frames]

Source Code for Module nltk_lite.contrib.lex

  1  """ 
  2  Ewan Klein, March 2007 
  3   
  4  Experimental module to provide support for implementing English morphology by 
  5  feature unification. 
  6   
  7  Main challenge is to find way of encoding morphosyntactic rules. Current idea is to let a concatenated form such as 'walk + s' be encoded as a dictionary C{'stem': 'walk', 'affix': 's'}. This allows the morpho-phonological representation to undergo unification in the normal way. 
  8  """ 
  9   
 10  from nltk_lite.contrib.featurelite import * 
 11  import re 
 12   
13 -class Phon(dict):
14 """ 15 A Phon object is just a stem and an affix. 16 """
17 - def __init__(self, stem=None, affix=None):
18 dict.__init__(self) 19 self['stem'] = stem 20 self['affix'] = affix
21
22 - def __repr__(self):
23 return "%s + %s" % (self['stem'] , self['affix'] )
24 25 """ 26 >>> print Phon('a', 'b') 27 a + b 28 """ 29
30 -def phon_representer(dumper, data):
31 """ 32 Output 'phon' values in 'stem + affix' notation. 33 """ 34 return dumper.represent_scalar(u'!phon', u'%s + %s' % \ 35 (data['stem'], data['affix']))
36 37 yaml.add_representer(Phon, phon_representer) 38 39 """ 40 >>> print yaml.dump({'phon': Phon('a', 'b')}) 41 {phon: !phon 'a + b'} 42 """ 43
44 -def normalize(s):
45 """ 46 Turn input into non-Unicode strings without spaces. 47 Return a Variable if input is of the form '?name'. 48 """ 49 s = str(s.strip()) 50 patt = re.compile(r'^\?\w+$') 51 if patt.match(s): 52 name = s[1:] 53 return Variable(name) 54 return s
55
56 -def phon_constructor(loader, node):
57 """ 58 Recognize 'stem + affix' as Phon objects in YAML. 59 """ 60 value = loader.construct_scalar(node) 61 stem, affix = [normalize(s) for s in value.split('+')] 62 return Phon(stem, affix)
63 64 yaml.add_constructor(u'!phon', phon_constructor) 65 66 #following causes YAML to barf for some reason: 67 #pattern = re.compile(r'^(\?)?\w+\s*\+\s*(\?)?\w+$') 68 #yaml.add_implicit_resolver(u'phon', pattern) 69 70 """ 71 We have to specify the input using the '!phon' constructor. 72 73 >>> print yaml.load(''' 74 ... form: !phon 'walk + s' 75 ... ''') 76 {'form': 'walk + s'} 77 78 Unifying a stem and a phonological output: 79 80 >>> f1 = yaml.load(''' 81 ... form: !phon ?x + s 82 ... stem: ?x 83 ... ''') 84 85 >>> f2 = yaml.load(''' 86 ... stem: walk 87 ... ''') 88 89 >>> f3 = unify(f1, f2) 90 >>> print f3 91 {'form': walk + s, 'stem': 'walk'} 92 93 In the next example, we follow B&B in using 'sym' as the name of the semantic constant in the lexical entry. We might want to have a semantic constructor like Phon so that we could write things like '\\x. (?sem x)'. Or perhaps not. 94 95 >>> lex_walk = yaml.load(''' 96 ... sym: 'walk' 97 ... stem: 'walk' 98 ... ''') 99 100 >>> thirdsg = yaml.load(''' 101 ... sym: ?x 102 ... sem: ?x 103 ... stem: ?y 104 ... phon: !phon ?x + s 105 ... ''') 106 107 108 >>> walks = unify(lex_walk, thirdsg) 109 >>> print walks 110 {'sem': 'walk', 'phon': walk + s, 'sym': 'walk', 'stem': 'walk'} 111 """ 112
113 -def test():
114 "Run unit tests on unification." 115 import doctest 116 doctest.testmod()
117 118 if __name__ == "__main__": 119 test() 120