1 """
2 Ewan Klein, March 2007
3
4 Experimental module to provide support for implementing English morphology by
5 feature unification.
6
7 Main challenge is to find way of encoding morphosyntactic rules. Current idea is to let a concatenated form such as 'walk + s' be encoded as a dictionary C{'stem': 'walk', 'affix': 's'}. This allows the morpho-phonological representation to undergo unification in the normal way.
8 """
9
10 from nltk_lite.contrib.featurelite import *
11 import re
12
14 """
15 A Phon object is just a stem and an affix.
16 """
17 - def __init__(self, stem=None, affix=None):
21
23 return "%s + %s" % (self['stem'] , self['affix'] )
24
25 """
26 >>> print Phon('a', 'b')
27 a + b
28 """
29
31 """
32 Output 'phon' values in 'stem + affix' notation.
33 """
34 return dumper.represent_scalar(u'!phon', u'%s + %s' % \
35 (data['stem'], data['affix']))
36
37 yaml.add_representer(Phon, phon_representer)
38
39 """
40 >>> print yaml.dump({'phon': Phon('a', 'b')})
41 {phon: !phon 'a + b'}
42 """
43
45 """
46 Turn input into non-Unicode strings without spaces.
47 Return a Variable if input is of the form '?name'.
48 """
49 s = str(s.strip())
50 patt = re.compile(r'^\?\w+$')
51 if patt.match(s):
52 name = s[1:]
53 return Variable(name)
54 return s
55
63
64 yaml.add_constructor(u'!phon', phon_constructor)
65
66
67
68
69
70 """
71 We have to specify the input using the '!phon' constructor.
72
73 >>> print yaml.load('''
74 ... form: !phon 'walk + s'
75 ... ''')
76 {'form': 'walk + s'}
77
78 Unifying a stem and a phonological output:
79
80 >>> f1 = yaml.load('''
81 ... form: !phon ?x + s
82 ... stem: ?x
83 ... ''')
84
85 >>> f2 = yaml.load('''
86 ... stem: walk
87 ... ''')
88
89 >>> f3 = unify(f1, f2)
90 >>> print f3
91 {'form': walk + s, 'stem': 'walk'}
92
93 In the next example, we follow B&B in using 'sym' as the name of the semantic constant in the lexical entry. We might want to have a semantic constructor like Phon so that we could write things like '\\x. (?sem x)'. Or perhaps not.
94
95 >>> lex_walk = yaml.load('''
96 ... sym: 'walk'
97 ... stem: 'walk'
98 ... ''')
99
100 >>> thirdsg = yaml.load('''
101 ... sym: ?x
102 ... sem: ?x
103 ... stem: ?y
104 ... phon: !phon ?x + s
105 ... ''')
106
107
108 >>> walks = unify(lex_walk, thirdsg)
109 >>> print walks
110 {'sem': 'walk', 'phon': walk + s, 'sym': 'walk', 'stem': 'walk'}
111 """
112
114 "Run unit tests on unification."
115 import doctest
116 doctest.testmod()
117
118 if __name__ == "__main__":
119 test()
120