Package nltk_lite :: Package corpora :: Module inaugural
[hide private]
[frames] | no frames]

Source Code for Module nltk_lite.corpora.inaugural

 1  # Natural Language Toolkit: Presidential State of the Union Addres Corpus Reader 
 2  # 
 3  # Copyright (C) 2001-2007 University of Pennsylvania 
 4  # Author: Steven Bird <sb@ldc.upenn.edu> 
 5  # URL: <http://nltk.sf.net> 
 6  # For license information, see LICENSE.TXT 
 7   
 8  """ 
 9  C-Span Inaugural Address Corpus 
10   
11  US presidential inaugural addresses 1789-2005 
12  """        
13   
14  from nltk_lite.corpora import get_basedir 
15  from nltk_lite import tokenize 
16  import os, re 
17   
18  items = [ 
19      '1789-Washington', 
20      '1793-Washington', 
21      '1797-Adams', 
22      '1801-Jefferson', 
23      '1805-Jefferson', 
24      '1809-Madison', 
25      '1813-Madison', 
26      '1817-Monroe', 
27      '1821-Monroe', 
28      '1825-Adams', 
29      '1829-Jackson', 
30      '1833-Jackson', 
31      '1837-VanBuren', 
32      '1841-Harrison', 
33      '1845-Polk', 
34      '1849-Taylor', 
35      '1853-Pierce', 
36      '1857-Buchanan', 
37      '1861-Lincoln', 
38      '1865-Lincoln', 
39      '1869-Grant', 
40      '1873-Grant', 
41      '1877-Hayes', 
42      '1881-Garfield', 
43      '1885-Cleveland', 
44      '1889-Harrison', 
45      '1893-Cleveland', 
46      '1897-McKinley', 
47      '1901-McKinley', 
48      '1905-Roosevelt', 
49      '1909-Taft', 
50      '1913-Wilson', 
51      '1917-Wilson', 
52      '1921-Harding', 
53      '1925-Coolidge', 
54      '1929-Hoover', 
55      '1933-Roosevelt', 
56      '1937-Roosevelt', 
57      '1941-Roosevelt', 
58      '1945-Roosevelt', 
59      '1949-Truman', 
60      '1953-Eisenhower', 
61      '1957-Eisenhower', 
62      '1961-Kennedy', 
63      '1965-Johnson', 
64      '1969-Nixon', 
65      '1973-Nixon', 
66      '1977-Carter', 
67      '1981-Reagan', 
68      '1985-Reagan', 
69      '1989-Bush', 
70      '1993-Clinton', 
71      '1997-Clinton', 
72      '2001-Bush', 
73      '2005-Bush' 
74  ] 
75   
76 -def raw(files = items):
77 if type(files) is str: files = (files,) 78 79 for file in files: 80 path = os.path.join(get_basedir(), "inaugural", file + ".txt") 81 f = open(path) 82 preamble = True 83 text = f.read() 84 for t in tokenize.wordpunct(text): 85 yield t
86
87 -def demo():
88 from nltk_lite.corpora import inaugural 89 90 for speech in inaugural.items: 91 year = speech[:4] 92 freq = list(inaugural.raw(speech)).count('men') 93 print year, freq
94 95 if __name__ == '__main__': 96 demo() 97