Skip to content

Commit 67a5536

Browse files
committed
Manually apply pull request aimacode#234.
Trivial update to test suites to ensure all new tests included.
1 parent f64fd47 commit 67a5536

20 files changed

+1517
-1
lines changed
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
package aima.core.nlp.data.grammars;
2+
3+
import java.util.ArrayList;
4+
5+
import aima.core.nlp.data.lexicons.LexiconExamples;
6+
import aima.core.nlp.parsing.Lexicon;
7+
import aima.core.nlp.parsing.grammars.ProbCNFGrammar;
8+
import aima.core.nlp.parsing.grammars.Rule;
9+
10+
/**
11+
* A store of example Probabilistic Chomsky-Normal-Form grammars for testing and
12+
* demonstrating CYK.
13+
* @author Jonathon
14+
*
15+
*/
16+
public class ProbCNFGrammarExamples {
17+
18+
/**
19+
* An elementary Chomsky-Normal-Form grammar for simple testing and
20+
* demonstrating. This type of grammar is seen more in Computing Theory classes,
21+
* and does not mock a subset of English phrase-structure.
22+
* @return
23+
*/
24+
public static ProbCNFGrammar buildExampleGrammarOne() {
25+
ProbCNFGrammar g = new ProbCNFGrammar();
26+
ArrayList<Rule> rules = new ArrayList<Rule>();
27+
// Start Rules
28+
rules.add( new Rule( "S", "Y,Z", (float)0.10));
29+
rules.add( new Rule( "B", "B,D", (float)0.10));
30+
rules.add( new Rule( "B", "G,D", (float)0.10));
31+
rules.add( new Rule( "C", "E,C", (float)0.10));
32+
rules.add( new Rule( "C", "E,H", (float)0.10));
33+
rules.add( new Rule( "E", "M,N", (float)0.10));
34+
rules.add( new Rule( "D", "M,N", (float)0.10));
35+
rules.add( new Rule( "Y", "E,C", (float)0.10));
36+
rules.add( new Rule( "Z", "E,C", (float)0.10));
37+
38+
// Terminal Rules
39+
rules.add( new Rule( "M", "m", (float)1.0));
40+
rules.add( new Rule( "N", "n", (float)1.0));
41+
rules.add( new Rule( "B", "a", (float)0.25));
42+
rules.add( new Rule( "B", "b", (float)0.25));
43+
rules.add( new Rule( "B", "c", (float)0.25));
44+
rules.add( new Rule( "B", "d", (float)0.25));
45+
rules.add( new Rule( "G", "a", (float)0.50));
46+
rules.add( new Rule( "G", "d", (float)0.50));
47+
rules.add( new Rule( "C", "x", (float)0.20));
48+
rules.add( new Rule( "C", "y", (float)0.20));
49+
rules.add( new Rule( "C", "z", (float)0.60));
50+
rules.add( new Rule( "H", "u", (float)0.50));
51+
rules.add( new Rule( "H", "z", (float)0.50));
52+
53+
// Add all these rules into the grammar
54+
if(!g.addRules(rules)) {
55+
return null;
56+
}
57+
return g;
58+
}
59+
60+
/**
61+
* A more restrictive phrase-structure grammar, used in testing and demonstrating
62+
* the CYK Algorithm.
63+
* Note: It is complemented by the "trivial lexicon" in LexiconExamples.java
64+
* @return
65+
*/
66+
public static ProbCNFGrammar buildTrivialGrammar() {
67+
ProbCNFGrammar g = new ProbCNFGrammar();
68+
ArrayList<Rule> rules = new ArrayList<Rule>();
69+
rules.add( new Rule( "S", "NP,VP", (float)1.0));
70+
rules.add( new Rule( "NP", "ARTICLE,NOUN", (float)0.50));
71+
rules.add( new Rule( "NP", "PRONOUN,ADVERB", (float)0.5));
72+
rules.add( new Rule( "VP", "VERB,NP", (float)1.0));
73+
// add terminal rules
74+
Lexicon trivLex = LexiconExamples.buildTrivialLexicon();
75+
ArrayList<Rule> terminalRules = new ArrayList<Rule>(trivLex.getAllTerminalRules());
76+
rules.addAll(terminalRules);
77+
// Add all these rules into the grammar
78+
if(!g.addRules(rules)) {
79+
return null;
80+
}
81+
return g;
82+
}
83+
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
package aima.core.nlp.data.grammars;
2+
3+
import java.util.ArrayList;
4+
5+
import aima.core.nlp.data.lexicons.LexiconExamples;
6+
import aima.core.nlp.parsing.Lexicon;
7+
import aima.core.nlp.parsing.grammars.ProbContextFreeGrammar;
8+
import aima.core.nlp.parsing.grammars.Rule;
9+
10+
public class ProbContextFreeExamples {
11+
12+
public static ProbContextFreeGrammar buildWumpusGrammar() {
13+
ProbContextFreeGrammar g = new ProbContextFreeGrammar();
14+
ArrayList<Rule> rules = new ArrayList<Rule>();
15+
// Start Rules
16+
rules.add( new Rule( "S", "NP,VP", (float)0.90));
17+
rules.add( new Rule( "S", "CONJ,S", (float)0.10));
18+
// Noun Phrase Rules
19+
rules.add( new Rule( "NP", "PRONOUN", (float)0.30));
20+
rules.add( new Rule( "NP", "NAME" , (float)0.10));
21+
rules.add( new Rule( "NP", "NOUN" , (float)0.10));
22+
rules.add( new Rule( "NP", "ARTICLE,NOUN", (float)0.25));
23+
rules.add( new Rule( "NP", "AP,NOUN", (float)0.05));
24+
rules.add( new Rule( "NP", "DIGIT,DIGIT", (float)0.05));
25+
rules.add( new Rule( "NP", "NP,PP", (float)0.10));
26+
rules.add( new Rule( "NP", "NP,RELCLAUSE", (float)0.05));
27+
// add verb phrase rules
28+
rules.add( new Rule( "VP", "VERB", (float)0.40));
29+
rules.add( new Rule( "VP", "VP,NP", (float)0.35));
30+
rules.add( new Rule( "VP", "VP,ADJS", (float)0.05));
31+
rules.add( new Rule( "VP", "VP,PP", (float)0.10));
32+
rules.add( new Rule( "VP", "VP,ADVERB", (float)0.10));
33+
// add adjective rules
34+
rules.add( new Rule( "AJD", "AJDS", (float)0.80));
35+
rules.add( new Rule( "AJD", "AJD,AJDS", (float)0.20));
36+
// add Article Phrase
37+
// This deviates from the text because the text provides the rule:
38+
// NP -> Article Adjs Noun, which is NOT in Chomsky Normal Form
39+
//
40+
// We instead define AP (Article Phrase) AP -> Article Adjs, to get around this
41+
rules.add( new Rule( "AP", "ARTICLE,ADJS", (float)1.0));
42+
// add preposition phrase
43+
rules.add( new Rule( "PP", "PREP,NP", (float)1.00));
44+
// add relative clause
45+
rules.add( new Rule( "RELCLAUSE", "RELPRO,VP", (float)1.00));
46+
47+
// Now we can add all rules that derive terminal symbols, which are in
48+
// this case words.
49+
Lexicon wumpusLex = LexiconExamples.buildWumpusLex();
50+
ArrayList<Rule> terminalRules = new ArrayList<Rule>(wumpusLex.getAllTerminalRules());
51+
rules.addAll( terminalRules );
52+
// Add all these rules into the grammar
53+
if(!g.addRules(rules)) {
54+
return null;
55+
}
56+
return g;
57+
}
58+
}
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
package aima.core.nlp.data.lexicons;
2+
3+
import java.util.ArrayList;
4+
5+
import aima.core.nlp.parsing.LexWord;
6+
import aima.core.nlp.parsing.Lexicon;
7+
8+
/**
9+
* A store of simple lexicon's for the purpose of testing and demonstrating the CYK Algorithm.
10+
* @author Jonathon
11+
*
12+
*/
13+
public class LexiconExamples {
14+
15+
16+
/**
17+
* Builds an expanded version of the 'wumpus lexicon' found on page 891 of AIMA V3
18+
* @return
19+
*/
20+
public static Lexicon buildWumpusLex() {
21+
Lexicon l = new Lexicon();
22+
ArrayList<LexWord> list = new ArrayList<LexWord>();
23+
// noun list
24+
list.add( new LexWord("stench", (float)0.05));
25+
list.add( new LexWord("breeze", (float)0.10));
26+
list.add( new LexWord("wumpus", (float)0.15));
27+
list.add( new LexWord("pits", (float)0.05));
28+
list.add( new LexWord("friend", (float)0.10)); // not in textbook
29+
list.add( new LexWord("enemy", (float)0.10)); // not in textbook
30+
list.add( new LexWord("dog", (float)0.10)); // not in textbook
31+
list.add( new LexWord("superhero", (float)0.20)); // not in textbook
32+
list.add( new LexWord("virus", (float)0.15)); // not in textbook
33+
l.put("NOUN", list );
34+
// verb list
35+
ArrayList<LexWord> verbList = new ArrayList<LexWord>();
36+
verbList.add( new LexWord( "is", (float)0.10));
37+
verbList.add( new LexWord( "feel", (float)0.10));
38+
verbList.add( new LexWord( "smells", (float)0.10));
39+
verbList.add( new LexWord( "stinks", (float)0.05));
40+
verbList.add( new LexWord( "wants", (float)0.20)); // not in textbook
41+
verbList.add( new LexWord( "flies", (float)0.10)); // not in textbook
42+
verbList.add( new LexWord( "keeps", (float)0.05)); // not in textbook
43+
verbList.add( new LexWord( "leaves", (float)0.10)); // not in textbook
44+
verbList.add( new LexWord( "throws", (float)0.20)); // not in textbook
45+
l.put("VERB", verbList);
46+
// adjective list
47+
ArrayList<LexWord> adjList = new ArrayList<LexWord>();
48+
adjList.add( new LexWord( "right", (float)0.10));
49+
adjList.add( new LexWord( "dead", (float)0.05));
50+
adjList.add( new LexWord( "smelly", (float)0.02));
51+
adjList.add( new LexWord( "breezy", (float)0.02));
52+
adjList.add( new LexWord( "foul", (float)0.10));
53+
adjList.add( new LexWord( "black", (float)0.05));
54+
adjList.add( new LexWord( "white", (float)0.05));
55+
adjList.add( new LexWord( "callous", (float)0.10));
56+
adjList.add( new LexWord( "proud", (float)0.10));
57+
adjList.add( new LexWord( "right", (float)0.10));
58+
adjList.add( new LexWord( "gold", (float)0.06));
59+
adjList.add( new LexWord( "normal", (float)0.25));
60+
l.put("ADJS", adjList);
61+
// Adverb list
62+
l.addLexWords("ADVERB","here","0.05","ahead","0.05","nearby","0.02",
63+
"quickly","0.05", "badly", "0.05", "slowly", "0.08",
64+
"sadly","0.10", "silently","0.10","easily","0.10",
65+
"seldom","0.10","sometimes","0.10","loudly","0.10",
66+
"cordially","0.05", "frequently","0.05");
67+
// Pronoun list
68+
l.addLexWords("PRONOUN","me","0.10","you","0.03","i","0.10","it","0.10", // remember "I" has to be lowercase "i"
69+
"us","0.07","they","0.20","he","0.20","she","0.20");
70+
// RelPro
71+
l.addLexWords("RELPRO", "that","0.40","which","0.15","who","0.20","whom","0.02",
72+
"whose","0.08","whabt","0.15");
73+
// Name list
74+
l.addLexWords( buildNameLexicon() );
75+
76+
// Article list
77+
l.addLexWords("ARTICLE", "the","0.40","a","0.30","an","0.10","every","0.05","some","0.15");
78+
79+
// Prepositions list
80+
l.addLexWords("PREP", "to","0.20","in","0.10","on","0.05","near","0.10","after","0.10",
81+
"among","0.05","around","0.20","against","0.10","across","0.10");
82+
83+
// Conjugations list
84+
l.addLexWords("CONJ", "and","0.50","or","0.10","but","0.20","yet","0.02","since","0.08",
85+
"unless","0.10");
86+
87+
// Digits list
88+
l.addLexWords("DIGIT", "0","0.20","1","0.20","2","0.20","3","0.20","4","0.20");
89+
90+
return l;
91+
}
92+
93+
/**
94+
* A lexicon of names that complements the 'wumpus lexicon' above. There are 50 names
95+
* of equal derivation likelihood (%2)
96+
* @return
97+
*/
98+
public static Lexicon buildNameLexicon() {
99+
Lexicon l = new Lexicon();
100+
String[] names = {"John","Mary","Boston","Xiao","Hollie","Kendrick","Beverly"
101+
,"Garnet","Zora","Shavonda","Peg","Katherin","Beatriz","Deirdre","Gaylord"
102+
,"Desirae","Tresa","Gwyneth","Rashida","Garfield","Pinkie","Claretta","Teressa"
103+
,"Andy","Eugena","Carie","Dinorah","Tess","Johnie","Keely","Antonetta","Darcey"
104+
,"Bud","Veta","Janey","Rosalina","Frederica","Lou","Essie","Marinda","Elene"
105+
,"Juliana","Marilyn","Maxima","Branden","Ethan","Donovan","Erinn","Ramon","Jacquiline"};
106+
107+
for(int i=0; i < names.length; i++ ) {
108+
l.addLexWords("NAME", names[i], "0.02");
109+
}
110+
111+
return l;
112+
113+
}
114+
115+
/**
116+
* A more restraining lexicon for simple testing and demonstration.
117+
* @return
118+
*/
119+
public static Lexicon buildTrivialLexicon() {
120+
Lexicon l = new Lexicon();
121+
l.addLexWords("ARTICLE", "the", "0.50","a","0.50");
122+
l.addLexWords("NOUN", "man","0.20","woman","0.20","table","0.20","shoelace","0.20","saw","0.20");
123+
l.addLexWords("PRONOUN","i","0.40","you","0.40","it","0.20"); // remember "I" has to be lowercase "i"
124+
l.addLexWords("VERB","saw","0.30","liked","0.30","feel","0.40");
125+
l.addLexWords("ADVERB", "happily","0.30","sadly","0.20","morosely","0.50");
126+
return l;
127+
}
128+
129+
}

0 commit comments

Comments
 (0)