vinayb21
diff --git a/‎aima-core/src/main/java/aima/core/nlp/data/grammars/ProbCNFGrammarExamples.java‎
Lines changed: 83 additions & 0 deletions b/‎aima-core/src/main/java/aima/core/nlp/data/grammars/ProbCNFGrammarExamples.java‎
Lines changed: 83 additions & 0 deletions
diff --git a/‎aima-core/src/main/java/aima/core/nlp/data/grammars/ProbContextFreeExamples.java‎
Lines changed: 58 additions & 0 deletions b/‎aima-core/src/main/java/aima/core/nlp/data/grammars/ProbContextFreeExamples.java‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎aima-core/src/main/java/aima/core/nlp/data/lexicons/LexiconExamples.java‎
Lines changed: 129 additions & 0 deletions b/‎aima-core/src/main/java/aima/core/nlp/data/lexicons/LexiconExamples.java‎
Lines changed: 129 additions & 0 deletions
@@ -0,0 +1,83 @@
+package aima.core.nlp.data.grammars;
+
+import java.util.ArrayList;
+
+import aima.core.nlp.data.lexicons.LexiconExamples;
+import aima.core.nlp.parsing.Lexicon;
+import aima.core.nlp.parsing.grammars.ProbCNFGrammar;
+import aima.core.nlp.parsing.grammars.Rule;
+
+/**
+ * A store of example Probabilistic Chomsky-Normal-Form grammars for testing and 
+ * demonstrating CYK.
+ * @author Jonathon
+ *
+ */
+public class ProbCNFGrammarExamples {
+	
+	/** 
+	 * An elementary Chomsky-Normal-Form grammar for simple testing and 
+	 * demonstrating. This type of grammar is seen more in Computing Theory classes,
+	 * and does not mock a subset of English phrase-structure.
+	 * @return
+	 */
+	public static ProbCNFGrammar buildExampleGrammarOne() {
+		ProbCNFGrammar g = new ProbCNFGrammar();
+		ArrayList<Rule> rules = new ArrayList<Rule>();
+		// Start Rules
+		rules.add( new Rule( "S", "Y,Z", (float)0.10));
+		rules.add( new Rule( "B", "B,D", (float)0.10));
+		rules.add( new Rule( "B", "G,D", (float)0.10));
+		rules.add( new Rule( "C", "E,C", (float)0.10));
+		rules.add( new Rule( "C", "E,H", (float)0.10));
+		rules.add( new Rule( "E", "M,N", (float)0.10));
+		rules.add( new Rule( "D", "M,N", (float)0.10));
+		rules.add( new Rule( "Y", "E,C", (float)0.10));
+		rules.add( new Rule( "Z", "E,C", (float)0.10));
+		
+		// Terminal Rules
+		rules.add( new Rule( "M", "m", (float)1.0));
+		rules.add( new Rule( "N", "n", (float)1.0));
+		rules.add( new Rule( "B", "a", (float)0.25));
+		rules.add( new Rule( "B", "b", (float)0.25));
+		rules.add( new Rule( "B", "c", (float)0.25));
+		rules.add( new Rule( "B", "d", (float)0.25));
+		rules.add( new Rule( "G", "a", (float)0.50));
+		rules.add( new Rule( "G", "d", (float)0.50));
+		rules.add( new Rule( "C", "x", (float)0.20));
+		rules.add( new Rule( "C", "y", (float)0.20));
+		rules.add( new Rule( "C", "z", (float)0.60));
+		rules.add( new Rule( "H", "u", (float)0.50));
+		rules.add( new Rule( "H", "z", (float)0.50));
+		
+		// Add all these rules into the grammar
+		if(!g.addRules(rules)) {
+			return null;
+		}
+		return g; 
+	}
+	
+	/**
+	 * A more restrictive phrase-structure grammar, used in testing and demonstrating 
+	 * the CYK Algorithm. 
+	 * Note: It is complemented by the "trivial lexicon" in LexiconExamples.java
+	 * @return
+	 */
+	public static ProbCNFGrammar buildTrivialGrammar() {
+		ProbCNFGrammar g = new ProbCNFGrammar();
+		ArrayList<Rule> rules = new ArrayList<Rule>();
+		rules.add( new Rule( "S", "NP,VP", (float)1.0));
+		rules.add( new Rule( "NP", "ARTICLE,NOUN", (float)0.50));
+		rules.add( new Rule( "NP", "PRONOUN,ADVERB", (float)0.5));
+		rules.add( new Rule( "VP", "VERB,NP", (float)1.0));
+		// add terminal rules
+		Lexicon trivLex = LexiconExamples.buildTrivialLexicon();
+		ArrayList<Rule> terminalRules = new ArrayList<Rule>(trivLex.getAllTerminalRules());
+		rules.addAll(terminalRules);
+		// Add all these rules into the grammar
+		if(!g.addRules(rules)) {
+			return null;
+		}
+		return g;
+	}
+}
@@ -0,0 +1,58 @@
+package aima.core.nlp.data.grammars;
+
+import java.util.ArrayList;
+
+import aima.core.nlp.data.lexicons.LexiconExamples;
+import aima.core.nlp.parsing.Lexicon;
+import aima.core.nlp.parsing.grammars.ProbContextFreeGrammar;
+import aima.core.nlp.parsing.grammars.Rule;
+
+public class ProbContextFreeExamples {
+
+	public static ProbContextFreeGrammar buildWumpusGrammar() {
+		ProbContextFreeGrammar g = new ProbContextFreeGrammar();
+		ArrayList<Rule> rules = new ArrayList<Rule>();
+		// Start Rules
+		rules.add( new Rule( "S", "NP,VP", (float)0.90));
+		rules.add( new Rule( "S", "CONJ,S", (float)0.10));
+		// Noun Phrase Rules
+		rules.add( new Rule( "NP", "PRONOUN", (float)0.30));
+		rules.add( new Rule( "NP", "NAME" , (float)0.10));
+		rules.add( new Rule( "NP", "NOUN" , (float)0.10));
+		rules.add( new Rule( "NP", "ARTICLE,NOUN", (float)0.25));
+		rules.add( new Rule( "NP", "AP,NOUN", (float)0.05));
+		rules.add( new Rule( "NP", "DIGIT,DIGIT", (float)0.05));
+		rules.add( new Rule( "NP", "NP,PP", (float)0.10));
+		rules.add( new Rule( "NP", "NP,RELCLAUSE", (float)0.05));
+		// add verb phrase rules
+		rules.add( new Rule( "VP", "VERB", (float)0.40));
+		rules.add( new Rule( "VP", "VP,NP", (float)0.35));
+		rules.add( new Rule( "VP", "VP,ADJS", (float)0.05));
+		rules.add( new Rule( "VP", "VP,PP", (float)0.10));
+		rules.add( new Rule( "VP", "VP,ADVERB", (float)0.10));
+		// add adjective rules
+		rules.add( new Rule( "AJD", "AJDS", (float)0.80));
+		rules.add( new Rule( "AJD", "AJD,AJDS", (float)0.20));
+		// add Article Phrase
+		// This deviates from the text because the text provides the rule:
+		// NP -> Article Adjs Noun, which is NOT in Chomsky Normal Form
+		//
+		// We instead define AP (Article Phrase) AP -> Article Adjs, to get around this
+		rules.add( new Rule( "AP", "ARTICLE,ADJS", (float)1.0));
+		// add preposition phrase
+		rules.add( new Rule( "PP", "PREP,NP", (float)1.00));
+		// add relative clause
+		rules.add( new Rule( "RELCLAUSE", "RELPRO,VP", (float)1.00));
+		
+		// Now we can add all rules that derive terminal symbols, which are in 
+		// this case words.
+		Lexicon wumpusLex = LexiconExamples.buildWumpusLex();
+		ArrayList<Rule> terminalRules = new ArrayList<Rule>(wumpusLex.getAllTerminalRules());
+		rules.addAll( terminalRules );
+		// Add all these rules into the grammar
+		if(!g.addRules(rules)) {
+			return null;
+		}
+		return g; 
+	}
+}
@@ -0,0 +1,129 @@
+package aima.core.nlp.data.lexicons;
+
+import java.util.ArrayList;
+
+import aima.core.nlp.parsing.LexWord;
+import aima.core.nlp.parsing.Lexicon;
+
+/**
+ * A store of simple lexicon's for the purpose of testing and demonstrating the CYK Algorithm.
+ * @author Jonathon
+ *
+ */
+public class LexiconExamples {
+	
+	
+	/**
+	 * Builds an expanded version of the 'wumpus lexicon' found on page 891 of AIMA V3
+	 * @return
+	 */
+	public static Lexicon buildWumpusLex() {
+		Lexicon l = new Lexicon();
+		ArrayList<LexWord> list = new ArrayList<LexWord>();
+		// noun list
+		list.add( new LexWord("stench", (float)0.05)); 
+		list.add( new LexWord("breeze", (float)0.10));
+		list.add( new LexWord("wumpus", (float)0.15));
+		list.add( new LexWord("pits", (float)0.05));
+		list.add( new LexWord("friend", (float)0.10)); // not in textbook
+		list.add( new LexWord("enemy", (float)0.10)); // not in textbook
+		list.add( new LexWord("dog", (float)0.10)); // not in textbook
+		list.add( new LexWord("superhero", (float)0.20)); // not in textbook
+		list.add( new LexWord("virus", (float)0.15)); // not in textbook
+		l.put("NOUN", list );
+		// verb list
+		ArrayList<LexWord> verbList = new ArrayList<LexWord>();
+		verbList.add( new LexWord( "is", (float)0.10));
+		verbList.add( new LexWord( "feel", (float)0.10));
+		verbList.add( new LexWord( "smells", (float)0.10));
+		verbList.add( new LexWord( "stinks", (float)0.05));
+		verbList.add( new LexWord( "wants", (float)0.20)); // not in textbook
+		verbList.add( new LexWord( "flies", (float)0.10)); // not in textbook
+		verbList.add( new LexWord( "keeps", (float)0.05)); // not in textbook
+		verbList.add( new LexWord( "leaves", (float)0.10)); // not in textbook
+		verbList.add( new LexWord( "throws", (float)0.20)); // not in textbook
+		l.put("VERB", verbList);
+		// adjective list
+		ArrayList<LexWord> adjList = new ArrayList<LexWord>();
+		adjList.add( new LexWord( "right", (float)0.10));
+		adjList.add( new LexWord( "dead", (float)0.05));
+		adjList.add( new LexWord( "smelly", (float)0.02));
+		adjList.add( new LexWord( "breezy", (float)0.02));
+		adjList.add( new LexWord( "foul", (float)0.10));
+		adjList.add( new LexWord( "black", (float)0.05));
+		adjList.add( new LexWord( "white", (float)0.05));
+		adjList.add( new LexWord( "callous", (float)0.10));
+		adjList.add( new LexWord( "proud", (float)0.10));
+		adjList.add( new LexWord( "right", (float)0.10));
+		adjList.add( new LexWord( "gold", (float)0.06));
+		adjList.add( new LexWord( "normal", (float)0.25));
+		l.put("ADJS", adjList);
+		// Adverb list
+		l.addLexWords("ADVERB","here","0.05","ahead","0.05","nearby","0.02",
+					  "quickly","0.05", "badly", "0.05", "slowly", "0.08",
+					  "sadly","0.10", "silently","0.10","easily","0.10",
+					  "seldom","0.10","sometimes","0.10","loudly","0.10",
+					  "cordially","0.05", "frequently","0.05");
+		// Pronoun list
+		l.addLexWords("PRONOUN","me","0.10","you","0.03","i","0.10","it","0.10", // remember "I" has to be lowercase "i"
+					 		    "us","0.07","they","0.20","he","0.20","she","0.20");
+		// RelPro
+		l.addLexWords("RELPRO", "that","0.40","which","0.15","who","0.20","whom","0.02",
+								"whose","0.08","whabt","0.15");
+		// Name list
+		l.addLexWords( buildNameLexicon() );
+		
+		// Article list
+		l.addLexWords("ARTICLE", "the","0.40","a","0.30","an","0.10","every","0.05","some","0.15");
+		
+		// Prepositions list
+		l.addLexWords("PREP", "to","0.20","in","0.10","on","0.05","near","0.10","after","0.10",
+							  "among","0.05","around","0.20","against","0.10","across","0.10");
+		
+		// Conjugations list
+		l.addLexWords("CONJ", "and","0.50","or","0.10","but","0.20","yet","0.02","since","0.08",
+						      "unless","0.10");
+		
+		// Digits list
+		l.addLexWords("DIGIT", "0","0.20","1","0.20","2","0.20","3","0.20","4","0.20");
+		
+		return l;
+	}
+	
+	/**
+	 * A lexicon of names that complements the 'wumpus lexicon' above. There are 50 names 
+	 * of equal derivation likelihood (%2)
+	 * @return
+	 */
+	public static Lexicon buildNameLexicon() {
+		Lexicon l = new Lexicon();
+		String[] names = {"John","Mary","Boston","Xiao","Hollie","Kendrick","Beverly"
+		                  ,"Garnet","Zora","Shavonda","Peg","Katherin","Beatriz","Deirdre","Gaylord"
+		                  ,"Desirae","Tresa","Gwyneth","Rashida","Garfield","Pinkie","Claretta","Teressa"
+		                  ,"Andy","Eugena","Carie","Dinorah","Tess","Johnie","Keely","Antonetta","Darcey"
+		                  ,"Bud","Veta","Janey","Rosalina","Frederica","Lou","Essie","Marinda","Elene"
+		                  ,"Juliana","Marilyn","Maxima","Branden","Ethan","Donovan","Erinn","Ramon","Jacquiline"};
+		
+		for(int i=0; i < names.length; i++ ) {
+			l.addLexWords("NAME", names[i], "0.02");
+		}
+		
+		return l;
+		
+	}
+	
+	/**
+	 * A more restraining lexicon for simple testing and demonstration.
+	 * @return
+	 */
+	public static Lexicon buildTrivialLexicon() {
+		Lexicon l = new Lexicon();
+		l.addLexWords("ARTICLE", "the", "0.50","a","0.50");
+		l.addLexWords("NOUN", "man","0.20","woman","0.20","table","0.20","shoelace","0.20","saw","0.20");
+		l.addLexWords("PRONOUN","i","0.40","you","0.40","it","0.20"); // remember "I" has to be lowercase "i"
+		l.addLexWords("VERB","saw","0.30","liked","0.30","feel","0.40");
+		l.addLexWords("ADVERB", "happily","0.30","sadly","0.20","morosely","0.50");
+		return l;
+	}
+
+}