|
| 1 | +package aima.core.nlp.data.lexicons; |
| 2 | + |
| 3 | +import java.util.ArrayList; |
| 4 | + |
| 5 | +import aima.core.nlp.parsing.LexWord; |
| 6 | +import aima.core.nlp.parsing.Lexicon; |
| 7 | + |
| 8 | +/** |
| 9 | + * A store of simple lexicon's for the purpose of testing and demonstrating the CYK Algorithm. |
| 10 | + * @author Jonathon |
| 11 | + * |
| 12 | + */ |
| 13 | +public class LexiconExamples { |
| 14 | + |
| 15 | + |
| 16 | + /** |
| 17 | + * Builds an expanded version of the 'wumpus lexicon' found on page 891 of AIMA V3 |
| 18 | + * @return |
| 19 | + */ |
| 20 | + public static Lexicon buildWumpusLex() { |
| 21 | + Lexicon l = new Lexicon(); |
| 22 | + ArrayList<LexWord> list = new ArrayList<LexWord>(); |
| 23 | + // noun list |
| 24 | + list.add( new LexWord("stench", (float)0.05)); |
| 25 | + list.add( new LexWord("breeze", (float)0.10)); |
| 26 | + list.add( new LexWord("wumpus", (float)0.15)); |
| 27 | + list.add( new LexWord("pits", (float)0.05)); |
| 28 | + list.add( new LexWord("friend", (float)0.10)); // not in textbook |
| 29 | + list.add( new LexWord("enemy", (float)0.10)); // not in textbook |
| 30 | + list.add( new LexWord("dog", (float)0.10)); // not in textbook |
| 31 | + list.add( new LexWord("superhero", (float)0.20)); // not in textbook |
| 32 | + list.add( new LexWord("virus", (float)0.15)); // not in textbook |
| 33 | + l.put("NOUN", list ); |
| 34 | + // verb list |
| 35 | + ArrayList<LexWord> verbList = new ArrayList<LexWord>(); |
| 36 | + verbList.add( new LexWord( "is", (float)0.10)); |
| 37 | + verbList.add( new LexWord( "feel", (float)0.10)); |
| 38 | + verbList.add( new LexWord( "smells", (float)0.10)); |
| 39 | + verbList.add( new LexWord( "stinks", (float)0.05)); |
| 40 | + verbList.add( new LexWord( "wants", (float)0.20)); // not in textbook |
| 41 | + verbList.add( new LexWord( "flies", (float)0.10)); // not in textbook |
| 42 | + verbList.add( new LexWord( "keeps", (float)0.05)); // not in textbook |
| 43 | + verbList.add( new LexWord( "leaves", (float)0.10)); // not in textbook |
| 44 | + verbList.add( new LexWord( "throws", (float)0.20)); // not in textbook |
| 45 | + l.put("VERB", verbList); |
| 46 | + // adjective list |
| 47 | + ArrayList<LexWord> adjList = new ArrayList<LexWord>(); |
| 48 | + adjList.add( new LexWord( "right", (float)0.10)); |
| 49 | + adjList.add( new LexWord( "dead", (float)0.05)); |
| 50 | + adjList.add( new LexWord( "smelly", (float)0.02)); |
| 51 | + adjList.add( new LexWord( "breezy", (float)0.02)); |
| 52 | + adjList.add( new LexWord( "foul", (float)0.10)); |
| 53 | + adjList.add( new LexWord( "black", (float)0.05)); |
| 54 | + adjList.add( new LexWord( "white", (float)0.05)); |
| 55 | + adjList.add( new LexWord( "callous", (float)0.10)); |
| 56 | + adjList.add( new LexWord( "proud", (float)0.10)); |
| 57 | + adjList.add( new LexWord( "right", (float)0.10)); |
| 58 | + adjList.add( new LexWord( "gold", (float)0.06)); |
| 59 | + adjList.add( new LexWord( "normal", (float)0.25)); |
| 60 | + l.put("ADJS", adjList); |
| 61 | + // Adverb list |
| 62 | + l.addLexWords("ADVERB","here","0.05","ahead","0.05","nearby","0.02", |
| 63 | + "quickly","0.05", "badly", "0.05", "slowly", "0.08", |
| 64 | + "sadly","0.10", "silently","0.10","easily","0.10", |
| 65 | + "seldom","0.10","sometimes","0.10","loudly","0.10", |
| 66 | + "cordially","0.05", "frequently","0.05"); |
| 67 | + // Pronoun list |
| 68 | + l.addLexWords("PRONOUN","me","0.10","you","0.03","i","0.10","it","0.10", // remember "I" has to be lowercase "i" |
| 69 | + "us","0.07","they","0.20","he","0.20","she","0.20"); |
| 70 | + // RelPro |
| 71 | + l.addLexWords("RELPRO", "that","0.40","which","0.15","who","0.20","whom","0.02", |
| 72 | + "whose","0.08","whabt","0.15"); |
| 73 | + // Name list |
| 74 | + l.addLexWords( buildNameLexicon() ); |
| 75 | + |
| 76 | + // Article list |
| 77 | + l.addLexWords("ARTICLE", "the","0.40","a","0.30","an","0.10","every","0.05","some","0.15"); |
| 78 | + |
| 79 | + // Prepositions list |
| 80 | + l.addLexWords("PREP", "to","0.20","in","0.10","on","0.05","near","0.10","after","0.10", |
| 81 | + "among","0.05","around","0.20","against","0.10","across","0.10"); |
| 82 | + |
| 83 | + // Conjugations list |
| 84 | + l.addLexWords("CONJ", "and","0.50","or","0.10","but","0.20","yet","0.02","since","0.08", |
| 85 | + "unless","0.10"); |
| 86 | + |
| 87 | + // Digits list |
| 88 | + l.addLexWords("DIGIT", "0","0.20","1","0.20","2","0.20","3","0.20","4","0.20"); |
| 89 | + |
| 90 | + return l; |
| 91 | + } |
| 92 | + |
| 93 | + /** |
| 94 | + * A lexicon of names that complements the 'wumpus lexicon' above. There are 50 names |
| 95 | + * of equal derivation likelihood (%2) |
| 96 | + * @return |
| 97 | + */ |
| 98 | + public static Lexicon buildNameLexicon() { |
| 99 | + Lexicon l = new Lexicon(); |
| 100 | + String[] names = {"John","Mary","Boston","Xiao","Hollie","Kendrick","Beverly" |
| 101 | + ,"Garnet","Zora","Shavonda","Peg","Katherin","Beatriz","Deirdre","Gaylord" |
| 102 | + ,"Desirae","Tresa","Gwyneth","Rashida","Garfield","Pinkie","Claretta","Teressa" |
| 103 | + ,"Andy","Eugena","Carie","Dinorah","Tess","Johnie","Keely","Antonetta","Darcey" |
| 104 | + ,"Bud","Veta","Janey","Rosalina","Frederica","Lou","Essie","Marinda","Elene" |
| 105 | + ,"Juliana","Marilyn","Maxima","Branden","Ethan","Donovan","Erinn","Ramon","Jacquiline"}; |
| 106 | + |
| 107 | + for(int i=0; i < names.length; i++ ) { |
| 108 | + l.addLexWords("NAME", names[i], "0.02"); |
| 109 | + } |
| 110 | + |
| 111 | + return l; |
| 112 | + |
| 113 | + } |
| 114 | + |
| 115 | + /** |
| 116 | + * A more restraining lexicon for simple testing and demonstration. |
| 117 | + * @return |
| 118 | + */ |
| 119 | + public static Lexicon buildTrivialLexicon() { |
| 120 | + Lexicon l = new Lexicon(); |
| 121 | + l.addLexWords("ARTICLE", "the", "0.50","a","0.50"); |
| 122 | + l.addLexWords("NOUN", "man","0.20","woman","0.20","table","0.20","shoelace","0.20","saw","0.20"); |
| 123 | + l.addLexWords("PRONOUN","i","0.40","you","0.40","it","0.20"); // remember "I" has to be lowercase "i" |
| 124 | + l.addLexWords("VERB","saw","0.30","liked","0.30","feel","0.40"); |
| 125 | + l.addLexWords("ADVERB", "happily","0.30","sadly","0.20","morosely","0.50"); |
| 126 | + return l; |
| 127 | + } |
| 128 | + |
| 129 | +} |
0 commit comments