Skip to content

Commit c57e56f

Browse files
committed
* forest.js, index.html: disambiguation.
Also refactored earley.js to pre-generate all LR(0) items.
1 parent fc9a7f7 commit c57e56f

File tree

4 files changed

+236
-107
lines changed

4 files changed

+236
-107
lines changed

earley.js

Lines changed: 51 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ var Rule, Grammar;
88
this.S = startSymbol;
99
this.rules = {};
1010
for(var i=0; i<rules.length; ++i) {
11+
instantiate_rule(rules[i], rules.length - i);
1112
var sym = rules[i].symbol;
12-
if(!this.rules.hasOwnProperty(sym)) this.rules[sym] = [];
13+
this.rules[sym] = this.rules[sym] || [];
1314
this.rules[sym].push(rules[i]);
1415
}
1516
}
@@ -20,17 +21,13 @@ var Rule, Grammar;
2021
return find_item(this.S, s0, sN);
2122
}
2223

23-
Grammar.prototype.rules_for = function(sym) {
24-
if(this.rules.hasOwnProperty(sym)) return this.rules[sym];
25-
}
26-
2724
function parse_symbol(set, sym) { return process(scan(set, sym)); }
2825

2926
function process(set) {
3027
do { var len = set.items.length;
3128
for(var i=0; i<set.items.length; ++i) {
3229
var item = set.items[i];
33-
if(item.tag.nextSymbol) predict(set, item.tag.nextSymbol());
30+
if(item.tag.nextSymbol) predict(set, item.tag.nextSymbol);
3431
else complete(item);
3532
}
3633
} while(set.items.length > len); // cheesy nullable-rule handling
@@ -44,17 +41,22 @@ var Rule, Grammar;
4441
}
4542

4643
function complete(c) {
47-
var items = c.start.items_waiting_for(c.tag);
44+
var items = c.start.wants[c.tag];
4845
if(items) for(var i=0; i<items.length; ++i) {
49-
var a = add_item(items[i].tag.advance(), items[i].start, c.end);
50-
add_derivation(a, items[i], c);
46+
var item = items[i], tag = item.tag;
47+
add_derivation(add_item(tag.advance, item.start, c.end),
48+
item, c, item.rule);
5149
}
5250
}
5351

5452
function predict(set, sym) {
55-
var rules = set.grammar.rules_for(sym);
53+
var rules = set.grammar.rules[sym];
5654
if(rules) for(var i=0; i<rules.length; ++i) {
57-
add_item(rules[i], set, set);
55+
var item = add_item(rules[i].advance, set, set, rules[i]);
56+
if(!item.tag.production) {
57+
var empty = add_item('', set, set);
58+
add_derivation(item, undefined, empty, item.rule);
59+
}
5860
}
5961
return set;
6062
}
@@ -71,27 +73,24 @@ var Rule, Grammar;
7173
this.wants = {}; // incomplete items by next symbol
7274
}
7375

74-
Set.prototype.items_waiting_for = function(sym) {
75-
if(this.wants.hasOwnProperty(sym+'')) return this.wants[sym];
76-
}
77-
7876

7977
// -------------------------------------------------------------------
8078
// An Item represents a step in the process of matching a piece of the
8179
// input: an input symbol, a complete derivation for a symbol, or a
8280
// partial match for a rule.
8381

84-
function Item(tag, start, end) {
82+
function Item(tag, start, end, rule) {
8583
this.tag = tag;
8684
this.start = start;
8785
this.end = end;
86+
this.rule = rule;
8887

8988
end.items.push(this);
9089

9190
if(!end.idx.hasOwnProperty(tag)) end.idx[tag] = {};
9291
end.idx[tag][start.position] = this;
9392

94-
var sym; if(sym = tag.nextSymbol && tag.nextSymbol()) {
93+
var sym; if(sym = tag.nextSymbol) {
9594
if(!end.wants.hasOwnProperty(sym)) end.wants[sym] = [];
9695
end.wants[sym].push(this);
9796
}
@@ -101,21 +100,12 @@ var Rule, Grammar;
101100
return this.start.position + '..' + this.end.position + ': ' + this.tag;
102101
}
103102

104-
function add_item(tag, start, end) {
105-
// Rules get promoted to LR(0) items with the dot at the left.
106-
if(tag.symbol) tag = new LR0Item(tag, 0);
107-
108-
// Completed LR(0) items are filed under the rule's symbol so that
109-
// they will be combined with other derivations for that symbol.
110-
if(tag.isComplete && tag.isComplete()) tag = tag.rule.symbol;
111-
112-
return find_item(tag, start, end) || new Item(tag, start, end);
103+
function add_item(tag, start, end, rule) {
104+
return find_item(tag, start, end) || new Item(tag, start, end, rule);
113105
}
114106

115107
function find_item(tag, start, end) {
116-
if(end.idx.hasOwnProperty(tag)
117-
&& end.idx[tag].hasOwnProperty(start.position))
118-
return end.idx[tag][start.position];
108+
return end.idx.hasOwnProperty(tag) && end.idx[tag][start.position];
119109
}
120110

121111

@@ -129,39 +119,41 @@ var Rule, Grammar;
129119
// - deriv/null: multiple derivations
130120

131121

132-
function Derivation(left, right, next) {
122+
function Derivation(left, right, next, rule) {
133123
this.left = left;
134124
this.right = right;
135125
this.next = next;
126+
this.rule = rule;
136127
}
137128

138-
function add_derivation(item, left, right) {
129+
function add_derivation(item, left, right, rule) {
139130
if(!(left || right)) return;
140131

141-
// remove trivial nodes on the left.
142-
if(left && left.tag && left.tag.hasOwnProperty('dot')
143-
&& left.tag.dot <= 1) left = left.right;
132+
// remove trivial nodes on left
133+
if(left && left.tag && left.tag.production && left.tag.dot <= 1) {
134+
if(left.tag.dot <= 1) left = left.right;
135+
}
144136

145-
if(!(item.left || item.right)) set_derivation(item, left, right);
146-
else if(item.right) add_second_derivation(item, left, right);
147-
else add_another_derivation(item, left, right);
137+
if(!(item.left || item.right)) set_derivation(item, left, right, rule);
138+
else if(item.right) add_second_derivation(item, left, right, rule);
139+
else add_another_derivation(item, left, right, rule);
148140
}
149141

150-
function set_derivation(i, l, r) { i.left = l; i.right = r; }
142+
function set_derivation(i, l, r, rule) { i.left = l; i.right = r; i.rule = rule; }
151143

152144
function same_derivation(i, l, r) { return i.left+''==l+'' && i.right+''==r+''; }
153145

154-
function add_second_derivation(i, l, r) {
146+
function add_second_derivation(i, l, r, rule) {
155147
if(!same_derivation(i, l, r)) {
156-
var old = new Derivation(i.left, i.right, null);
157-
i.left = new Derivation(l, r, old);
158-
delete(i.right);
148+
var old = new Derivation(i.left, i.right, null, i.rule);
149+
i.left = new Derivation(l, r, old, rule);
150+
delete(i.right); delete(i.rule);
159151
}
160152
}
161153

162-
function add_another_derivation(i, l, r) {
154+
function add_another_derivation(i, l, r, rule) {
163155
var d=i; while(d=d.next || d.left) if(same_derivation(d, l, r)) return;
164-
i.left = new Derivation(l, r, i.left);
156+
i.left = new Derivation(l, r, i.left, rule);
165157
}
166158

167159

@@ -172,39 +164,25 @@ var Rule, Grammar;
172164
this.production = production;
173165
}
174166

175-
Rule.prototype.toString = function(dot) {
176-
var s = this.symbol + ' ->';
177-
for(var i=0; i<this.production.length; ++i) {
178-
s += (i===dot ? ' * ' : ' ') + this.production[i];
167+
function instantiate_rule(rule, priority) {
168+
rule.priority = priority;
169+
var n = rule.production.length;
170+
for(var i=0; i<n; ++i) {
171+
rule.advance = new Rule(rule.symbol, rule.production);
172+
rule = rule.advance;
173+
rule.priority = priority;
174+
rule.dot = i;
175+
rule.nextSymbol = rule.production[i];
179176
}
180-
return s;
177+
rule.advance = rule.symbol;
181178
}
182179

183-
// An LR(0) item represents a rule which has been partially matched.
184-
// The "dot" indicates how many of its symbols have been recognized.
185-
function LR0Item(rule, nParsed) {
186-
this.rule = rule;
187-
this.dot = nParsed; // [0 .. production.length]
188-
}
189-
190-
LR0Item.prototype.toString = function() {
191-
return this.rule.toString(this.dot);
192-
}
193-
194-
LR0Item.prototype.isComplete = function() {
195-
return this.dot >= this.rule.production.length;
196-
}
197-
198-
LR0Item.prototype.advance = function() {
199-
if(!this.advanced) {
200-
if(this.isComplete()) this.advanced = this;
201-
else this.advanced = new LR0Item(this.rule, 1 + this.dot);
180+
Rule.prototype.toString = function() {
181+
var s = this.symbol + ' -> ' + (this.dot===0 ? ' * ' : '');
182+
for(var i=0; i<this.production.length; ++i) {
183+
s += this.production[i] + (i+1===this.dot ? ' * ' : ' ');
202184
}
203-
return this.advanced;
204-
}
205-
206-
LR0Item.prototype.nextSymbol = function() {
207-
if(!this.isComplete()) return this.rule.production[this.dot];
185+
return s;
208186
}
209187

210188
})();

forest.js

Lines changed: 63 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,73 @@
1-
var forest_to_html, process_forest;
1+
var disambiguate, prioritized_tree;
2+
var process_forest, forest_to_html;
23
(function(){
34
'use strict';
45

5-
function is_intermediate(f) { return f && f.tag && f.tag.rule; }
6-
function is_ambiguous(f) { return f && f.hasOwnProperty('next'); }
6+
7+
// -------------------------------------------------------------------
8+
// Choose parse tree from forest by rule priority
9+
10+
function is_ambiguous(f) { return f && f.left && !f.right; }
11+
12+
disambiguate = function disambiguate(f, gt) {
13+
if(f && is_ambiguous(f)) {
14+
var best = f.left, d = f.left;
15+
while(d = d.next) {
16+
best = cmp(d, best, gt) > 0 ? d : best;
17+
}
18+
f.left = best.left;
19+
f.right = best.right;
20+
f.rule = best.rule;
21+
}
22+
// disambiguate child derivations
23+
if(f.left) disambiguate(f.left, gt);
24+
if(f.right) disambiguate(f.right, gt);
25+
return f;
26+
}
27+
28+
// So as a side effect, this needs to disambiguate the children
29+
// of the chosen derivation.
30+
function choose_derivation_for(f, gt) {
31+
var best = f.left, d = f.left;
32+
while(d = d.next) {
33+
best = cmp(d, best, gt) > 0 ? d : best;
34+
}
35+
f.left = best.left;
36+
f.right = best.right;
37+
f.rule = best.rule;
38+
}
39+
40+
function cmp(a, b, gt) {
41+
if(!(a || b)) return 0;
42+
else {
43+
disambiguate(a, gt);
44+
disambiguate(b, gt);
45+
if(a.rule == b.rule) {
46+
return cmp(a.left, b.left, gt)
47+
|| cmp(a.right, b.right, gt);
48+
} else if(gt(a.rule, b.rule)) return 1;
49+
else return -1;
50+
}
51+
}
52+
53+
prioritized_tree = function prioritized_tree(f) {
54+
return disambiguate(f, higher_priority);
55+
}
56+
57+
function higher_priority(a, b) { return a.priority > b.priority; }
58+
59+
60+
61+
// -------------------------------------------------------------------
62+
// Forest walker
63+
64+
function is_intermediate(f) { return f && f.tag && f.tag.symbol; }
65+
function is_derivation_list(f) { return f && f.hasOwnProperty('next'); }
766

867
process_forest = function process_forest(f, fns) {
968
if(!f) return;
1069
var result;
11-
if(is_ambiguous(f)) {
70+
if(is_derivation_list(f)) {
1271
result = [];
1372
while(f) {
1473
result.push(fns.derivation(collect_children(f, fns)));

0 commit comments

Comments
 (0)