Skip to content

Commit 3497f34

Browse files
committed
Fix parser-generation bugs introduced in tree-sitter#782
1 parent cced66c commit 3497f34

File tree

2 files changed

+62
-64
lines changed

2 files changed

+62
-64
lines changed

cli/src/generate/node_types.rs

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ impl ChildQuantity {
146146
pub(crate) fn get_variable_info(
147147
syntax_grammar: &SyntaxGrammar,
148148
lexical_grammar: &LexicalGrammar,
149-
simple_aliases: &AliasMap,
149+
default_aliases: &AliasMap,
150150
) -> Result<Vec<VariableInfo>> {
151151
let child_type_is_visible = |t: &ChildType| {
152152
variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous
@@ -185,7 +185,7 @@ pub(crate) fn get_variable_info(
185185
let child_symbol = step.symbol;
186186
let child_type = if let Some(alias) = &step.alias {
187187
ChildType::Aliased(alias.clone())
188-
} else if let Some(alias) = simple_aliases.get(&step.symbol) {
188+
} else if let Some(alias) = default_aliases.get(&step.symbol) {
189189
ChildType::Aliased(alias.clone())
190190
} else {
191191
ChildType::Normal(child_symbol)
@@ -358,7 +358,7 @@ pub(crate) fn get_variable_info(
358358
pub(crate) fn generate_node_types_json(
359359
syntax_grammar: &SyntaxGrammar,
360360
lexical_grammar: &LexicalGrammar,
361-
simple_aliases: &AliasMap,
361+
default_aliases: &AliasMap,
362362
variable_info: &Vec<VariableInfo>,
363363
) -> Vec<NodeInfoJSON> {
364364
let mut node_types_json = BTreeMap::new();
@@ -369,7 +369,7 @@ pub(crate) fn generate_node_types_json(
369369
named: alias.is_named,
370370
},
371371
ChildType::Normal(symbol) => {
372-
if let Some(alias) = simple_aliases.get(&symbol) {
372+
if let Some(alias) = default_aliases.get(&symbol) {
373373
NodeTypeJSON {
374374
kind: alias.value.clone(),
375375
named: alias.is_named,
@@ -417,15 +417,15 @@ pub(crate) fn generate_node_types_json(
417417
};
418418

419419
let mut aliases_by_symbol = HashMap::new();
420-
for (symbol, alias) in simple_aliases {
420+
for (symbol, alias) in default_aliases {
421421
aliases_by_symbol.insert(*symbol, {
422422
let mut aliases = HashSet::new();
423423
aliases.insert(Some(alias.clone()));
424424
aliases
425425
});
426426
}
427427
for extra_symbol in &syntax_grammar.extra_symbols {
428-
if !simple_aliases.contains_key(extra_symbol) {
428+
if !default_aliases.contains_key(extra_symbol) {
429429
aliases_by_symbol
430430
.entry(*extra_symbol)
431431
.or_insert(HashSet::new())
@@ -435,12 +435,15 @@ pub(crate) fn generate_node_types_json(
435435
for variable in &syntax_grammar.variables {
436436
for production in &variable.productions {
437437
for step in &production.steps {
438-
if !simple_aliases.contains_key(&step.symbol) {
439-
aliases_by_symbol
440-
.entry(step.symbol)
441-
.or_insert(HashSet::new())
442-
.insert(step.alias.clone());
443-
}
438+
aliases_by_symbol
439+
.entry(step.symbol)
440+
.or_insert(HashSet::new())
441+
.insert(
442+
step.alias
443+
.as_ref()
444+
.or_else(|| default_aliases.get(&step.symbol))
445+
.cloned(),
446+
);
444447
}
445448
}
446449
}
@@ -1808,14 +1811,14 @@ mod tests {
18081811
}
18091812

18101813
fn get_node_types(grammar: InputGrammar) -> Vec<NodeInfoJSON> {
1811-
let (syntax_grammar, lexical_grammar, _, simple_aliases) =
1814+
let (syntax_grammar, lexical_grammar, _, default_aliases) =
18121815
prepare_grammar(&grammar).unwrap();
18131816
let variable_info =
1814-
get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases).unwrap();
1817+
get_variable_info(&syntax_grammar, &lexical_grammar, &default_aliases).unwrap();
18151818
generate_node_types_json(
18161819
&syntax_grammar,
18171820
&lexical_grammar,
1818-
&simple_aliases,
1821+
&default_aliases,
18191822
&variable_info,
18201823
)
18211824
}

cli/src/generate/render.rs

Lines changed: 44 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -143,49 +143,6 @@ impl Generator {
143143
self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers);
144144
}
145145

146-
let mut field_names = Vec::new();
147-
for production_info in &self.parse_table.production_infos {
148-
for field_name in production_info.field_map.keys() {
149-
field_names.push(field_name);
150-
}
151-
152-
for alias in &production_info.alias_sequence {
153-
if let Some(alias) = &alias {
154-
let alias_kind = alias.kind();
155-
let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
156-
let (name, kind) = self.metadata_for_symbol(*symbol);
157-
name == alias.value && kind == alias_kind
158-
});
159-
let alias_id = if let Some(symbol) = matching_symbol {
160-
self.symbol_ids[&symbol].clone()
161-
} else if alias.is_named {
162-
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
163-
} else {
164-
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
165-
};
166-
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
167-
}
168-
}
169-
}
170-
171-
self.unique_aliases = self
172-
.alias_ids
173-
.keys()
174-
.filter(|alias| {
175-
self.parse_table
176-
.symbols
177-
.iter()
178-
.cloned()
179-
.find(|symbol| {
180-
let (name, kind) = self.metadata_for_symbol(*symbol);
181-
name == alias.value && kind == alias.kind()
182-
})
183-
.is_none()
184-
})
185-
.cloned()
186-
.collect();
187-
self.unique_aliases.sort_unstable();
188-
189146
self.symbol_map = self
190147
.parse_table
191148
.symbols
@@ -230,13 +187,51 @@ impl Generator {
230187
})
231188
.collect();
232189

233-
field_names.sort_unstable();
234-
field_names.dedup();
235-
self.field_names = field_names.into_iter().cloned().collect();
190+
for production_info in &self.parse_table.production_infos {
191+
// Build a list of all field names
192+
for field_name in production_info.field_map.keys() {
193+
if let Err(i) = self.field_names.binary_search(&field_name) {
194+
self.field_names.insert(i, field_name.clone());
195+
}
196+
}
197+
198+
for alias in &production_info.alias_sequence {
199+
// Generate a mapping from aliases to C identifiers.
200+
if let Some(alias) = &alias {
201+
let existing_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
202+
if let Some(default_alias) = self.default_aliases.get(symbol) {
203+
default_alias == alias
204+
} else {
205+
let (name, kind) = self.metadata_for_symbol(*symbol);
206+
name == alias.value && kind == alias.kind()
207+
}
208+
});
209+
210+
// Some aliases match an existing symbol in the grammar.
211+
let alias_id;
212+
if let Some(existing_symbol) = existing_symbol {
213+
alias_id = self.symbol_ids[&self.symbol_map[&existing_symbol]].clone();
214+
}
215+
// Other aliases don't match any existing symbol, and need their own identifiers.
216+
else {
217+
if let Err(i) = self.unique_aliases.binary_search(alias) {
218+
self.unique_aliases.insert(i, alias.clone());
219+
}
220+
221+
alias_id = if alias.is_named {
222+
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
223+
} else {
224+
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
225+
};
226+
}
227+
228+
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
229+
}
230+
}
231+
}
236232

237-
// If we are opting in to the new unstable language ABI, then use the concept of
238-
// "small parse states". Otherwise, use the same representation for all parse
239-
// states.
233+
// Determine which states should use the "small state" representation, and which should
234+
// use the normal array representation.
240235
let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2);
241236
self.large_state_count = self
242237
.parse_table

0 commit comments

Comments
 (0)