Skip to content

Commit c697ebf

Browse files
committed
Add explicit unit test for error detection lookahead bug
1 parent 0fb864c commit c697ebf

File tree

1 file changed

+47
-4
lines changed

1 file changed

+47
-4
lines changed

cli/src/tests/parser_test.rs

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
11
use super::helpers::{
22
allocations,
3+
edits::invert_edit,
34
edits::ReadRecorder,
45
fixtures::{get_language, get_test_grammar, get_test_language},
56
};
6-
use crate::generate::generate_parser_for_grammar;
7-
use crate::parse::{perform_edit, Edit};
8-
use std::sync::atomic::{AtomicUsize, Ordering};
9-
use std::{thread, time};
7+
use crate::{
8+
generate::generate_parser_for_grammar,
9+
parse::{perform_edit, Edit},
10+
};
11+
use std::{
12+
sync::atomic::{AtomicUsize, Ordering},
13+
thread, time,
14+
};
1015
use tree_sitter::{IncludedRangesError, InputEdit, LogType, Parser, Point, Range};
1116

1217
#[test]
@@ -491,6 +496,44 @@ h + i
491496
);
492497
}
493498

499+
#[test]
500+
fn test_parsing_after_detecting_error_in_the_middle_of_a_string_token() {
501+
let mut parser = Parser::new();
502+
parser.set_language(get_language("python")).unwrap();
503+
504+
let mut source = b"a = b, 'c, d'".to_vec();
505+
let tree = parser.parse(&source, None).unwrap();
506+
assert_eq!(
507+
tree.root_node().to_sexp(),
508+
"(module (expression_statement (assignment left: (identifier) right: (expression_list (identifier) (string)))))"
509+
);
510+
511+
// Delete a suffix of the source code, starting in the middle of the string
512+
// literal, after some whitespace. With this deletion, the remaining string
513+
// content: "c, " looks like two valid python tokens: an identifier and a comma.
514+
// When this edit is undone, in order correctly recover the orginal tree, the
515+
// parser needs to remember that before matching the `c` as an identifier, it
516+
// lookahead ahead several bytes, trying to find the closing quotation mark in
517+
// order to match the "string content" node.
518+
let edit_ix = std::str::from_utf8(&source).unwrap().find("d'").unwrap();
519+
let edit = Edit {
520+
position: edit_ix,
521+
deleted_length: source.len() - edit_ix,
522+
inserted_text: Vec::new(),
523+
};
524+
let undo = invert_edit(&source, &edit);
525+
526+
let mut tree2 = tree.clone();
527+
perform_edit(&mut tree2, &mut source, &edit);
528+
tree2 = parser.parse(&source, Some(&tree2)).unwrap();
529+
assert!(tree2.root_node().has_error());
530+
531+
let mut tree3 = tree2.clone();
532+
perform_edit(&mut tree3, &mut source, &undo);
533+
tree3 = parser.parse(&source, Some(&tree3)).unwrap();
534+
assert_eq!(tree3.root_node().to_sexp(), tree.root_node().to_sexp(),);
535+
}
536+
494537
// Thread safety
495538

496539
#[test]

0 commit comments

Comments
 (0)