Skip to content

Commit 1b86cce

Browse files
authored
Merge pull request tree-sitter#1845 from tree-sitter/root-node-with-offset
Add API for applying a positional offset when accessing a tree's nodes
2 parents 20d44ed + 53ed4cf commit 1b86cce

File tree

9 files changed

+179
-46
lines changed

9 files changed

+179
-46
lines changed

cli/src/tests/corpus_test.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,9 +384,26 @@ fn check_consistent_sizes(tree: &Tree, input: &Vec<u8>) {
384384
}
385385

386386
fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec<u8>) -> Result<(), String> {
387-
let changed_ranges = old_tree.changed_ranges(new_tree).collect();
387+
let changed_ranges = old_tree.changed_ranges(new_tree).collect::<Vec<_>>();
388388
let old_scope_sequence = ScopeSequence::new(old_tree);
389389
let new_scope_sequence = ScopeSequence::new(new_tree);
390+
391+
let old_range = old_tree.root_node().range();
392+
let new_range = new_tree.root_node().range();
393+
let byte_range =
394+
old_range.start_byte.min(new_range.start_byte)..old_range.end_byte.max(new_range.end_byte);
395+
let point_range = old_range.start_point.min(new_range.start_point)
396+
..old_range.end_point.max(new_range.end_point);
397+
398+
for range in &changed_ranges {
399+
if range.end_byte > byte_range.end || range.end_point > point_range.end {
400+
return Err(format!(
401+
"changed range extends outside of the old and new trees {:?}",
402+
range
403+
));
404+
}
405+
}
406+
390407
old_scope_sequence.check_changes(&new_scope_sequence, &input, &changed_ranges)
391408
}
392409

cli/src/tests/node_test.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,34 @@ fn test_node_edit() {
529529
}
530530
}
531531

532+
#[test]
533+
fn test_root_node_with_offset() {
534+
let mut parser = Parser::new();
535+
parser.set_language(get_language("javascript")).unwrap();
536+
let tree = parser.parse(" if (a) b", None).unwrap();
537+
538+
let node = tree.root_node_with_offset(6, Point::new(2, 2));
539+
assert_eq!(node.byte_range(), 8..16);
540+
assert_eq!(node.start_position(), Point::new(2, 4));
541+
assert_eq!(node.end_position(), Point::new(2, 12));
542+
543+
let child = node.child(0).unwrap().child(2).unwrap();
544+
assert_eq!(child.kind(), "expression_statement");
545+
assert_eq!(child.byte_range(), 15..16);
546+
assert_eq!(child.start_position(), Point::new(2, 11));
547+
assert_eq!(child.end_position(), Point::new(2, 12));
548+
549+
let mut cursor = node.walk();
550+
cursor.goto_first_child();
551+
cursor.goto_first_child();
552+
cursor.goto_next_sibling();
553+
let child = cursor.node();
554+
assert_eq!(child.kind(), "parenthesized_expression");
555+
assert_eq!(child.byte_range(), 11..14);
556+
assert_eq!(child.start_position(), Point::new(2, 7));
557+
assert_eq!(child.end_position(), Point::new(2, 10));
558+
}
559+
532560
#[test]
533561
fn test_node_is_extra() {
534562
let mut parser = Parser::new();

cli/src/tests/tree_test.rs

Lines changed: 68 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,11 @@ fn test_tree_edit() {
8989
let child2 = expr.child(1).unwrap();
9090

9191
assert!(expr.has_changes());
92-
assert_eq!(expr.start_byte(), 4);
93-
assert_eq!(expr.end_byte(), 17);
92+
assert_eq!(expr.byte_range(), 4..17);
9493
assert!(child1.has_changes());
95-
assert_eq!(child1.start_byte(), 4);
96-
assert_eq!(child1.end_byte(), 7);
94+
assert_eq!(child1.byte_range(), 4..7);
9795
assert!(!child2.has_changes());
98-
assert_eq!(child2.start_byte(), 9);
99-
assert_eq!(child2.end_byte(), 12);
96+
assert_eq!(child2.byte_range(), 9..12);
10097
}
10198

10299
// replacement starting at the edge of the tree's padding:
@@ -117,14 +114,11 @@ fn test_tree_edit() {
117114
let child2 = expr.child(1).unwrap();
118115

119116
assert!(expr.has_changes());
120-
assert_eq!(expr.start_byte(), 4);
121-
assert_eq!(expr.end_byte(), 17);
117+
assert_eq!(expr.byte_range(), 4..17);
122118
assert!(child1.has_changes());
123-
assert_eq!(child1.start_byte(), 4);
124-
assert_eq!(child1.end_byte(), 7);
119+
assert_eq!(child1.byte_range(), 4..7);
125120
assert!(!child2.has_changes());
126-
assert_eq!(child2.start_byte(), 9);
127-
assert_eq!(child2.end_byte(), 12);
121+
assert_eq!(child2.byte_range(), 9..12);
128122
}
129123

130124
// deletion that spans more than one child node:
@@ -146,17 +140,13 @@ fn test_tree_edit() {
146140
let child3 = expr.child(2).unwrap();
147141

148142
assert!(expr.has_changes());
149-
assert_eq!(expr.start_byte(), 4);
150-
assert_eq!(expr.end_byte(), 8);
143+
assert_eq!(expr.byte_range(), 4..8);
151144
assert!(child1.has_changes());
152-
assert_eq!(child1.start_byte(), 4);
153-
assert_eq!(child1.end_byte(), 4);
145+
assert_eq!(child1.byte_range(), 4..4);
154146
assert!(child2.has_changes());
155-
assert_eq!(child2.start_byte(), 4);
156-
assert_eq!(child2.end_byte(), 4);
147+
assert_eq!(child2.byte_range(), 4..4);
157148
assert!(child3.has_changes());
158-
assert_eq!(child3.start_byte(), 5);
159-
assert_eq!(child3.end_byte(), 8);
149+
assert_eq!(child3.byte_range(), 5..8);
160150
}
161151

162152
// insertion at the end of the tree:
@@ -178,14 +168,67 @@ fn test_tree_edit() {
178168
let child3 = expr.child(2).unwrap();
179169

180170
assert!(expr.has_changes());
181-
assert_eq!(expr.start_byte(), 2);
182-
assert_eq!(expr.end_byte(), 16);
171+
assert_eq!(expr.byte_range(), 2..16);
183172
assert!(!child1.has_changes());
184-
assert_eq!(child1.end_byte(), 5);
173+
assert_eq!(child1.byte_range(), 2..5);
185174
assert!(!child2.has_changes());
186-
assert_eq!(child2.end_byte(), 10);
175+
assert_eq!(child2.byte_range(), 7..10);
176+
assert!(child3.has_changes());
177+
assert_eq!(child3.byte_range(), 12..16);
178+
}
179+
180+
// replacement that starts within a token and extends beyond the end of the tree:
181+
// resize the token and empty out any subsequent child nodes.
182+
{
183+
let mut tree = tree.clone();
184+
tree.edit(&InputEdit {
185+
start_byte: 3,
186+
old_end_byte: 90,
187+
new_end_byte: 4,
188+
start_position: Point::new(0, 3),
189+
old_end_position: Point::new(0, 90),
190+
new_end_position: Point::new(0, 4),
191+
});
192+
193+
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
194+
let child1 = expr.child(0).unwrap();
195+
let child2 = expr.child(1).unwrap();
196+
let child3 = expr.child(2).unwrap();
197+
assert_eq!(expr.byte_range(), 2..4);
198+
assert!(expr.has_changes());
199+
assert_eq!(child1.byte_range(), 2..4);
200+
assert!(child1.has_changes());
201+
assert_eq!(child2.byte_range(), 4..4);
202+
assert!(child2.has_changes());
203+
assert_eq!(child3.byte_range(), 4..4);
204+
assert!(child3.has_changes());
205+
}
206+
207+
// replacement that starts in whitespace and extends beyond the end of the tree:
208+
// shift the token's start position and empty out its content.
209+
{
210+
let mut tree = tree.clone();
211+
tree.edit(&InputEdit {
212+
start_byte: 6,
213+
old_end_byte: 90,
214+
new_end_byte: 8,
215+
start_position: Point::new(0, 6),
216+
old_end_position: Point::new(0, 90),
217+
new_end_position: Point::new(0, 8),
218+
});
219+
220+
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
221+
let child1 = expr.child(0).unwrap();
222+
let child2 = expr.child(1).unwrap();
223+
let child3 = expr.child(2).unwrap();
224+
assert_eq!(expr.byte_range(), 2..8);
225+
assert!(expr.has_changes());
226+
assert_eq!(child1.byte_range(), 2..5);
227+
assert!(!child1.has_changes());
228+
assert_eq!(child2.byte_range(), 8..8);
229+
assert!(child2.has_changes());
230+
assert_eq!(child3.byte_range(), 8..8);
187231
assert!(child3.has_changes());
188-
assert_eq!(child3.end_byte(), 16);
189232
}
190233
}
191234

lib/binding_rust/bindings.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,15 @@ extern "C" {
332332
#[doc = " Get the root node of the syntax tree."]
333333
pub fn ts_tree_root_node(self_: *const TSTree) -> TSNode;
334334
}
335+
extern "C" {
336+
#[doc = " Get the root node of the syntax tree, but with its position"]
337+
#[doc = " shifted forward by the given offset."]
338+
pub fn ts_tree_root_node_with_offset(
339+
self_: *const TSTree,
340+
offset_bytes: u32,
341+
offset_point: TSPoint,
342+
) -> TSNode;
343+
}
335344
extern "C" {
336345
#[doc = " Get the language that was used to parse the syntax tree."]
337346
pub fn ts_tree_language(arg1: *const TSTree) -> *const TSLanguage;

lib/binding_rust/lib.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,20 @@ impl Tree {
708708
Node::new(unsafe { ffi::ts_tree_root_node(self.0.as_ptr()) }).unwrap()
709709
}
710710

711+
/// Get the root node of the syntax tree, but with its position shifted
712+
/// forward by the given offset.
713+
#[doc(alias = "ts_tree_root_node_with_offset")]
714+
pub fn root_node_with_offset(&self, offset_bytes: usize, offset_extent: Point) -> Node {
715+
Node::new(unsafe {
716+
ffi::ts_tree_root_node_with_offset(
717+
self.0.as_ptr(),
718+
offset_bytes as u32,
719+
offset_extent.into(),
720+
)
721+
})
722+
.unwrap()
723+
}
724+
711725
/// Get the language that was used to parse the syntax tree.
712726
#[doc(alias = "ts_tree_language")]
713727
pub fn language(&self) -> Language {

lib/include/tree_sitter/api.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,16 @@ void ts_tree_delete(TSTree *self);
366366
*/
367367
TSNode ts_tree_root_node(const TSTree *self);
368368

369+
/**
370+
* Get the root node of the syntax tree, but with its position
371+
* shifted forward by the given offset.
372+
*/
373+
TSNode ts_tree_root_node_with_offset(
374+
const TSTree *self,
375+
uint32_t offset_bytes,
376+
TSPoint offset_point
377+
);
378+
369379
/**
370380
* Get the language that was used to parse the syntax tree.
371381
*/

lib/src/length.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,12 @@ static inline Length length_zero(void) {
4141
return result;
4242
}
4343

44+
static inline Length length_saturating_sub(Length len1, Length len2) {
45+
if (len1.bytes > len2.bytes) {
46+
return length_sub(len1, len2);
47+
} else {
48+
return length_zero();
49+
}
50+
}
51+
4452
#endif

lib/src/subtree.c

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -666,8 +666,9 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
666666

667667
Length size = ts_subtree_size(*entry.tree);
668668
Length padding = ts_subtree_padding(*entry.tree);
669+
Length total_size = length_add(padding, size);
669670
uint32_t lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree);
670-
uint32_t end_byte = padding.bytes + size.bytes + lookahead_bytes;
671+
uint32_t end_byte = total_size.bytes + lookahead_bytes;
671672
if (edit.start.bytes > end_byte || (is_noop && edit.start.bytes == end_byte)) continue;
672673

673674
// If the edit is entirely within the space before this subtree, then shift this
@@ -679,7 +680,7 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
679680
// If the edit starts in the space before this subtree and extends into this subtree,
680681
// shrink the subtree's content to compensate for the change in the space before it.
681682
else if (edit.start.bytes < padding.bytes) {
682-
size = length_sub(size, length_sub(edit.old_end, padding));
683+
size = length_saturating_sub(size, length_sub(edit.old_end, padding));
683684
padding = edit.new_end;
684685
}
685686

@@ -690,15 +691,14 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
690691
}
691692

692693
// If the edit is within this subtree, resize the subtree to reflect the edit.
693-
else {
694-
uint32_t total_bytes = padding.bytes + size.bytes;
695-
if (edit.start.bytes < total_bytes ||
696-
(edit.start.bytes == total_bytes && is_pure_insertion)) {
697-
size = length_add(
698-
length_sub(edit.new_end, padding),
699-
length_sub(size, length_sub(edit.old_end, padding))
700-
);
701-
}
694+
else if (
695+
edit.start.bytes < total_size.bytes ||
696+
(edit.start.bytes == total_size.bytes && is_pure_insertion)
697+
) {
698+
size = length_add(
699+
length_sub(edit.new_end, padding),
700+
length_saturating_sub(total_size, edit.old_end)
701+
);
702702
}
703703

704704
MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree);
@@ -764,17 +764,11 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool
764764

765765
// Transform edit into the child's coordinate space.
766766
Edit child_edit = {
767-
.start = length_sub(edit.start, child_left),
768-
.old_end = length_sub(edit.old_end, child_left),
769-
.new_end = length_sub(edit.new_end, child_left),
767+
.start = length_saturating_sub(edit.start, child_left),
768+
.old_end = length_saturating_sub(edit.old_end, child_left),
769+
.new_end = length_saturating_sub(edit.new_end, child_left),
770770
};
771771

772-
// Clamp child_edit to the child's bounds.
773-
if (edit.start.bytes < child_left.bytes) child_edit.start = length_zero();
774-
if (edit.old_end.bytes < child_left.bytes) child_edit.old_end = length_zero();
775-
if (edit.new_end.bytes < child_left.bytes) child_edit.new_end = length_zero();
776-
if (edit.old_end.bytes > child_right.bytes) child_edit.old_end = child_size;
777-
778772
// Interpret all inserted text as applying to the *first* child that touches the edit.
779773
// Subsequent children are only never have any text inserted into them; they are only
780774
// shrunk to compensate for the edit.

lib/src/tree.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "tree_sitter/api.h"
22
#include "./array.h"
33
#include "./get_changed_ranges.h"
4+
#include "./length.h"
45
#include "./subtree.h"
56
#include "./tree_cursor.h"
67
#include "./tree.h"
@@ -37,6 +38,15 @@ TSNode ts_tree_root_node(const TSTree *self) {
3738
return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0);
3839
}
3940

41+
TSNode ts_tree_root_node_with_offset(
42+
const TSTree *self,
43+
uint32_t offset_bytes,
44+
TSPoint offset_extent
45+
) {
46+
Length offset = {offset_bytes, offset_extent};
47+
return ts_node_new(self, &self->root, length_add(offset, ts_subtree_padding(self->root)), 0);
48+
}
49+
4050
const TSLanguage *ts_tree_language(const TSTree *self) {
4151
return self->language;
4252
}

0 commit comments

Comments
 (0)