Skip to content

Commit 16376c4

Browse files
author
Max Brunsfeld
authored
Merge pull request tree-sitter#183 from tree-sitter/detect-included-range-boundaries
Add lexer API for detecting boundaries of included ranges
2 parents d544122 + 9ecb206 commit 16376c4

File tree

11 files changed

+95
-50
lines changed

11 files changed

+95
-50
lines changed

include/tree_sitter/parser.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,16 @@ typedef struct {
2525
bool named : 1;
2626
} TSSymbolMetadata;
2727

28-
typedef struct {
29-
void (*advance)(void *, bool);
30-
void (*mark_end)(void *);
31-
uint32_t (*get_column)(void *);
28+
typedef struct TSLexer TSLexer;
29+
30+
struct TSLexer {
3231
int32_t lookahead;
3332
TSSymbol result_symbol;
34-
} TSLexer;
33+
void (*advance)(TSLexer *, bool);
34+
void (*mark_end)(TSLexer *);
35+
uint32_t (*get_column)(TSLexer *);
36+
bool (*is_at_included_range_start)(TSLexer *);
37+
};
3538

3639
typedef enum {
3740
TSParseActionTypeShift,

include/tree_sitter/runtime.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ extern "C" {
1010
#include <stdint.h>
1111
#include <stdbool.h>
1212

13-
#define TREE_SITTER_LANGUAGE_VERSION 8
13+
#define TREE_SITTER_LANGUAGE_VERSION 9
1414

1515
typedef uint16_t TSSymbol;
1616
typedef struct TSLanguage TSLanguage;

script/fetch-fixtures

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@ fetch_grammar() {
1515
fi
1616

1717
(
18-
cd $grammar_dir;
18+
cd $grammar_dir
1919
git fetch origin $ref --depth=1
20-
git reset --hard origin/$ref;
20+
git reset --hard FETCH_HEAD
2121
)
2222
}
2323

24-
fetch_grammar javascript master
24+
fetch_grammar javascript included-range-boundaries
2525
fetch_grammar json master
2626
fetch_grammar c master
2727
fetch_grammar cpp master

script/fetch-fixtures.cmd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
@echo off
22

3-
call:fetch_grammar javascript master
3+
call:fetch_grammar javascript included-range-boundaries
44
call:fetch_grammar json master
55
call:fetch_grammar c master
66
call:fetch_grammar cpp master
@@ -22,6 +22,6 @@ SET grammar_branch=%~2
2222
)
2323
pushd %grammar_dir%
2424
git fetch origin %2 --depth=1
25-
git reset --hard origin/%grammar_branch%
25+
git reset --hard FETCH_HEAD
2626
popd
2727
EXIT /B 0

src/runtime/lexer.c

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ static void ts_lexer__get_lookahead(Lexer *self) {
5050
}
5151
}
5252

53-
static void ts_lexer__advance(void *payload, bool skip) {
53+
static void ts_lexer__advance(TSLexer *payload, bool skip) {
5454
Lexer *self = (Lexer *)payload;
5555
if (self->chunk == empty_chunk)
5656
return;
@@ -95,7 +95,7 @@ static void ts_lexer__advance(void *payload, bool skip) {
9595
ts_lexer__get_lookahead(self);
9696
}
9797

98-
static void ts_lexer__mark_end(void *payload) {
98+
static void ts_lexer__mark_end(TSLexer *payload) {
9999
Lexer *self = (Lexer *)payload;
100100
TSRange *current_included_range = &self->included_ranges[self->current_included_range_index];
101101
if (self->current_included_range_index > 0 &&
@@ -110,7 +110,7 @@ static void ts_lexer__mark_end(void *payload) {
110110
}
111111
}
112112

113-
static uint32_t ts_lexer__get_column(void *payload) {
113+
static uint32_t ts_lexer__get_column(TSLexer *payload) {
114114
Lexer *self = (Lexer *)payload;
115115
uint32_t goal_byte = self->current_position.bytes;
116116

@@ -123,13 +123,19 @@ static uint32_t ts_lexer__get_column(void *payload) {
123123

124124
uint32_t result = 0;
125125
while (self->current_position.bytes < goal_byte) {
126-
ts_lexer__advance(self, false);
126+
ts_lexer__advance(payload, false);
127127
result++;
128128
}
129129

130130
return result;
131131
}
132132

133+
static bool ts_lexer__is_at_included_range_start(TSLexer *payload) {
134+
const Lexer *self = (const Lexer *)payload;
135+
TSRange *current_range = &self->included_ranges[self->current_included_range_index];
136+
return self->current_position.bytes == current_range->start_byte;
137+
}
138+
133139
// The lexer's methods are stored as a struct field so that generated
134140
// parsers can call them without needing to be linked against this library.
135141

@@ -139,6 +145,7 @@ void ts_lexer_init(Lexer *self) {
139145
.advance = ts_lexer__advance,
140146
.mark_end = ts_lexer__mark_end,
141147
.get_column = ts_lexer__get_column,
148+
.is_at_included_range_start = ts_lexer__is_at_included_range_start,
142149
.lookahead = 0,
143150
.result_symbol = 0,
144151
},
@@ -227,7 +234,9 @@ void ts_lexer_start(Lexer *self) {
227234
}
228235

229236
void ts_lexer_advance_to_end(Lexer *self) {
230-
while (self->data.lookahead != 0) ts_lexer__advance(self, false);
237+
while (self->data.lookahead != 0) {
238+
ts_lexer__advance((TSLexer *)self, false);
239+
}
231240
}
232241

233242
static const TSRange DEFAULT_RANGES[] = {

src/runtime/parser.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ static const Subtree *ts_parser__lex(TSParser *self, StackVersion version, TSSta
327327
valid_external_tokens
328328
)) {
329329
if (length_is_undefined(self->lexer.token_end_position)) {
330-
self->lexer.token_end_position = self->lexer.current_position;
330+
self->lexer.data.mark_end(&self->lexer.data);
331331
}
332332

333333
if (!error_mode || self->lexer.token_end_position.bytes > current_position.bytes) {
@@ -380,7 +380,7 @@ static const Subtree *ts_parser__lex(TSParser *self, StackVersion version, TSSta
380380
self->lexer.data.result_symbol = ts_builtin_sym_error;
381381
break;
382382
}
383-
self->lexer.data.advance(&self->lexer, false);
383+
self->lexer.data.advance(&self->lexer.data, false);
384384
}
385385

386386
error_end_position = self->lexer.current_position;

test/helpers/point_helpers.cc

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "./point_helpers.h"
22
#include <string>
33
#include <ostream>
4+
#include <cassert>
45
#include "runtime/length.h"
56
#include "tree_sitter/runtime.h"
67

@@ -45,3 +46,29 @@ std::ostream &operator<<(std::ostream &stream, const TSRange &range) {
4546
ostream &operator<<(ostream &stream, const Length &length) {
4647
return stream << "{bytes:" << length.bytes << ", extent:" << length.extent << "}";
4748
}
49+
50+
TSPoint extent_for_string(const string &text, size_t end_index) {
51+
if (end_index > text.size()) end_index = text.size();
52+
TSPoint result = {0, 0};
53+
for (size_t i = 0; i < end_index; i++) {
54+
if (text[i] == '\n') {
55+
result.row++;
56+
result.column = 0;
57+
} else {
58+
result.column++;
59+
}
60+
}
61+
return result;
62+
}
63+
64+
TSRange range_for_substring(const string &text, const string &substring) {
65+
size_t start = text.find(substring);
66+
assert(start != string::npos);
67+
size_t end = start + substring.size();
68+
return TSRange {
69+
extent_for_string(text, start),
70+
extent_for_string(text, end),
71+
static_cast<uint32_t>(start),
72+
static_cast<uint32_t>(end),
73+
};
74+
};

test/helpers/point_helpers.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,8 @@ std::ostream &operator<<(std::ostream &stream, const TSRange &range);
2020

2121
std::ostream &operator<<(std::ostream &stream, const Length &length);
2222

23+
TSPoint extent_for_string(const std::string &text, size_t end_index = std::string::npos);
24+
25+
TSRange range_for_substring(const std::string &text, const std::string &substring);
26+
2327
#endif // HELPERS_POINT_HELPERS_H_

test/helpers/spy_input.cc

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "helpers/spy_input.h"
22
#include "helpers/encoding_helpers.h"
3+
#include "helpers/point_helpers.h"
34
#include "runtime/point.h"
45
#include <string.h>
56
#include <algorithm>
@@ -95,19 +96,6 @@ TSInput SpyInput::input() {
9596
return result;
9697
}
9798

98-
static TSPoint get_extent(string text) {
99-
TSPoint result = {0, 0};
100-
for (auto i = text.begin(); i != text.end(); i++) {
101-
if (*i == '\n') {
102-
result.row++;
103-
result.column = 0;
104-
} else {
105-
result.column++;
106-
}
107-
}
108-
return result;
109-
}
110-
11199
TSInputEdit SpyInput::replace(size_t start_byte, size_t bytes_removed, string text) {
112100
auto swap = swap_substr(start_byte, bytes_removed, text);
113101
size_t bytes_added = text.size();
@@ -117,8 +105,8 @@ TSInputEdit SpyInput::replace(size_t start_byte, size_t bytes_removed, string te
117105
result.old_end_byte = start_byte + bytes_removed;
118106
result.new_end_byte = start_byte + bytes_added;
119107
result.start_point = swap.second;
120-
result.old_end_point = result.start_point + get_extent(swap.first);
121-
result.new_end_point = result.start_point + get_extent(text);
108+
result.old_end_point = result.start_point + extent_for_string(swap.first);
109+
result.new_end_point = result.start_point + extent_for_string(text);
122110
return result;
123111
}
124112

@@ -131,8 +119,8 @@ TSInputEdit SpyInput::undo() {
131119
result.old_end_byte = entry.start_byte + entry.bytes_removed;
132120
result.new_end_byte = entry.start_byte + entry.text_inserted.size();
133121
result.start_point = swap.second;
134-
result.old_end_point = result.start_point + get_extent(swap.first);
135-
result.new_end_point = result.start_point + get_extent(entry.text_inserted);
122+
result.old_end_point = result.start_point + extent_for_string(swap.first);
123+
result.new_end_point = result.start_point + extent_for_string(entry.text_inserted);
136124
return result;
137125
}
138126

test/runtime/parser_test.cc

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -918,6 +918,29 @@ describe("Parser", [&]() {
918918

919919
assert_root_node("(program (ERROR (identifier)))");
920920
});
921+
922+
it("allows external scanners to detect the boundaries of included ranges", [&]() {
923+
string source_code = "a <%= b() %> c <% d() %>";
924+
925+
TSRange included_ranges[] = {
926+
range_for_substring(source_code, "b()"),
927+
range_for_substring(source_code, "d()"),
928+
};
929+
930+
ts_parser_set_included_ranges(parser, included_ranges, 2);
931+
ts_parser_set_language(parser, load_real_language("javascript"));
932+
tree = ts_parser_parse_string(parser, nullptr, source_code.c_str(), source_code.size());
933+
934+
assert_root_node("(program "
935+
"(expression_statement (call_expression (identifier) (arguments))) "
936+
"(expression_statement (call_expression (identifier) (arguments))))");
937+
938+
TSNode statement_node1 = ts_node_child(ts_tree_root_node(tree), 0);
939+
TSNode statement_node2 = ts_node_child(ts_tree_root_node(tree), 1);
940+
941+
AssertThat(ts_node_end_point(statement_node1), Equals(extent_for_string("a <%= b()")));
942+
AssertThat(ts_node_end_point(statement_node2), Equals(extent_for_string("a <%= b() %> c <% d()")));
943+
});
921944
});
922945
});
923946

0 commit comments

Comments
 (0)