Skip to content

Commit 26b89da

Browse files
committed
feat(lib): add ts_parser_parse_with_options
Currently, this allows users to pass in a callback that should be invoked to check whether or not to halt parsing
1 parent aaba7cd commit 26b89da

File tree

3 files changed

+87
-9
lines changed

3 files changed

+87
-9
lines changed

lib/binding_rust/bindings.rs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,19 @@ pub struct TSInput {
7272
>,
7373
pub encoding: TSInputEncoding,
7474
}
75+
#[repr(C)]
76+
#[derive(Debug, Copy, Clone)]
77+
pub struct TSParseState {
78+
pub payload: *mut ::core::ffi::c_void,
79+
pub current_byte_offset: u32,
80+
}
81+
#[repr(C)]
82+
#[derive(Debug, Copy, Clone)]
83+
pub struct TSParseOptions {
84+
pub payload: *mut ::core::ffi::c_void,
85+
pub progress_callback:
86+
::core::option::Option<unsafe extern "C" fn(state: *mut TSParseState) -> bool>,
87+
}
7588
pub const TSLogTypeParse: TSLogType = 0;
7689
pub const TSLogTypeLex: TSLogType = 1;
7790
pub type TSLogType = ::core::ffi::c_uint;
@@ -178,13 +191,22 @@ extern "C" {
178191
pub fn ts_parser_included_ranges(self_: *const TSParser, count: *mut u32) -> *const TSRange;
179192
}
180193
extern "C" {
181-
#[doc = " Use the parser to parse some source code and create a syntax tree.\n\n If you are parsing this document for the first time, pass `NULL` for the\n `old_tree` parameter. Otherwise, if you have already parsed an earlier\n version of this document and the document has since been edited, pass the\n previous syntax tree so that the unchanged parts of it can be reused.\n This will save time and memory. For this to work correctly, you must have\n already edited the old syntax tree using the [`ts_tree_edit`] function in a\n way that exactly matches the source code changes.\n\n The [`TSInput`] parameter lets you specify how to read the text. It has the\n following three fields:\n 1. [`read`]: A function to retrieve a chunk of text at a given byte offset\n and (row, column) position. The function should return a pointer to the\n text and write its length to the [`bytes_read`] pointer. The parser does\n not take ownership of this buffer; it just borrows it until it has\n finished reading it. The function should write a zero value to the\n [`bytes_read`] pointer to indicate the end of the document.\n 2. [`payload`]: An arbitrary pointer that will be passed to each invocation\n of the [`read`] function.\n 3. [`encoding`]: An indication of how the text is encoded. Either\n `TSInputEncodingUTF8` or `TSInputEncodingUTF16`.\n\n This function returns a syntax tree on success, and `NULL` on failure. There\n are three possible reasons for failure:\n 1. The parser does not have a language assigned. Check for this using the\n[`ts_parser_language`] function.\n 2. Parsing was cancelled due to a timeout that was set by an earlier call to\n the [`ts_parser_set_timeout_micros`] function. You can resume parsing from\n where the parser left out by calling [`ts_parser_parse`] again with the\n same arguments. Or you can start parsing from scratch by first calling\n [`ts_parser_reset`].\n 3. Parsing was cancelled using a cancellation flag that was set by an\n earlier call to [`ts_parser_set_cancellation_flag`]. You can resume parsing\n from where the parser left out by calling [`ts_parser_parse`] again with\n the same arguments.\n\n [`read`]: TSInput::read\n [`payload`]: TSInput::payload\n [`encoding`]: TSInput::encoding\n [`bytes_read`]: TSInput::read"]
194+
#[doc = " Use the parser to parse some source code and create a syntax tree.\n\n If you are parsing this document for the first time, pass `NULL` for the\n `old_tree` parameter. Otherwise, if you have already parsed an earlier\n version of this document and the document has since been edited, pass the\n previous syntax tree so that the unchanged parts of it can be reused.\n This will save time and memory. For this to work correctly, you must have\n already edited the old syntax tree using the [`ts_tree_edit`] function in a\n way that exactly matches the source code changes.\n\n The [`TSInput`] parameter lets you specify how to read the text. It has the\n following three fields:\n 1. [`read`]: A function to retrieve a chunk of text at a given byte offset\n and (row, column) position. The function should return a pointer to the\n text and write its length to the [`bytes_read`] pointer. The parser does\n not take ownership of this buffer; it just borrows it until it has\n finished reading it. The function should write a zero value to the\n [`bytes_read`] pointer to indicate the end of the document.\n 2. [`payload`]: An arbitrary pointer that will be passed to each invocation\n of the [`read`] function.\n 3. [`encoding`]: An indication of how the text is encoded. Either\n `TSInputEncodingUTF8` or `TSInputEncodingUTF16`.\n\n This function returns a syntax tree on success, and `NULL` on failure. There\n are four possible reasons for failure:\n 1. The parser does not have a language assigned. Check for this using the\n[`ts_parser_language`] function.\n 2. Parsing was cancelled due to a timeout that was set by an earlier call to\n the [`ts_parser_set_timeout_micros`] function. You can resume parsing from\n where the parser left out by calling [`ts_parser_parse`] again with the\n same arguments. Or you can start parsing from scratch by first calling\n [`ts_parser_reset`].\n 3. Parsing was cancelled using a cancellation flag that was set by an\n earlier call to [`ts_parser_set_cancellation_flag`]. You can resume parsing\n from where the parser left out by calling [`ts_parser_parse`] again with\n the same arguments.\n 4. Parsing was cancelled due to the progress callback returning true. This callback\n is passed in [`ts_parser_parse_with_options`] inside the [`TSParseOptions`] struct.\n\n [`read`]: TSInput::read\n [`payload`]: TSInput::payload\n [`encoding`]: TSInput::encoding\n [`bytes_read`]: TSInput::read"]
182195
pub fn ts_parser_parse(
183196
self_: *mut TSParser,
184197
old_tree: *const TSTree,
185198
input: TSInput,
186199
) -> *mut TSTree;
187200
}
201+
extern "C" {
202+
#[doc = " Use the parser to parse some source code and create a syntax tree, with some options.\n\n See [`ts_parser_parse`] for more details."]
203+
pub fn ts_parser_parse_with_options(
204+
self_: *mut TSParser,
205+
old_tree: *const TSTree,
206+
input: TSInput,
207+
parse_options: *const TSParseOptions,
208+
) -> *mut TSTree;
209+
}
188210
extern "C" {
189211
#[doc = " Use the parser to parse some source code stored in one contiguous buffer.\n The first two parameters are the same as in the [`ts_parser_parse`] function\n above. The second two parameters indicate the location of the buffer and its\n length in bytes."]
190212
pub fn ts_parser_parse_string(

lib/include/tree_sitter/api.h

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,16 @@ typedef struct TSInput {
7979
TSInputEncoding encoding;
8080
} TSInput;
8181

82+
typedef struct TSParseState {
83+
void *payload;
84+
uint32_t current_byte_offset;
85+
} TSParseState;
86+
87+
typedef struct TSParseOptions {
88+
void *payload;
89+
bool (*progress_callback)(TSParseState *state);
90+
} TSParseOptions;
91+
8292
typedef enum TSLogType {
8393
TSLogTypeParse,
8494
TSLogTypeLex,
@@ -247,7 +257,7 @@ const TSRange *ts_parser_included_ranges(
247257
* `TSInputEncodingUTF8` or `TSInputEncodingUTF16`.
248258
*
249259
* This function returns a syntax tree on success, and `NULL` on failure. There
250-
* are three possible reasons for failure:
260+
* are four possible reasons for failure:
251261
* 1. The parser does not have a language assigned. Check for this using the
252262
[`ts_parser_language`] function.
253263
* 2. Parsing was cancelled due to a timeout that was set by an earlier call to
@@ -259,6 +269,8 @@ const TSRange *ts_parser_included_ranges(
259269
* earlier call to [`ts_parser_set_cancellation_flag`]. You can resume parsing
260270
* from where the parser left out by calling [`ts_parser_parse`] again with
261271
* the same arguments.
272+
* 4. Parsing was cancelled due to the progress callback returning true. This callback
273+
* is passed in [`ts_parser_parse_with_options`] inside the [`TSParseOptions`] struct.
262274
*
263275
* [`read`]: TSInput::read
264276
* [`payload`]: TSInput::payload
@@ -271,6 +283,18 @@ TSTree *ts_parser_parse(
271283
TSInput input
272284
);
273285

286+
/**
287+
* Use the parser to parse some source code and create a syntax tree, with some options.
288+
*
289+
* See [`ts_parser_parse`] for more details.
290+
*/
291+
TSTree* ts_parser_parse_with_options(
292+
TSParser *self,
293+
const TSTree *old_tree,
294+
TSInput input,
295+
TSParseOptions parse_options
296+
);
297+
274298
/**
275299
* Use the parser to parse some source code stored in one contiguous buffer.
276300
* The first two parameters are the same as in the [`ts_parser_parse`] function
@@ -310,6 +334,8 @@ TSTree *ts_parser_parse_string_encoding(
310334
void ts_parser_reset(TSParser *self);
311335

312336
/**
337+
* @deprecated use [`ts_parser_parse_with_options`] and pass in a callback instead, this will be removed in 0.26.
338+
*
313339
* Set the maximum duration in microseconds that parsing should be allowed to
314340
* take before halting.
315341
*
@@ -319,11 +345,15 @@ void ts_parser_reset(TSParser *self);
319345
void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros);
320346

321347
/**
348+
* @deprecated use [`ts_parser_parse_with_options`] and pass in a callback instead, this will be removed in 0.26.
349+
*
322350
* Get the duration in microseconds that parsing is allowed to take.
323351
*/
324352
uint64_t ts_parser_timeout_micros(const TSParser *self);
325353

326354
/**
355+
* @deprecated use [`ts_parser_parse_with_options`] and pass in a callback instead, this will be removed in 0.26.
356+
*
327357
* Set the parser's current cancellation flag pointer.
328358
*
329359
* If a non-null pointer is assigned, then the parser will periodically read
@@ -333,6 +363,8 @@ uint64_t ts_parser_timeout_micros(const TSParser *self);
333363
void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag);
334364

335365
/**
366+
* @deprecated use [`ts_parser_parse_with_options`] and pass in a callback instead, this will be removed in 0.26.
367+
*
336368
* Get the parser's current cancellation flag pointer.
337369
*/
338370
const size_t *ts_parser_cancellation_flag(const TSParser *self);

lib/src/parser.c

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ struct TSParser {
111111
const volatile size_t *cancellation_flag;
112112
Subtree old_tree;
113113
TSRangeArray included_range_differences;
114+
TSParseOptions parse_options;
115+
TSParseState parse_state;
114116
unsigned included_range_difference_index;
115117
bool has_scanner_error;
116118
};
@@ -1562,20 +1564,26 @@ static bool ts_parser__advance(
15621564
}
15631565
}
15641566

1565-
// If a cancellation flag or a timeout was provided, then check every
1567+
// If a cancellation flag, timeout, or progress callback was provided, then check every
15661568
// time a fixed number of parse actions has been processed.
15671569
if (++self->operation_count == OP_COUNT_PER_PARSER_TIMEOUT_CHECK) {
15681570
self->operation_count = 0;
15691571
}
1572+
if (self->parse_options.progress_callback) {
1573+
self->parse_state.current_byte_offset = position;
1574+
}
15701575
if (
15711576
self->operation_count == 0 &&
1572-
((self->cancellation_flag && atomic_load(self->cancellation_flag)) ||
1573-
(!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock)))
1577+
(
1578+
(self->cancellation_flag && atomic_load(self->cancellation_flag)) ||
1579+
(!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock)) ||
1580+
(self->parse_options.progress_callback && self->parse_options.progress_callback(&self->parse_state))
1581+
)
15741582
) {
1575-
if (lookahead.ptr) {
1576-
ts_subtree_release(&self->tree_pool, lookahead);
1577-
}
1578-
return false;
1583+
if (lookahead.ptr) {
1584+
ts_subtree_release(&self->tree_pool, lookahead);
1585+
}
1586+
return false;
15791587
}
15801588

15811589
// Process each parse action for the current lookahead token in
@@ -2118,6 +2126,22 @@ TSTree *ts_parser_parse(
21182126
return result;
21192127
}
21202128

2129+
TSTree *ts_parser_parse_with_options(
2130+
TSParser *self,
2131+
const TSTree *old_tree,
2132+
TSInput input,
2133+
TSParseOptions parse_options
2134+
) {
2135+
self->parse_options = parse_options;
2136+
self->parse_state = (TSParseState) {
2137+
.payload = parse_options.payload,
2138+
};
2139+
TSTree *result = ts_parser_parse(self, old_tree, input);
2140+
self->parse_options = (TSParseOptions) {0};
2141+
self->parse_state = (TSParseState) {0};
2142+
return result;
2143+
}
2144+
21212145
TSTree *ts_parser_parse_string(
21222146
TSParser *self,
21232147
const TSTree *old_tree,

0 commit comments

Comments
 (0)