Skip to content

Commit 25f64e1

Browse files
committed
Place tighter limits on the work done during query analysis
1 parent 8de8c64 commit 25f64e1

File tree

1 file changed

+43
-28
lines changed

1 file changed

+43
-28
lines changed

lib/src/query.c

Lines changed: 43 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@
1111
// #define DEBUG_EXECUTE_QUERY
1212

1313
#define MAX_STEP_CAPTURE_COUNT 3
14-
#define MAX_STATE_PREDECESSOR_COUNT 100
15-
#define MAX_ANALYSIS_STATE_DEPTH 8
1614
#define MAX_NEGATED_FIELD_COUNT 8
15+
#define MAX_STATE_PREDECESSOR_COUNT 256
16+
#define MAX_ANALYSIS_STATE_DEPTH 8
17+
#define MAX_ANALYSIS_ITERATION_COUNT 256
1718

1819
/*
1920
* Stream - A sequence of unicode characters derived from a UTF8 string.
@@ -571,7 +572,7 @@ static inline StatePredecessorMap state_predecessor_map_new(
571572
) {
572573
return (StatePredecessorMap) {
573574
.contents = ts_calloc(
574-
language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1),
575+
(size_t)language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1),
575576
sizeof(TSStateId)
576577
),
577578
};
@@ -586,7 +587,7 @@ static inline void state_predecessor_map_add(
586587
TSStateId state,
587588
TSStateId predecessor
588589
) {
589-
unsigned index = state * (MAX_STATE_PREDECESSOR_COUNT + 1);
590+
size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1);
590591
TSStateId *count = &self->contents[index];
591592
if (
592593
*count == 0 ||
@@ -602,7 +603,7 @@ static inline const TSStateId *state_predecessor_map_get(
602603
TSStateId state,
603604
unsigned *count
604605
) {
605-
unsigned index = state * (MAX_STATE_PREDECESSOR_COUNT + 1);
606+
size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1);
606607
*count = self->contents[index];
607608
return &self->contents[index + 1];
608609
}
@@ -635,6 +636,8 @@ static inline int analysis_state__compare_position(
635636
if (self->stack[i].child_index > other->stack[i].child_index) return 1;
636637
}
637638
if (self->depth < other->depth) return 1;
639+
if (self->step_index < other->step_index) return -1;
640+
if (self->step_index > other->step_index) return 1;
638641
return 0;
639642
}
640643

@@ -652,8 +655,6 @@ static inline int analysis_state__compare(
652655
if (self->stack[i].field_id < other->stack[i].field_id) return -1;
653656
if (self->stack[i].field_id > other->stack[i].field_id) return 1;
654657
}
655-
if (self->step_index < other->step_index) return -1;
656-
if (self->step_index > other->step_index) return 1;
657658
return 0;
658659
}
659660

@@ -1023,13 +1024,18 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
10231024
// Walk the subgraph for this non-terminal, tracking all of the possible
10241025
// sequences of progress within the pattern.
10251026
bool can_finish_pattern = false;
1026-
bool did_exceed_max_depth = false;
1027+
bool did_abort_analysis = false;
10271028
unsigned recursion_depth_limit = 0;
10281029
unsigned prev_final_step_count = 0;
10291030
array_clear(&final_step_indices);
1030-
for (;;) {
1031+
for (unsigned iteration = 0;; iteration++) {
1032+
if (iteration == MAX_ANALYSIS_ITERATION_COUNT) {
1033+
did_abort_analysis = true;
1034+
break;
1035+
}
1036+
10311037
#ifdef DEBUG_ANALYZE_QUERY
1032-
printf("Final step indices:");
1038+
printf("Iteration: %u. Final step indices:", iteration);
10331039
for (unsigned j = 0; j < final_step_indices.size; j++) {
10341040
printf(" %4u", final_step_indices.contents[j]);
10351041
}
@@ -1085,9 +1091,15 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
10851091
if (next_states.size > 0) {
10861092
int comparison = analysis_state__compare_position(state, array_back(&next_states));
10871093
if (comparison == 0) {
1094+
#ifdef DEBUG_ANALYZE_QUERY
1095+
printf("Skip iteration for state %u\n", j);
1096+
#endif
10881097
array_insert_sorted_with(&next_states, analysis_state__compare, *state);
10891098
continue;
10901099
} else if (comparison > 0) {
1100+
#ifdef DEBUG_ANALYZE_QUERY
1101+
printf("Terminate iteration at state %u\n", j);
1102+
#endif
10911103
while (j < states.size) {
10921104
array_push(&next_states, states.contents[j]);
10931105
j++;
@@ -1203,7 +1215,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
12031215
printf("Exceeded depth limit for state %u\n", j);
12041216
#endif
12051217

1206-
did_exceed_max_depth = true;
1218+
did_abort_analysis = true;
12071219
continue;
12081220
}
12091221

@@ -1295,22 +1307,9 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
12951307
next_states = _states;
12961308
}
12971309

1298-
// Mark as indefinite any step where a match terminated.
1299-
// Later, this property will be propagated to all of the step's predecessors.
1300-
for (unsigned j = 0; j < final_step_indices.size; j++) {
1301-
uint32_t final_step_index = final_step_indices.contents[j];
1302-
QueryStep *step = &self->steps.contents[final_step_index];
1303-
if (
1304-
step->depth != PATTERN_DONE_MARKER &&
1305-
step->depth > parent_depth &&
1306-
!step->is_dead_end
1307-
) {
1308-
step->parent_pattern_guaranteed = false;
1309-
step->root_pattern_guaranteed = false;
1310-
}
1311-
}
1312-
1313-
if (did_exceed_max_depth) {
1310+
// If this pattern could not be fully analyzed, then every step should
1311+
// be considered fallible.
1312+
if (did_abort_analysis) {
13141313
for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) {
13151314
QueryStep *step = &self->steps.contents[j];
13161315
if (
@@ -1322,11 +1321,12 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
13221321
step->root_pattern_guaranteed = false;
13231322
}
13241323
}
1324+
continue;
13251325
}
13261326

13271327
// If this pattern cannot match, store the pattern index so that it can be
13281328
// returned to the caller.
1329-
if (all_patterns_are_valid && !can_finish_pattern && !did_exceed_max_depth) {
1329+
if (!can_finish_pattern) {
13301330
assert(final_step_indices.size > 0);
13311331
uint16_t impossible_step_index = *array_back(&final_step_indices);
13321332
uint32_t i, exists;
@@ -1336,6 +1336,21 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
13361336
all_patterns_are_valid = false;
13371337
break;
13381338
}
1339+
1340+
// Mark as fallible any step where a match terminated.
1341+
// Later, this property will be propagated to all of the step's predecessors.
1342+
for (unsigned j = 0; j < final_step_indices.size; j++) {
1343+
uint32_t final_step_index = final_step_indices.contents[j];
1344+
QueryStep *step = &self->steps.contents[final_step_index];
1345+
if (
1346+
step->depth != PATTERN_DONE_MARKER &&
1347+
step->depth > parent_depth &&
1348+
!step->is_dead_end
1349+
) {
1350+
step->parent_pattern_guaranteed = false;
1351+
step->root_pattern_guaranteed = false;
1352+
}
1353+
}
13391354
}
13401355

13411356
// Mark as indefinite any step with captures that are used in predicates.

0 commit comments

Comments
 (0)