@@ -214,6 +214,7 @@ struct TSQuery {
214214 Array (TSQueryPredicateStep ) predicate_steps ;
215215 Array (QueryPattern ) patterns ;
216216 Array (StepOffset ) step_offsets ;
217+ Array (char ) string_buffer ;
217218 const TSLanguage * language ;
218219 uint16_t wildcard_root_pattern_count ;
219220 TSSymbol * symbol_map ;
@@ -439,67 +440,6 @@ static uint16_t symbol_table_insert_name(
439440 return self -> slices .size - 1 ;
440441}
441442
442- static uint16_t symbol_table_insert_name_with_escapes (
443- SymbolTable * self ,
444- const char * escaped_name ,
445- uint32_t escaped_length
446- ) {
447- Slice slice = {
448- .offset = self -> characters .size ,
449- .length = 0 ,
450- };
451- array_grow_by (& self -> characters , escaped_length + 1 );
452-
453- // Copy the contents of the literal into the characters buffer, processing escape
454- // sequences like \n and \". This needs to be done before checking if the literal
455- // is already present, in order to do the string comparison.
456- bool is_escaped = false;
457- for (unsigned i = 0 ; i < escaped_length ; i ++ ) {
458- const char * src = & escaped_name [i ];
459- char * dest = & self -> characters .contents [slice .offset + slice .length ];
460- if (is_escaped ) {
461- switch (* src ) {
462- case 'n' :
463- * dest = '\n' ;
464- break ;
465- case 'r' :
466- * dest = '\r' ;
467- break ;
468- case 't' :
469- * dest = '\t' ;
470- break ;
471- case '0' :
472- * dest = '\0' ;
473- break ;
474- default :
475- * dest = * src ;
476- break ;
477- }
478- is_escaped = false;
479- slice .length ++ ;
480- } else {
481- if (* src == '\\' ) {
482- is_escaped = true;
483- } else {
484- * dest = * src ;
485- slice .length ++ ;
486- }
487- }
488- }
489-
490- // If the string is already present, remove the redundant content from the characters
491- // buffer and return the existing id.
492- int id = symbol_table_id_for_name (self , & self -> characters .contents [slice .offset ], slice .length );
493- if (id >= 0 ) {
494- self -> characters .size -= (escaped_length + 1 );
495- return id ;
496- }
497-
498- self -> characters .contents [slice .offset + slice .length ] = 0 ;
499- array_push (& self -> slices , slice );
500- return self -> slices .size - 1 ;
501- }
502-
503443/************
504444 * QueryStep
505445 ************/
@@ -1393,6 +1333,59 @@ static void ts_query__finalize_steps(TSQuery *self) {
13931333 }
13941334}
13951335
1336+ static TSQueryError ts_query__parse_string_literal (
1337+ TSQuery * self ,
1338+ Stream * stream
1339+ ) {
1340+ const char * string_start = stream -> input ;
1341+ if (stream -> next != '"' ) return TSQueryErrorSyntax ;
1342+ stream_advance (stream );
1343+ const char * prev_position = stream -> input ;
1344+
1345+ bool is_escaped = false;
1346+ array_clear (& self -> string_buffer );
1347+ for (;;) {
1348+ if (is_escaped ) {
1349+ is_escaped = false;
1350+ switch (stream -> next ) {
1351+ case 'n' :
1352+ array_push (& self -> string_buffer , '\n' );
1353+ break ;
1354+ case 'r' :
1355+ array_push (& self -> string_buffer , '\r' );
1356+ break ;
1357+ case 't' :
1358+ array_push (& self -> string_buffer , '\t' );
1359+ break ;
1360+ case '0' :
1361+ array_push (& self -> string_buffer , '\0' );
1362+ break ;
1363+ default :
1364+ array_extend (& self -> string_buffer , stream -> next_size , stream -> input );
1365+ break ;
1366+ }
1367+ prev_position = stream -> input + stream -> next_size ;
1368+ } else {
1369+ if (stream -> next == '\\' ) {
1370+ array_extend (& self -> string_buffer , (stream -> input - prev_position ), prev_position );
1371+ prev_position = stream -> input + 1 ;
1372+ is_escaped = true;
1373+ } else if (stream -> next == '"' ) {
1374+ array_extend (& self -> string_buffer , (stream -> input - prev_position ), prev_position );
1375+ stream_advance (stream );
1376+ return TSQueryErrorNone ;
1377+ } else if (stream -> next == '\n' ) {
1378+ stream_reset (stream , string_start );
1379+ return TSQueryErrorSyntax ;
1380+ }
1381+ }
1382+ if (!stream_advance (stream )) {
1383+ stream_reset (stream , string_start );
1384+ return TSQueryErrorSyntax ;
1385+ }
1386+ }
1387+ }
1388+
13961389// Parse a single predicate associated with a pattern, adding it to the
13971390// query's internal `predicate_steps` array. Predicates are arbitrary
13981391// S-expressions associated with a pattern which are meant to be handled at
@@ -1458,44 +1451,17 @@ static TSQueryError ts_query__parse_predicate(
14581451
14591452 // Parse a string literal
14601453 else if (stream -> next == '"' ) {
1461- stream_advance (stream );
1462-
1463- // Parse the string content
1464- bool is_escaped = false;
1465- const char * string_content = stream -> input ;
1466- for (;;) {
1467- if (is_escaped ) {
1468- is_escaped = false;
1469- } else {
1470- if (stream -> next == '\\' ) {
1471- is_escaped = true;
1472- } else if (stream -> next == '"' ) {
1473- break ;
1474- } else if (stream -> next == '\n' ) {
1475- stream_reset (stream , string_content - 1 );
1476- return TSQueryErrorSyntax ;
1477- }
1478- }
1479- if (!stream_advance (stream )) {
1480- stream_reset (stream , string_content - 1 );
1481- return TSQueryErrorSyntax ;
1482- }
1483- }
1484- uint32_t length = stream -> input - string_content ;
1485-
1486- // Add a step for the node
1487- uint16_t id = symbol_table_insert_name_with_escapes (
1454+ TSQueryError e = ts_query__parse_string_literal (self , stream );
1455+ if (e ) return e ;
1456+ uint16_t id = symbol_table_insert_name (
14881457 & self -> predicate_values ,
1489- string_content ,
1490- length
1458+ self -> string_buffer . contents ,
1459+ self -> string_buffer . size
14911460 );
14921461 array_push (& self -> predicate_steps , ((TSQueryPredicateStep ) {
14931462 .type = TSQueryPredicateStepTypeString ,
14941463 .value_id = id ,
14951464 }));
1496-
1497- if (stream -> next != '"' ) return TSQueryErrorSyntax ;
1498- stream_advance (stream );
14991465 }
15001466
15011467 // Parse a bare symbol
@@ -1761,33 +1727,22 @@ static TSQueryError ts_query__parse_pattern(
17611727
17621728 // Parse a double-quoted anonymous leaf node expression
17631729 else if (stream -> next == '"' ) {
1764- stream_advance (stream );
1765-
1766- // Parse the string content
1767- const char * string_content = stream -> input ;
1768- while (stream -> next != '"' ) {
1769- if (!stream_advance (stream )) {
1770- stream_reset (stream , string_content - 1 );
1771- return TSQueryErrorSyntax ;
1772- }
1773- }
1774- uint32_t length = stream -> input - string_content ;
1730+ const char * string_start = stream -> input ;
1731+ TSQueryError e = ts_query__parse_string_literal (self , stream );
1732+ if (e ) return e ;
17751733
17761734 // Add a step for the node
17771735 TSSymbol symbol = ts_language_symbol_for_name (
17781736 self -> language ,
1779- string_content ,
1780- length ,
1737+ self -> string_buffer . contents ,
1738+ self -> string_buffer . size ,
17811739 false
17821740 );
17831741 if (!symbol ) {
1784- stream_reset (stream , string_content );
1742+ stream_reset (stream , string_start + 1 );
17851743 return TSQueryErrorNodeType ;
17861744 }
17871745 array_push (& self -> steps , query_step__new (symbol , depth , is_immediate ));
1788-
1789- if (stream -> next != '"' ) return TSQueryErrorSyntax ;
1790- stream_advance (stream );
17911746 }
17921747
17931748 // Parse a field-prefixed pattern
@@ -1977,6 +1932,7 @@ TSQuery *ts_query_new(
19771932 .predicate_steps = array_new (),
19781933 .patterns = array_new (),
19791934 .step_offsets = array_new (),
1935+ .string_buffer = array_new (),
19801936 .symbol_map = symbol_map ,
19811937 .wildcard_root_pattern_count = 0 ,
19821938 .language = language ,
@@ -2056,6 +2012,7 @@ TSQuery *ts_query_new(
20562012 }
20572013
20582014 ts_query__finalize_steps (self );
2015+ array_delete (& self -> string_buffer );
20592016 return self ;
20602017}
20612018
@@ -2066,6 +2023,7 @@ void ts_query_delete(TSQuery *self) {
20662023 array_delete (& self -> predicate_steps );
20672024 array_delete (& self -> patterns );
20682025 array_delete (& self -> step_offsets );
2026+ array_delete (& self -> string_buffer );
20692027 symbol_table_delete (& self -> captures );
20702028 symbol_table_delete (& self -> predicate_values );
20712029 ts_free (self -> symbol_map );
0 commit comments