33namespace Keepsuit \Liquid \Parse ;
44
55use Keepsuit \Liquid \Exceptions \SyntaxException ;
6+ use Keepsuit \Liquid \TagBlock ;
67use RuntimeException ;
78
89class Lexer
@@ -36,6 +37,11 @@ class Lexer
3637
3738 protected int $ position ;
3839
40+ /**
41+ * @var string[]
42+ */
43+ protected array $ rawBodyTags ;
44+
3945 public function __construct (
4046 protected ParseContext $ parseContext ,
4147 ) {}
@@ -53,6 +59,14 @@ public function tokenize(string $source): TokenStream
5359 $ this ->state = LexerState::Data;
5460 $ this ->tokens = [];
5561
62+ $ this ->rawBodyTags = array_keys (array_filter ($ this ->parseContext ->environment ->tagRegistry ->all (), function ($ tag ) {
63+ if (! is_subclass_of ($ tag , TagBlock::class)) {
64+ return false ;
65+ }
66+
67+ return $ tag ::hasRawBody ();
68+ }));
69+
5670 $ this ->parseContext ->lineNumber = 1 ;
5771
5872 preg_match_all (LexerOptions::tokenStartRegex (), $ this ->source , $ matches , PREG_OFFSET_CAPTURE );
@@ -159,18 +173,41 @@ protected function lexVariable(): void
159173 */
160174 protected function lexBlock (): void
161175 {
162- if (preg_match (LexerOptions::blockEndRegex (), $ this ->source , $ matches , offset: $ this ->cursor ) === 1 ) {
163- $ this ->pushToken (TokenType::BlockEnd);
164- $ this ->moveCursor ($ matches [0 ]);
165- $ this ->popState ();
176+ $ tag = null ;
166177
167- // trim?
168- if (trim ($ matches [0 ])[0 ] === LexerOptions::WhitespaceTrim->value ) {
169- preg_match ('/\s+/A ' , $ this ->source , $ matches , offset: $ this ->cursor );
170- $ this ->moveCursor ($ matches [0 ] ?? '' );
178+ // Parse the full expression inside {% ... %}
179+ while (preg_match (LexerOptions::blockEndRegex (), $ this ->source , $ matches , offset: $ this ->cursor ) !== 1 ) {
180+ $ this ->lexExpression ();
181+
182+ $ lastToken = $ this ->tokens [array_key_last ($ this ->tokens )];
183+
184+ if ($ tag === null && $ lastToken ->type === TokenType::Identifier) {
185+ $ tag = $ lastToken ;
171186 }
187+ }
188+
189+ // Move the cursor to the end of the block
190+ $ this ->moveCursor ($ matches [0 ]);
191+
192+ // trim?
193+ if (trim ($ matches [0 ])[0 ] === LexerOptions::WhitespaceTrim->value ) {
194+ preg_match ('/\s+/A ' , $ this ->source , $ matches , offset: $ this ->cursor );
195+ $ this ->moveCursor ($ matches [0 ] ?? '' );
196+ }
197+
198+ // If the last token is a block start, we remove the node
199+ $ lastToken = $ this ->tokens [array_key_last ($ this ->tokens )];
200+ if ($ lastToken ->type === TokenType::BlockStart) {
201+ array_pop ($ this ->tokens );
172202 } else {
173- $ this ->lexExpression ();
203+ $ this ->pushToken (TokenType::BlockEnd);
204+ }
205+
206+ $ this ->popState ();
207+
208+ // If the tag is a raw body tag, we need to lex the body as raw data instead of liquid blocks
209+ if ($ tag !== null && in_array ($ tag ->data , $ this ->rawBodyTags , true )) {
210+ $ this ->laxRawBodyTag ($ tag ->data );
174211 }
175212 }
176213
@@ -227,6 +264,19 @@ protected function ensureStreamNotEnded(): void
227264 }
228265 }
229266
267+ private function laxRawBodyTag (string $ tag ): void
268+ {
269+ if (preg_match (LexerOptions::blockRawBodyTagDataRegex ($ tag ), $ this ->source , $ matches , flags: PREG_OFFSET_CAPTURE , offset: $ this ->cursor ) !== 1 ) {
270+ throw SyntaxException::tagBlockNeverClosed ($ tag );
271+ }
272+
273+ $ rawBody = substr ($ this ->source , $ this ->cursor , $ matches [0 ][1 ] - $ this ->cursor );
274+
275+ $ this ->moveCursor ($ rawBody );
276+
277+ $ this ->pushToken (TokenType::RawData, $ rawBody );
278+ }
279+
230280 protected function lexRawData (): void
231281 {
232282 if (preg_match (LexerOptions::blockRawDataRegex (), $ this ->source , $ matches , flags: PREG_OFFSET_CAPTURE , offset: $ this ->cursor ) !== 1 ) {
@@ -265,24 +315,7 @@ protected function lexInlineComment(): void
265315
266316 $ text = substr ($ this ->source , $ this ->cursor , $ matches [0 ][1 ] - $ this ->cursor );
267317
268- $ this ->moveCursor ($ text .$ matches [0 ][0 ]);
269-
270- if ($ matches [1 ][0 ] === "\n" ) {
271- return ;
272- }
273-
274- $ lastToken = $ this ->tokens [count ($ this ->tokens ) - 1 ] ?? null ;
275-
276- if ($ lastToken ?->type === TokenType::BlockStart) {
277- array_pop ($ this ->tokens );
278- } else {
279- $ this ->pushToken (TokenType::BlockEnd);
280- }
281-
282- if ($ matches [1 ][0 ] === LexerOptions::WhitespaceTrim->value ) {
283- preg_match ('/\s+/A ' , $ this ->source , $ matches2 , offset: $ this ->cursor );
284- $ this ->moveCursor ($ matches2 [0 ] ?? '' );
285- }
318+ $ this ->moveCursor ($ text );
286319 }
287320
288321 protected function pushToken (TokenType $ type , string $ value = '' ): void
0 commit comments