Skip to content

Commit 07c46fd

Browse files
authored
Merge pull request #44 from keepsuit/shopify-5.8
Shopify liquid 5.8 compatibility
2 parents dd238f3 + 1ea8f87 commit 07c46fd

File tree

12 files changed

+432
-90
lines changed

12 files changed

+432
-90
lines changed

src/Extensions/StandardExtension.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,14 @@ public function getTags(): array
1717
Tags\ContinueTag::class,
1818
Tags\CycleTag::class,
1919
Tags\DecrementTag::class,
20+
Tags\DocTag::class,
2021
Tags\EchoTag::class,
2122
Tags\ForTag::class,
2223
Tags\IfChanged::class,
2324
Tags\IfTag::class,
2425
Tags\IncrementTag::class,
2526
Tags\LiquidTag::class,
27+
Tags\RawTag::class,
2628
Tags\RenderTag::class,
2729
Tags\TableRowTag::class,
2830
Tags\UnlessTag::class,

src/Parse/Lexer.php

Lines changed: 92 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
namespace Keepsuit\Liquid\Parse;
44

55
use Keepsuit\Liquid\Exceptions\SyntaxException;
6+
use Keepsuit\Liquid\TagBlock;
67
use RuntimeException;
78

89
class Lexer
@@ -30,12 +31,17 @@ class Lexer
3031
protected array $tokens;
3132

3233
/**
33-
* @var array<int, array<int, array{0:string,1:int}>>
34+
* @var array<int, array{0:string,1:int}>
3435
*/
3536
protected array $positions;
3637

3738
protected int $position;
3839

40+
/**
41+
* @var string[]
42+
*/
43+
protected array $rawBodyTags;
44+
3945
public function __construct(
4046
protected ParseContext $parseContext,
4147
) {}
@@ -53,10 +59,17 @@ public function tokenize(string $source): TokenStream
5359
$this->state = LexerState::Data;
5460
$this->tokens = [];
5561

62+
$this->rawBodyTags = array_keys(array_filter($this->parseContext->environment->tagRegistry->all(), function ($tag) {
63+
if (! is_subclass_of($tag, TagBlock::class)) {
64+
return false;
65+
}
66+
67+
return $tag::hasRawBody();
68+
}));
69+
5670
$this->parseContext->lineNumber = 1;
5771

58-
preg_match_all(LexerOptions::tokenStartRegex(), $this->source, $matches, PREG_OFFSET_CAPTURE);
59-
$this->positions = $matches;
72+
$this->positions = $this->extractTokenStarts($this->source);
6073
$this->position = -1;
6174

6275
while ($this->cursor < $this->end) {
@@ -79,42 +92,36 @@ public function tokenize(string $source): TokenStream
7992
protected function lexData(): void
8093
{
8194
// if no matches are left we return the rest of the template as simple text token
82-
if ($this->position == count($this->positions[0]) - 1) {
95+
if ($this->position == count($this->positions) - 1) {
8396
$this->pushToken(TokenType::TextData, substr($this->source, $this->cursor));
8497
$this->cursor = $this->end;
8598

8699
return;
87100
}
88101

89102
// Find the first token after the current cursor
90-
$position = $this->positions[0][++$this->position];
103+
$position = $this->positions[++$this->position];
91104
while ($position[1] < $this->cursor) {
92-
if ($this->position == count($this->positions[0]) - 1) {
105+
if ($this->position == count($this->positions) - 1) {
93106
return;
94107
}
95-
$position = $this->positions[0][++$this->position];
108+
$position = $this->positions[++$this->position];
96109
}
97110

98111
// push the template text before the token first
99112
$text = $textBeforeToken = substr($this->source, $this->cursor, $position[1] - $this->cursor);
100113

101114
// trim?
102-
if ($this->positions[2][$this->position][0] === LexerOptions::WhitespaceTrim->value) {
115+
if (($this->positions[$this->position][0][2] ?? null) === LexerOptions::WhitespaceTrim->value) {
103116
$textBeforeToken = rtrim($textBeforeToken);
104117
}
105118

106119
$this->pushToken(TokenType::TextData, $textBeforeToken);
107120
$this->moveCursor($text.$position[0]);
108121

109-
switch ($this->positions[1][$this->position][0]) {
122+
switch ($this->positions[$this->position][0]) {
110123
case LexerOptions::TagBlockStart->value:
111-
// {% raw %}
112-
if (preg_match(LexerOptions::blockRawStartRegex(), $this->source, $matches, offset: $this->cursor) === 1) {
113-
$this->moveCursor($matches[0]);
114-
$this->lexRawData();
115-
break;
116-
}
117-
124+
case LexerOptions::TagBlockStart->value.LexerOptions::WhitespaceTrim->value:
118125
// {% comment %}
119126
if (preg_match(LexerOptions::blockCommentStartRegex(), $this->source, $matches, offset: $this->cursor) === 1) {
120127
$this->moveCursor($matches[0]);
@@ -127,6 +134,7 @@ protected function lexData(): void
127134
$this->currentVarBlockLine = $this->lineNumber;
128135
break;
129136
case LexerOptions::TagVariableStart->value:
137+
case LexerOptions::TagVariableStart->value.LexerOptions::WhitespaceTrim->value:
130138
$this->pushToken(TokenType::VariableStart);
131139
$this->pushState(LexerState::Variable);
132140
$this->currentVarBlockLine = $this->lineNumber;
@@ -145,9 +153,8 @@ protected function lexVariable(): void
145153
$this->popState();
146154

147155
// trim?
148-
if (trim($matches[0])[0] === LexerOptions::WhitespaceTrim->value) {
149-
preg_match('/\s+/A', $this->source, $matches, offset: $this->cursor);
150-
$this->moveCursor($matches[0] ?? '');
156+
if ($matches[1][0] === LexerOptions::WhitespaceTrim->value) {
157+
$this->trimWhitespaces();
151158
}
152159
} else {
153160
$this->lexExpression();
@@ -159,18 +166,40 @@ protected function lexVariable(): void
159166
*/
160167
protected function lexBlock(): void
161168
{
162-
if (preg_match(LexerOptions::blockEndRegex(), $this->source, $matches, offset: $this->cursor) === 1) {
163-
$this->pushToken(TokenType::BlockEnd);
164-
$this->moveCursor($matches[0]);
165-
$this->popState();
169+
$tag = null;
166170

167-
// trim?
168-
if (trim($matches[0])[0] === LexerOptions::WhitespaceTrim->value) {
169-
preg_match('/\s+/A', $this->source, $matches, offset: $this->cursor);
170-
$this->moveCursor($matches[0] ?? '');
171+
// Parse the full expression inside {% ... %}
172+
while (preg_match(LexerOptions::blockEndRegex(), $this->source, $matches, offset: $this->cursor) !== 1) {
173+
$this->lexExpression();
174+
175+
$lastToken = $this->tokens[array_key_last($this->tokens)];
176+
177+
if ($tag === null && $lastToken->type === TokenType::Identifier) {
178+
$tag = $lastToken;
171179
}
180+
}
181+
182+
// Move the cursor to the end of the block
183+
$this->moveCursor($matches[0]);
184+
185+
// trim?
186+
if ($matches[1][0] === LexerOptions::WhitespaceTrim->value) {
187+
$this->trimWhitespaces();
188+
}
189+
190+
// If the last token is a block start, we remove the node
191+
$lastToken = $this->tokens[array_key_last($this->tokens)];
192+
if ($lastToken->type === TokenType::BlockStart) {
193+
array_pop($this->tokens);
172194
} else {
173-
$this->lexExpression();
195+
$this->pushToken(TokenType::BlockEnd);
196+
}
197+
198+
$this->popState();
199+
200+
// If the tag is a raw body tag, we need to lex the body as raw data instead of liquid blocks
201+
if ($tag !== null && in_array($tag->data, $this->rawBodyTags, true)) {
202+
$this->laxRawBodyTag($tag->data);
174203
}
175204
}
176205

@@ -227,23 +256,27 @@ protected function ensureStreamNotEnded(): void
227256
}
228257
}
229258

230-
protected function lexRawData(): void
259+
protected function laxRawBodyTag(string $tag): void
231260
{
232-
if (preg_match(LexerOptions::blockRawDataRegex(), $this->source, $matches, flags: PREG_OFFSET_CAPTURE, offset: $this->cursor) !== 1) {
233-
throw SyntaxException::tagBlockNeverClosed('raw');
261+
if (preg_match(LexerOptions::blockRawBodyTagDataRegex($tag), $this->source, $matches, flags: PREG_OFFSET_CAPTURE, offset: $this->cursor) !== 1) {
262+
throw SyntaxException::tagBlockNeverClosed($tag);
234263
}
235264

236-
$text = substr($this->source, $this->cursor, $matches[0][1] - $this->cursor);
265+
$rawBody = substr($this->source, $this->cursor, $matches[0][1] - $this->cursor);
237266

238-
$this->moveCursor($text.$matches[0][0]);
267+
$this->moveCursor($rawBody);
239268

240-
// trim?
241-
if (isset($matches[2][0])) {
242-
preg_match('/\s+/A', $this->source, $matches2, offset: $this->cursor);
243-
$this->moveCursor($matches2[0] ?? '');
269+
// inner trim?
270+
if (($matches[1][0][2] ?? null) === LexerOptions::WhitespaceTrim->value) {
271+
$rawBody = rtrim($rawBody);
244272
}
245273

246-
$this->pushToken(TokenType::RawData, $text);
274+
$this->pushToken(TokenType::RawData, $rawBody);
275+
276+
// trim?
277+
if ($matches[2][0][0] === LexerOptions::WhitespaceTrim->value) {
278+
$this->trimWhitespaces();
279+
}
247280
}
248281

249282
protected function lexComment(): void
@@ -265,24 +298,7 @@ protected function lexInlineComment(): void
265298

266299
$text = substr($this->source, $this->cursor, $matches[0][1] - $this->cursor);
267300

268-
$this->moveCursor($text.$matches[0][0]);
269-
270-
if ($matches[1][0] === "\n") {
271-
return;
272-
}
273-
274-
$lastToken = $this->tokens[count($this->tokens) - 1] ?? null;
275-
276-
if ($lastToken?->type === TokenType::BlockStart) {
277-
array_pop($this->tokens);
278-
} else {
279-
$this->pushToken(TokenType::BlockEnd);
280-
}
281-
282-
if ($matches[1][0] === LexerOptions::WhitespaceTrim->value) {
283-
preg_match('/\s+/A', $this->source, $matches2, offset: $this->cursor);
284-
$this->moveCursor($matches2[0] ?? '');
285-
}
301+
$this->moveCursor($text);
286302
}
287303

288304
protected function pushToken(TokenType $type, string $value = ''): void
@@ -322,4 +338,24 @@ protected function popState(): void
322338

323339
$this->state = $state;
324340
}
341+
342+
protected function trimWhitespaces(): void
343+
{
344+
preg_match('/\s+/A', $this->source, $matches, offset: $this->cursor);
345+
$this->moveCursor($matches[0] ?? '');
346+
}
347+
348+
/**
349+
* @return array<int,array{0:string,1:int}>
350+
*/
351+
protected function extractTokenStarts(string $source): array
352+
{
353+
preg_match_all(LexerOptions::blockStartRegex(), $source, $blocks, PREG_OFFSET_CAPTURE);
354+
preg_match_all(LexerOptions::variableStartRegex(), $source, $variables, PREG_OFFSET_CAPTURE);
355+
356+
$positions = array_merge($blocks[0], $variables[0]);
357+
usort($positions, fn (array $a, array $b) => $a[1] <=> $b[1]);
358+
359+
return $positions;
360+
}
325361
}

src/Parse/LexerOptions.php

Lines changed: 15 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,13 @@ enum LexerOptions: string
1717

1818
case WhitespaceTrim = '-';
1919

20-
public static function tokenStartRegex(): string
20+
public static function blockStartRegex(): string
2121
{
2222
static $regex;
2323

2424
if ($regex === null) {
2525
$regex = sprintf(
26-
'{(%s|%s)(%s)?}sx',
27-
preg_quote(LexerOptions::TagVariableStart->value),
26+
'{(%s%s?)}sx',
2827
preg_quote(LexerOptions::TagBlockStart->value),
2928
preg_quote(LexerOptions::WhitespaceTrim->value)
3029
);
@@ -33,15 +32,15 @@ public static function tokenStartRegex(): string
3332
return $regex;
3433
}
3534

36-
public static function commentBlockRegex(): string
35+
public static function variableStartRegex(): string
3736
{
3837
static $regex;
3938

4039
if ($regex === null) {
4140
$regex = sprintf(
42-
"{\s*comment\s*(?:%s|%s')}Asx",
43-
preg_quote(LexerOptions::WhitespaceTrim->value.LexerOptions::TagBlockEnd->value),
44-
preg_quote(LexerOptions::TagBlockEnd->value),
41+
'{(%s%s?)}sx',
42+
preg_quote(LexerOptions::TagVariableStart->value),
43+
preg_quote(LexerOptions::WhitespaceTrim->value)
4544
);
4645
}
4746

@@ -54,7 +53,7 @@ public static function variableEndRegex(): string
5453

5554
if ($regex === null) {
5655
$regex = sprintf(
57-
'{\s*(?:%s|%s)}Ax',
56+
'{\s*(%s|%s)}Ax',
5857
preg_quote(LexerOptions::WhitespaceTrim->value.LexerOptions::TagVariableEnd->value),
5958
preg_quote(LexerOptions::TagVariableEnd->value),
6059
);
@@ -69,22 +68,7 @@ public static function blockEndRegex(): string
6968

7069
if ($regex === null) {
7170
$regex = sprintf(
72-
'{\s*(?:%s|%s)}Ax',
73-
preg_quote(LexerOptions::WhitespaceTrim->value.LexerOptions::TagBlockEnd->value),
74-
preg_quote(LexerOptions::TagBlockEnd->value),
75-
);
76-
}
77-
78-
return $regex;
79-
}
80-
81-
public static function blockRawStartRegex(): string
82-
{
83-
static $regex;
84-
85-
if ($regex === null) {
86-
$regex = sprintf(
87-
'{\s*raw\s*(?:%s|%s)}Ax',
71+
'{\s*(%s|%s)}Ax',
8872
preg_quote(LexerOptions::WhitespaceTrim->value.LexerOptions::TagBlockEnd->value),
8973
preg_quote(LexerOptions::TagBlockEnd->value),
9074
);
@@ -93,21 +77,22 @@ public static function blockRawStartRegex(): string
9377
return $regex;
9478
}
9579

96-
public static function blockRawDataRegex(): string
80+
public static function blockRawBodyTagDataRegex(string $tag): string
9781
{
98-
static $regex;
82+
static $regex = [];
9983

100-
if ($regex === null) {
101-
$regex = sprintf(
102-
'{%s(%s)?\s*endraw\s*(%s)?%s}sx',
84+
if (($regex[$tag] ?? null) === null) {
85+
$regex[$tag] = sprintf(
86+
'{(%s%s?)\s*end%s\s*(%s?%s)}sx',
10387
preg_quote(LexerOptions::TagBlockStart->value),
10488
LexerOptions::WhitespaceTrim->value,
89+
preg_quote($tag),
10590
LexerOptions::WhitespaceTrim->value,
10691
preg_quote(LexerOptions::TagBlockEnd->value),
10792
);
10893
}
10994

110-
return $regex;
95+
return $regex[$tag];
11196
}
11297

11398
public static function blockCommentStartRegex(): string

src/TagBlock.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,9 @@ public function parseTreeVisitorChildren(): array
2020
{
2121
return [];
2222
}
23+
24+
public static function hasRawBody(): bool
25+
{
26+
return false;
27+
}
2328
}

0 commit comments

Comments
 (0)