Skip to content

Commit 05b65d5

Browse files
Fix trailing ) from interfering with extraction in Clojure keywords (#18345)
## Summary In a form like, ```clojure (if condition :bg-white :bg-black) ``` `:bg-black` will fail to extract, while `:bg-white` is extracted as expected. This PR fixes this case, implements more comprehensive candidate filtering, and supersedes a previous PR. Having recently submitted a PR for handling another special case with Clojure keywords (the presence of `:` inside of keywords), I thought it best to invert the previous strategy: Instead of handling special cases one by one, consume keywords according to the Clojure reader spec. Consume nothing else, other than strings. Because of this, this PR is a tad more invasive rather than additive, for which I apologize. The strategy is this: - Strings begin with a `"` and ends with an unescaped `"`. Consume everything between these delimiters (existing case). - Keywords begin with `:`, and end with whitespace, or one out of a small set of specific reserved characters. Everything else is a valid character in a keyword. Consume everything between these delimiters, and apply the class splitting previously contained in the outer loop. My previous special case handling of `:` inside of keywords in #18338 is now redundant (and is removed), as this is a more general solution. - Discard _everything else_. I'm hoping that a strategy that is based on Clojure's definition of strings and keywords will pre-empt any further issues with edge cases. Closes #18344. ## Test plan - Added failing tests. - `cargo test` -> failure - Added fix - `cargo test` -> success --------- Co-authored-by: Jordan Pittman <[email protected]>
1 parent 7946db0 commit 05b65d5

File tree

2 files changed

+112
-32
lines changed

2 files changed

+112
-32
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1010
### Fixed
1111

1212
- Don't consider the global important state in `@apply` ([#18404](https://github.com/tailwindlabs/tailwindcss/pull/18404))
13+
- Fix trailing `)` from interfering with extraction in Clojure keywords ([#18345](https://github.com/tailwindlabs/tailwindcss/pull/18345))
1314

1415
## [4.1.11] - 2025-06-26
1516

crates/oxide/src/extractor/pre_processors/clojure.rs

Lines changed: 111 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,23 @@ use bstr::ByteSlice;
55
#[derive(Debug, Default)]
66
pub struct Clojure;
77

8+
/// This is meant to be a rough estimate of a valid ClojureScript keyword
9+
///
10+
/// This can be approximated by the following regex:
11+
/// /::?[a-zA-Z0-9!#$%&*+./:<=>?_|-]+/
12+
///
13+
/// However, keywords are intended to be detected as utilities. Since the set
14+
/// of valid characters in a utility (outside of arbitrary values) is smaller,
15+
/// along with the fact that neither `[]` nor `()` are allowed in keywords we
16+
/// can simplify this list quite a bit.
17+
#[inline]
18+
fn is_keyword_character(byte: u8) -> bool {
19+
return matches!(
20+
byte,
21+
b'!' | b'%' | b'*' | b'+' | b'-' | b'.' | b'/' | b':' | b'_'
22+
) | byte.is_ascii_alphanumeric();
23+
}
24+
825
impl PreProcessor for Clojure {
926
fn process(&self, content: &[u8]) -> Vec<u8> {
1027
let content = content
@@ -18,6 +35,7 @@ impl PreProcessor for Clojure {
1835
match cursor.curr {
1936
// Consume strings as-is
2037
b'"' => {
38+
result[cursor.pos] = b' ';
2139
cursor.advance();
2240

2341
while cursor.pos < len {
@@ -26,52 +44,82 @@ impl PreProcessor for Clojure {
2644
b'\\' => cursor.advance_twice(),
2745

2846
// End of the string
29-
b'"' => break,
47+
b'"' => {
48+
result[cursor.pos] = b' ';
49+
break;
50+
}
3051

3152
// Everything else is valid
3253
_ => cursor.advance(),
3354
};
3455
}
3556
}
3657

37-
// Consume comments as-is until the end of the line.
58+
// Discard line comments until the end of the line.
3859
// Comments start with `;;`
3960
b';' if matches!(cursor.next, b';') => {
4061
while cursor.pos < len && cursor.curr != b'\n' {
62+
result[cursor.pos] = b' ';
4163
cursor.advance();
4264
}
4365
}
4466

45-
// A `.` surrounded by digits is a decimal number, so we don't want to replace it.
46-
//
47-
// E.g.:
48-
// ```
49-
// gap-1.5
50-
// ^
51-
// ``
52-
b'.' if cursor.prev.is_ascii_digit() && cursor.next.is_ascii_digit() => {
67+
// Consume keyword until a terminating character is reached.
68+
b':' => {
69+
result[cursor.pos] = b' ';
70+
cursor.advance();
5371

54-
// Keep the `.` as-is
55-
}
72+
while cursor.pos < len {
73+
match cursor.curr {
74+
// A `.` surrounded by digits is a decimal number, so we don't want to replace it.
75+
//
76+
// E.g.:
77+
// ```
78+
// gap-1.5
79+
// ^
80+
// ```
81+
b'.' if cursor.prev.is_ascii_digit()
82+
&& cursor.next.is_ascii_digit() =>
83+
{
84+
// Keep the `.` as-is
85+
}
86+
// A `.` not surrounded by digits denotes the start of a new class name in a
87+
// dot-delimited keyword.
88+
//
89+
// E.g.:
90+
// ```
91+
// flex.gap-1.5
92+
// ^
93+
// ```
94+
b'.' => {
95+
result[cursor.pos] = b' ';
96+
}
97+
// End of keyword.
98+
_ if !is_keyword_character(cursor.curr) => {
99+
result[cursor.pos] = b' ';
100+
break;
101+
}
56102

57-
// A `:` surrounded by letters denotes a variant. Keep as is.
58-
//
59-
// E.g.:
60-
// ```
61-
// lg:pr-6"
62-
// ^
63-
// ``
64-
b':' if cursor.prev.is_ascii_alphanumeric() && cursor.next.is_ascii_alphanumeric() => {
103+
// Consume everything else.
104+
_ => {}
105+
};
65106

66-
// Keep the `:` as-is
107+
cursor.advance();
108+
}
67109
}
68110

69-
b':' | b'.' => {
111+
// Aggressively discard everything else, reducing false positives and preventing
112+
// characters surrounding keywords from producing false negatives.
113+
// E.g.:
114+
// ```
115+
// (when condition :bg-white)
116+
// ^
117+
// ```
118+
// A ')' is never a valid part of a keyword, but will nonetheless prevent 'bg-white'
119+
// from being extracted if not discarded.
120+
_ => {
70121
result[cursor.pos] = b' ';
71122
}
72-
73-
// Consume everything else
74-
_ => {}
75123
};
76124

77125
cursor.advance();
@@ -92,19 +140,23 @@ mod tests {
92140
(":div.flex-1.flex-2", " div flex-1 flex-2"),
93141
(
94142
":.flex-3.flex-4 ;defaults to div",
95-
" flex-3 flex-4 ;defaults to div",
143+
" flex-3 flex-4 ",
96144
),
97-
("{:class :flex-5.flex-6", "{ flex-5 flex-6"),
98-
(r#"{:class "flex-7 flex-8"}"#, r#"{ "flex-7 flex-8"}"#),
145+
("{:class :flex-5.flex-6", " flex-5 flex-6"),
146+
(r#"{:class "flex-7 flex-8"}"#, r#" flex-7 flex-8 "#),
99147
(
100148
r#"{:class ["flex-9" :flex-10]}"#,
101-
r#"{ ["flex-9" flex-10]}"#,
149+
r#" flex-9 flex-10 "#,
102150
),
103151
(
104152
r#"(dom/div {:class "flex-11 flex-12"})"#,
105-
r#"(dom/div { "flex-11 flex-12"})"#,
153+
r#" flex-11 flex-12 "#,
154+
),
155+
("(dom/div :.flex-13.flex-14", " flex-13 flex-14"),
156+
(
157+
r#"[:div#hello.bg-white.pr-1.5 {:class ["grid grid-cols-[auto,1fr] grid-rows-2"]}]"#,
158+
r#" div#hello bg-white pr-1.5 grid grid-cols-[auto,1fr] grid-rows-2 "#,
106159
),
107-
("(dom/div :.flex-13.flex-14", "(dom/div flex-13 flex-14"),
108160
] {
109161
Clojure::test(input, expected);
110162
}
@@ -198,8 +250,35 @@ mod tests {
198250
($ :div {:class [:flex :first:lg:pr-6 :first:2xl:pl-6 :group-hover/2:2xs:pt-6]} …)
199251
200252
:.hover:bg-white
253+
254+
[:div#hello.bg-white.pr-1.5]
255+
"#;
256+
257+
Clojure::test_extract_contains(
258+
input,
259+
vec![
260+
"flex",
261+
"first:lg:pr-6",
262+
"first:2xl:pl-6",
263+
"group-hover/2:2xs:pt-6",
264+
"hover:bg-white",
265+
"bg-white",
266+
"pr-1.5",
267+
],
268+
);
269+
}
270+
271+
// https://github.com/tailwindlabs/tailwindcss/issues/18344
272+
#[test]
273+
fn test_noninterference_of_parens_on_keywords() {
274+
let input = r#"
275+
(get props :y-padding :py-5)
276+
($ :div {:class [:flex.pr-1.5 (if condition :bg-white :bg-black)]})
201277
"#;
202278

203-
Clojure::test_extract_contains(input, vec!["flex", "first:lg:pr-6", "first:2xl:pl-6", "group-hover/2:2xs:pt-6", "hover:bg-white"]);
279+
Clojure::test_extract_contains(
280+
input,
281+
vec!["py-5", "flex", "pr-1.5", "bg-white", "bg-black"],
282+
);
204283
}
205284
}

0 commit comments

Comments
 (0)