Skip to content

Commit 3950b60

Browse files
committed
TRegex: performance improvements for bounded quantifier tracking.
1 parent aad7de1 commit 3950b60

File tree

173 files changed

+5775
-3909
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

173 files changed

+5775
-3909
lines changed

regex/CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22

33
This changelog summarizes major changes between TRegex versions relevant to language implementors integrating TRegex into their language. This document will focus on API changes relevant to integrators of TRegex.
44

5+
## Version 25.0.0
6+
7+
* Added support for bounded quantifiers in DFA matchers in boolean match mode. Nested bounded quantifiers and some interplay with lookarounds remain unsupported.
8+
* Added support for Truffle Source Options. Options embedded into the regex source string are now deprecated, but flags remain as they are, i.e. the expected source format changes from `options/regex/flags` to `/regex/flags`. See `RegexOptions` for details.
9+
* Added a new option `ForceLinearExecution`, which causes a bailout on all regexes that TRegex cannot execute in linear time.
10+
511
## Version 24.2.0
612

713
* Implemented the [Regular Expression Pattern Modifiers](https://github.com/tc39/proposal-regexp-modifiers) proposal for ECMAScript regular expressions.

regex/mx.regex/mx_regex.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#
2-
# Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
2+
# Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved.
33
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
#
55
# The Universal Permissive License (UPL), Version 1.0

regex/mx.regex/suite.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#
2-
# Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved.
2+
# Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved.
33
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
#
55
# The Universal Permissive License (UPL), Version 1.0
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/*
2+
* Copyright (c) 2025, 2025, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* The Universal Permissive License (UPL), Version 1.0
6+
*
7+
* Subject to the condition set forth below, permission is hereby granted to any
8+
* person obtaining a copy of this software, associated documentation and/or
9+
* data (collectively the "Software"), free of charge and under any and all
10+
* copyright rights in the Software, and any and all patent rights owned or
11+
* freely licensable by each licensor hereunder covering either (i) the
12+
* unmodified Software as contributed to or provided by such licensor, or (ii)
13+
* the Larger Works (as defined below), to deal in both
14+
*
15+
* (a) the Software, and
16+
*
17+
* (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
18+
* one is included with the Software each a "Larger Work" to which the Software
19+
* is contributed by such licensors),
20+
*
21+
* without restriction, including without limitation the rights to copy, create
22+
* derivative works of, display, perform, and distribute the Software and make,
23+
* use, sell, offer for sale, import, export, have made, and have sold the
24+
* Software and the Larger Work(s), and to sublicense the foregoing rights on
25+
* either these or other terms.
26+
*
27+
* This license is subject to the following condition:
28+
*
29+
* The above copyright notice and either this complete permission notice or at a
30+
* minimum a reference to the UPL must be included in all copies or substantial
31+
* portions of the Software.
32+
*
33+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
36+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
37+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
38+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
39+
* SOFTWARE.
40+
*/
41+
package com.oracle.truffle.regex.test.dummylang;
42+
43+
import java.util.ArrayList;
44+
import java.util.Iterator;
45+
46+
import org.graalvm.options.OptionDescriptor;
47+
import org.graalvm.options.OptionDescriptors;
48+
import org.graalvm.polyglot.SandboxPolicy;
49+
50+
import com.oracle.truffle.api.TruffleOptionDescriptors;
51+
import com.oracle.truffle.regex.RegexOptions;
52+
53+
/**
54+
* To be removed once TRegex is a non-internal language (GR-65841).
55+
*/
56+
final class TRegexTestDummyLangOptionDescriptors implements TruffleOptionDescriptors {
57+
58+
private final OptionDescriptors tregexOptionDescriptors = RegexOptions.getDescriptors();
59+
private final TRegexTestDummyLanguageOptionsOptionDescriptors ownOptions = new TRegexTestDummyLanguageOptionsOptionDescriptors();
60+
61+
@Override
62+
public OptionDescriptor get(String optionName) {
63+
OptionDescriptor o = tregexOptionDescriptors.get(optionName.replace("regexDummyLang", "regex"));
64+
if (o != null) {
65+
return copyDescriptor(optionName, o);
66+
}
67+
return ownOptions.get(optionName);
68+
}
69+
70+
@Override
71+
public SandboxPolicy getSandboxPolicy(String optionName) {
72+
assert get(optionName) != null : "Unknown option " + optionName;
73+
return SandboxPolicy.TRUSTED;
74+
}
75+
76+
@Override
77+
public Iterator<OptionDescriptor> iterator() {
78+
ArrayList<OptionDescriptor> tregexOptions = new java.util.ArrayList<>();
79+
for (OptionDescriptor o : tregexOptionDescriptors) {
80+
tregexOptions.add(copyDescriptor(o.getName().replace("regex", "regexDummyLang"), o));
81+
}
82+
for (OptionDescriptor ownOption : ownOptions) {
83+
tregexOptions.add(ownOption);
84+
}
85+
return tregexOptions.iterator();
86+
}
87+
88+
private static OptionDescriptor copyDescriptor(String optionName, OptionDescriptor o) {
89+
return OptionDescriptor.newBuilder(o.getKey(), optionName).deprecated(o.isDeprecated()).help(o.getHelp()).usageSyntax(o.getUsageSyntax()).category(o.getCategory()).stability(
90+
o.getStability()).build();
91+
}
92+
}

regex/src/com.oracle.truffle.regex.test.dummylang/src/com/oracle/truffle/regex/test/dummylang/TRegexTestDummyLanguage.java

Lines changed: 63 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* The Universal Permissive License (UPL), Version 1.0
@@ -40,6 +40,11 @@
4040
*/
4141
package com.oracle.truffle.regex.test.dummylang;
4242

43+
import org.graalvm.options.OptionDescriptor;
44+
import org.graalvm.options.OptionDescriptors;
45+
import org.graalvm.options.OptionKey;
46+
import org.graalvm.options.OptionValues;
47+
4348
import com.oracle.truffle.api.CallTarget;
4449
import com.oracle.truffle.api.CompilerDirectives;
4550
import com.oracle.truffle.api.TruffleLanguage;
@@ -67,59 +72,80 @@ public class TRegexTestDummyLanguage extends TruffleLanguage<TRegexTestDummyLang
6772
public static final String NAME = "REGEXDUMMYLANG";
6873
public static final String ID = "regexDummyLang";
6974
public static final String MIME_TYPE = "application/tregexdummy";
70-
public static final String BENCH_PREFIX = "__BENCH__";
71-
public static final String BENCH_CG_PREFIX = "__BENCH_CG__";
7275

7376
@Override
7477
protected CallTarget parse(ParsingRequest parsingRequest) {
75-
String src = parsingRequest.getSource().getCharacters().toString();
76-
if (src.startsWith(BENCH_PREFIX)) {
77-
final Object regex = DummyLanguageContext.get(null).getEnv().parseInternal(
78-
Source.newBuilder(RegexLanguage.ID, "BooleanMatch=true," + src.substring(BENCH_PREFIX.length()), parsingRequest.getSource().getName()).internal(true).build()).call();
78+
OptionValues options = parsingRequest.getOptionValues();
79+
try {
80+
final CallTarget regex = parseRegex(parsingRequest);
81+
switch (options.get(TRegexTestDummyLanguageOptions.Mode)) {
82+
case Test -> {
83+
return regex;
84+
}
85+
case Bench -> {
86+
return new RootNode(this) {
7987

80-
return new RootNode(this) {
88+
private final Object compiledRegex = regex.call();
89+
private final String name = parsingRequest.getSource().getName();
8190

82-
private final Object compiledRegex = regex;
83-
private final String name = parsingRequest.getSource().getName();
91+
@Child RegexBenchNode benchNode = TRegexTestDummyLanguageFactory.RegexBenchNodeGen.create();
8492

85-
@Child RegexBenchNode benchNode = TRegexTestDummyLanguageFactory.RegexBenchNodeGen.create();
93+
@Override
94+
public Object execute(VirtualFrame frame) {
95+
Object[] args = frame.getArguments();
96+
return benchNode.execute(this, compiledRegex, args[0], (int) args[1]);
97+
}
8698

87-
@Override
88-
public Object execute(VirtualFrame frame) {
89-
Object[] args = frame.getArguments();
90-
return benchNode.execute(this, compiledRegex, args[0], (int) args[1]);
99+
@Override
100+
public String toString() {
101+
return name + ' ' + ((RegexObject) compiledRegex).getLabel();
102+
}
103+
}.getCallTarget();
91104
}
105+
case BenchCG -> {
106+
return new RootNode(this) {
92107

93-
@Override
94-
public String toString() {
95-
return name + ' ' + ((RegexObject) compiledRegex).getLabel();
96-
}
97-
}.getCallTarget();
98-
}
99-
if (src.startsWith(BENCH_CG_PREFIX)) {
100-
final Object regex = DummyLanguageContext.get(null).getEnv().parseInternal(
101-
Source.newBuilder(RegexLanguage.ID, src.substring(BENCH_CG_PREFIX.length()), parsingRequest.getSource().getName()).internal(true).build()).call();
102-
return new RootNode(this) {
108+
private final Object compiledRegex = regex.call();
103109

104-
private final Object compiledRegex = regex;
110+
@Child RegexBenchCGNode benchNode = TRegexTestDummyLanguageFactory.RegexBenchCGNodeGen.create();
105111

106-
@Child RegexBenchCGNode benchNode = TRegexTestDummyLanguageFactory.RegexBenchCGNodeGen.create();
107-
108-
@Override
109-
public Object execute(VirtualFrame frame) {
110-
Object[] args = frame.getArguments();
111-
return benchNode.execute(this, compiledRegex, args[0], (int) args[1]);
112+
@Override
113+
public Object execute(VirtualFrame frame) {
114+
Object[] args = frame.getArguments();
115+
return benchNode.execute(this, compiledRegex, args[0], (int) args[1]);
116+
}
117+
}.getCallTarget();
112118
}
113-
}.getCallTarget();
114-
}
115-
try {
116-
return DummyLanguageContext.get(null).getEnv().parseInternal(
117-
Source.newBuilder(RegexLanguage.ID, src, parsingRequest.getSource().getName()).internal(true).build());
119+
default -> throw CompilerDirectives.shouldNotReachHere();
120+
}
118121
} catch (RegexSyntaxException e) {
119122
throw e.withErrorCodeInMessage();
120123
}
121124
}
122125

126+
private static CallTarget parseRegex(ParsingRequest parsingRequest) {
127+
OptionValues optionValues = parsingRequest.getOptionValues();
128+
Source.LiteralBuilder builder = Source.newBuilder(RegexLanguage.ID, parsingRequest.getSource().getCharacters(), parsingRequest.getSource().getName()).internal(true);
129+
for (OptionDescriptor optionDescriptor : optionValues.getDescriptors()) {
130+
if (optionDescriptor.getKey() == TRegexTestDummyLanguageOptions.Mode) {
131+
continue;
132+
}
133+
OptionKey<?> key = optionDescriptor.getKey();
134+
if (optionValues.hasBeenSet(key)) {
135+
builder.option("regex" + optionDescriptor.getName().substring("regexDummyLang".length()), String.valueOf(optionValues.get(key)));
136+
}
137+
}
138+
if (optionValues.get(TRegexTestDummyLanguageOptions.Mode) == TRegexTestDummyLanguageOptions.ExecutionMode.Bench) {
139+
builder.option("regex.BooleanMatch", "true");
140+
}
141+
return DummyLanguageContext.get(null).getEnv().parseInternal(builder.build());
142+
}
143+
144+
@Override
145+
protected OptionDescriptors getSourceOptionDescriptors() {
146+
return new TRegexTestDummyLangOptionDescriptors();
147+
}
148+
123149
@GenerateInline
124150
abstract static class RegexBenchNode extends Node {
125151

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/*
2+
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* The Universal Permissive License (UPL), Version 1.0
6+
*
7+
* Subject to the condition set forth below, permission is hereby granted to any
8+
* person obtaining a copy of this software, associated documentation and/or
9+
* data (collectively the "Software"), free of charge and under any and all
10+
* copyright rights in the Software, and any and all patent rights owned or
11+
* freely licensable by each licensor hereunder covering either (i) the
12+
* unmodified Software as contributed to or provided by such licensor, or (ii)
13+
* the Larger Works (as defined below), to deal in both
14+
*
15+
* (a) the Software, and
16+
*
17+
* (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
18+
* one is included with the Software each a "Larger Work" to which the Software
19+
* is contributed by such licensors),
20+
*
21+
* without restriction, including without limitation the rights to copy, create
22+
* derivative works of, display, perform, and distribute the Software and make,
23+
* use, sell, offer for sale, import, export, have made, and have sold the
24+
* Software and the Larger Work(s), and to sublicense the foregoing rights on
25+
* either these or other terms.
26+
*
27+
* This license is subject to the following condition:
28+
*
29+
* The above copyright notice and either this complete permission notice or at a
30+
* minimum a reference to the UPL must be included in all copies or substantial
31+
* portions of the Software.
32+
*
33+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
36+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
37+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
38+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
39+
* SOFTWARE.
40+
*/
41+
package com.oracle.truffle.regex.test.dummylang;
42+
43+
import com.oracle.truffle.api.Option;
44+
import org.graalvm.options.OptionCategory;
45+
import org.graalvm.options.OptionKey;
46+
import org.graalvm.options.OptionStability;
47+
48+
@Option.Group(TRegexTestDummyLanguage.ID)
49+
public final class TRegexTestDummyLanguageOptions {
50+
51+
public enum ExecutionMode {
52+
Test,
53+
Bench,
54+
BenchCG
55+
}
56+
57+
@Option(category = OptionCategory.INTERNAL, stability = OptionStability.EXPERIMENTAL, help = "") //
58+
public static final OptionKey<ExecutionMode> Mode = new OptionKey<>(ExecutionMode.Test);
59+
}

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/jmh/BenchmarkBase.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019, 2019, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* The Universal Permissive License (UPL), Version 1.0

0 commit comments

Comments
 (0)