Skip to content

Commit a2780ed

Browse files
perf: optimize parsing in Connection API (#3800)
* Add a simple benchmark for statement parser Also fixes the pom to run the annotation processor so that mvn -Pbenchmark actually is able to run benchmarks. * Optimize AbstractStatementParser.statementStartsWith I found this was taking ~25% of the CPU of pgadapter when running the TPCC benchmark loader, which seems to generate very large DMLs. Previously, it would call split() over the whole string with a limit. Now, it uses Guava's lazy splitter so that it doesn't have to copy the remainder of the string following the second match. For whatever reason, it seems like the previous implementation was doing something much more expensive than just copying the tail. For 100kb long query text, this new implementation is 1600x faster. For short queries it's only a few times faster. Before: Benchmark Mode Cnt Score Error Units StatementParserBenchmark.isQueryTest thrpt 5 1461962.835 ± 340237.573 ops/s StatementParserBenchmark.longQueryTest thrpt 5 2873.150 ± 490.611 ops/s After: Benchmark Mode Cnt Score Error Units StatementParserBenchmark.isQueryTest thrpt 5 4765215.378 ± 132661.232 ops/s StatementParserBenchmark.longQueryTest thrpt 5 4671884.683 ± 486566.506 ops/s * perf: further micro optimizations to parser * fix: remove supportsExplain The `supportsExplain()` method did not actually do anything useful and returned the wrong result. The reason that it was not useful is that: 1. Parsers that do support the EXPLAIN keyword handle these as client-side statements. This means that they never go into the isQuery() method. 2. Parsers that do not support the EXPLAIN keyword cannot do anything with it anyways. * build: register clirr difference --------- Co-authored-by: Knut Olav Løite <[email protected]>
1 parent e97b92e commit a2780ed

File tree

7 files changed

+149
-49
lines changed

7 files changed

+149
-49
lines changed

google-cloud-spanner/clirr-ignored-differences.xml

+18
Original file line numberDiff line numberDiff line change
@@ -927,5 +927,23 @@
927927
<className>com/google/cloud/spanner/connection/ConnectionOptions</className>
928928
<field>VALID_PROPERTIES</field>
929929
</difference>
930+
931+
<!-- Remove supportsExplain() from the parser -->
932+
<difference>
933+
<differenceType>7002</differenceType>
934+
<className>com/google/cloud/spanner/connection/AbstractStatementParser</className>
935+
<method>boolean supportsExplain()</method>
936+
</difference>
937+
<difference>
938+
<differenceType>7002</differenceType>
939+
<className>com/google/cloud/spanner/connection/PostgreSQLStatementParser</className>
940+
<method>boolean supportsExplain()</method>
941+
</difference>
942+
<difference>
943+
<differenceType>7002</differenceType>
944+
<className>com/google/cloud/spanner/connection/SpannerStatementParser</className>
945+
<method>boolean supportsExplain()</method>
946+
</difference>
947+
930948

931949
</differences>

google-cloud-spanner/pom.xml

+13
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,19 @@
517517
</execution>
518518
</executions>
519519
</plugin>
520+
<plugin>
521+
<groupId>org.apache.maven.plugins</groupId>
522+
<artifactId>maven-compiler-plugin</artifactId>
523+
<configuration>
524+
<annotationProcessorPaths>
525+
<path>
526+
<groupId>org.openjdk.jmh</groupId>
527+
<artifactId>jmh-generator-annprocess</artifactId>
528+
<version>1.37</version>
529+
</path>
530+
</annotationProcessorPaths>
531+
</configuration>
532+
</plugin>
520533
</plugins>
521534
</build>
522535
</profile>

google-cloud-spanner/src/main/java/com/google/cloud/spanner/Statement.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import com.google.cloud.spanner.ReadContext.QueryAnalyzeMode;
2323
import com.google.common.base.Preconditions;
24+
import com.google.common.collect.ImmutableMap;
2425
import com.google.spanner.v1.ExecuteSqlRequest.QueryOptions;
2526
import java.io.Serializable;
2627
import java.util.Collections;
@@ -140,7 +141,7 @@ Builder handle(Value value) {
140141

141142
/** Creates a {@code Statement} with the given SQL text {@code sql}. */
142143
public static Statement of(String sql) {
143-
return newBuilder(sql).build();
144+
return new Statement(sql, ImmutableMap.of(), /*queryOptions=*/ null);
144145
}
145146

146147
/** Creates a new statement builder with the SQL text {@code sql}. */

google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/AbstractStatementParser.java

+27-23
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import com.google.cloud.spanner.connection.UnitOfWork.CallType;
3232
import com.google.common.annotations.VisibleForTesting;
3333
import com.google.common.base.Preconditions;
34+
import com.google.common.base.Splitter;
3435
import com.google.common.cache.Cache;
3536
import com.google.common.cache.CacheBuilder;
3637
import com.google.common.cache.CacheStats;
@@ -41,6 +42,7 @@
4142
import java.util.Collection;
4243
import java.util.Collections;
4344
import java.util.HashMap;
45+
import java.util.Iterator;
4446
import java.util.Map;
4547
import java.util.Objects;
4648
import java.util.Set;
@@ -511,7 +513,7 @@ ParsedStatement parse(Statement statement, QueryOptions defaultQueryOptions) {
511513
return parsedStatement.copy(statement, defaultQueryOptions);
512514
}
513515

514-
private ParsedStatement internalParse(Statement statement, QueryOptions defaultQueryOptions) {
516+
ParsedStatement internalParse(Statement statement, QueryOptions defaultQueryOptions) {
515517
StatementHintParser statementHintParser =
516518
new StatementHintParser(getDialect(), statement.getSql());
517519
ReadQueryUpdateTransactionOption[] optionsFromHints = EMPTY_OPTIONS;
@@ -521,16 +523,21 @@ private ParsedStatement internalParse(Statement statement, QueryOptions defaultQ
521523
statement.toBuilder().replace(statementHintParser.getSqlWithoutClientSideHints()).build();
522524
optionsFromHints = convertHintsToOptions(statementHintParser.getClientSideStatementHints());
523525
}
526+
// TODO: Qualify statements without removing comments first.
524527
String sql = removeCommentsAndTrim(statement.getSql());
525528
ClientSideStatementImpl client = parseClientSideStatement(sql);
526529
if (client != null) {
527530
return ParsedStatement.clientSideStatement(client, statement, sql);
528-
} else if (isQuery(sql)) {
529-
return ParsedStatement.query(statement, sql, defaultQueryOptions, optionsFromHints);
530-
} else if (isUpdateStatement(sql)) {
531-
return ParsedStatement.update(statement, sql, checkReturningClause(sql), optionsFromHints);
532-
} else if (isDdlStatement(sql)) {
533-
return ParsedStatement.ddl(statement, sql);
531+
} else {
532+
String sqlWithoutHints =
533+
!sql.isEmpty() && sql.charAt(0) == '@' ? removeStatementHint(sql) : sql;
534+
if (isQuery(sqlWithoutHints)) {
535+
return ParsedStatement.query(statement, sql, defaultQueryOptions, optionsFromHints);
536+
} else if (isUpdateStatement(sqlWithoutHints)) {
537+
return ParsedStatement.update(statement, sql, checkReturningClause(sql), optionsFromHints);
538+
} else if (isDdlStatement(sqlWithoutHints)) {
539+
return ParsedStatement.ddl(statement, sql);
540+
}
534541
}
535542
return ParsedStatement.unknown(statement, sql);
536543
}
@@ -610,20 +617,16 @@ public boolean isUpdateStatement(String sql) {
610617
return statementStartsWith(sql, dmlStatements);
611618
}
612619

613-
protected abstract boolean supportsExplain();
614-
615620
private boolean statementStartsWith(String sql, Iterable<String> checkStatements) {
616621
Preconditions.checkNotNull(sql);
617-
String[] tokens = sql.split("\\s+", 2);
618-
int checkIndex = 0;
619-
if (supportsExplain() && tokens[0].equalsIgnoreCase("EXPLAIN")) {
620-
checkIndex = 1;
621-
}
622-
if (tokens.length > checkIndex) {
623-
for (String check : checkStatements) {
624-
if (tokens[checkIndex].equalsIgnoreCase(check)) {
625-
return true;
626-
}
622+
Iterator<String> tokens = Splitter.onPattern("\\s+").split(sql).iterator();
623+
if (!tokens.hasNext()) {
624+
return false;
625+
}
626+
String token = tokens.next();
627+
for (String check : checkStatements) {
628+
if (token.equalsIgnoreCase(check)) {
629+
return true;
627630
}
628631
}
629632
return false;
@@ -929,7 +932,8 @@ int skipQuoted(
929932
appendIfNotNull(result, startQuote);
930933
appendIfNotNull(result, startQuote);
931934
}
932-
while (currentIndex < sql.length()) {
935+
int length = sql.length();
936+
while (currentIndex < length) {
933937
char currentChar = sql.charAt(currentIndex);
934938
if (currentChar == startQuote) {
935939
if (supportsDollarQuotedStrings() && currentChar == DOLLAR) {
@@ -940,7 +944,7 @@ int skipQuoted(
940944
return currentIndex + tag.length() + 2;
941945
}
942946
} else if (supportsEscapeQuoteWithQuote()
943-
&& sql.length() > currentIndex + 1
947+
&& length > currentIndex + 1
944948
&& sql.charAt(currentIndex + 1) == startQuote) {
945949
// This is an escaped quote (e.g. 'foo''bar')
946950
appendIfNotNull(result, currentChar);
@@ -949,7 +953,7 @@ int skipQuoted(
949953
continue;
950954
} else if (isTripleQuoted) {
951955
// Check if this is the end of the triple-quoted string.
952-
if (sql.length() > currentIndex + 2
956+
if (length > currentIndex + 2
953957
&& sql.charAt(currentIndex + 1) == startQuote
954958
&& sql.charAt(currentIndex + 2) == startQuote) {
955959
appendIfNotNull(result, currentChar);
@@ -963,7 +967,7 @@ int skipQuoted(
963967
}
964968
} else if (supportsBackslashEscape()
965969
&& currentChar == BACKSLASH
966-
&& sql.length() > currentIndex + 1
970+
&& length > currentIndex + 1
967971
&& sql.charAt(currentIndex + 1) == startQuote) {
968972
// This is an escaped quote (e.g. 'foo\'bar').
969973
// Note that in raw strings, the \ officially does not start an escape sequence, but the

google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/PostgreSQLStatementParser.java

+8-16
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,6 @@ Dialect getDialect() {
4646
return Dialect.POSTGRESQL;
4747
}
4848

49-
/**
50-
* Indicates whether the parser supports the {@code EXPLAIN} clause. The PostgreSQL parser does
51-
* not support it.
52-
*/
53-
@Override
54-
protected boolean supportsExplain() {
55-
return false;
56-
}
57-
5849
@Override
5950
boolean supportsNestedComments() {
6051
return true;
@@ -125,7 +116,8 @@ String removeCommentsAndTrimInternal(String sql) {
125116
int multiLineCommentStartIdx = -1;
126117
StringBuilder res = new StringBuilder(sql.length());
127118
int index = 0;
128-
while (index < sql.length()) {
119+
int length = sql.length();
120+
while (index < length) {
129121
char c = sql.charAt(index);
130122
if (isInSingleLineComment) {
131123
if (c == '\n') {
@@ -134,34 +126,34 @@ String removeCommentsAndTrimInternal(String sql) {
134126
res.append(c);
135127
}
136128
} else if (multiLineCommentLevel > 0) {
137-
if (sql.length() > index + 1 && c == ASTERISK && sql.charAt(index + 1) == SLASH) {
129+
if (length > index + 1 && c == ASTERISK && sql.charAt(index + 1) == SLASH) {
138130
multiLineCommentLevel--;
139131
if (multiLineCommentLevel == 0) {
140-
if (!whitespaceBeforeOrAfterMultiLineComment && (sql.length() > index + 2)) {
132+
if (!whitespaceBeforeOrAfterMultiLineComment && (length > index + 2)) {
141133
whitespaceBeforeOrAfterMultiLineComment =
142134
Character.isWhitespace(sql.charAt(index + 2));
143135
}
144136
// If the multiline comment does not have any whitespace before or after it, and it is
145137
// neither at the start nor at the end of SQL string, append an extra space.
146138
if (!whitespaceBeforeOrAfterMultiLineComment
147139
&& (multiLineCommentStartIdx != 0)
148-
&& (index != sql.length() - 2)) {
140+
&& (index != length - 2)) {
149141
res.append(' ');
150142
}
151143
}
152144
index++;
153-
} else if (sql.length() > index + 1 && c == SLASH && sql.charAt(index + 1) == ASTERISK) {
145+
} else if (length > index + 1 && c == SLASH && sql.charAt(index + 1) == ASTERISK) {
154146
multiLineCommentLevel++;
155147
index++;
156148
}
157149
} else {
158150
// Check for -- which indicates the start of a single-line comment.
159-
if (sql.length() > index + 1 && c == HYPHEN && sql.charAt(index + 1) == HYPHEN) {
151+
if (length > index + 1 && c == HYPHEN && sql.charAt(index + 1) == HYPHEN) {
160152
// This is a single line comment.
161153
isInSingleLineComment = true;
162154
index += 2;
163155
continue;
164-
} else if (sql.length() > index + 1 && c == SLASH && sql.charAt(index + 1) == ASTERISK) {
156+
} else if (length > index + 1 && c == SLASH && sql.charAt(index + 1) == ASTERISK) {
165157
multiLineCommentLevel++;
166158
if (index >= 1) {
167159
whitespaceBeforeOrAfterMultiLineComment = Character.isWhitespace(sql.charAt(index - 1));

google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/SpannerStatementParser.java

-9
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,6 @@ Dialect getDialect() {
4646
return Dialect.GOOGLE_STANDARD_SQL;
4747
}
4848

49-
/**
50-
* Indicates whether the parser supports the {@code EXPLAIN} clause. The Spanner parser does
51-
* support it.
52-
*/
53-
@Override
54-
protected boolean supportsExplain() {
55-
return true;
56-
}
57-
5849
@Override
5950
boolean supportsNestedComments() {
6051
return false;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/*
2+
* Copyright 2025 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.google.cloud.spanner.connection;
18+
19+
import com.google.cloud.spanner.Dialect;
20+
import com.google.cloud.spanner.Statement;
21+
import com.google.cloud.spanner.connection.AbstractStatementParser.ParsedStatement;
22+
import com.google.spanner.v1.ExecuteSqlRequest.QueryOptions;
23+
import org.openjdk.jmh.annotations.Benchmark;
24+
import org.openjdk.jmh.annotations.Fork;
25+
import org.openjdk.jmh.annotations.Measurement;
26+
import org.openjdk.jmh.annotations.Warmup;
27+
28+
@Fork(value = 1, warmups = 0)
29+
@Warmup(iterations = 1, time = 5)
30+
@Measurement(iterations = 5, time = 5)
31+
public class StatementParserBenchmark {
32+
private static final Dialect dialect = Dialect.POSTGRESQL;
33+
private static final AbstractStatementParser PARSER =
34+
AbstractStatementParser.getInstance(dialect);
35+
36+
private static final String LONG_QUERY_TEXT =
37+
generateLongStatement("SELECT * FROM foo WHERE 1", 100 * 1024); // 100kb
38+
39+
private static final String LONG_DML_TEXT =
40+
generateLongStatement("update foo set bar=1 WHERE 1", 100 * 1024); // 100kb
41+
42+
/** Generates a long SQL-looking string. */
43+
private static String generateLongStatement(String prefix, int length) {
44+
StringBuilder sb = new StringBuilder(length + 50);
45+
sb.append(prefix);
46+
while (sb.length() < length) {
47+
sb.append(" OR abcdefghijklmnopqrstuvwxyz='abcdefghijklmnopqrstuvwxyz'");
48+
}
49+
return sb.toString();
50+
}
51+
52+
@Benchmark
53+
public ParsedStatement isQueryTest() {
54+
return PARSER.internalParse(
55+
Statement.of("CREATE TABLE FOO (ID INT64, NAME STRING(100)) PRIMARY KEY (ID)"),
56+
QueryOptions.getDefaultInstance());
57+
}
58+
59+
@Benchmark
60+
public ParsedStatement longQueryTest() {
61+
return PARSER.internalParse(Statement.of(LONG_QUERY_TEXT), QueryOptions.getDefaultInstance());
62+
}
63+
64+
@Benchmark
65+
public ParsedStatement longDmlTest() {
66+
return PARSER.internalParse(Statement.of(LONG_DML_TEXT), QueryOptions.getDefaultInstance());
67+
}
68+
69+
public static void main(String[] args) throws Exception {
70+
for (int i = 0; i < 100000; i++) {
71+
if (PARSER.internalParse(Statement.of(LONG_QUERY_TEXT), QueryOptions.getDefaultInstance())
72+
== null) {
73+
throw new AssertionError();
74+
}
75+
if (PARSER.internalParse(Statement.of(LONG_DML_TEXT), QueryOptions.getDefaultInstance())
76+
== null) {
77+
throw new AssertionError();
78+
}
79+
}
80+
}
81+
}

0 commit comments

Comments
 (0)