Skip to content

Commit 7bf61f2

Browse files
committed
GH-3281: Update SPARQL/ARQ to check RDF Strings
1 parent 9f8d94b commit 7bf61f2

File tree

7 files changed

+24
-16
lines changed

7 files changed

+24
-16
lines changed

jena-arq/Grammar/arq.jj

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,8 @@ String VersionSpecifier() : { Token t ; String version ; }
9393
| t = <STRING_LITERAL2> { version = stripQuotes(t.image) ; }
9494
)
9595
{
96-
checkString(version, t.beginLine, t.beginColumn) ;
9796
version = unescapeStr(version, t.beginLine, t.beginColumn) ;
97+
checkRDFString(version, t.beginLine, t.beginColumn) ;
9898
return version;
9999
}
100100
}
@@ -1933,7 +1933,7 @@ String String() : { Token t ; String lex ; }
19331933
| t = <STRING_LITERAL_LONG2> { lex = stripQuotes3(t.image) ; }
19341934
)
19351935
{ lex = unescapeStr(lex, t.beginLine, t.beginColumn) ;
1936-
checkString(lex, t.beginLine, t.beginColumn) ;
1936+
checkRDFString(lex, t.beginLine, t.beginColumn) ;
19371937
return lex ;
19381938
}
19391939
}

jena-arq/Grammar/main.jj

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,8 +188,8 @@ String VersionSpecifier() : { Token t ; String version ; }
188188
| t = <STRING_LITERAL2> { version = stripQuotes(t.image) ; }
189189
)
190190
{
191-
checkString(version, t.beginLine, t.beginColumn) ;
192191
version = unescapeStr(version, t.beginLine, t.beginColumn) ;
192+
checkRDFString(version, t.beginLine, t.beginColumn) ;
193193
return version;
194194
}
195195
}
@@ -2631,7 +2631,7 @@ String String() : { Token t ; String lex ; }
26312631
| t = <STRING_LITERAL_LONG2> { lex = stripQuotes3(t.image) ; }
26322632
)
26332633
{ lex = unescapeStr(lex, t.beginLine, t.beginColumn) ;
2634-
checkString(lex, t.beginLine, t.beginColumn) ;
2634+
checkRDFString(lex, t.beginLine, t.beginColumn) ;
26352635
return lex ;
26362636
}
26372637
}

jena-arq/Grammar/sparql_12.jj

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@ String VersionSpecifier() : { Token t ; String version ; }
8383
| t = <STRING_LITERAL2> { version = stripQuotes(t.image) ; }
8484
)
8585
{
86-
checkString(version, t.beginLine, t.beginColumn) ;
8786
version = unescapeStr(version, t.beginLine, t.beginColumn) ;
87+
checkRDFString(version, t.beginLine, t.beginColumn) ;
8888
return version;
8989
}
9090
}
@@ -1646,7 +1646,7 @@ String String() : { Token t ; String lex ; }
16461646
| t = <STRING_LITERAL_LONG2> { lex = stripQuotes3(t.image) ; }
16471647
)
16481648
{ lex = unescapeStr(lex, t.beginLine, t.beginColumn) ;
1649-
checkString(lex, t.beginLine, t.beginColumn) ;
1649+
checkRDFString(lex, t.beginLine, t.beginColumn) ;
16501650
return lex ;
16511651
}
16521652
}

jena-arq/src/main/java/org/apache/jena/sparql/lang/QueryParserBase.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -212,13 +212,21 @@ protected Node stripSign(Node node) {
212212
return NodeFactory.createLiteral(lex, lang, dt);
213213
}
214214

215-
// Because of Java (Java strings have surrogate pairs) we only detect singleton surrogates.
216-
protected void checkString(String string, int line, int column) {
215+
/**
216+
* Apply any checks for "RDF String" to a string that has already had escape processing applied.
217+
* An RDF String is a sequence of codepoints in the range U+0000 to U+10FFFF, excluding surrogates.
218+
* Because this is java, we test for no non-paired surrogates.
219+
* A surrogate pair is high-low.
220+
*/
221+
protected static void checkRDFString(String string, int line, int column) {
217222
// Checks for bare surrogate pairs.
218223
for ( int i = 0; i < string.length(); i++ ) {
219224
// Not "codePointAt" which does surrogate processing.
220225
char ch = string.charAt(i);
221226

227+
if ( ! Character.isValidCodePoint(ch) )
228+
throw new QueryParseException(String.format("Illegal code point in \\U sequence value: 0x%08X", ch), line, column);
229+
222230
// Check surrogate pairs are in pairs. Pairs are high-low.
223231
if ( Character.isLowSurrogate(ch) )
224232
throw new QueryParseException("Bad surrogate pair (low surrogate without high surrogate)", line, column);
@@ -369,7 +377,7 @@ protected String resolveQuotedIRI(String iriStr, int line, int column) {
369377
iriStr = stripQuotes(iriStr);
370378
iriStr = unescapeUnicode(iriStr, line, column);
371379
// Check for Unicode surrogates
372-
checkString(iriStr, line, column);
380+
checkRDFString(iriStr, line, column);
373381
return resolveIRI(iriStr, line, column);
374382
}
375383

jena-arq/src/main/java/org/apache/jena/sparql/lang/arq/javacc/ARQParser.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,8 @@ final public void Prologue() throws ParseException {
149149
jj_consume_token(-1);
150150
throw new ParseException();
151151
}
152-
checkString(version, t.beginLine, t.beginColumn) ;
153-
version = unescapeStr(version, t.beginLine, t.beginColumn) ;
152+
version = unescapeStr(version, t.beginLine, t.beginColumn) ;
153+
checkRDFString(version, t.beginLine, t.beginColumn) ;
154154
{if ("" != null) return version;}
155155
throw new Error("Missing return statement in function");
156156
}
@@ -7520,7 +7520,7 @@ final public Node BooleanLiteral() throws ParseException {
75207520
throw new ParseException();
75217521
}
75227522
lex = unescapeStr(lex, t.beginLine, t.beginColumn) ;
7523-
checkString(lex, t.beginLine, t.beginColumn) ;
7523+
checkRDFString(lex, t.beginLine, t.beginColumn) ;
75247524
{if ("" != null) return lex ;}
75257525
throw new Error("Missing return statement in function");
75267526
}

jena-arq/src/main/java/org/apache/jena/sparql/lang/sparql_12/javacc/SPARQLParser12.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,8 @@ final public void Prologue() throws ParseException {
128128
jj_consume_token(-1);
129129
throw new ParseException();
130130
}
131-
checkString(version, t.beginLine, t.beginColumn) ;
132-
version = unescapeStr(version, t.beginLine, t.beginColumn) ;
131+
version = unescapeStr(version, t.beginLine, t.beginColumn) ;
132+
checkRDFString(version, t.beginLine, t.beginColumn) ;
133133
{if ("" != null) return version;}
134134
throw new Error("Missing return statement in function");
135135
}
@@ -5889,7 +5889,7 @@ final public Node BooleanLiteral() throws ParseException {
58895889
throw new ParseException();
58905890
}
58915891
lex = unescapeStr(lex, t.beginLine, t.beginColumn) ;
5892-
checkString(lex, t.beginLine, t.beginColumn) ;
5892+
checkRDFString(lex, t.beginLine, t.beginColumn) ;
58935893
{if ("" != null) return lex ;}
58945894
throw new Error("Missing return statement in function");
58955895
}

jena-arq/src/test/java/org/apache/jena/sparql/syntax/TestQueryParser.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ public void syntax_unicode_escaped_surrogate_strings() {
7070

7171
@Test
7272
public void syntax_unicode_surrogate_pair_by_unicode_escape() {
73-
// Allow - because Java strings may have surrogate pairs so we allow then in unicode escapes if paired.
73+
// Allow - because Java strings may have surrogate pairs so we allow them in unicode escapes if paired.
7474
testParse("SELECT * { ?s ?p '\\uD801\\uDC37'}");
7575

7676
// QueryParseException ex = assertThrows(QueryParseException.class, ()->testParse("SELECT * { ?s ?p '\\uD801\\uDC37'}"));

0 commit comments

Comments
 (0)