* jsonpath_gram.y
* Grammar definitions for jsonpath datatype
*
+ * Transforms tokenized jsonpath into tree of JsonPathParseItem structs.
+ *
* Copyright (c) 2019, PostgreSQL Global Development Group
*
* IDENTIFICATION
int jsonpath_yyparse(JsonPathParseResult **result);
void jsonpath_yyerror(JsonPathParseResult **result, const char *message);
-static JsonPathParseItem *makeItemType(int type);
+static JsonPathParseItem *makeItemType(JsonPathItemType type);
static JsonPathParseItem *makeItemString(JsonPathString *s);
static JsonPathParseItem *makeItemVariable(JsonPathString *s);
static JsonPathParseItem *makeItemKey(JsonPathString *s);
static JsonPathParseItem *makeItemNumeric(JsonPathString *s);
static JsonPathParseItem *makeItemBool(bool val);
-static JsonPathParseItem *makeItemBinary(int type, JsonPathParseItem *la,
+static JsonPathParseItem *makeItemBinary(JsonPathItemType type,
+ JsonPathParseItem *la,
JsonPathParseItem *ra);
-static JsonPathParseItem *makeItemUnary(int type, JsonPathParseItem *a);
+static JsonPathParseItem *makeItemUnary(JsonPathItemType type,
+ JsonPathParseItem *a);
static JsonPathParseItem *makeItemList(List *list);
static JsonPathParseItem *makeIndexArray(List *list);
static JsonPathParseItem *makeAny(int first, int last);
%union {
JsonPathString str;
- List *elems; /* list of JsonPathParseItem */
- List *indexs; /* list of integers */
- JsonPathParseItem *value;
+ List *elems; /* list of JsonPathParseItem */
+ List *indexs; /* list of integers */
+ JsonPathParseItem *value;
JsonPathParseResult *result;
JsonPathItemType optype;
bool boolean;
;
delimited_predicate:
- '(' predicate ')' { $$ = $2; }
+ '(' predicate ')' { $$ = $2; }
| EXISTS_P '(' expr ')' { $$ = makeItemUnary(jpiExists, $3); }
;
| predicate AND_P predicate { $$ = makeItemBinary(jpiAnd, $1, $3); }
| predicate OR_P predicate { $$ = makeItemBinary(jpiOr, $1, $3); }
| NOT_P delimited_predicate { $$ = makeItemUnary(jpiNot, $2); }
- | '(' predicate ')' IS_P UNKNOWN_P { $$ = makeItemUnary(jpiIsUnknown, $2); }
+ | '(' predicate ')' IS_P UNKNOWN_P
+ { $$ = makeItemUnary(jpiIsUnknown, $2); }
| expr STARTS_P WITH_P starts_with_initial
- { $$ = makeItemBinary(jpiStartsWith, $1, $4); }
+ { $$ = makeItemBinary(jpiStartsWith, $1, $4); }
| expr LIKE_REGEX_P STRING_P { $$ = makeItemLikeRegex($1, &$3, NULL); }
| expr LIKE_REGEX_P STRING_P FLAG_P STRING_P
{ $$ = makeItemLikeRegex($1, &$3, &$5); }
any_path:
ANY_P { $$ = makeAny(0, -1); }
| ANY_P '{' any_level '}' { $$ = makeAny($3, $3); }
- | ANY_P '{' any_level TO_P any_level '}' { $$ = makeAny($3, $5); }
+ | ANY_P '{' any_level TO_P any_level '}'
+ { $$ = makeAny($3, $5); }
;
accessor_op:
;
%%
-static JsonPathParseItem*
-makeItemType(int type)
+/*
+ * The helper functions below allocate and fill JsonPathParseItem's of various
+ * types.
+ */
+
+static JsonPathParseItem *
+makeItemType(JsonPathItemType type)
{
- JsonPathParseItem* v = palloc(sizeof(*v));
+ JsonPathParseItem *v = palloc(sizeof(*v));
CHECK_FOR_INTERRUPTS();
return v;
}
-static JsonPathParseItem*
+static JsonPathParseItem *
makeItemString(JsonPathString *s)
{
- JsonPathParseItem *v;
+ JsonPathParseItem *v;
if (s == NULL)
{
static JsonPathParseItem *
makeItemVariable(JsonPathString *s)
{
- JsonPathParseItem *v;
+ JsonPathParseItem *v;
v = makeItemType(jpiVariable);
v->value.string.val = s->val;
static JsonPathParseItem *
makeItemKey(JsonPathString *s)
{
- JsonPathParseItem *v;
+ JsonPathParseItem *v;
v = makeItemString(s);
v->type = jpiKey;
static JsonPathParseItem *
makeItemNumeric(JsonPathString *s)
{
- JsonPathParseItem *v;
+ JsonPathParseItem *v;
v = makeItemType(jpiNumeric);
v->value.numeric =
static JsonPathParseItem *
makeItemBool(bool val)
{
- JsonPathParseItem *v = makeItemType(jpiBool);
+ JsonPathParseItem *v = makeItemType(jpiBool);
v->value.boolean = val;
}
static JsonPathParseItem *
-makeItemBinary(int type, JsonPathParseItem* la, JsonPathParseItem *ra)
+makeItemBinary(JsonPathItemType type, JsonPathParseItem *la, JsonPathParseItem *ra)
{
JsonPathParseItem *v = makeItemType(type);
}
static JsonPathParseItem *
-makeItemUnary(int type, JsonPathParseItem* a)
+makeItemUnary(JsonPathItemType type, JsonPathParseItem *a)
{
JsonPathParseItem *v;
static JsonPathParseItem *
makeItemList(List *list)
{
- JsonPathParseItem *head, *end;
- ListCell *cell = list_head(list);
+ JsonPathParseItem *head,
+ *end;
+ ListCell *cell = list_head(list);
head = end = (JsonPathParseItem *) lfirst(cell);
static JsonPathParseItem *
makeIndexArray(List *list)
{
- JsonPathParseItem *v = makeItemType(jpiIndexArray);
- ListCell *cell;
+ JsonPathParseItem *v = makeItemType(jpiIndexArray);
+ ListCell *cell;
int i = 0;
Assert(list_length(list) > 0);
foreach(cell, list)
{
- JsonPathParseItem *jpi = lfirst(cell);
+ JsonPathParseItem *jpi = lfirst(cell);
Assert(jpi->type == jpiSubscript);
static JsonPathParseItem *
makeAny(int first, int last)
{
- JsonPathParseItem *v = makeItemType(jpiAny);
+ JsonPathParseItem *v = makeItemType(jpiAny);
v->value.anybounds.first = (first >= 0) ? first : PG_UINT32_MAX;
v->value.anybounds.last = (last >= 0) ? last : PG_UINT32_MAX;
makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
JsonPathString *flags)
{
- JsonPathParseItem *v = makeItemType(jpiLikeRegex);
- int i;
- int cflags = REG_ADVANCED;
+ JsonPathParseItem *v = makeItemType(jpiLikeRegex);
+ int i;
+ int cflags = REG_ADVANCED;
v->value.like_regex.expr = expr;
v->value.like_regex.pattern = pattern->val;
return v;
}
+/*
+ * jsonpath_scan.l is compiled as part of jsonpath_gram.y. Currently, this is
+ * unavoidable because jsonpath_gram does not create a .h file to export its
+ * token symbols. If these files ever grow large enough to be worth compiling
+ * separately, that could be fixed; but for now it seems like useless
+ * complication.
+ */
+
#include "jsonpath_scan.c"
* jsonpath_scan.l
* Lexical parser for jsonpath datatype
*
+ * Splits jsonpath string into tokens represented as JsonPathString structs.
+ * Decodes unicode and hex escaped strings.
+ *
* Copyright (c) 2019, PostgreSQL Global Development Group
*
* IDENTIFICATION
static JsonPathString scanstring;
-/* No reason to constrain amount of data slurped */
-/* #define YY_READ_BUF_SIZE 16777216 */
-
/* Handles to the buffer that the lexer uses internally */
static YY_BUFFER_STATE scanbufhandle;
static char *scanbuf;
static void addstring(bool init, char *s, int l);
static void addchar(bool init, char s);
-static int checkSpecialVal(void); /* examine scanstring for the special
- * value */
-
+static enum yytokentype checkKeyword(void);
static void parseUnicode(char *s, int l);
static void parseHexChars(char *s, int l);
%option noyyrealloc
%option noyyfree
-%x xQUOTED
-%x xNONQUOTED
-%x xVARQUOTED
-%x xSINGLEQUOTED
-%x xCOMMENT
+/*
+ * We use exclusive states for quoted, signle-quoted and non-quoted strings,
+ * quoted variable names and C-tyle comments.
+ * Exclusive states:
+ * <xq> - quoted strings
+ * <xnq> - non-quoted strings
+ * <xvq> - quoted variable names
+ * <xsq> - single-quoted strings
+ * <xc> - C-style comment
+ */
+
+%x xq
+%x xnq
+%x xvq
+%x xsq
+%x xc
special [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/]
any [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\"\' \t\n\r\f]
unicode \\u({hex_dig}{4}|\{{hex_dig}{1,6}\})
hex_char \\x{hex_dig}{2}
-
%%
-<INITIAL>\&\& { return AND_P; }
-
-<INITIAL>\|\| { return OR_P; }
-
-<INITIAL>\! { return NOT_P; }
-
-<INITIAL>\*\* { return ANY_P; }
-
-<INITIAL>\< { return LESS_P; }
-
-<INITIAL>\<\= { return LESSEQUAL_P; }
-
-<INITIAL>\=\= { return EQUAL_P; }
-
-<INITIAL>\<\> { return NOTEQUAL_P; }
+<xnq>{any}+ {
+ addstring(false, yytext, yyleng);
+ }
-<INITIAL>\!\= { return NOTEQUAL_P; }
+<xnq>{blank}+ {
+ yylval->str = scanstring;
+ BEGIN INITIAL;
+ return checkKeyword();
+ }
-<INITIAL>\>\= { return GREATEREQUAL_P; }
-<INITIAL>\> { return GREATER_P; }
+<xnq>\/\* {
+ yylval->str = scanstring;
+ BEGIN xc;
+ }
-<INITIAL>\${any}+ {
- addstring(true, yytext + 1, yyleng - 1);
- addchar(false, '\0');
+<xnq>({special}|\"|\') {
yylval->str = scanstring;
- return VARIABLE_P;
+ yyless(0);
+ BEGIN INITIAL;
+ return checkKeyword();
}
-<INITIAL>\$\" {
- addchar(true, '\0');
- BEGIN xVARQUOTED;
+<xnq><<EOF>> {
+ yylval->str = scanstring;
+ BEGIN INITIAL;
+ return checkKeyword();
}
-<INITIAL>{special} { return *yytext; }
+<xnq,xq,xvq,xsq>\\[\"\'\\] { addchar(false, yytext[1]); }
-<INITIAL>{blank}+ { /* ignore */ }
+<xnq,xq,xvq,xsq>\\b { addchar(false, '\b'); }
-<INITIAL>\/\* {
- addchar(true, '\0');
- BEGIN xCOMMENT;
- }
+<xnq,xq,xvq,xsq>\\f { addchar(false, '\f'); }
-<INITIAL>[0-9]+(\.[0-9]+)?[eE][+-]?[0-9]+ /* float */ {
- addstring(true, yytext, yyleng);
- addchar(false, '\0');
- yylval->str = scanstring;
- return NUMERIC_P;
- }
+<xnq,xq,xvq,xsq>\\n { addchar(false, '\n'); }
-<INITIAL>\.[0-9]+[eE][+-]?[0-9]+ /* float */ {
- addstring(true, yytext, yyleng);
- addchar(false, '\0');
- yylval->str = scanstring;
- return NUMERIC_P;
- }
+<xnq,xq,xvq,xsq>\\r { addchar(false, '\r'); }
-<INITIAL>([0-9]+)?\.[0-9]+ {
- addstring(true, yytext, yyleng);
- addchar(false, '\0');
- yylval->str = scanstring;
- return NUMERIC_P;
- }
+<xnq,xq,xvq,xsq>\\t { addchar(false, '\t'); }
-<INITIAL>[0-9]+ {
- addstring(true, yytext, yyleng);
- addchar(false, '\0');
- yylval->str = scanstring;
- return INT_P;
- }
+<xnq,xq,xvq,xsq>\\v { addchar(false, '\v'); }
-<INITIAL>{any}+ {
- addstring(true, yytext, yyleng);
- BEGIN xNONQUOTED;
- }
+<xnq,xq,xvq,xsq>{unicode}+ { parseUnicode(yytext, yyleng); }
-<INITIAL>\" {
- addchar(true, '\0');
- BEGIN xQUOTED;
- }
+<xnq,xq,xvq,xsq>{hex_char}+ { parseHexChars(yytext, yyleng); }
-<INITIAL>\' {
- addchar(true, '\0');
- BEGIN xSINGLEQUOTED;
- }
+<xnq,xq,xvq,xsq>\\x { yyerror(NULL, "Hex character sequence is invalid"); }
-<INITIAL>\\ {
- yyless(0);
- addchar(true, '\0');
- BEGIN xNONQUOTED;
- }
+<xnq,xq,xvq,xsq>\\u { yyerror(NULL, "Unicode sequence is invalid"); }
-<xNONQUOTED>{any}+ {
- addstring(false, yytext, yyleng);
- }
+<xnq,xq,xvq,xsq>\\. { yyerror(NULL, "Escape sequence is invalid"); }
-<xNONQUOTED>{blank}+ {
- yylval->str = scanstring;
- BEGIN INITIAL;
- return checkSpecialVal();
- }
+<xnq,xq,xvq,xsq>\\ { yyerror(NULL, "Unexpected end after backslash"); }
+<xq,xvq,xsq><<EOF>> { yyerror(NULL, "Unexpected end of quoted string"); }
-<xNONQUOTED>\/\* {
+<xq>\" {
yylval->str = scanstring;
- BEGIN xCOMMENT;
+ BEGIN INITIAL;
+ return STRING_P;
}
-<xNONQUOTED>({special}|\"|\') {
+<xvq>\" {
yylval->str = scanstring;
- yyless(0);
BEGIN INITIAL;
- return checkSpecialVal();
+ return VARIABLE_P;
}
-<xNONQUOTED><<EOF>> {
+<xsq>\' {
yylval->str = scanstring;
BEGIN INITIAL;
- return checkSpecialVal();
+ return STRING_P;
}
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\[\"\'\\] { addchar(false, yytext[1]); }
+<xq,xvq>[^\\\"]+ { addstring(false, yytext, yyleng); }
+
+<xsq>[^\\\']+ { addstring(false, yytext, yyleng); }
+
+<xc>\*\/ { BEGIN INITIAL; }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\b { addchar(false, '\b'); }
+<xc>[^\*]+ { }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\f { addchar(false, '\f'); }
+<xc>\* { }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\n { addchar(false, '\n'); }
+<xc><<EOF>> { yyerror(NULL, "Unexpected end of comment"); }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\r { addchar(false, '\r'); }
+\&\& { return AND_P; }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\t { addchar(false, '\t'); }
+\|\| { return OR_P; }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\v { addchar(false, '\v'); }
+\! { return NOT_P; }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>{unicode}+ { parseUnicode(yytext, yyleng); }
+\*\* { return ANY_P; }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>{hex_char}+ { parseHexChars(yytext, yyleng); }
+\< { return LESS_P; }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\x { yyerror(NULL, "Hex character sequence is invalid"); }
+\<\= { return LESSEQUAL_P; }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\u { yyerror(NULL, "Unicode sequence is invalid"); }
+\=\= { return EQUAL_P; }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\. { yyerror(NULL, "Escape sequence is invalid"); }
+\<\> { return NOTEQUAL_P; }
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\ { yyerror(NULL, "Unexpected end after backslash"); }
+\!\= { return NOTEQUAL_P; }
-<xQUOTED,xVARQUOTED,xSINGLEQUOTED><<EOF>> { yyerror(NULL, "Unexpected end of quoted string"); }
+\>\= { return GREATEREQUAL_P; }
-<xQUOTED>\" {
+\> { return GREATER_P; }
+
+\${any}+ {
+ addstring(true, yytext + 1, yyleng - 1);
+ addchar(false, '\0');
yylval->str = scanstring;
- BEGIN INITIAL;
- return STRING_P;
+ return VARIABLE_P;
+ }
+
+\$\" {
+ addchar(true, '\0');
+ BEGIN xvq;
}
-<xVARQUOTED>\" {
+{special} { return *yytext; }
+
+{blank}+ { /* ignore */ }
+
+\/\* {
+ addchar(true, '\0');
+ BEGIN xc;
+ }
+
+[0-9]+(\.[0-9]+)?[eE][+-]?[0-9]+ { /* float */
+ addstring(true, yytext, yyleng);
+ addchar(false, '\0');
yylval->str = scanstring;
- BEGIN INITIAL;
- return VARIABLE_P;
+ return NUMERIC_P;
}
-<xSINGLEQUOTED>\' {
+\.[0-9]+[eE][+-]?[0-9]+ { /* float */
+ addstring(true, yytext, yyleng);
+ addchar(false, '\0');
yylval->str = scanstring;
- BEGIN INITIAL;
- return STRING_P;
+ return NUMERIC_P;
}
-<xQUOTED,xVARQUOTED>[^\\\"]+ { addstring(false, yytext, yyleng); }
+([0-9]+)?\.[0-9]+ {
+ addstring(true, yytext, yyleng);
+ addchar(false, '\0');
+ yylval->str = scanstring;
+ return NUMERIC_P;
+ }
-<xSINGLEQUOTED>[^\\\']+ { addstring(false, yytext, yyleng); }
+[0-9]+ {
+ addstring(true, yytext, yyleng);
+ addchar(false, '\0');
+ yylval->str = scanstring;
+ return INT_P;
+ }
-<INITIAL><<EOF>> { yyterminate(); }
+{any}+ {
+ addstring(true, yytext, yyleng);
+ BEGIN xnq;
+ }
-<xCOMMENT>\*\/ { BEGIN INITIAL; }
+\" {
+ addchar(true, '\0');
+ BEGIN xq;
+ }
-<xCOMMENT>[^\*]+ { }
+\' {
+ addchar(true, '\0');
+ BEGIN xsq;
+ }
-<xCOMMENT>\* { }
+\\ {
+ yyless(0);
+ addchar(true, '\0');
+ BEGIN xnq;
+ }
-<xCOMMENT><<EOF>> { yyerror(NULL, "Unexpected end of comment"); }
+<<EOF>> { yyterminate(); }
%%
* Array of key words should be sorted by length and then
* alphabetical order
*/
-
static const JsonPathKeyword keywords[] = {
{ 2, false, IS_P, "is"},
{ 2, false, TO_P, "to"},
{ 10,false, LIKE_REGEX_P, "like_regex"},
};
-static int
-checkSpecialVal()
+/* Check if current scanstring value is a keyword */
+static enum yytokentype
+checkKeyword()
{
int res = IDENT_P;
int diff;
if (scanstring.len > keywords[lengthof(keywords) - 1].len)
return res;
- while(StopLow < StopHigh)
+ while (StopLow < StopHigh)
{
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
pfree(scanbuf);
}
+/*
+ * Resize scanstring so that it can append string of given length.
+ * Reinitialize if required.
+ */
static void
-addstring(bool init, char *s, int l)
+resizeString(bool init, int appendLen)
{
if (init)
{
- scanstring.total = 32;
- scanstring.val = palloc(scanstring.total);
+ scanstring.total = Max(32, appendLen);
+ scanstring.val = (char *) palloc(scanstring.total);
scanstring.len = 0;
}
-
- if (s && l)
+ else
{
- while(scanstring.len + l + 1 >= scanstring.total)
+ if (scanstring.len + appendLen >= scanstring.total)
{
- scanstring.total *= 2;
+ while (scanstring.len + appendLen >= scanstring.total)
+ scanstring.total *= 2;
scanstring.val = repalloc(scanstring.val, scanstring.total);
}
-
- memcpy(scanstring.val + scanstring.len, s, l);
- scanstring.len += l;
}
}
+/* Add set of bytes at "s" of length "l" to scanstring */
static void
-addchar(bool init, char s)
+addstring(bool init, char *s, int l)
{
- if (init)
- {
- scanstring.total = 32;
- scanstring.val = palloc(scanstring.total);
- scanstring.len = 0;
- }
- else if(scanstring.len + 1 >= scanstring.total)
- {
- scanstring.total *= 2;
- scanstring.val = repalloc(scanstring.val, scanstring.total);
- }
+ resizeString(init, l + 1);
+ memcpy(scanstring.val + scanstring.len, s, l);
+ scanstring.len += l;
+}
- scanstring.val[ scanstring.len ] = s;
- if (s != '\0')
+/* Add single byte "c" to scanstring */
+static void
+addchar(bool init, char c)
+{
+ resizeString(init, 1);
+ scanstring.val[scanstring.len] = c;
+ if (c != '\0')
scanstring.len++;
}
+/* Interface to jsonpath parser */
JsonPathParseResult *
parsejsonpath(const char *str, int len)
{
jsonpath_scanner_init(str, len);
- if (jsonpath_yyparse((void*)&parseresult) != 0)
+ if (jsonpath_yyparse((void *) &parseresult) != 0)
jsonpath_yyerror(NULL, "bugus input");
jsonpath_scanner_finish();
return parseresult;
}
+/* Turn hex character into integer */
static int
hexval(char c)
{
return 0; /* not reached */
}
+/* Add given unicode character to scanstring */
static void
addUnicodeChar(int ch)
{
}
}
+/* Add unicode character and process its hi surrogate */
static void
addUnicode(int ch, int *hi_surrogate)
{
}
}
+/* Parse sequence of hex-encoded characters */
static void
parseHexChars(char *s, int l)
{
for (i = 0; i < l / 4; i++)
{
- int ch = (hexval(s[i * 4 + 2]) << 4) | hexval(s[i * 4 + 3]);
+ int ch = (hexval(s[i * 4 + 2]) << 4) |
+ hexval(s[i * 4 + 3]);
addUnicodeChar(ch);
}