/* validate it */
lex = makeJsonLexContext(result, false);
- pg_parse_json_or_ereport(lex, &nullSemAction);
+ if (!pg_parse_json_or_errsave(lex, &nullSemAction, fcinfo->context))
+ PG_RETURN_NULL();
- /* Internal representation is the same as text, for now */
+ /* Internal representation is the same as text */
PG_RETURN_TEXT_P(result);
}
/* Lex exactly one token from the input and check its type. */
result = json_lex(lex);
if (result != JSON_SUCCESS)
- json_ereport_error(result, lex);
+ json_errsave_error(result, lex, NULL);
tok = lex->token_type;
switch (tok)
{
{
JsonbParseState *parseState;
JsonbValue *res;
+ Node *escontext;
} JsonbInState;
/* unlike with json categories, we need to treat json and jsonb differently */
Oid val_output_func;
} JsonbAggState;
-static inline Datum jsonb_from_cstring(char *json, int len);
-static size_t checkStringLen(size_t len);
+static inline Datum jsonb_from_cstring(char *json, int len, Node *escontext);
+static bool checkStringLen(size_t len, Node *escontext);
static JsonParseErrorType jsonb_in_object_start(void *pstate);
static JsonParseErrorType jsonb_in_object_end(void *pstate);
static JsonParseErrorType jsonb_in_array_start(void *pstate);
{
char *json = PG_GETARG_CSTRING(0);
- return jsonb_from_cstring(json, strlen(json));
+ return jsonb_from_cstring(json, strlen(json), fcinfo->context);
}
/*
else
elog(ERROR, "unsupported jsonb version number %d", version);
- return jsonb_from_cstring(str, nbytes);
+ return jsonb_from_cstring(str, nbytes, NULL);
}
/*
* Turns json string into a jsonb Datum.
*
* Uses the json parser (with hooks) to construct a jsonb.
+ *
+ * If escontext points to an ErrorSaveContext, errors are reported there
+ * instead of being thrown.
*/
static inline Datum
-jsonb_from_cstring(char *json, int len)
+jsonb_from_cstring(char *json, int len, Node *escontext)
{
JsonLexContext *lex;
JsonbInState state;
memset(&sem, 0, sizeof(sem));
lex = makeJsonLexContextCstringLen(json, len, GetDatabaseEncoding(), true);
+ state.escontext = escontext;
sem.semstate = (void *) &state;
sem.object_start = jsonb_in_object_start;
sem.scalar = jsonb_in_scalar;
sem.object_field_start = jsonb_in_object_field_start;
- pg_parse_json_or_ereport(lex, &sem);
+ if (!pg_parse_json_or_errsave(lex, &sem, escontext))
+ return (Datum) 0;
/* after parsing, the item member has the composed jsonb structure */
PG_RETURN_POINTER(JsonbValueToJsonb(state.res));
}
-static size_t
-checkStringLen(size_t len)
+static bool
+checkStringLen(size_t len, Node *escontext)
{
if (len > JENTRY_OFFLENMASK)
- ereport(ERROR,
+ ereturn(escontext, false,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("string too long to represent as jsonb string"),
errdetail("Due to an implementation restriction, jsonb strings cannot exceed %d bytes.",
JENTRY_OFFLENMASK)));
- return len;
+ return true;
}
static JsonParseErrorType
Assert(fname != NULL);
v.type = jbvString;
- v.val.string.len = checkStringLen(strlen(fname));
+ v.val.string.len = strlen(fname);
+ if (!checkStringLen(v.val.string.len, _state->escontext))
+ return JSON_SEM_ACTION_FAILED;
v.val.string.val = fname;
_state->res = pushJsonbValue(&_state->parseState, WJB_KEY, &v);
case JSON_TOKEN_STRING:
Assert(token != NULL);
v.type = jbvString;
- v.val.string.len = checkStringLen(strlen(token));
+ v.val.string.len = strlen(token);
+ if (!checkStringLen(v.val.string.len, _state->escontext))
+ return JSON_SEM_ACTION_FAILED;
v.val.string.val = token;
break;
case JSON_TOKEN_NUMBER:
*/
Assert(token != NULL);
v.type = jbvNumeric;
- numd = DirectFunctionCall3(numeric_in,
- CStringGetDatum(token),
- ObjectIdGetDatum(InvalidOid),
- Int32GetDatum(-1));
+ if (!DirectInputFunctionCallSafe(numeric_in, token,
+ InvalidOid, -1,
+ _state->escontext,
+ &numd))
+ return JSON_SEM_ACTION_FAILED;
v.val.numeric = DatumGetNumeric(numd);
break;
case JSON_TOKEN_TRUE:
*
* If key_scalar is true, the value is stored as a key, so insist
* it's of an acceptable type, and force it to be a jbvString.
+ *
+ * Note: currently, we assume that result->escontext is NULL and errors
+ * will be thrown.
*/
static void
datum_to_jsonb(Datum val, bool is_null, JsonbInState *result,
default:
outputstr = OidOutputFunctionCall(outfuncoid, val);
jb.type = jbvString;
- jb.val.string.len = checkStringLen(strlen(outputstr));
+ jb.val.string.len = strlen(outputstr);
+ (void) checkStringLen(jb.val.string.len, NULL);
jb.val.string.val = outputstr;
break;
}
* shallow clone is sufficient as we aren't going to change any of the
* values, just add the final array end marker.
*/
+ memset(&result, 0, sizeof(JsonbInState));
result.parseState = clone_parse_state(arg->res->parseState);
* going to change any of the values, just add the final object end
* marker.
*/
+ memset(&result, 0, sizeof(JsonbInState));
result.parseState = clone_parse_state(arg->res->parseState);
#include "lib/stringinfo.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
+#include "nodes/miscnodes.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
static JsonParseErrorType transform_string_values_array_element_start(void *state, bool isnull);
static JsonParseErrorType transform_string_values_scalar(void *state, char *token, JsonTokenType tokentype);
+
/*
- * pg_parse_json_or_ereport
+ * pg_parse_json_or_errsave
*
* This function is like pg_parse_json, except that it does not return a
* JsonParseErrorType. Instead, in case of any failure, this function will
+ * save error data into *escontext if that's an ErrorSaveContext, otherwise
* ereport(ERROR).
+ *
+ * Returns a boolean indicating success or failure (failure will only be
+ * returned when escontext is an ErrorSaveContext).
*/
-void
-pg_parse_json_or_ereport(JsonLexContext *lex, JsonSemAction *sem)
+bool
+pg_parse_json_or_errsave(JsonLexContext *lex, JsonSemAction *sem,
+ Node *escontext)
{
JsonParseErrorType result;
result = pg_parse_json(lex, sem);
if (result != JSON_SUCCESS)
- json_ereport_error(result, lex);
+ {
+ json_errsave_error(result, lex, escontext);
+ return false;
+ }
+ return true;
}
/*
* Report a JSON error.
*/
void
-json_ereport_error(JsonParseErrorType error, JsonLexContext *lex)
+json_errsave_error(JsonParseErrorType error, JsonLexContext *lex,
+ Node *escontext)
{
if (error == JSON_UNICODE_HIGH_ESCAPE ||
+ error == JSON_UNICODE_UNTRANSLATABLE ||
error == JSON_UNICODE_CODE_POINT_ZERO)
- ereport(ERROR,
+ errsave(escontext,
(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
errmsg("unsupported Unicode escape sequence"),
errdetail_internal("%s", json_errdetail(error, lex)),
report_json_context(lex)));
+ else if (error == JSON_SEM_ACTION_FAILED)
+ {
+ /* semantic action function had better have reported something */
+ if (!SOFT_ERROR_OCCURRED(escontext))
+ elog(ERROR, "JSON semantic action function did not provide error information");
+ }
else
- ereport(ERROR,
+ errsave(escontext,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail_internal("%s", json_errdetail(error, lex)),
error = json_count_array_elements(_state->lex, &nelements);
if (error != JSON_SUCCESS)
- json_ereport_error(error, _state->lex);
+ json_errsave_error(error, _state->lex, NULL);
if (-_state->path_indexes[lex_level] <= nelements)
_state->path_indexes[lex_level] += nelements;
return true;
}
+/*
+ * Call a directly-named datatype input function, with non-exception
+ * handling of "soft" errors.
+ *
+ * This is like InputFunctionCallSafe, except that it is given a direct
+ * pointer to the C function to call. We assume that that function is
+ * strict. Also, the function cannot be one that needs to
+ * look at FmgrInfo, since there won't be any.
+ */
+bool
+DirectInputFunctionCallSafe(PGFunction func, char *str,
+ Oid typioparam, int32 typmod,
+ fmNodePtr escontext,
+ Datum *result)
+{
+ LOCAL_FCINFO(fcinfo, 3);
+
+ if (str == NULL)
+ {
+ *result = (Datum) 0; /* just return null result */
+ return true;
+ }
+
+ InitFunctionCallInfoData(*fcinfo, NULL, 3, InvalidOid, escontext, NULL);
+
+ fcinfo->args[0].value = CStringGetDatum(str);
+ fcinfo->args[0].isnull = false;
+ fcinfo->args[1].value = ObjectIdGetDatum(typioparam);
+ fcinfo->args[1].isnull = false;
+ fcinfo->args[2].value = Int32GetDatum(typmod);
+ fcinfo->args[2].isnull = false;
+
+ *result = (*func) (fcinfo);
+
+ /* Result value is garbage, and could be null, if an error was reported */
+ if (SOFT_ERROR_OCCURRED(escontext))
+ return false;
+
+ /* Otherwise, shouldn't get null result */
+ if (fcinfo->isnull)
+ elog(ERROR, "input function %p returned NULL", (void *) func);
+
+ return true;
+}
+
/*
* Call a previously-looked-up datatype output function.
*
BoolGetDatum(false));
}
+/*
+ * Convert a single Unicode code point into a string in the server encoding.
+ *
+ * Same as pg_unicode_to_server(), except that we don't throw errors,
+ * but simply return false on conversion failure.
+ */
+bool
+pg_unicode_to_server_noerror(pg_wchar c, unsigned char *s)
+{
+ unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
+ int c_as_utf8_len;
+ int converted_len;
+ int server_encoding;
+
+ /* Fail if invalid Unicode code point */
+ if (!is_valid_unicode_codepoint(c))
+ return false;
+
+ /* Otherwise, if it's in ASCII range, conversion is trivial */
+ if (c <= 0x7F)
+ {
+ s[0] = (unsigned char) c;
+ s[1] = '\0';
+ return true;
+ }
+
+ /* If the server encoding is UTF-8, we just need to reformat the code */
+ server_encoding = GetDatabaseEncoding();
+ if (server_encoding == PG_UTF8)
+ {
+ unicode_to_utf8(c, s);
+ s[pg_utf_mblen(s)] = '\0';
+ return true;
+ }
+
+ /* For all other cases, we must have a conversion function available */
+ if (Utf8ToServerConvProc == NULL)
+ return false;
+
+ /* Construct UTF-8 source string */
+ unicode_to_utf8(c, c_as_utf8);
+ c_as_utf8_len = pg_utf_mblen(c_as_utf8);
+ c_as_utf8[c_as_utf8_len] = '\0';
+
+ /* Convert, but without throwing error if we can't */
+ converted_len = DatumGetInt32(FunctionCall6(Utf8ToServerConvProc,
+ Int32GetDatum(PG_UTF8),
+ Int32GetDatum(server_encoding),
+ CStringGetDatum((char *) c_as_utf8),
+ CStringGetDatum((char *) s),
+ Int32GetDatum(c_as_utf8_len),
+ BoolGetDatum(true)));
+
+ /* Conversion was successful iff it consumed the whole input */
+ return (converted_len == c_as_utf8_len);
+}
+
/* convert a multibyte string to a wchar */
int
/*
* Add the represented character to lex->strval. In the
- * backend, we can let pg_unicode_to_server() handle any
- * required character set conversion; in frontend, we can
- * only deal with trivial conversions.
- *
- * Note: pg_unicode_to_server() will throw an error for a
- * conversion failure, rather than returning a failure
- * indication. That seems OK.
+ * backend, we can let pg_unicode_to_server_noerror()
+ * handle any required character set conversion; in
+ * frontend, we can only deal with trivial conversions.
*/
#ifndef FRONTEND
{
char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
- pg_unicode_to_server(ch, (unsigned char *) cbuf);
+ if (!pg_unicode_to_server_noerror(ch, (unsigned char *) cbuf))
+ return JSON_UNICODE_UNTRANSLATABLE;
appendStringInfoString(lex->strval, cbuf);
}
#else
case JSON_UNICODE_HIGH_ESCAPE:
/* note: this case is only reachable in frontend not backend */
return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8.");
+ case JSON_UNICODE_UNTRANSLATABLE:
+ /* note: this case is only reachable in backend not frontend */
+ return psprintf(_("Unicode escape value could not be translated to the server's encoding %s."),
+ GetDatabaseEncodingName());
case JSON_UNICODE_HIGH_SURROGATE:
return _("Unicode high surrogate must not follow a high surrogate.");
case JSON_UNICODE_LOW_SURROGATE:
JSON_UNICODE_CODE_POINT_ZERO,
JSON_UNICODE_ESCAPE_FORMAT,
JSON_UNICODE_HIGH_ESCAPE,
+ JSON_UNICODE_UNTRANSLATABLE,
JSON_UNICODE_HIGH_SURROGATE,
JSON_UNICODE_LOW_SURROGATE,
JSON_SEM_ACTION_FAILED /* error should already be reported */
Oid typioparam, int32 typmod,
fmNodePtr escontext,
Datum *result);
+extern bool DirectInputFunctionCallSafe(PGFunction func, char *str,
+ Oid typioparam, int32 typmod,
+ fmNodePtr escontext,
+ Datum *result);
extern Datum OidInputFunctionCall(Oid functionId, char *str,
Oid typioparam, int32 typmod);
extern char *OutputFunctionCall(FmgrInfo *flinfo, Datum val);
extern char *pg_server_to_any(const char *s, int len, int encoding);
extern void pg_unicode_to_server(pg_wchar c, unsigned char *s);
+extern bool pg_unicode_to_server_noerror(pg_wchar c, unsigned char *s);
extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc);
extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc);
/* build a JsonLexContext from a text datum */
extern JsonLexContext *makeJsonLexContext(text *json, bool need_escapes);
-/* try to parse json, and ereport(ERROR) on failure */
-extern void pg_parse_json_or_ereport(JsonLexContext *lex, JsonSemAction *sem);
+/* try to parse json, and errsave(escontext) on failure */
+extern bool pg_parse_json_or_errsave(JsonLexContext *lex, JsonSemAction *sem,
+ struct Node *escontext);
-/* report an error during json lexing or parsing */
-extern void json_ereport_error(JsonParseErrorType error, JsonLexContext *lex);
+#define pg_parse_json_or_ereport(lex, sem) \
+ (void) pg_parse_json_or_errsave(lex, sem, NULL)
+
+/* save an error during json lexing or parsing */
+extern void json_errsave_error(JsonParseErrorType error, JsonLexContext *lex,
+ struct Node *escontext);
extern uint32 parse_jsonb_index_flags(Jsonb *jb);
extern void iterate_jsonb_values(Jsonb *jb, uint32 flags, void *state,
DETAIL: Expected JSON value, but found "}".
CONTEXT: JSON data, line 4: ...yveryveryveryveryveryveryveryverylongfieldname":}
-- ERROR missing value for last field
+-- test non-error-throwing input
+select pg_input_is_valid('{"a":true}', 'json');
+ pg_input_is_valid
+-------------------
+ t
+(1 row)
+
+select pg_input_is_valid('{"a":true', 'json');
+ pg_input_is_valid
+-------------------
+ f
+(1 row)
+
+select pg_input_error_message('{"a":true', 'json');
+ pg_input_error_message
+------------------------------------
+ invalid input syntax for type json
+(1 row)
+
--constructors
-- array_to_json
SELECT array_to_json(array(select 1 as a));
null \u0000 escape
(1 row)
+-- soft error for input-time failure
+select pg_input_error_message('{ "a": "\ud83d\ude04\ud83d\udc36" }', 'jsonb');
+ pg_input_error_message
+------------------------
+
+(1 row)
+
-- handling of unicode surrogate pairs
select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
-ERROR: conversion between UTF8 and SQL_ASCII is not supported
+ERROR: unsupported Unicode escape sequence
+DETAIL: Unicode escape value could not be translated to the server's encoding SQL_ASCII.
+CONTEXT: JSON data, line 1: { "a":...
select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
ERROR: invalid input syntax for type json
DETAIL: Unicode high surrogate must not follow a high surrogate.
(1 row)
select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
-ERROR: conversion between UTF8 and SQL_ASCII is not supported
+ERROR: unsupported Unicode escape sequence
+DETAIL: Unicode escape value could not be translated to the server's encoding SQL_ASCII.
+CONTEXT: JSON data, line 1: { "a":...
select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
correct_everywhere
--------------------
-- use octet_length here so we don't get an odd unicode char in the
-- output
SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
-ERROR: conversion between UTF8 and SQL_ASCII is not supported
+ERROR: unsupported Unicode escape sequence
LINE 1: SELECT octet_length('"\uaBcD"'::jsonb::text);
^
+DETAIL: Unicode escape value could not be translated to the server's encoding SQL_ASCII.
+CONTEXT: JSON data, line 1: ...
-- handling of unicode surrogate pairs
SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a')::text) AS correct_in_utf8;
-ERROR: conversion between UTF8 and SQL_ASCII is not supported
+ERROR: unsupported Unicode escape sequence
LINE 1: SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc3...
^
+DETAIL: Unicode escape value could not be translated to the server's encoding SQL_ASCII.
+CONTEXT: JSON data, line 1: { "a":...
SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a';
CONTEXT: JSON data, line 1: { "a":...
-- handling of simple unicode escapes
SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
-ERROR: conversion between UTF8 and SQL_ASCII is not supported
+ERROR: unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as corr...
^
+DETAIL: Unicode escape value could not be translated to the server's encoding SQL_ASCII.
+CONTEXT: JSON data, line 1: { "a":...
SELECT jsonb '{ "a": "dollar \u0024 character" }' as correct_everywhere;
correct_everywhere
-----------------------------
(1 row)
SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
-ERROR: conversion between UTF8 and SQL_ASCII is not supported
+ERROR: unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a'...
^
+DETAIL: Unicode escape value could not be translated to the server's encoding SQL_ASCII.
+CONTEXT: JSON data, line 1: { "a":...
SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
correct_everywhere
--------------------
null \u0000 escape
(1 row)
+-- soft error for input-time failure
+select pg_input_error_message('{ "a": "\ud83d\ude04\ud83d\udc36" }', 'jsonb');
+ pg_input_error_message
+-------------------------------------
+ unsupported Unicode escape sequence
+(1 row)
+
DETAIL: Expected JSON value, but found "}".
CONTEXT: JSON data, line 4: ...yveryveryveryveryveryveryveryverylongfieldname":}
-- ERROR missing value for last field
+-- test non-error-throwing input
+select pg_input_is_valid('{"a":true}', 'jsonb');
+ pg_input_is_valid
+-------------------
+ t
+(1 row)
+
+select pg_input_is_valid('{"a":true', 'jsonb');
+ pg_input_is_valid
+-------------------
+ f
+(1 row)
+
+select pg_input_error_message('{"a":true', 'jsonb');
+ pg_input_error_message
+------------------------------------
+ invalid input syntax for type json
+(1 row)
+
+select pg_input_error_message('{"a":1e1000000}', 'jsonb');
+ pg_input_error_message
+--------------------------------
+ value overflows numeric format
+(1 row)
+
-- make sure jsonb is passed through json generators without being escaped
SELECT array_to_json(ARRAY [jsonb '{"a":1}', jsonb '{"b":[2,3]}']);
array_to_json
"averyveryveryveryveryveryveryveryveryverylongfieldname":}'::json;
-- ERROR missing value for last field
+-- test non-error-throwing input
+select pg_input_is_valid('{"a":true}', 'json');
+select pg_input_is_valid('{"a":true', 'json');
+select pg_input_error_message('{"a":true', 'json');
+
--constructors
-- array_to_json
SELECT jsonb '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
+
+-- soft error for input-time failure
+
+select pg_input_error_message('{ "a": "\ud83d\ude04\ud83d\udc36" }', 'jsonb');
"averyveryveryveryveryveryveryveryveryverylongfieldname":}'::jsonb;
-- ERROR missing value for last field
+-- test non-error-throwing input
+select pg_input_is_valid('{"a":true}', 'jsonb');
+select pg_input_is_valid('{"a":true', 'jsonb');
+select pg_input_error_message('{"a":true', 'jsonb');
+select pg_input_error_message('{"a":1e1000000}', 'jsonb');
+
-- make sure jsonb is passed through json generators without being escaped
SELECT array_to_json(ARRAY [jsonb '{"a":1}', jsonb '{"b":[2,3]}']);