integer, -- destination encoding ID
cstring, -- source string (null terminated C string)
internal, -- destination (fill with a null terminated C string)
- integer -- source string length
-) RETURNS void;
-</programlisting></para>
+ integer, -- source string length
+ boolean -- if true, don't throw an error if conversion fails
+) RETURNS integer;
+</programlisting>
+ The return value is the number of source bytes that were successfully
+ converted. If the last argument is false, the function must throw an
+ error on invalid input, and the return value is always equal to the
+ source string length.
+ </para>
</listitem>
</varlistentry>
</variablelist>
const char *from_encoding_name = stmt->for_encoding_name;
const char *to_encoding_name = stmt->to_encoding_name;
List *func_name = stmt->func_name;
- static const Oid funcargs[] = {INT4OID, INT4OID, CSTRINGOID, INTERNALOID, INT4OID};
+ static const Oid funcargs[] = {INT4OID, INT4OID, CSTRINGOID, INTERNALOID, INT4OID, BOOLOID};
char result[1];
+ Datum funcresult;
/* Convert list of names to a name and namespace */
namespaceId = QualifiedNameGetCreationNamespace(stmt->conversion_name,
funcoid = LookupFuncName(func_name, sizeof(funcargs) / sizeof(Oid),
funcargs, false);
- /* Check it returns VOID, else it's probably the wrong function */
- if (get_func_rettype(funcoid) != VOIDOID)
+ /* Check it returns int4, else it's probably the wrong function */
+ if (get_func_rettype(funcoid) != INT4OID)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("encoding conversion function %s must return type %s",
- NameListToString(func_name), "void")));
+ NameListToString(func_name), "integer")));
/* Check we have EXECUTE rights for the function */
aclresult = pg_proc_aclcheck(funcoid, GetUserId(), ACL_EXECUTE);
* string; the conversion function should throw an error if it can't
* perform the requested conversion.
*/
- OidFunctionCall5(funcoid,
- Int32GetDatum(from_encoding),
- Int32GetDatum(to_encoding),
- CStringGetDatum(""),
- CStringGetDatum(result),
- Int32GetDatum(0));
+ funcresult = OidFunctionCall6(funcoid,
+ Int32GetDatum(from_encoding),
+ Int32GetDatum(to_encoding),
+ CStringGetDatum(""),
+ CStringGetDatum(result),
+ Int32GetDatum(0),
+ BoolGetDatum(false));
+
+ /*
+ * The function should return 0 for empty input. Might as well check that,
+ * too.
+ */
+ if (DatumGetInt32(funcresult) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("encoding conversion function %s returned incorrect result for empty input",
+ NameListToString(func_name))));
/*
* All seem ok, go ahead (possible failure would be a duplicate conversion
* Conversion on non-win32 platforms is not implemented yet. It requires
* non-throw version of pg_do_encoding_conversion(), that converts
* unconvertable characters to '?' without errors.
+ *
+ * XXX: We have a no-throw version now. It doesn't convert to '?' though.
*/
#endif
* tab holds conversion entries for the source charset
* starting from 128 (0x80). each entry in the table holds the corresponding
* code point for the target charset, or 0 if there is no equivalent code.
+ *
+ * Returns the number of input bytes consumed. If noError is true, this can
+ * be less than 'len'.
*/
-void
+int
local2local(const unsigned char *l,
unsigned char *p,
int len,
int src_encoding,
int dest_encoding,
- const unsigned char *tab)
+ const unsigned char *tab,
+ bool noError)
{
+ const unsigned char *start = l;
unsigned char c1,
c2;
{
c1 = *l;
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(src_encoding, (const char *) l, len);
+ }
if (!IS_HIGHBIT_SET(c1))
*p++ = c1;
else
if (c2)
*p++ = c2;
else
+ {
+ if (noError)
+ break;
report_untranslatable_char(src_encoding, dest_encoding,
(const char *) l, len);
+ }
}
l++;
len--;
}
*p = '\0';
+
+ return l - start;
}
/*
* p is the output area (must be large enough!)
* lc is the mule character set id for the local encoding
* encoding is the PG identifier for the local encoding
+ *
+ * Returns the number of input bytes consumed. If noError is true, this can
+ * be less than 'len'.
*/
-void
+int
latin2mic(const unsigned char *l, unsigned char *p, int len,
- int lc, int encoding)
+ int lc, int encoding, bool noError)
{
+ const unsigned char *start = l;
int c1;
while (len > 0)
{
c1 = *l;
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(encoding, (const char *) l, len);
+ }
if (IS_HIGHBIT_SET(c1))
*p++ = lc;
*p++ = c1;
len--;
}
*p = '\0';
+
+ return l - start;
}
/*
* p is the output area (must be large enough!)
* lc is the mule character set id for the local encoding
* encoding is the PG identifier for the local encoding
+ *
+ * Returns the number of input bytes consumed. If noError is true, this can
+ * be less than 'len'.
*/
-void
+int
mic2latin(const unsigned char *mic, unsigned char *p, int len,
- int lc, int encoding)
+ int lc, int encoding, bool noError)
{
+ const unsigned char *start = mic;
int c1;
while (len > 0)
{
c1 = *mic;
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
+ }
if (!IS_HIGHBIT_SET(c1))
{
/* easy for ASCII */
int l = pg_mule_mblen(mic);
if (len < l)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
len);
+ }
if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_MULE_INTERNAL, encoding,
(const char *) mic, len);
+ }
*p++ = mic[1];
mic += 2;
len -= 2;
}
}
*p = '\0';
+
+ return mic - start;
}
* tab holds conversion entries for the local charset
* starting from 128 (0x80). each entry in the table holds the corresponding
* code point for the mule encoding, or 0 if there is no equivalent code.
+ *
+ * Returns the number of input bytes consumed. If noError is true, this can
+ * be less than 'len'.
*/
-void
+int
latin2mic_with_table(const unsigned char *l,
unsigned char *p,
int len,
int lc,
int encoding,
- const unsigned char *tab)
+ const unsigned char *tab,
+ bool noError)
{
+ const unsigned char *start = l;
unsigned char c1,
c2;
{
c1 = *l;
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(encoding, (const char *) l, len);
+ }
if (!IS_HIGHBIT_SET(c1))
*p++ = c1;
else
*p++ = c2;
}
else
+ {
+ if (noError)
+ break;
report_untranslatable_char(encoding, PG_MULE_INTERNAL,
(const char *) l, len);
+ }
}
l++;
len--;
}
*p = '\0';
+
+ return l - start;
}
/*
* tab holds conversion entries for the mule internal code's second byte,
* starting from 128 (0x80). each entry in the table holds the corresponding
* code point for the local charset, or 0 if there is no equivalent code.
+ *
+ * Returns the number of input bytes consumed. If noError is true, this can
+ * be less than 'len'.
*/
-void
+int
mic2latin_with_table(const unsigned char *mic,
unsigned char *p,
int len,
int lc,
int encoding,
- const unsigned char *tab)
+ const unsigned char *tab,
+ bool noError)
{
+ const unsigned char *start = mic;
unsigned char c1,
c2;
{
c1 = *mic;
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
+ }
if (!IS_HIGHBIT_SET(c1))
{
/* easy for ASCII */
int l = pg_mule_mblen(mic);
if (len < l)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
len);
+ }
if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
(c2 = tab[mic[1] - HIGHBIT]) == 0)
{
+ if (noError)
+ break;
report_untranslatable_char(PG_MULE_INTERNAL, encoding,
(const char *) mic, len);
break; /* keep compiler quiet */
}
}
*p = '\0';
+
+ return mic - start;
}
/*
* is applied. An error is raised if no match is found.
*
* See pg_wchar.h for more details about the data structures used here.
+ *
+ * Returns the number of input bytes consumed. If noError is true, this can
+ * be less than 'len'.
*/
-void
+int
UtfToLocal(const unsigned char *utf, int len,
unsigned char *iso,
const pg_mb_radix_tree *map,
const pg_utf_to_local_combined *cmap, int cmapsize,
utf_local_conversion_func conv_func,
- int encoding)
+ int encoding, bool noError)
{
uint32 iutf;
int l;
const pg_utf_to_local_combined *cp;
+ const unsigned char *start = utf;
if (!PG_VALID_ENCODING(encoding))
ereport(ERROR,
l = pg_utf_mblen(utf);
if (len < l)
+ {
+ /* need more data to decide if this is a combined char */
+ utf -= l_save;
break;
+ }
if (!pg_utf8_islegal(utf, l))
+ {
+ if (!noError)
+ report_invalid_encoding(PG_UTF8, (const char *) utf, len);
+ utf -= l_save;
break;
+ }
/* We assume ASCII character cannot be in combined map */
if (l > 1)
}
/* failed to translate this character */
+ utf -= l;
+ if (noError)
+ break;
report_untranslatable_char(PG_UTF8, encoding,
- (const char *) (utf - l), len);
+ (const char *) utf, len);
}
/* if we broke out of loop early, must be invalid input */
- if (len > 0)
+ if (len > 0 && !noError)
report_invalid_encoding(PG_UTF8, (const char *) utf, len);
*iso = '\0';
+
+ return utf - start;
}
/*
* (if provided) is applied. An error is raised if no match is found.
*
* See pg_wchar.h for more details about the data structures used here.
+ *
+ * Returns the number of input bytes consumed. If noError is true, this can
+ * be less than 'len'.
*/
-void
+int
LocalToUtf(const unsigned char *iso, int len,
unsigned char *utf,
const pg_mb_radix_tree *map,
const pg_local_to_utf_combined *cmap, int cmapsize,
utf_local_conversion_func conv_func,
- int encoding)
+ int encoding,
+ bool noError)
{
uint32 iiso;
int l;
const pg_local_to_utf_combined *cp;
+ const unsigned char *start = iso;
if (!PG_VALID_ENCODING(encoding))
ereport(ERROR,
}
/* failed to translate this character */
+ iso -= l;
+ if (noError)
+ break;
report_untranslatable_char(encoding, PG_UTF8,
- (const char *) (iso - l), len);
+ (const char *) iso, len);
}
/* if we broke out of loop early, must be invalid input */
- if (len > 0)
+ if (len > 0 && !noError)
report_invalid_encoding(encoding, (const char *) iso, len);
*utf = '\0';
+
+ return iso - start;
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_MULE_INTERNAL);
- latin2mic(src, dest, len, LC_KOI8_R, PG_KOI8R);
+ converted = latin2mic(src, dest, len, LC_KOI8_R, PG_KOI8R, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_KOI8R);
- mic2latin(src, dest, len, LC_KOI8_R, PG_KOI8R);
+ converted = mic2latin(src, dest, len, LC_KOI8_R, PG_KOI8R, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_MULE_INTERNAL);
- latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, iso2koi);
+ converted = latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, iso2koi, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_ISO_8859_5);
- mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, koi2iso);
+ converted = mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, koi2iso, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_MULE_INTERNAL);
- latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, win12512koi);
+ converted = latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, win12512koi, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN1251);
- mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, koi2win1251);
+ converted = mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, koi2win1251, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_MULE_INTERNAL);
- latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, win8662koi);
+ converted = latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, win8662koi, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN866);
- mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, koi2win866);
+ converted = mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, koi2win866, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_WIN1251);
- local2local(src, dest, len, PG_KOI8R, PG_WIN1251, koi2win1251);
+ converted = local2local(src, dest, len, PG_KOI8R, PG_WIN1251, koi2win1251, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_KOI8R);
- local2local(src, dest, len, PG_WIN1251, PG_KOI8R, win12512koi);
+ converted = local2local(src, dest, len, PG_WIN1251, PG_KOI8R, win12512koi, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_WIN866);
- local2local(src, dest, len, PG_KOI8R, PG_WIN866, koi2win866);
+ converted = local2local(src, dest, len, PG_KOI8R, PG_WIN866, koi2win866, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_KOI8R);
- local2local(src, dest, len, PG_WIN866, PG_KOI8R, win8662koi);
+ converted = local2local(src, dest, len, PG_WIN866, PG_KOI8R, win8662koi, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_WIN1251);
- local2local(src, dest, len, PG_WIN866, PG_WIN1251, win8662win1251);
+ converted = local2local(src, dest, len, PG_WIN866, PG_WIN1251, win8662win1251, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_WIN866);
- local2local(src, dest, len, PG_WIN1251, PG_WIN866, win12512win866);
+ converted = local2local(src, dest, len, PG_WIN1251, PG_WIN866, win12512win866, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_KOI8R);
- local2local(src, dest, len, PG_ISO_8859_5, PG_KOI8R, iso2koi);
+ converted = local2local(src, dest, len, PG_ISO_8859_5, PG_KOI8R, iso2koi, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_ISO_8859_5);
- local2local(src, dest, len, PG_KOI8R, PG_ISO_8859_5, koi2iso);
+ converted = local2local(src, dest, len, PG_KOI8R, PG_ISO_8859_5, koi2iso, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_WIN1251);
- local2local(src, dest, len, PG_ISO_8859_5, PG_WIN1251, iso2win1251);
+ converted = local2local(src, dest, len, PG_ISO_8859_5, PG_WIN1251, iso2win1251, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_ISO_8859_5);
- local2local(src, dest, len, PG_WIN1251, PG_ISO_8859_5, win12512iso);
+ converted = local2local(src, dest, len, PG_WIN1251, PG_ISO_8859_5, win12512iso, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_WIN866);
- local2local(src, dest, len, PG_ISO_8859_5, PG_WIN866, iso2win866);
+ converted = local2local(src, dest, len, PG_ISO_8859_5, PG_WIN866, iso2win866, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_ISO_8859_5);
- local2local(src, dest, len, PG_WIN866, PG_ISO_8859_5, win8662iso);
+ converted = local2local(src, dest, len, PG_WIN866, PG_ISO_8859_5, win8662iso, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004);
PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004);
-static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len);
-static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len);
+static int euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len, bool noError);
+static int shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len, bool noError);
/* ----------
* conv_proc(
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_SHIFT_JIS_2004);
- euc_jis_20042shift_jis_2004(src, dest, len);
+ converted = euc_jis_20042shift_jis_2004(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_EUC_JIS_2004);
- shift_jis_20042euc_jis_2004(src, dest, len);
+ converted = shift_jis_20042euc_jis_2004(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
/*
* EUC_JIS_2004 -> SHIFT_JIS_2004
*/
-static void
-euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
+static int
+euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = euc;
int c1,
ku,
ten;
{
/* ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
+ }
*p++ = c1;
euc++;
len--;
l = pg_encoding_verifymbchar(PG_EUC_JIS_2004, (const char *) euc, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
+ }
if (c1 == SS2 && l == 2) /* JIS X 0201 kana? */
{
*p++ = (ku + 0x19b) >> 1;
}
else
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
+ }
}
if (ku % 2)
else if (ten >= 64 && ten <= 94)
*p++ = ten + 0x40;
else
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
+ }
}
else
*p++ = ten + 0x9e;
else if (ku >= 63 && ku <= 94)
*p++ = (ku + 0x181) >> 1;
else
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
+ }
if (ku % 2)
{
else if (ten >= 64 && ten <= 94)
*p++ = ten + 0x40;
else
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
+ }
}
else
*p++ = ten + 0x9e;
}
else
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
+ }
euc += l;
len -= l;
}
*p = '\0';
+
+ return euc - start;
}
/*
* SHIFT_JIS_2004 ---> EUC_JIS_2004
*/
-static void
-shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len)
+static int
+shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = sjis;
int c1;
int ku,
ten,
{
/* ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
+ }
*p++ = c1;
sjis++;
len--;
l = pg_encoding_verifymbchar(PG_SHIFT_JIS_2004, (const char *) sjis, len);
if (l < 0 || l > len)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
+ }
if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
{
ku = (c1 << 1) - 0x100;
ten = get_ten(c2, &kubun);
if (ten < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
+ }
ku -= kubun;
}
else if (c1 >= 0xe0 && c1 <= 0xef) /* plane 1 62ku-94ku */
ku = (c1 << 1) - 0x180;
ten = get_ten(c2, &kubun);
if (ten < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_SHIFT_JIS_2004,
-
(const char *) sjis, len);
+ }
ku -= kubun;
}
else if (c1 >= 0xf0 && c1 <= 0xf3) /* plane 2
plane = 2;
ten = get_ten(c2, &kubun);
if (ten < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
+ }
switch (c1)
{
case 0xf0:
plane = 2;
ten = get_ten(c2, &kubun);
if (ten < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
+ }
if (c1 == 0xf4 && kubun == 1)
ku = 15;
else
ku = (c1 << 1) - 0x19a - kubun;
}
else
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
+ }
if (plane == 2)
*p++ = SS3;
len -= l;
}
*p = '\0';
+
+ return sjis - start;
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
-static void euc_cn2mic(const unsigned char *euc, unsigned char *p, int len);
-static void mic2euc_cn(const unsigned char *mic, unsigned char *p, int len);
+static int euc_cn2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
+static int mic2euc_cn(const unsigned char *mic, unsigned char *p, int len, bool noError);
Datum
euc_cn_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_CN, PG_MULE_INTERNAL);
- euc_cn2mic(src, dest, len);
+ converted = euc_cn2mic(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_CN);
- mic2euc_cn(src, dest, len);
+ converted = mic2euc_cn(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
/*
* EUC_CN ---> MIC
*/
-static void
-euc_cn2mic(const unsigned char *euc, unsigned char *p, int len)
+static int
+euc_cn2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = euc;
int c1;
while (len > 0)
if (IS_HIGHBIT_SET(c1))
{
if (len < 2 || !IS_HIGHBIT_SET(euc[1]))
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_CN, (const char *) euc, len);
+ }
*p++ = LC_GB2312_80;
*p++ = c1;
*p++ = euc[1];
else
{ /* should be ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_CN, (const char *) euc, len);
+ }
*p++ = c1;
euc++;
len--;
}
}
*p = '\0';
+
+ return euc - start;
}
/*
* MIC ---> EUC_CN
*/
-static void
-mic2euc_cn(const unsigned char *mic, unsigned char *p, int len)
+static int
+mic2euc_cn(const unsigned char *mic, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = mic;
int c1;
while (len > 0)
if (IS_HIGHBIT_SET(c1))
{
if (c1 != LC_GB2312_80)
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_CN,
(const char *) mic, len);
+ }
if (len < 3 || !IS_HIGHBIT_SET(mic[1]) || !IS_HIGHBIT_SET(mic[2]))
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
mic++;
*p++ = *mic++;
*p++ = *mic++;
else
{ /* should be ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
*p++ = c1;
mic++;
len--;
}
}
*p = '\0';
+
+ return mic - start;
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
-static void sjis2mic(const unsigned char *sjis, unsigned char *p, int len);
-static void mic2sjis(const unsigned char *mic, unsigned char *p, int len);
-static void euc_jp2mic(const unsigned char *euc, unsigned char *p, int len);
-static void mic2euc_jp(const unsigned char *mic, unsigned char *p, int len);
-static void euc_jp2sjis(const unsigned char *mic, unsigned char *p, int len);
-static void sjis2euc_jp(const unsigned char *mic, unsigned char *p, int len);
+static int sjis2mic(const unsigned char *sjis, unsigned char *p, int len, bool noError);
+static int mic2sjis(const unsigned char *mic, unsigned char *p, int len, bool noError);
+static int euc_jp2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
+static int mic2euc_jp(const unsigned char *mic, unsigned char *p, int len, bool noError);
+static int euc_jp2sjis(const unsigned char *mic, unsigned char *p, int len, bool noError);
+static int sjis2euc_jp(const unsigned char *mic, unsigned char *p, int len, bool noError);
Datum
euc_jp_to_sjis(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_SJIS);
- euc_jp2sjis(src, dest, len);
+ converted = euc_jp2sjis(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_EUC_JP);
- sjis2euc_jp(src, dest, len);
+ converted = sjis2euc_jp(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_MULE_INTERNAL);
- euc_jp2mic(src, dest, len);
+ converted = euc_jp2mic(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_JP);
- mic2euc_jp(src, dest, len);
+ converted = mic2euc_jp(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_MULE_INTERNAL);
- sjis2mic(src, dest, len);
+ converted = sjis2mic(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_SJIS);
- mic2sjis(src, dest, len);
+ converted = mic2sjis(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
/*
* SJIS ---> MIC
*/
-static void
-sjis2mic(const unsigned char *sjis, unsigned char *p, int len)
+static int
+sjis2mic(const unsigned char *sjis, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = sjis;
int c1,
c2,
i,
* JIS X0208, X0212, user defined extended characters
*/
if (len < 2 || !ISSJISHEAD(c1) || !ISSJISTAIL(sjis[1]))
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_SJIS, (const char *) sjis, len);
+ }
c2 = sjis[1];
k = (c1 << 8) + c2;
if (k >= 0xed40 && k < 0xf040)
else
{ /* should be ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_SJIS, (const char *) sjis, len);
+ }
*p++ = c1;
sjis++;
len--;
}
}
*p = '\0';
+
+ return sjis - start;
}
/*
* MIC ---> SJIS
*/
-static void
-mic2sjis(const unsigned char *mic, unsigned char *p, int len)
+static int
+mic2sjis(const unsigned char *mic, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = mic;
int c1,
c2,
k,
{
/* ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
*p++ = c1;
mic++;
len--;
}
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
if (c1 == LC_JISX0201K)
*p++ = mic[1];
else if (c1 == LC_JISX0208)
}
}
else
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_SJIS,
(const char *) mic, len);
+ }
mic += l;
len -= l;
}
*p = '\0';
+
+ return mic - start;
}
/*
* EUC_JP ---> MIC
*/
-static void
-euc_jp2mic(const unsigned char *euc, unsigned char *p, int len)
+static int
+euc_jp2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = euc;
int c1;
int l;
{
/* ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_JP,
(const char *) euc, len);
+ }
*p++ = c1;
euc++;
len--;
}
l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_JP,
(const char *) euc, len);
+ }
if (c1 == SS2)
{ /* 1 byte kana? */
*p++ = LC_JISX0201K;
len -= l;
}
*p = '\0';
+
+ return euc - start;
}
/*
* MIC ---> EUC_JP
*/
-static void
-mic2euc_jp(const unsigned char *mic, unsigned char *p, int len)
+static int
+mic2euc_jp(const unsigned char *mic, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = mic;
int c1;
int l;
{
/* ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
*p++ = c1;
mic++;
len--;
}
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
if (c1 == LC_JISX0201K)
{
*p++ = SS2;
*p++ = mic[2];
}
else
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_JP,
(const char *) mic, len);
+ }
mic += l;
len -= l;
}
*p = '\0';
+
+ return mic - start;
}
/*
* EUC_JP -> SJIS
*/
-static void
-euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len)
+static int
+euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = euc;
int c1,
c2,
k;
{
/* ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_JP,
(const char *) euc, len);
+ }
*p++ = c1;
euc++;
len--;
}
l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_JP,
(const char *) euc, len);
+ }
if (c1 == SS2)
{
/* hankaku kana? */
len -= l;
}
*p = '\0';
+
+ return euc - start;
}
/*
* SJIS ---> EUC_JP
*/
-static void
-sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len)
+static int
+sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = sjis;
int c1,
c2,
i,
{
/* ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_SJIS,
(const char *) sjis, len);
+ }
*p++ = c1;
sjis++;
len--;
}
l = pg_encoding_verifymbchar(PG_SJIS, (const char *) sjis, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_SJIS,
(const char *) sjis, len);
+ }
if (c1 >= 0xa1 && c1 <= 0xdf)
{
/* JIS X0201 (1 byte kana) */
len -= l;
}
*p = '\0';
+
+ return sjis - start;
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
-static void euc_kr2mic(const unsigned char *euc, unsigned char *p, int len);
-static void mic2euc_kr(const unsigned char *mic, unsigned char *p, int len);
+static int euc_kr2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
+static int mic2euc_kr(const unsigned char *mic, unsigned char *p, int len, bool noError);
Datum
euc_kr_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_KR, PG_MULE_INTERNAL);
- euc_kr2mic(src, dest, len);
+ converted = euc_kr2mic(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_KR);
- mic2euc_kr(src, dest, len);
+ converted = mic2euc_kr(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
/*
* EUC_KR ---> MIC
*/
-static void
-euc_kr2mic(const unsigned char *euc, unsigned char *p, int len)
+static int
+euc_kr2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = euc;
int c1;
int l;
{
l = pg_encoding_verifymbchar(PG_EUC_KR, (const char *) euc, len);
if (l != 2)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_KR,
(const char *) euc, len);
+ }
*p++ = LC_KS5601;
*p++ = c1;
*p++ = euc[1];
else
{ /* should be ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_KR,
(const char *) euc, len);
+ }
*p++ = c1;
euc++;
len--;
}
}
*p = '\0';
+
+ return euc - start;
}
/*
* MIC ---> EUC_KR
*/
-static void
-mic2euc_kr(const unsigned char *mic, unsigned char *p, int len)
+static int
+mic2euc_kr(const unsigned char *mic, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = mic;
int c1;
int l;
{
/* ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
*p++ = c1;
mic++;
len--;
}
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
if (c1 == LC_KS5601)
{
*p++ = mic[1];
*p++ = mic[2];
}
else
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_KR,
(const char *) mic, len);
+ }
mic += l;
len -= l;
}
*p = '\0';
+
+ return mic - start;
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
-static void euc_tw2big5(const unsigned char *euc, unsigned char *p, int len);
-static void big52euc_tw(const unsigned char *euc, unsigned char *p, int len);
-static void big52mic(const unsigned char *big5, unsigned char *p, int len);
-static void mic2big5(const unsigned char *mic, unsigned char *p, int len);
-static void euc_tw2mic(const unsigned char *euc, unsigned char *p, int len);
-static void mic2euc_tw(const unsigned char *mic, unsigned char *p, int len);
+static int euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError);
+static int big52euc_tw(const unsigned char *euc, unsigned char *p, int len, bool noError);
+static int big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError);
+static int mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError);
+static int euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
+static int mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError);
Datum
euc_tw_to_big5(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_BIG5);
- euc_tw2big5(src, dest, len);
+ converted = euc_tw2big5(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_EUC_TW);
- big52euc_tw(src, dest, len);
+ converted = big52euc_tw(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_MULE_INTERNAL);
- euc_tw2mic(src, dest, len);
+ converted = euc_tw2mic(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_TW);
- mic2euc_tw(src, dest, len);
+ converted = mic2euc_tw(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_MULE_INTERNAL);
- big52mic(src, dest, len);
+ converted = big52mic(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_BIG5);
- mic2big5(src, dest, len);
+ converted = mic2big5(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
/*
* EUC_TW ---> Big5
*/
-static void
-euc_tw2big5(const unsigned char *euc, unsigned char *p, int len)
+static int
+euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = euc;
unsigned char c1;
unsigned short big5buf,
cnsBuf;
/* Verify and decode the next EUC_TW input character */
l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_TW,
(const char *) euc, len);
+ }
if (c1 == SS2)
{
c1 = euc[1]; /* plane No. */
/* Write it out in Big5 */
big5buf = CNStoBIG5(cnsBuf, lc);
if (big5buf == 0)
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_EUC_TW, PG_BIG5,
(const char *) euc, len);
+ }
*p++ = (big5buf >> 8) & 0x00ff;
*p++ = big5buf & 0x00ff;
else
{ /* should be ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_TW,
(const char *) euc, len);
+ }
*p++ = c1;
euc++;
len--;
}
}
*p = '\0';
+
+ return euc - start;
}
/*
* Big5 ---> EUC_TW
*/
-static void
-big52euc_tw(const unsigned char *big5, unsigned char *p, int len)
+static int
+big52euc_tw(const unsigned char *big5, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = big5;
unsigned short c1;
unsigned short big5buf,
cnsBuf;
{
l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_BIG5,
(const char *) big5, len);
+ }
big5buf = (c1 << 8) | big5[1];
cnsBuf = BIG5toCNS(big5buf, &lc);
*p++ = cnsBuf & 0x00ff;
}
else
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_BIG5, PG_EUC_TW,
(const char *) big5, len);
+ }
big5 += l;
len -= l;
}
}
*p = '\0';
+
+ return big5 - start;
}
/*
* EUC_TW ---> MIC
*/
-static void
-euc_tw2mic(const unsigned char *euc, unsigned char *p, int len)
+static int
+euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = euc;
int c1;
int l;
{
l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_TW,
(const char *) euc, len);
+ }
if (c1 == SS2)
{
c1 = euc[1]; /* plane No. */
else
{ /* should be ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_TW,
(const char *) euc, len);
+ }
*p++ = c1;
euc++;
len--;
}
}
*p = '\0';
+
+ return euc - start;
}
/*
* MIC ---> EUC_TW
*/
-static void
-mic2euc_tw(const unsigned char *mic, unsigned char *p, int len)
+static int
+mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = mic;
int c1;
int l;
{
/* ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
*p++ = c1;
mic++;
len--;
}
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
if (c1 == LC_CNS11643_1)
{
*p++ = mic[1];
*p++ = mic[3];
}
else
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_TW,
(const char *) mic, len);
+ }
mic += l;
len -= l;
}
*p = '\0';
+
+ return mic - start;
}
/*
* Big5 ---> MIC
*/
-static void
-big52mic(const unsigned char *big5, unsigned char *p, int len)
+static int
+big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = big5;
unsigned short c1;
unsigned short big5buf,
cnsBuf;
{
/* ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_BIG5,
(const char *) big5, len);
+ }
*p++ = c1;
big5++;
len--;
}
l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_BIG5,
(const char *) big5, len);
+ }
big5buf = (c1 << 8) | big5[1];
cnsBuf = BIG5toCNS(big5buf, &lc);
if (lc != 0)
*p++ = cnsBuf & 0x00ff;
}
else
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_BIG5, PG_MULE_INTERNAL,
(const char *) big5, len);
+ }
big5 += l;
len -= l;
}
*p = '\0';
+
+ return big5 - start;
}
/*
* MIC ---> Big5
*/
-static void
-mic2big5(const unsigned char *mic, unsigned char *p, int len)
+static int
+mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = mic;
unsigned short c1;
unsigned short big5buf,
cnsBuf;
{
/* ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
*p++ = c1;
mic++;
len--;
}
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == LCPRV2_B)
{
if (c1 == LCPRV2_B)
}
big5buf = CNStoBIG5(cnsBuf, c1);
if (big5buf == 0)
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_BIG5,
(const char *) mic, len);
+ }
*p++ = (big5buf >> 8) & 0x00ff;
*p++ = big5buf & 0x00ff;
}
else
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_BIG5,
(const char *) mic, len);
+ }
mic += l;
len -= l;
}
*p = '\0';
+
+ return mic - start;
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN2, PG_MULE_INTERNAL);
- latin2mic(src, dest, len, LC_ISO8859_2, PG_LATIN2);
+ converted = latin2mic(src, dest, len, LC_ISO8859_2, PG_LATIN2, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN2);
- mic2latin(src, dest, len, LC_ISO8859_2, PG_LATIN2);
+ converted = mic2latin(src, dest, len, LC_ISO8859_2, PG_LATIN2, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1250, PG_MULE_INTERNAL);
- latin2mic_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250,
- win1250_2_iso88592);
+ converted = latin2mic_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250,
+ win1250_2_iso88592, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN1250);
- mic2latin_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250,
- iso88592_2_win1250);
+ converted = mic2latin_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250,
+ iso88592_2_win1250, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN2, PG_WIN1250);
- local2local(src, dest, len, PG_LATIN2, PG_WIN1250, iso88592_2_win1250);
+ converted = local2local(src, dest, len, PG_LATIN2, PG_WIN1250,
+ iso88592_2_win1250, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1250, PG_LATIN2);
- local2local(src, dest, len, PG_WIN1250, PG_LATIN2, win1250_2_iso88592);
+ converted = local2local(src, dest, len, PG_WIN1250, PG_LATIN2,
+ win1250_2_iso88592, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN1, PG_MULE_INTERNAL);
- latin2mic(src, dest, len, LC_ISO8859_1, PG_LATIN1);
+ converted = latin2mic(src, dest, len, LC_ISO8859_1, PG_LATIN1, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN1);
- mic2latin(src, dest, len, LC_ISO8859_1, PG_LATIN1);
+ converted = mic2latin(src, dest, len, LC_ISO8859_1, PG_LATIN1, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN3, PG_MULE_INTERNAL);
- latin2mic(src, dest, len, LC_ISO8859_3, PG_LATIN3);
+ converted = latin2mic(src, dest, len, LC_ISO8859_3, PG_LATIN3, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN3);
- mic2latin(src, dest, len, LC_ISO8859_3, PG_LATIN3);
+ converted = mic2latin(src, dest, len, LC_ISO8859_3, PG_LATIN3, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN4, PG_MULE_INTERNAL);
- latin2mic(src, dest, len, LC_ISO8859_4, PG_LATIN4);
+ converted = latin2mic(src, dest, len, LC_ISO8859_4, PG_LATIN4, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN4);
- mic2latin(src, dest, len, LC_ISO8859_4, PG_LATIN4);
+ converted = mic2latin(src, dest, len, LC_ISO8859_4, PG_LATIN4, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_UTF8);
- LocalToUtf(src, len, dest,
- &big5_to_unicode_tree,
- NULL, 0,
- NULL,
- PG_BIG5);
+ converted = LocalToUtf(src, len, dest,
+ &big5_to_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_BIG5,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_BIG5);
- UtfToLocal(src, len, dest,
- &big5_from_unicode_tree,
- NULL, 0,
- NULL,
- PG_BIG5);
+ converted = UtfToLocal(src, len, dest,
+ &big5_from_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_BIG5,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8R);
- UtfToLocal(src, len, dest,
- &koi8r_from_unicode_tree,
- NULL, 0,
- NULL,
- PG_KOI8R);
+ converted = UtfToLocal(src, len, dest,
+ &koi8r_from_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_KOI8R,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_UTF8);
- LocalToUtf(src, len, dest,
- &koi8r_to_unicode_tree,
- NULL, 0,
- NULL,
- PG_KOI8R);
+ converted = LocalToUtf(src, len, dest,
+ &koi8r_to_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_KOI8R,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8U);
- UtfToLocal(src, len, dest,
- &koi8u_from_unicode_tree,
- NULL, 0,
- NULL,
- PG_KOI8U);
+ converted = UtfToLocal(src, len, dest,
+ &koi8u_from_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_KOI8U,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8U, PG_UTF8);
- LocalToUtf(src, len, dest,
- &koi8u_to_unicode_tree,
- NULL, 0,
- NULL,
- PG_KOI8U);
+ converted = LocalToUtf(src, len, dest,
+ &koi8u_to_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_KOI8U,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_UTF8);
- LocalToUtf(src, len, dest,
- &euc_jis_2004_to_unicode_tree,
- LUmapEUC_JIS_2004_combined, lengthof(LUmapEUC_JIS_2004_combined),
- NULL,
- PG_EUC_JIS_2004);
+ converted = LocalToUtf(src, len, dest,
+ &euc_jis_2004_to_unicode_tree,
+ LUmapEUC_JIS_2004_combined, lengthof(LUmapEUC_JIS_2004_combined),
+ NULL,
+ PG_EUC_JIS_2004,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JIS_2004);
- UtfToLocal(src, len, dest,
- &euc_jis_2004_from_unicode_tree,
- ULmapEUC_JIS_2004_combined, lengthof(ULmapEUC_JIS_2004_combined),
- NULL,
- PG_EUC_JIS_2004);
+ converted = UtfToLocal(src, len, dest,
+ &euc_jis_2004_from_unicode_tree,
+ ULmapEUC_JIS_2004_combined, lengthof(ULmapEUC_JIS_2004_combined),
+ NULL,
+ PG_EUC_JIS_2004,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_CN, PG_UTF8);
- LocalToUtf(src, len, dest,
- &euc_cn_to_unicode_tree,
- NULL, 0,
- NULL,
- PG_EUC_CN);
+ converted = LocalToUtf(src, len, dest,
+ &euc_cn_to_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_EUC_CN,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_CN);
- UtfToLocal(src, len, dest,
- &euc_cn_from_unicode_tree,
- NULL, 0,
- NULL,
- PG_EUC_CN);
+ converted = UtfToLocal(src, len, dest,
+ &euc_cn_from_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_EUC_CN,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_UTF8);
- LocalToUtf(src, len, dest,
- &euc_jp_to_unicode_tree,
- NULL, 0,
- NULL,
- PG_EUC_JP);
+ converted = LocalToUtf(src, len, dest,
+ &euc_jp_to_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_EUC_JP,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JP);
- UtfToLocal(src, len, dest,
- &euc_jp_from_unicode_tree,
- NULL, 0,
- NULL,
- PG_EUC_JP);
+ converted = UtfToLocal(src, len, dest,
+ &euc_jp_from_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_EUC_JP,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_KR, PG_UTF8);
- LocalToUtf(src, len, dest,
- &euc_kr_to_unicode_tree,
- NULL, 0,
- NULL,
- PG_EUC_KR);
+ converted = LocalToUtf(src, len, dest,
+ &euc_kr_to_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_EUC_KR,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_KR);
- UtfToLocal(src, len, dest,
- &euc_kr_from_unicode_tree,
- NULL, 0,
- NULL,
- PG_EUC_KR);
+ converted = UtfToLocal(src, len, dest,
+ &euc_kr_from_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_EUC_KR,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_UTF8);
- LocalToUtf(src, len, dest,
- &euc_tw_to_unicode_tree,
- NULL, 0,
- NULL,
- PG_EUC_TW);
+ converted = LocalToUtf(src, len, dest,
+ &euc_tw_to_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_EUC_TW,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_TW);
- UtfToLocal(src, len, dest,
- &euc_tw_from_unicode_tree,
- NULL, 0,
- NULL,
- PG_EUC_TW);
+ converted = UtfToLocal(src, len, dest,
+ &euc_tw_from_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_EUC_TW,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_GB18030, PG_UTF8);
- LocalToUtf(src, len, dest,
- &gb18030_to_unicode_tree,
- NULL, 0,
- conv_18030_to_utf8,
- PG_GB18030);
+ converted = LocalToUtf(src, len, dest,
+ &gb18030_to_unicode_tree,
+ NULL, 0,
+ conv_18030_to_utf8,
+ PG_GB18030,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GB18030);
- UtfToLocal(src, len, dest,
- &gb18030_from_unicode_tree,
- NULL, 0,
- conv_utf8_to_18030,
- PG_GB18030);
+ converted = UtfToLocal(src, len, dest,
+ &gb18030_from_unicode_tree,
+ NULL, 0,
+ conv_utf8_to_18030,
+ PG_GB18030,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_GBK, PG_UTF8);
- LocalToUtf(src, len, dest,
- &gbk_to_unicode_tree,
- NULL, 0,
- NULL,
- PG_GBK);
+ converted = LocalToUtf(src, len, dest,
+ &gbk_to_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_GBK,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GBK);
- UtfToLocal(src, len, dest,
- &gbk_from_unicode_tree,
- NULL, 0,
- NULL,
- PG_GBK);
+ converted = UtfToLocal(src, len, dest,
+ &gbk_from_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_GBK,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
int i;
CHECK_ENCODING_CONVERSION_ARGS(-1, PG_UTF8);
{
if (encoding == maps[i].encoding)
{
- LocalToUtf(src, len, dest,
- maps[i].map1,
- NULL, 0,
- NULL,
- encoding);
- PG_RETURN_VOID();
+ int converted;
+
+ converted = LocalToUtf(src, len, dest,
+ maps[i].map1,
+ NULL, 0,
+ NULL,
+ encoding,
+ noError);
+ PG_RETURN_INT32(converted);
}
}
errmsg("unexpected encoding ID %d for ISO 8859 character sets",
encoding)));
- PG_RETURN_VOID();
+ PG_RETURN_INT32(0);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
int i;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, -1);
{
if (encoding == maps[i].encoding)
{
- UtfToLocal(src, len, dest,
- maps[i].map2,
- NULL, 0,
- NULL,
- encoding);
- PG_RETURN_VOID();
+ int converted;
+
+ converted = UtfToLocal(src, len, dest,
+ maps[i].map2,
+ NULL, 0,
+ NULL,
+ encoding,
+ noError);
+ PG_RETURN_INT32(converted);
}
}
errmsg("unexpected encoding ID %d for ISO 8859 character sets",
encoding)));
- PG_RETURN_VOID();
+ PG_RETURN_INT32(0);
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ unsigned char *start = src;
unsigned short c;
CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN1, PG_UTF8);
{
c = *src;
if (c == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_LATIN1, (const char *) src, len);
+ }
if (!IS_HIGHBIT_SET(c))
*dest++ = c;
else
}
*dest = '\0';
- PG_RETURN_VOID();
+ PG_RETURN_INT32(src - start);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ unsigned char *start = src;
unsigned short c,
c1;
{
c = *src;
if (c == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_UTF8, (const char *) src, len);
+ }
/* fast path for ASCII-subset characters */
if (!IS_HIGHBIT_SET(c))
{
int l = pg_utf_mblen(src);
if (l > len || !pg_utf8_islegal(src, l))
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_UTF8, (const char *) src, len);
+ }
if (l != 2)
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_UTF8, PG_LATIN1,
(const char *) src, len);
+ }
c1 = src[1] & 0x3f;
c = ((c & 0x1f) << 6) | c1;
if (c >= 0x80 && c <= 0xff)
len -= 2;
}
else
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_UTF8, PG_LATIN1,
(const char *) src, len);
+ }
}
}
*dest = '\0';
- PG_RETURN_VOID();
+ PG_RETURN_INT32(src - start);
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_JOHAB, PG_UTF8);
- LocalToUtf(src, len, dest,
- &johab_to_unicode_tree,
- NULL, 0,
- NULL,
- PG_JOHAB);
+ converted = LocalToUtf(src, len, dest,
+ &johab_to_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_JOHAB,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_JOHAB);
- UtfToLocal(src, len, dest,
- &johab_from_unicode_tree,
- NULL, 0,
- NULL,
- PG_JOHAB);
+ converted = UtfToLocal(src, len, dest,
+ &johab_from_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_JOHAB,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_UTF8);
- LocalToUtf(src, len, dest,
- &sjis_to_unicode_tree,
- NULL, 0,
- NULL,
- PG_SJIS);
+ converted = LocalToUtf(src, len, dest,
+ &sjis_to_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_SJIS,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_SJIS);
- UtfToLocal(src, len, dest,
- &sjis_from_unicode_tree,
- NULL, 0,
- NULL,
- PG_SJIS);
+ converted = UtfToLocal(src, len, dest,
+ &sjis_from_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_SJIS,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_UTF8);
- LocalToUtf(src, len, dest,
- &shift_jis_2004_to_unicode_tree,
- LUmapSHIFT_JIS_2004_combined, lengthof(LUmapSHIFT_JIS_2004_combined),
- NULL,
- PG_SHIFT_JIS_2004);
+ converted = LocalToUtf(src, len, dest,
+ &shift_jis_2004_to_unicode_tree,
+ LUmapSHIFT_JIS_2004_combined, lengthof(LUmapSHIFT_JIS_2004_combined),
+ NULL,
+ PG_SHIFT_JIS_2004,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_SHIFT_JIS_2004);
- UtfToLocal(src, len, dest,
- &shift_jis_2004_from_unicode_tree,
- ULmapSHIFT_JIS_2004_combined, lengthof(ULmapSHIFT_JIS_2004_combined),
- NULL,
- PG_SHIFT_JIS_2004);
+ converted = UtfToLocal(src, len, dest,
+ &shift_jis_2004_from_unicode_tree,
+ ULmapSHIFT_JIS_2004_combined, lengthof(ULmapSHIFT_JIS_2004_combined),
+ NULL,
+ PG_SHIFT_JIS_2004,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UHC, PG_UTF8);
- LocalToUtf(src, len, dest,
- &uhc_to_unicode_tree,
- NULL, 0,
- NULL,
- PG_UHC);
+ converted = LocalToUtf(src, len, dest,
+ &uhc_to_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_UHC,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_UHC);
- UtfToLocal(src, len, dest,
- &uhc_from_unicode_tree,
- NULL, 0,
- NULL,
- PG_UHC);
+ converted = UtfToLocal(src, len, dest,
+ &uhc_from_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_UHC,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
int i;
CHECK_ENCODING_CONVERSION_ARGS(-1, PG_UTF8);
{
if (encoding == maps[i].encoding)
{
- LocalToUtf(src, len, dest,
- maps[i].map1,
- NULL, 0,
- NULL,
- encoding);
- PG_RETURN_VOID();
+ int converted;
+
+ converted = LocalToUtf(src, len, dest,
+ maps[i].map1,
+ NULL, 0,
+ NULL,
+ encoding,
+ noError);
+ PG_RETURN_INT32(converted);
}
}
errmsg("unexpected encoding ID %d for WIN character sets",
encoding)));
- PG_RETURN_VOID();
+ PG_RETURN_INT32(0);
}
Datum
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
int i;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, -1);
{
if (encoding == maps[i].encoding)
{
- UtfToLocal(src, len, dest,
- maps[i].map2,
- NULL, 0,
- NULL,
- encoding);
- PG_RETURN_VOID();
+ int converted;
+
+ converted = UtfToLocal(src, len, dest,
+ maps[i].map2,
+ NULL, 0,
+ NULL,
+ encoding,
+ noError);
+ PG_RETURN_INT32(converted);
}
}
errmsg("unexpected encoding ID %d for WIN character sets",
encoding)));
- PG_RETURN_VOID();
+ PG_RETURN_INT32(0);
}
MemoryContextAllocHuge(CurrentMemoryContext,
(Size) len * MAX_CONVERSION_GROWTH + 1);
- OidFunctionCall5(proc,
- Int32GetDatum(src_encoding),
- Int32GetDatum(dest_encoding),
- CStringGetDatum(src),
- CStringGetDatum(result),
- Int32GetDatum(len));
+ (void) OidFunctionCall6(proc,
+ Int32GetDatum(src_encoding),
+ Int32GetDatum(dest_encoding),
+ CStringGetDatum(src),
+ CStringGetDatum(result),
+ Int32GetDatum(len),
+ BoolGetDatum(false));
/*
* If the result is large, it's worth repalloc'ing to release any extra
return result;
}
+/*
+ * Convert src string to another encoding.
+ *
+ * This function has a different API than the other conversion functions.
+ * The caller should've looked up the conversion function using
+ * FindDefaultConversionProc(). Unlike the other functions, the converted
+ * result is not palloc'd. It is written to the caller-supplied buffer
+ * instead.
+ *
+ * src_encoding - encoding to convert from
+ * dest_encoding - encoding to convert to
+ * src, srclen - input buffer and its length in bytes
+ * dest, destlen - destination buffer and its size in bytes
+ *
+ * The output is null-terminated.
+ *
+ * If destlen < srclen * MAX_CONVERSION_LENGTH + 1, the converted output
+ * wouldn't necessarily fit in the output buffer, and the function will not
+ * convert the whole input.
+ *
+ * TODO: The conversion function interface is not great. Firstly, it
+ * would be nice to pass through the destination buffer size to the
+ * conversion function, so that if you pass a shorter destination buffer, it
+ * could still continue to fill up the whole buffer. Currently, we have to
+ * assume worst case expansion and stop the conversion short, even if there
+ * is in fact space left in the destination buffer. Secondly, it would be
+ * nice to return the number of bytes written to the caller, to avoid a call
+ * to strlen().
+ */
+int
+pg_do_encoding_conversion_buf(Oid proc,
+ int src_encoding,
+ int dest_encoding,
+ unsigned char *src, int srclen,
+ unsigned char *dest, int destlen,
+ bool noError)
+{
+ Datum result;
+
+ /*
+ * If the destination buffer is not large enough to hold the result in the
+ * worst case, limit the input size passed to the conversion function.
+ */
+ if ((Size) srclen >= ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH))
+ srclen = ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH);
+
+ result = OidFunctionCall6(proc,
+ Int32GetDatum(src_encoding),
+ Int32GetDatum(dest_encoding),
+ CStringGetDatum(src),
+ CStringGetDatum(dest),
+ Int32GetDatum(srclen),
+ BoolGetDatum(noError));
+ return DatumGetInt32(result);
+}
+
/*
* Convert string to encoding encoding_name. The source
* encoding is the DB encoding.
MemoryContextAllocHuge(CurrentMemoryContext,
(Size) len * MAX_CONVERSION_GROWTH + 1);
- FunctionCall5(flinfo,
+ FunctionCall6(flinfo,
Int32GetDatum(src_encoding),
Int32GetDatum(dest_encoding),
CStringGetDatum(src),
CStringGetDatum(result),
- Int32GetDatum(len));
+ Int32GetDatum(len),
+ BoolGetDatum(false));
/*
* Release extra space if there might be a lot --- see comments in
c_as_utf8[c_as_utf8_len] = '\0';
/* Convert, or throw error if we can't */
- FunctionCall5(Utf8ToServerConvProc,
+ FunctionCall6(Utf8ToServerConvProc,
Int32GetDatum(PG_UTF8),
Int32GetDatum(server_encoding),
CStringGetDatum(c_as_utf8),
CStringGetDatum(s),
- Int32GetDatum(c_as_utf8_len));
+ Int32GetDatum(c_as_utf8_len),
+ BoolGetDatum(false));
}
static void check_for_jsonb_9_4_usage(ClusterInfo *cluster);
static void check_for_pg_role_prefix(ClusterInfo *cluster);
static void check_for_new_tablespace_dir(ClusterInfo *new_cluster);
+static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster);
static char *get_canonical_locale_name(int category, const char *locale);
check_for_reg_data_type_usage(&old_cluster);
check_for_isn_and_int8_passing_mismatch(&old_cluster);
+ /*
+ * PG 14 changed the function signature of encoding conversion functions.
+ * Conversions from older versions cannot be upgraded automatically
+ * because the user-defined functions used by the encoding conversions
+ * need to be changed to match the new signature.
+ */
+ if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1300)
+ check_for_user_defined_encoding_conversions(&old_cluster);
+
/*
* Pre-PG 14 allowed user defined postfix operators, which are not
* supported anymore. Verify there are none, iff applicable.
check_ok();
}
+/*
+ * Verify that no user-defined encoding conversions exist.
+ */
+static void
+check_for_user_defined_encoding_conversions(ClusterInfo *cluster)
+{
+ int dbnum;
+ FILE *script = NULL;
+ bool found = false;
+ char output_path[MAXPGPATH];
+
+ prep_status("Checking for user-defined encoding conversions");
+
+ snprintf(output_path, sizeof(output_path),
+ "encoding_conversions.txt");
+
+ /* Find any user defined encoding conversions */
+ for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
+ {
+ PGresult *res;
+ bool db_used = false;
+ int ntups;
+ int rowno;
+ int i_conoid,
+ i_conname,
+ i_nspname;
+ DbInfo *active_db = &cluster->dbarr.dbs[dbnum];
+ PGconn *conn = connectToServer(cluster, active_db->db_name);
+
+ /*
+ * The query below hardcodes FirstNormalObjectId as 16384 rather than
+ * interpolating that C #define into the query because, if that
+ * #define is ever changed, the cutoff we want to use is the value
+ * used by pre-version 14 servers, not that of some future version.
+ */
+ res = executeQueryOrDie(conn,
+ "SELECT c.oid as conoid, c.conname, n.nspname "
+ "FROM pg_catalog.pg_conversion c, "
+ " pg_catalog.pg_namespace n "
+ "WHERE c.connamespace = n.oid AND "
+ " c.oid >= 16384");
+ ntups = PQntuples(res);
+ i_conoid = PQfnumber(res, "conoid");
+ i_conname = PQfnumber(res, "conname");
+ i_nspname = PQfnumber(res, "nspname");
+ for (rowno = 0; rowno < ntups; rowno++)
+ {
+ found = true;
+ if (script == NULL &&
+ (script = fopen_priv(output_path, "w")) == NULL)
+ pg_fatal("could not open file \"%s\": %s\n",
+ output_path, strerror(errno));
+ if (!db_used)
+ {
+ fprintf(script, "In database: %s\n", active_db->db_name);
+ db_used = true;
+ }
+ fprintf(script, " (oid=%s) %s.%s\n",
+ PQgetvalue(res, rowno, i_conoid),
+ PQgetvalue(res, rowno, i_nspname),
+ PQgetvalue(res, rowno, i_conname));
+ }
+
+ PQclear(res);
+
+ PQfinish(conn);
+ }
+
+ if (script)
+ fclose(script);
+
+ if (found)
+ {
+ pg_log(PG_REPORT, "fatal\n");
+ pg_fatal("Your installation contains user-defined encoding conversions.\n"
+ "The conversion function parameters changed in PostgreSQL version 14\n"
+ "so this cluster cannot currently be upgraded. You can remove the\n"
+ "encoding conversions in the old cluster and restart the upgrade.\n"
+ "A list of user-defined encoding conversions is in the file:\n"
+ " %s\n\n", output_path);
+ }
+ else
+ check_ok();
+}
+
/*
* get_canonical_locale_name
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 202103291
+#define CATALOG_VERSION_NO 202104011
#endif
# conversion functions
{ oid => '4302',
descr => 'internal conversion function for KOI8R to MULE_INTERNAL',
- proname => 'koi8r_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'koi8r_to_mic',
+ proname => 'koi8r_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'koi8r_to_mic',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4303',
descr => 'internal conversion function for MULE_INTERNAL to KOI8R',
- proname => 'mic_to_koi8r', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_koi8r',
+ proname => 'mic_to_koi8r', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_koi8r',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4304',
descr => 'internal conversion function for ISO-8859-5 to MULE_INTERNAL',
- proname => 'iso_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'iso_to_mic',
+ proname => 'iso_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'iso_to_mic',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4305',
descr => 'internal conversion function for MULE_INTERNAL to ISO-8859-5',
- proname => 'mic_to_iso', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_iso',
+ proname => 'mic_to_iso', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_iso',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4306',
descr => 'internal conversion function for WIN1251 to MULE_INTERNAL',
- proname => 'win1251_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'win1251_to_mic',
+ proname => 'win1251_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'win1251_to_mic',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4307',
descr => 'internal conversion function for MULE_INTERNAL to WIN1251',
- proname => 'mic_to_win1251', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_win1251',
+ proname => 'mic_to_win1251', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_win1251',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4308',
descr => 'internal conversion function for WIN866 to MULE_INTERNAL',
- proname => 'win866_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'win866_to_mic',
+ proname => 'win866_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'win866_to_mic',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4309',
descr => 'internal conversion function for MULE_INTERNAL to WIN866',
- proname => 'mic_to_win866', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_win866',
+ proname => 'mic_to_win866', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_win866',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4310', descr => 'internal conversion function for KOI8R to WIN1251',
- proname => 'koi8r_to_win1251', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'koi8r_to_win1251', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'koi8r_to_win1251', probin => '$libdir/cyrillic_and_mic' },
{ oid => '4311', descr => 'internal conversion function for WIN1251 to KOI8R',
- proname => 'win1251_to_koi8r', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'win1251_to_koi8r', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'win1251_to_koi8r', probin => '$libdir/cyrillic_and_mic' },
{ oid => '4312', descr => 'internal conversion function for KOI8R to WIN866',
- proname => 'koi8r_to_win866', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'koi8r_to_win866',
+ proname => 'koi8r_to_win866', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'koi8r_to_win866',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4313', descr => 'internal conversion function for WIN866 to KOI8R',
- proname => 'win866_to_koi8r', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'win866_to_koi8r',
+ proname => 'win866_to_koi8r', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'win866_to_koi8r',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4314',
descr => 'internal conversion function for WIN866 to WIN1251',
- proname => 'win866_to_win1251', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'win866_to_win1251', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'win866_to_win1251', probin => '$libdir/cyrillic_and_mic' },
{ oid => '4315',
descr => 'internal conversion function for WIN1251 to WIN866',
- proname => 'win1251_to_win866', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'win1251_to_win866', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'win1251_to_win866', probin => '$libdir/cyrillic_and_mic' },
{ oid => '4316',
descr => 'internal conversion function for ISO-8859-5 to KOI8R',
- proname => 'iso_to_koi8r', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'iso_to_koi8r',
+ proname => 'iso_to_koi8r', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'iso_to_koi8r',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4317',
descr => 'internal conversion function for KOI8R to ISO-8859-5',
- proname => 'koi8r_to_iso', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'koi8r_to_iso',
+ proname => 'koi8r_to_iso', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'koi8r_to_iso',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4318',
descr => 'internal conversion function for ISO-8859-5 to WIN1251',
- proname => 'iso_to_win1251', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'iso_to_win1251',
+ proname => 'iso_to_win1251', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'iso_to_win1251',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4319',
descr => 'internal conversion function for WIN1251 to ISO-8859-5',
- proname => 'win1251_to_iso', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'win1251_to_iso',
+ proname => 'win1251_to_iso', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'win1251_to_iso',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4320',
descr => 'internal conversion function for ISO-8859-5 to WIN866',
- proname => 'iso_to_win866', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'iso_to_win866',
+ proname => 'iso_to_win866', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'iso_to_win866',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4321',
descr => 'internal conversion function for WIN866 to ISO-8859-5',
- proname => 'win866_to_iso', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'win866_to_iso',
+ proname => 'win866_to_iso', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'win866_to_iso',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4322',
descr => 'internal conversion function for EUC_CN to MULE_INTERNAL',
- proname => 'euc_cn_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_cn_to_mic',
+ proname => 'euc_cn_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_cn_to_mic',
probin => '$libdir/euc_cn_and_mic' },
{ oid => '4323',
descr => 'internal conversion function for MULE_INTERNAL to EUC_CN',
- proname => 'mic_to_euc_cn', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_euc_cn',
+ proname => 'mic_to_euc_cn', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_euc_cn',
probin => '$libdir/euc_cn_and_mic' },
{ oid => '4324', descr => 'internal conversion function for EUC_JP to SJIS',
- proname => 'euc_jp_to_sjis', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_jp_to_sjis',
+ proname => 'euc_jp_to_sjis', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_jp_to_sjis',
probin => '$libdir/euc_jp_and_sjis' },
{ oid => '4325', descr => 'internal conversion function for SJIS to EUC_JP',
- proname => 'sjis_to_euc_jp', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'sjis_to_euc_jp',
+ proname => 'sjis_to_euc_jp', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'sjis_to_euc_jp',
probin => '$libdir/euc_jp_and_sjis' },
{ oid => '4326',
descr => 'internal conversion function for EUC_JP to MULE_INTERNAL',
- proname => 'euc_jp_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_jp_to_mic',
+ proname => 'euc_jp_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_jp_to_mic',
probin => '$libdir/euc_jp_and_sjis' },
{ oid => '4327',
descr => 'internal conversion function for SJIS to MULE_INTERNAL',
- proname => 'sjis_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'sjis_to_mic',
+ proname => 'sjis_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'sjis_to_mic',
probin => '$libdir/euc_jp_and_sjis' },
{ oid => '4328',
descr => 'internal conversion function for MULE_INTERNAL to EUC_JP',
- proname => 'mic_to_euc_jp', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_euc_jp',
+ proname => 'mic_to_euc_jp', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_euc_jp',
probin => '$libdir/euc_jp_and_sjis' },
{ oid => '4329',
descr => 'internal conversion function for MULE_INTERNAL to SJIS',
- proname => 'mic_to_sjis', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_sjis',
+ proname => 'mic_to_sjis', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_sjis',
probin => '$libdir/euc_jp_and_sjis' },
{ oid => '4330',
descr => 'internal conversion function for EUC_KR to MULE_INTERNAL',
- proname => 'euc_kr_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_kr_to_mic',
+ proname => 'euc_kr_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_kr_to_mic',
probin => '$libdir/euc_kr_and_mic' },
{ oid => '4331',
descr => 'internal conversion function for MULE_INTERNAL to EUC_KR',
- proname => 'mic_to_euc_kr', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_euc_kr',
+ proname => 'mic_to_euc_kr', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_euc_kr',
probin => '$libdir/euc_kr_and_mic' },
{ oid => '4332', descr => 'internal conversion function for EUC_TW to BIG5',
- proname => 'euc_tw_to_big5', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_tw_to_big5',
+ proname => 'euc_tw_to_big5', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_tw_to_big5',
probin => '$libdir/euc_tw_and_big5' },
{ oid => '4333', descr => 'internal conversion function for BIG5 to EUC_TW',
- proname => 'big5_to_euc_tw', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'big5_to_euc_tw',
+ proname => 'big5_to_euc_tw', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'big5_to_euc_tw',
probin => '$libdir/euc_tw_and_big5' },
{ oid => '4334',
descr => 'internal conversion function for EUC_TW to MULE_INTERNAL',
- proname => 'euc_tw_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_tw_to_mic',
+ proname => 'euc_tw_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_tw_to_mic',
probin => '$libdir/euc_tw_and_big5' },
{ oid => '4335',
descr => 'internal conversion function for BIG5 to MULE_INTERNAL',
- proname => 'big5_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'big5_to_mic',
+ proname => 'big5_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'big5_to_mic',
probin => '$libdir/euc_tw_and_big5' },
{ oid => '4336',
descr => 'internal conversion function for MULE_INTERNAL to EUC_TW',
- proname => 'mic_to_euc_tw', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_euc_tw',
+ proname => 'mic_to_euc_tw', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_euc_tw',
probin => '$libdir/euc_tw_and_big5' },
{ oid => '4337',
descr => 'internal conversion function for MULE_INTERNAL to BIG5',
- proname => 'mic_to_big5', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_big5',
+ proname => 'mic_to_big5', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_big5',
probin => '$libdir/euc_tw_and_big5' },
{ oid => '4338',
descr => 'internal conversion function for LATIN2 to MULE_INTERNAL',
- proname => 'latin2_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'latin2_to_mic',
+ proname => 'latin2_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'latin2_to_mic',
probin => '$libdir/latin2_and_win1250' },
{ oid => '4339',
descr => 'internal conversion function for MULE_INTERNAL to LATIN2',
- proname => 'mic_to_latin2', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_latin2',
+ proname => 'mic_to_latin2', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_latin2',
probin => '$libdir/latin2_and_win1250' },
{ oid => '4340',
descr => 'internal conversion function for WIN1250 to MULE_INTERNAL',
- proname => 'win1250_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'win1250_to_mic',
+ proname => 'win1250_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'win1250_to_mic',
probin => '$libdir/latin2_and_win1250' },
{ oid => '4341',
descr => 'internal conversion function for MULE_INTERNAL to WIN1250',
- proname => 'mic_to_win1250', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_win1250',
+ proname => 'mic_to_win1250', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_win1250',
probin => '$libdir/latin2_and_win1250' },
{ oid => '4342',
descr => 'internal conversion function for LATIN2 to WIN1250',
- proname => 'latin2_to_win1250', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'latin2_to_win1250', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'latin2_to_win1250', probin => '$libdir/latin2_and_win1250' },
{ oid => '4343',
descr => 'internal conversion function for WIN1250 to LATIN2',
- proname => 'win1250_to_latin2', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'win1250_to_latin2', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'win1250_to_latin2', probin => '$libdir/latin2_and_win1250' },
{ oid => '4344',
descr => 'internal conversion function for LATIN1 to MULE_INTERNAL',
- proname => 'latin1_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'latin1_to_mic',
+ proname => 'latin1_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'latin1_to_mic',
probin => '$libdir/latin_and_mic' },
{ oid => '4345',
descr => 'internal conversion function for MULE_INTERNAL to LATIN1',
- proname => 'mic_to_latin1', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_latin1',
+ proname => 'mic_to_latin1', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_latin1',
probin => '$libdir/latin_and_mic' },
{ oid => '4346',
descr => 'internal conversion function for LATIN3 to MULE_INTERNAL',
- proname => 'latin3_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'latin3_to_mic',
+ proname => 'latin3_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'latin3_to_mic',
probin => '$libdir/latin_and_mic' },
{ oid => '4347',
descr => 'internal conversion function for MULE_INTERNAL to LATIN3',
- proname => 'mic_to_latin3', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_latin3',
+ proname => 'mic_to_latin3', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_latin3',
probin => '$libdir/latin_and_mic' },
{ oid => '4348',
descr => 'internal conversion function for LATIN4 to MULE_INTERNAL',
- proname => 'latin4_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'latin4_to_mic',
+ proname => 'latin4_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'latin4_to_mic',
probin => '$libdir/latin_and_mic' },
{ oid => '4349',
descr => 'internal conversion function for MULE_INTERNAL to LATIN4',
- proname => 'mic_to_latin4', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_latin4',
+ proname => 'mic_to_latin4', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_latin4',
probin => '$libdir/latin_and_mic' },
{ oid => '4352', descr => 'internal conversion function for BIG5 to UTF8',
- proname => 'big5_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'big5_to_utf8',
+ proname => 'big5_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'big5_to_utf8',
probin => '$libdir/utf8_and_big5' },
{ oid => '4353', descr => 'internal conversion function for UTF8 to BIG5',
- proname => 'utf8_to_big5', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_big5',
+ proname => 'utf8_to_big5', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_big5',
probin => '$libdir/utf8_and_big5' },
{ oid => '4354', descr => 'internal conversion function for UTF8 to KOI8R',
- proname => 'utf8_to_koi8r', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_koi8r',
+ proname => 'utf8_to_koi8r', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_koi8r',
probin => '$libdir/utf8_and_cyrillic' },
{ oid => '4355', descr => 'internal conversion function for KOI8R to UTF8',
- proname => 'koi8r_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'koi8r_to_utf8',
+ proname => 'koi8r_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'koi8r_to_utf8',
probin => '$libdir/utf8_and_cyrillic' },
{ oid => '4356', descr => 'internal conversion function for UTF8 to KOI8U',
- proname => 'utf8_to_koi8u', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_koi8u',
+ proname => 'utf8_to_koi8u', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_koi8u',
probin => '$libdir/utf8_and_cyrillic' },
{ oid => '4357', descr => 'internal conversion function for KOI8U to UTF8',
- proname => 'koi8u_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'koi8u_to_utf8',
+ proname => 'koi8u_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'koi8u_to_utf8',
probin => '$libdir/utf8_and_cyrillic' },
{ oid => '4358', descr => 'internal conversion function for UTF8 to WIN',
- proname => 'utf8_to_win', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_win',
+ proname => 'utf8_to_win', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_win',
probin => '$libdir/utf8_and_win' },
{ oid => '4359', descr => 'internal conversion function for WIN to UTF8',
- proname => 'win_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'win_to_utf8',
+ proname => 'win_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'win_to_utf8',
probin => '$libdir/utf8_and_win' },
{ oid => '4360', descr => 'internal conversion function for EUC_CN to UTF8',
- proname => 'euc_cn_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_cn_to_utf8',
+ proname => 'euc_cn_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_cn_to_utf8',
probin => '$libdir/utf8_and_euc_cn' },
{ oid => '4361', descr => 'internal conversion function for UTF8 to EUC_CN',
- proname => 'utf8_to_euc_cn', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_euc_cn',
+ proname => 'utf8_to_euc_cn', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_euc_cn',
probin => '$libdir/utf8_and_euc_cn' },
{ oid => '4362', descr => 'internal conversion function for EUC_JP to UTF8',
- proname => 'euc_jp_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_jp_to_utf8',
+ proname => 'euc_jp_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_jp_to_utf8',
probin => '$libdir/utf8_and_euc_jp' },
{ oid => '4363', descr => 'internal conversion function for UTF8 to EUC_JP',
- proname => 'utf8_to_euc_jp', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_euc_jp',
+ proname => 'utf8_to_euc_jp', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_euc_jp',
probin => '$libdir/utf8_and_euc_jp' },
{ oid => '4364', descr => 'internal conversion function for EUC_KR to UTF8',
- proname => 'euc_kr_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_kr_to_utf8',
+ proname => 'euc_kr_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_kr_to_utf8',
probin => '$libdir/utf8_and_euc_kr' },
{ oid => '4365', descr => 'internal conversion function for UTF8 to EUC_KR',
- proname => 'utf8_to_euc_kr', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_euc_kr',
+ proname => 'utf8_to_euc_kr', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_euc_kr',
probin => '$libdir/utf8_and_euc_kr' },
{ oid => '4366', descr => 'internal conversion function for EUC_TW to UTF8',
- proname => 'euc_tw_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_tw_to_utf8',
+ proname => 'euc_tw_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_tw_to_utf8',
probin => '$libdir/utf8_and_euc_tw' },
{ oid => '4367', descr => 'internal conversion function for UTF8 to EUC_TW',
- proname => 'utf8_to_euc_tw', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_euc_tw',
+ proname => 'utf8_to_euc_tw', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_euc_tw',
probin => '$libdir/utf8_and_euc_tw' },
{ oid => '4368', descr => 'internal conversion function for GB18030 to UTF8',
- proname => 'gb18030_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'gb18030_to_utf8',
+ proname => 'gb18030_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'gb18030_to_utf8',
probin => '$libdir/utf8_and_gb18030' },
{ oid => '4369', descr => 'internal conversion function for UTF8 to GB18030',
- proname => 'utf8_to_gb18030', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_gb18030',
+ proname => 'utf8_to_gb18030', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_gb18030',
probin => '$libdir/utf8_and_gb18030' },
{ oid => '4370', descr => 'internal conversion function for GBK to UTF8',
- proname => 'gbk_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'gbk_to_utf8',
+ proname => 'gbk_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'gbk_to_utf8',
probin => '$libdir/utf8_and_gbk' },
{ oid => '4371', descr => 'internal conversion function for UTF8 to GBK',
- proname => 'utf8_to_gbk', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_gbk',
+ proname => 'utf8_to_gbk', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_gbk',
probin => '$libdir/utf8_and_gbk' },
{ oid => '4372',
descr => 'internal conversion function for UTF8 to ISO-8859 2-16',
- proname => 'utf8_to_iso8859', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_iso8859',
+ proname => 'utf8_to_iso8859', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_iso8859',
probin => '$libdir/utf8_and_iso8859' },
{ oid => '4373',
descr => 'internal conversion function for ISO-8859 2-16 to UTF8',
- proname => 'iso8859_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'iso8859_to_utf8',
+ proname => 'iso8859_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'iso8859_to_utf8',
probin => '$libdir/utf8_and_iso8859' },
{ oid => '4374', descr => 'internal conversion function for LATIN1 to UTF8',
- proname => 'iso8859_1_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'iso8859_1_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'iso8859_1_to_utf8', probin => '$libdir/utf8_and_iso8859_1' },
{ oid => '4375', descr => 'internal conversion function for UTF8 to LATIN1',
- proname => 'utf8_to_iso8859_1', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'utf8_to_iso8859_1', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'utf8_to_iso8859_1', probin => '$libdir/utf8_and_iso8859_1' },
{ oid => '4376', descr => 'internal conversion function for JOHAB to UTF8',
- proname => 'johab_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'johab_to_utf8',
+ proname => 'johab_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'johab_to_utf8',
probin => '$libdir/utf8_and_johab' },
{ oid => '4377', descr => 'internal conversion function for UTF8 to JOHAB',
- proname => 'utf8_to_johab', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_johab',
+ proname => 'utf8_to_johab', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_johab',
probin => '$libdir/utf8_and_johab' },
{ oid => '4378', descr => 'internal conversion function for SJIS to UTF8',
- proname => 'sjis_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'sjis_to_utf8',
+ proname => 'sjis_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'sjis_to_utf8',
probin => '$libdir/utf8_and_sjis' },
{ oid => '4379', descr => 'internal conversion function for UTF8 to SJIS',
- proname => 'utf8_to_sjis', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_sjis',
+ proname => 'utf8_to_sjis', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_sjis',
probin => '$libdir/utf8_and_sjis' },
{ oid => '4380', descr => 'internal conversion function for UHC to UTF8',
- proname => 'uhc_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'uhc_to_utf8',
+ proname => 'uhc_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'uhc_to_utf8',
probin => '$libdir/utf8_and_uhc' },
{ oid => '4381', descr => 'internal conversion function for UTF8 to UHC',
- proname => 'utf8_to_uhc', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_uhc',
+ proname => 'utf8_to_uhc', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_uhc',
probin => '$libdir/utf8_and_uhc' },
{ oid => '4382',
descr => 'internal conversion function for EUC_JIS_2004 to UTF8',
- proname => 'euc_jis_2004_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'euc_jis_2004_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'euc_jis_2004_to_utf8', probin => '$libdir/utf8_and_euc2004' },
{ oid => '4383',
descr => 'internal conversion function for UTF8 to EUC_JIS_2004',
- proname => 'utf8_to_euc_jis_2004', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'utf8_to_euc_jis_2004', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'utf8_to_euc_jis_2004', probin => '$libdir/utf8_and_euc2004' },
{ oid => '4384',
descr => 'internal conversion function for SHIFT_JIS_2004 to UTF8',
- proname => 'shift_jis_2004_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'shift_jis_2004_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'shift_jis_2004_to_utf8', probin => '$libdir/utf8_and_sjis2004' },
{ oid => '4385',
descr => 'internal conversion function for UTF8 to SHIFT_JIS_2004',
- proname => 'utf8_to_shift_jis_2004', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'utf8_to_shift_jis_2004', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'utf8_to_shift_jis_2004', probin => '$libdir/utf8_and_sjis2004' },
{ oid => '4386',
descr => 'internal conversion function for EUC_JIS_2004 to SHIFT_JIS_2004',
proname => 'euc_jis_2004_to_shift_jis_2004', prolang => 'c',
- prorettype => 'void', proargtypes => 'int4 int4 cstring internal int4',
+ prorettype => 'int4', proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'euc_jis_2004_to_shift_jis_2004',
probin => '$libdir/euc2004_sjis2004' },
{ oid => '4387',
descr => 'internal conversion function for SHIFT_JIS_2004 to EUC_JIS_2004',
proname => 'shift_jis_2004_to_euc_jis_2004', prolang => 'c',
- prorettype => 'void', proargtypes => 'int4 int4 cstring internal int4',
+ prorettype => 'int4', proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'shift_jis_2004_to_euc_jis_2004',
probin => '$libdir/euc2004_sjis2004' },
extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len,
int src_encoding,
int dest_encoding);
+extern int pg_do_encoding_conversion_buf(Oid proc,
+ int src_encoding,
+ int dest_encoding,
+ unsigned char *src, int srclen,
+ unsigned char *dst, int dstlen,
+ bool noError);
extern char *pg_client_to_server(const char *s, int len);
extern char *pg_server_to_client(const char *s, int len);
extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc);
extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc);
-extern void UtfToLocal(const unsigned char *utf, int len,
+extern int UtfToLocal(const unsigned char *utf, int len,
unsigned char *iso,
const pg_mb_radix_tree *map,
const pg_utf_to_local_combined *cmap, int cmapsize,
utf_local_conversion_func conv_func,
- int encoding);
-extern void LocalToUtf(const unsigned char *iso, int len,
+ int encoding, bool noError);
+extern int LocalToUtf(const unsigned char *iso, int len,
unsigned char *utf,
const pg_mb_radix_tree *map,
const pg_local_to_utf_combined *cmap, int cmapsize,
utf_local_conversion_func conv_func,
- int encoding);
+ int encoding, bool noError);
extern bool pg_verifymbstr(const char *mbstr, int len, bool noError);
extern bool pg_verify_mbstr(int encoding, const char *mbstr, int len,
extern void report_untranslatable_char(int src_encoding, int dest_encoding,
const char *mbstr, int len) pg_attribute_noreturn();
-extern void local2local(const unsigned char *l, unsigned char *p, int len,
- int src_encoding, int dest_encoding, const unsigned char *tab);
-extern void latin2mic(const unsigned char *l, unsigned char *p, int len,
- int lc, int encoding);
-extern void mic2latin(const unsigned char *mic, unsigned char *p, int len,
- int lc, int encoding);
-extern void latin2mic_with_table(const unsigned char *l, unsigned char *p,
+extern int local2local(const unsigned char *l, unsigned char *p, int len,
+ int src_encoding, int dest_encoding,
+ const unsigned char *tab, bool noError);
+extern int latin2mic(const unsigned char *l, unsigned char *p, int len,
+ int lc, int encoding, bool noError);
+extern int mic2latin(const unsigned char *mic, unsigned char *p, int len,
+ int lc, int encoding, bool noError);
+extern int latin2mic_with_table(const unsigned char *l, unsigned char *p,
int len, int lc, int encoding,
- const unsigned char *tab);
-extern void mic2latin_with_table(const unsigned char *mic, unsigned char *p,
+ const unsigned char *tab, bool noError);
+extern int mic2latin_with_table(const unsigned char *mic, unsigned char *p,
int len, int lc, int encoding,
- const unsigned char *tab);
+ const unsigned char *tab, bool noError);
#ifdef WIN32
extern WCHAR *pgwin32_message_to_UTF16(const char *str, int len, int *utf16len);
--
RESET SESSION AUTHORIZATION;
DROP USER regress_conversion_user;
+--
+-- Test built-in conversion functions.
+--
+-- Helper function to test a conversion. Uses the test_enc_conversion function
+-- that was created in the create_function_1 test.
+create or replace function test_conv(
+ input IN bytea,
+ src_encoding IN text,
+ dst_encoding IN text,
+ result OUT bytea,
+ errorat OUT bytea,
+ error OUT text)
+language plpgsql as
+$$
+declare
+ validlen int;
+begin
+ -- First try to perform the conversion with noError = false. If that errors out,
+ -- capture the error message, and try again with noError = true. The second call
+ -- should succeed and return the position of the error, return that too.
+ begin
+ select * into validlen, result from test_enc_conversion(input, src_encoding, dst_encoding, false);
+ errorat = NULL;
+ error := NULL;
+ exception when others then
+ error := sqlerrm;
+ select * into validlen, result from test_enc_conversion(input, src_encoding, dst_encoding, true);
+ errorat = substr(input, validlen + 1);
+ end;
+ return;
+end;
+$$;
+--
+-- UTF-8
+--
+CREATE TABLE utf8_inputs (inbytes bytea, description text);
+insert into utf8_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\xc3a4c3b6', 'valid, extra latin chars'),
+ ('\xd184d0bed0be', 'valid, cyrillic'),
+ ('\x666f6fe8b1a1', 'valid, kanji/Chinese'),
+ ('\xe382abe3829a', 'valid, two chars that combine to one in EUC_JIS_2004'),
+ ('\xe382ab', 'only first half of combined char in EUC_JIS_2004'),
+ ('\xe382abe382', 'incomplete combination when converted EUC_JIS_2004'),
+ ('\xecbd94eb81bceba6ac', 'valid, Hangul, Korean'),
+ ('\x666f6fefa8aa', 'valid, needs mapping function to convert to GB18030'),
+ ('\x66e8b1ff6f6f', 'invalid byte sequence'),
+ ('\x66006f', 'invalid, NUL byte'),
+ ('\x666f6fe8b100', 'invalid, NUL byte'),
+ ('\x666f6fe8b1', 'incomplete character at end');
+-- Test UTF-8 verification
+select description, (test_conv(inbytes, 'utf8', 'utf8')).* from utf8_inputs;
+ description | result | errorat | error
+------------------------------------------------------+----------------------+--------------+-----------------------------------------------------------
+ valid, pure ASCII | \x666f6f | |
+ valid, extra latin chars | \xc3a4c3b6 | |
+ valid, cyrillic | \xd184d0bed0be | |
+ valid, kanji/Chinese | \x666f6fe8b1a1 | |
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | |
+ only first half of combined char in EUC_JIS_2004 | \xe382ab | |
+ incomplete combination when converted EUC_JIS_2004 | \xe382ab | \xe382 | invalid byte sequence for encoding "UTF8": 0xe3 0x82
+ valid, Hangul, Korean | \xecbd94eb81bceba6ac | |
+ valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | |
+ invalid byte sequence | \x66 | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte | \x66 | \x006f | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte | \x666f6f | \xe8b100 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end | \x666f6f | \xe8b1 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(13 rows)
+
+-- Test conversions from UTF-8
+select description, inbytes, (test_conv(inbytes, 'utf8', 'euc_jis_2004')).* from utf8_inputs;
+ description | inbytes | result | errorat | error
+------------------------------------------------------+----------------------+----------------+----------------------+-------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid, extra latin chars | \xc3a4c3b6 | \xa9daa9ec | |
+ valid, cyrillic | \xd184d0bed0be | \xa7e6a7e0a7e0 | |
+ valid, kanji/Chinese | \x666f6fe8b1a1 | \x666f6fbedd | |
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | \xa5f7 | |
+ only first half of combined char in EUC_JIS_2004 | \xe382ab | \xa5ab | |
+ incomplete combination when converted EUC_JIS_2004 | \xe382abe382 | \x | \xe382abe382 | invalid byte sequence for encoding "UTF8": 0xe3 0x82
+ valid, Hangul, Korean | \xecbd94eb81bceba6ac | \x | \xecbd94eb81bceba6ac | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "EUC_JIS_2004"
+ valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | \x666f6f | \xefa8aa | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "EUC_JIS_2004"
+ invalid byte sequence | \x66e8b1ff6f6f | \x66 | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte | \x66006f | \x66 | \x006f | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte | \x666f6fe8b100 | \x666f6f | \xe8b100 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end | \x666f6fe8b1 | \x666f6f | \xe8b1 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(13 rows)
+
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin1')).* from utf8_inputs;
+ description | inbytes | result | errorat | error
+------------------------------------------------------+----------------------+----------+----------------------+-------------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid, extra latin chars | \xc3a4c3b6 | \xe4f6 | |
+ valid, cyrillic | \xd184d0bed0be | \x | \xd184d0bed0be | character with byte sequence 0xd1 0x84 in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ valid, kanji/Chinese | \x666f6fe8b1a1 | \x666f6f | \xe8b1a1 | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | \x | \xe382abe3829a | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ only first half of combined char in EUC_JIS_2004 | \xe382ab | \x | \xe382ab | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ incomplete combination when converted EUC_JIS_2004 | \xe382abe382 | \x | \xe382abe382 | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ valid, Hangul, Korean | \xecbd94eb81bceba6ac | \x | \xecbd94eb81bceba6ac | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | \x666f6f | \xefa8aa | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ invalid byte sequence | \x66e8b1ff6f6f | \x66 | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte | \x66006f | \x66 | \x006f | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte | \x666f6fe8b100 | \x666f6f | \xe8b100 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end | \x666f6fe8b1 | \x666f6f | \xe8b1 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(13 rows)
+
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin2')).* from utf8_inputs;
+ description | inbytes | result | errorat | error
+------------------------------------------------------+----------------------+----------+----------------------+-------------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid, extra latin chars | \xc3a4c3b6 | \xe4f6 | |
+ valid, cyrillic | \xd184d0bed0be | \x | \xd184d0bed0be | character with byte sequence 0xd1 0x84 in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ valid, kanji/Chinese | \x666f6fe8b1a1 | \x666f6f | \xe8b1a1 | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | \x | \xe382abe3829a | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ only first half of combined char in EUC_JIS_2004 | \xe382ab | \x | \xe382ab | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ incomplete combination when converted EUC_JIS_2004 | \xe382abe382 | \x | \xe382abe382 | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ valid, Hangul, Korean | \xecbd94eb81bceba6ac | \x | \xecbd94eb81bceba6ac | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | \x666f6f | \xefa8aa | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ invalid byte sequence | \x66e8b1ff6f6f | \x66 | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte | \x66006f | \x66 | \x006f | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte | \x666f6fe8b100 | \x666f6f | \xe8b100 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end | \x666f6fe8b1 | \x666f6f | \xe8b1 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(13 rows)
+
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin5')).* from utf8_inputs;
+ description | inbytes | result | errorat | error
+------------------------------------------------------+----------------------+----------+----------------------+-------------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid, extra latin chars | \xc3a4c3b6 | \xe4f6 | |
+ valid, cyrillic | \xd184d0bed0be | \x | \xd184d0bed0be | character with byte sequence 0xd1 0x84 in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ valid, kanji/Chinese | \x666f6fe8b1a1 | \x666f6f | \xe8b1a1 | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | \x | \xe382abe3829a | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ only first half of combined char in EUC_JIS_2004 | \xe382ab | \x | \xe382ab | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ incomplete combination when converted EUC_JIS_2004 | \xe382abe382 | \x | \xe382abe382 | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ valid, Hangul, Korean | \xecbd94eb81bceba6ac | \x | \xecbd94eb81bceba6ac | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | \x666f6f | \xefa8aa | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ invalid byte sequence | \x66e8b1ff6f6f | \x66 | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte | \x66006f | \x66 | \x006f | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte | \x666f6fe8b100 | \x666f6f | \xe8b100 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end | \x666f6fe8b1 | \x666f6f | \xe8b1 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(13 rows)
+
+select description, inbytes, (test_conv(inbytes, 'utf8', 'koi8r')).* from utf8_inputs;
+ description | inbytes | result | errorat | error
+------------------------------------------------------+----------------------+----------+----------------------+------------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid, extra latin chars | \xc3a4c3b6 | \x | \xc3a4c3b6 | character with byte sequence 0xc3 0xa4 in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ valid, cyrillic | \xd184d0bed0be | \xc6cfcf | |
+ valid, kanji/Chinese | \x666f6fe8b1a1 | \x666f6f | \xe8b1a1 | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | \x | \xe382abe3829a | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ only first half of combined char in EUC_JIS_2004 | \xe382ab | \x | \xe382ab | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ incomplete combination when converted EUC_JIS_2004 | \xe382abe382 | \x | \xe382abe382 | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ valid, Hangul, Korean | \xecbd94eb81bceba6ac | \x | \xecbd94eb81bceba6ac | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | \x666f6f | \xefa8aa | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ invalid byte sequence | \x66e8b1ff6f6f | \x66 | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte | \x66006f | \x66 | \x006f | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte | \x666f6fe8b100 | \x666f6f | \xe8b100 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end | \x666f6fe8b1 | \x666f6f | \xe8b1 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(13 rows)
+
+select description, inbytes, (test_conv(inbytes, 'utf8', 'gb18030')).* from utf8_inputs;
+ description | inbytes | result | errorat | error
+------------------------------------------------------+----------------------+----------------------------+--------------+-----------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid, extra latin chars | \xc3a4c3b6 | \x81308a3181308b32 | |
+ valid, cyrillic | \xd184d0bed0be | \xa7e6a7e0a7e0 | |
+ valid, kanji/Chinese | \x666f6fe8b1a1 | \x666f6fcff3 | |
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | \xa5ab8139a732 | |
+ only first half of combined char in EUC_JIS_2004 | \xe382ab | \xa5ab | |
+ incomplete combination when converted EUC_JIS_2004 | \xe382abe382 | \xa5ab | \xe382 | invalid byte sequence for encoding "UTF8": 0xe3 0x82
+ valid, Hangul, Korean | \xecbd94eb81bceba6ac | \x8334e5398238c4338330b335 | |
+ valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | \x666f6f84309c38 | |
+ invalid byte sequence | \x66e8b1ff6f6f | \x66 | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte | \x66006f | \x66 | \x006f | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte | \x666f6fe8b100 | \x666f6f | \xe8b100 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end | \x666f6fe8b1 | \x666f6f | \xe8b1 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(13 rows)
+
+--
+-- EUC_JIS_2004
+--
+CREATE TABLE euc_jis_2004_inputs (inbytes bytea, description text);
+insert into euc_jis_2004_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\x666f6fbedd', 'valid'),
+ ('\xa5f7', 'valid, translates to two UTF-8 chars '),
+ ('\xbeddbe', 'incomplete char '),
+ ('\x666f6f00bedd', 'invalid, NUL byte'),
+ ('\x666f6fbe00dd', 'invalid, NUL byte'),
+ ('\x666f6fbedd00', 'invalid, NUL byte'),
+ ('\xbe04', 'invalid byte sequence');
+-- Test EUC_JIS_2004 verification
+select description, inbytes, (test_conv(inbytes, 'euc_jis_2004', 'euc_jis_2004')).* from euc_jis_2004_inputs;
+ description | inbytes | result | errorat | error
+---------------------------------------+----------------+--------------+----------+--------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \x666f6fbedd | \x666f6fbedd | |
+ valid, translates to two UTF-8 chars | \xa5f7 | \xa5f7 | |
+ incomplete char | \xbeddbe | \xbedd | \xbe | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe
+ invalid, NUL byte | \x666f6f00bedd | \x666f6f | \x00bedd | invalid byte sequence for encoding "EUC_JIS_2004": 0x00
+ invalid, NUL byte | \x666f6fbe00dd | \x666f6f | \xbe00dd | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x00
+ invalid, NUL byte | \x666f6fbedd00 | \x666f6fbedd | \x00 | invalid byte sequence for encoding "EUC_JIS_2004": 0x00
+ invalid byte sequence | \xbe04 | \x | \xbe04 | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x04
+(8 rows)
+
+-- Test conversions from EUC_JIS_2004
+select description, inbytes, (test_conv(inbytes, 'euc_jis_2004', 'utf8')).* from euc_jis_2004_inputs;
+ description | inbytes | result | errorat | error
+---------------------------------------+----------------+----------------+----------+--------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \x666f6fbedd | \x666f6fe8b1a1 | |
+ valid, translates to two UTF-8 chars | \xa5f7 | \xe382abe3829a | |
+ incomplete char | \xbeddbe | \xe8b1a1 | \xbe | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe
+ invalid, NUL byte | \x666f6f00bedd | \x666f6f | \x00bedd | invalid byte sequence for encoding "EUC_JIS_2004": 0x00
+ invalid, NUL byte | \x666f6fbe00dd | \x666f6f | \xbe00dd | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x00
+ invalid, NUL byte | \x666f6fbedd00 | \x666f6fe8b1a1 | \x00 | invalid byte sequence for encoding "EUC_JIS_2004": 0x00
+ invalid byte sequence | \xbe04 | \x | \xbe04 | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x04
+(8 rows)
+
+--
+-- SHIFT-JIS-2004
+--
+CREATE TABLE shiftjis2004_inputs (inbytes bytea, description text);
+insert into shiftjis2004_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\x666f6f8fdb', 'valid'),
+ ('\x666f6f81c0', 'valid, no translation to UTF-8'),
+ ('\x666f6f82f5', 'valid, translates to two UTF-8 chars '),
+ ('\x666f6f8fdb8f', 'incomplete char '),
+ ('\x666f6f820a', 'incomplete char, followed by newline '),
+ ('\x666f6f008fdb', 'invalid, NUL byte'),
+ ('\x666f6f8f00db', 'invalid, NUL byte'),
+ ('\x666f6f8fdb00', 'invalid, NUL byte');
+-- Test SHIFT-JIS-2004 verification
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'shiftjis2004')).* from shiftjis2004_inputs;
+ description | inbytes | result | errorat | error
+---------------------------------------+----------------+--------------+----------+----------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \x666f6f8fdb | \x666f6f8fdb | |
+ valid, no translation to UTF-8 | \x666f6f81c0 | \x666f6f81c0 | |
+ valid, translates to two UTF-8 chars | \x666f6f82f5 | \x666f6f82f5 | |
+ incomplete char | \x666f6f8fdb8f | \x666f6f8fdb | \x8f | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f
+ incomplete char, followed by newline | \x666f6f820a | \x666f6f | \x820a | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x82 0x0a
+ invalid, NUL byte | \x666f6f008fdb | \x666f6f | \x008fdb | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+ invalid, NUL byte | \x666f6f8f00db | \x666f6f | \x8f00db | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f 0x00
+ invalid, NUL byte | \x666f6f8fdb00 | \x666f6f8fdb | \x00 | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+(9 rows)
+
+-- Test conversions from SHIFT-JIS-2004
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'utf8')).* from shiftjis2004_inputs;
+ description | inbytes | result | errorat | error
+---------------------------------------+----------------+----------------------+----------+----------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \x666f6f8fdb | \x666f6fe8b1a1 | |
+ valid, no translation to UTF-8 | \x666f6f81c0 | \x666f6fe28a84 | |
+ valid, translates to two UTF-8 chars | \x666f6f82f5 | \x666f6fe3818be3829a | |
+ incomplete char | \x666f6f8fdb8f | \x666f6fe8b1a1 | \x8f | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f
+ incomplete char, followed by newline | \x666f6f820a | \x666f6f | \x820a | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x82 0x0a
+ invalid, NUL byte | \x666f6f008fdb | \x666f6f | \x008fdb | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+ invalid, NUL byte | \x666f6f8f00db | \x666f6f | \x8f00db | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f 0x00
+ invalid, NUL byte | \x666f6f8fdb00 | \x666f6fe8b1a1 | \x00 | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+(9 rows)
+
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'euc_jis_2004')).* from shiftjis2004_inputs;
+ description | inbytes | result | errorat | error
+---------------------------------------+----------------+--------------+----------+----------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \x666f6f8fdb | \x666f6fbedd | |
+ valid, no translation to UTF-8 | \x666f6f81c0 | \x666f6fa2c2 | |
+ valid, translates to two UTF-8 chars | \x666f6f82f5 | \x666f6fa4f7 | |
+ incomplete char | \x666f6f8fdb8f | \x666f6fbedd | \x8f | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f
+ incomplete char, followed by newline | \x666f6f820a | \x666f6f | \x820a | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x82 0x0a
+ invalid, NUL byte | \x666f6f008fdb | \x666f6f | \x008fdb | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+ invalid, NUL byte | \x666f6f8f00db | \x666f6f | \x8f00db | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f 0x00
+ invalid, NUL byte | \x666f6f8fdb00 | \x666f6fbedd | \x00 | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+(9 rows)
+
+--
+-- GB18030
+--
+CREATE TABLE gb18030_inputs (inbytes bytea, description text);
+insert into gb18030_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\x666f6fcff3', 'valid'),
+ ('\x666f6f8431a530', 'valid, no translation to UTF-8'),
+ ('\x666f6f84309c38', 'valid, translates to UTF-8 by mapping function'),
+ ('\x666f6f84309c', 'incomplete char '),
+ ('\x666f6f84309c0a', 'incomplete char, followed by newline '),
+ ('\x666f6f84309c3800', 'invalid, NUL byte'),
+ ('\x666f6f84309c0038', 'invalid, NUL byte');
+-- Test GB18030 verification
+select description, inbytes, (test_conv(inbytes, 'gb18030', 'gb18030')).* from gb18030_inputs;
+ description | inbytes | result | errorat | error
+------------------------------------------------+--------------------+------------------+--------------+-------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \x666f6fcff3 | \x666f6fcff3 | |
+ valid, no translation to UTF-8 | \x666f6f8431a530 | \x666f6f8431a530 | |
+ valid, translates to UTF-8 by mapping function | \x666f6f84309c38 | \x666f6f84309c38 | |
+ incomplete char | \x666f6f84309c | \x666f6f | \x84309c | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c
+ incomplete char, followed by newline | \x666f6f84309c0a | \x666f6f | \x84309c0a | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x0a
+ invalid, NUL byte | \x666f6f84309c3800 | \x666f6f84309c38 | \x00 | invalid byte sequence for encoding "GB18030": 0x00
+ invalid, NUL byte | \x666f6f84309c0038 | \x666f6f | \x84309c0038 | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x00
+(8 rows)
+
+-- Test conversions from GB18030
+select description, inbytes, (test_conv(inbytes, 'gb18030', 'utf8')).* from gb18030_inputs;
+ description | inbytes | result | errorat | error
+------------------------------------------------+--------------------+----------------+--------------+-------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \x666f6fcff3 | \x666f6fe8b1a1 | |
+ valid, no translation to UTF-8 | \x666f6f8431a530 | \x666f6f | \x8431a530 | character with byte sequence 0x84 0x31 0xa5 0x30 in encoding "GB18030" has no equivalent in encoding "UTF8"
+ valid, translates to UTF-8 by mapping function | \x666f6f84309c38 | \x666f6fefa8aa | |
+ incomplete char | \x666f6f84309c | \x666f6f | \x84309c | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c
+ incomplete char, followed by newline | \x666f6f84309c0a | \x666f6f | \x84309c0a | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x0a
+ invalid, NUL byte | \x666f6f84309c3800 | \x666f6fefa8aa | \x00 | invalid byte sequence for encoding "GB18030": 0x00
+ invalid, NUL byte | \x666f6f84309c0038 | \x666f6f | \x84309c0038 | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x00
+(8 rows)
+
+--
+-- ISO-8859-5
+--
+CREATE TABLE iso8859_5_inputs (inbytes bytea, description text);
+insert into iso8859_5_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\xe4dede', 'valid'),
+ ('\x00', 'invalid, NUL byte'),
+ ('\xe400dede', 'invalid, NUL byte'),
+ ('\xe4dede00', 'invalid, NUL byte');
+-- Test ISO-8859-5 verification
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'iso8859-5')).* from iso8859_5_inputs;
+ description | inbytes | result | errorat | error
+-------------------+------------+----------+----------+-------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \xe4dede | \xe4dede | |
+ invalid, NUL byte | \x00 | \x | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe400dede | \xe4 | \x00dede | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe4dede00 | \xe4dede | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00
+(5 rows)
+
+-- Test conversions from ISO-8859-5
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'utf8')).* from iso8859_5_inputs;
+ description | inbytes | result | errorat | error
+-------------------+------------+----------------+----------+-------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \xe4dede | \xd184d0bed0be | |
+ invalid, NUL byte | \x00 | \x | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe400dede | \xd184 | \x00dede | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe4dede00 | \xd184d0bed0be | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00
+(5 rows)
+
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'koi8r')).* from iso8859_5_inputs;
+ description | inbytes | result | errorat | error
+-------------------+------------+----------+----------+-------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \xe4dede | \xc6cfcf | |
+ invalid, NUL byte | \x00 | \x | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe400dede | \xc6 | \x00dede | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe4dede00 | \xc6cfcf | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00
+(5 rows)
+
+select description, inbytes, (test_conv(inbytes, 'iso8859_5', 'mule_internal')).* from iso8859_5_inputs;
+ description | inbytes | result | errorat | error
+-------------------+------------+----------------+----------+-------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \xe4dede | \x8bc68bcf8bcf | |
+ invalid, NUL byte | \x00 | \x | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe400dede | \x8bc6 | \x00dede | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe4dede00 | \x8bc68bcf8bcf | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00
+(5 rows)
+
+--
+-- Big5
+--
+CREATE TABLE big5_inputs (inbytes bytea, description text);
+insert into big5_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\x666f6fb648', 'valid'),
+ ('\x666f6fa27f', 'valid, no translation to UTF-8'),
+ ('\x666f6fb60048', 'invalid, NUL byte'),
+ ('\x666f6fb64800', 'invalid, NUL byte');
+-- Test Big5 verification
+select description, inbytes, (test_conv(inbytes, 'big5', 'big5')).* from big5_inputs;
+ description | inbytes | result | errorat | error
+--------------------------------+----------------+--------------+----------+------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \x666f6fb648 | \x666f6fb648 | |
+ valid, no translation to UTF-8 | \x666f6fa27f | \x666f6fa27f | |
+ invalid, NUL byte | \x666f6fb60048 | \x666f6f | \xb60048 | invalid byte sequence for encoding "BIG5": 0xb6 0x00
+ invalid, NUL byte | \x666f6fb64800 | \x666f6fb648 | \x00 | invalid byte sequence for encoding "BIG5": 0x00
+(5 rows)
+
+-- Test conversions from Big5
+select description, inbytes, (test_conv(inbytes, 'big5', 'utf8')).* from big5_inputs;
+ description | inbytes | result | errorat | error
+--------------------------------+----------------+----------------+----------+------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \x666f6fb648 | \x666f6fe8b1a1 | |
+ valid, no translation to UTF-8 | \x666f6fa27f | \x666f6f | \xa27f | character with byte sequence 0xa2 0x7f in encoding "BIG5" has no equivalent in encoding "UTF8"
+ invalid, NUL byte | \x666f6fb60048 | \x666f6f | \xb60048 | invalid byte sequence for encoding "BIG5": 0xb6 0x00
+ invalid, NUL byte | \x666f6fb64800 | \x666f6fe8b1a1 | \x00 | invalid byte sequence for encoding "BIG5": 0x00
+(5 rows)
+
+select description, inbytes, (test_conv(inbytes, 'big5', 'mule_internal')).* from big5_inputs;
+ description | inbytes | result | errorat | error
+--------------------------------+----------------+----------------+----------+------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \x666f6fb648 | \x666f6f95e2af | |
+ valid, no translation to UTF-8 | \x666f6fa27f | \x666f6f95a3c1 | |
+ invalid, NUL byte | \x666f6fb60048 | \x666f6f | \xb60048 | invalid byte sequence for encoding "BIG5": 0xb6 0x00
+ invalid, NUL byte | \x666f6fb64800 | \x666f6f95e2af | \x00 | invalid byte sequence for encoding "BIG5": 0x00
+(5 rows)
+
+--
+-- MULE_INTERNAL
+--
+CREATE TABLE mic_inputs (inbytes bytea, description text);
+insert into mic_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\x8bc68bcf8bcf', 'valid (in KOI8R)'),
+ ('\x8bc68bcf8b', 'invalid,incomplete char'),
+ ('\x92bedd', 'valid (in SHIFT_JIS)'),
+ ('\x92be', 'invalid, incomplete char)'),
+ ('\x666f6f95a3c1', 'valid (in Big5)'),
+ ('\x666f6f95a3', 'invalid, incomplete char'),
+ ('\x9200bedd', 'invalid, NUL byte'),
+ ('\x92bedd00', 'invalid, NUL byte'),
+ ('\x8b00c68bcf8bcf', 'invalid, NUL byte');
+-- Test MULE_INTERNAL verification
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'mule_internal')).* from mic_inputs;
+ description | inbytes | result | errorat | error
+---------------------------+------------------+----------------+------------------+--------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid (in KOI8R) | \x8bc68bcf8bcf | \x8bc68bcf8bcf | |
+ invalid,incomplete char | \x8bc68bcf8b | \x8bc68bcf | \x8b | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b
+ valid (in SHIFT_JIS) | \x92bedd | \x92bedd | |
+ invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5) | \x666f6f95a3c1 | \x666f6f95a3c1 | |
+ invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte | \x9200bedd | \x | \x9200bedd | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
+ invalid, NUL byte | \x92bedd00 | \x92bedd | \x00 | invalid byte sequence for encoding "MULE_INTERNAL": 0x00
+ invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
+(10 rows)
+
+-- Test conversions from MULE_INTERNAL
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'koi8r')).* from mic_inputs;
+ description | inbytes | result | errorat | error
+---------------------------+------------------+----------+------------------+---------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid (in KOI8R) | \x8bc68bcf8bcf | \xc6cfcf | |
+ invalid,incomplete char | \x8bc68bcf8b | \xc6cf | \x8b | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b
+ valid (in SHIFT_JIS) | \x92bedd | \x | \x92bedd | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
+ invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5) | \x666f6f95a3c1 | \x666f6f | \x95a3c1 | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
+ invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte | \x9200bedd | \x | \x9200bedd | character with byte sequence 0x92 0x00 0xbe in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
+ invalid, NUL byte | \x92bedd00 | \x | \x92bedd00 | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
+ invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | character with byte sequence 0x8b 0x00 in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
+(10 rows)
+
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'iso8859-5')).* from mic_inputs;
+ description | inbytes | result | errorat | error
+---------------------------+------------------+----------+------------------+--------------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid (in KOI8R) | \x8bc68bcf8bcf | \xe4dede | |
+ invalid,incomplete char | \x8bc68bcf8b | \xe4de | \x8b | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b
+ valid (in SHIFT_JIS) | \x92bedd | \x | \x92bedd | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
+ invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5) | \x666f6f95a3c1 | \x666f6f | \x95a3c1 | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
+ invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte | \x9200bedd | \x | \x9200bedd | character with byte sequence 0x92 0x00 0xbe in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
+ invalid, NUL byte | \x92bedd00 | \x | \x92bedd00 | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
+ invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | character with byte sequence 0x8b 0x00 in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
+(10 rows)
+
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'sjis')).* from mic_inputs;
+ description | inbytes | result | errorat | error
+---------------------------+------------------+----------+------------------+--------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid (in KOI8R) | \x8bc68bcf8bcf | \x | \x8bc68bcf8bcf | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "SJIS"
+ invalid,incomplete char | \x8bc68bcf8b | \x | \x8bc68bcf8b | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "SJIS"
+ valid (in SHIFT_JIS) | \x92bedd | \x8fdb | |
+ invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5) | \x666f6f95a3c1 | \x666f6f | \x95a3c1 | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "SJIS"
+ invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte | \x9200bedd | \x | \x9200bedd | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
+ invalid, NUL byte | \x92bedd00 | \x8fdb | \x00 | invalid byte sequence for encoding "MULE_INTERNAL": 0x00
+ invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
+(10 rows)
+
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'big5')).* from mic_inputs;
+ description | inbytes | result | errorat | error
+---------------------------+------------------+--------------+------------------+--------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid (in KOI8R) | \x8bc68bcf8bcf | \x | \x8bc68bcf8bcf | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5"
+ invalid,incomplete char | \x8bc68bcf8b | \x | \x8bc68bcf8b | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5"
+ valid (in SHIFT_JIS) | \x92bedd | \x | \x92bedd | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5"
+ invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5) | \x666f6f95a3c1 | \x666f6fa2a1 | |
+ invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte | \x9200bedd | \x | \x9200bedd | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
+ invalid, NUL byte | \x92bedd00 | \x | \x92bedd00 | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5"
+ invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
+(10 rows)
+
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'euc_jp')).* from mic_inputs;
+ description | inbytes | result | errorat | error
+---------------------------+------------------+----------+------------------+----------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid (in KOI8R) | \x8bc68bcf8bcf | \x | \x8bc68bcf8bcf | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "EUC_JP"
+ invalid,incomplete char | \x8bc68bcf8b | \x | \x8bc68bcf8b | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "EUC_JP"
+ valid (in SHIFT_JIS) | \x92bedd | \xbedd | |
+ invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5) | \x666f6f95a3c1 | \x666f6f | \x95a3c1 | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "EUC_JP"
+ invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte | \x9200bedd | \x | \x9200bedd | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
+ invalid, NUL byte | \x92bedd00 | \xbedd | \x00 | invalid byte sequence for encoding "MULE_INTERNAL": 0x00
+ invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
+(10 rows)
+
SELECT p.oid, p.proname, c.oid, c.conname
FROM pg_proc p, pg_conversion c
WHERE p.oid = c.conproc AND
- (p.prorettype != 'void'::regtype OR p.proretset OR
- p.pronargs != 5 OR
+ (p.prorettype != 'int4'::regtype OR p.proretset OR
+ p.pronargs != 6 OR
p.proargtypes[0] != 'int4'::regtype OR
p.proargtypes[1] != 'int4'::regtype OR
p.proargtypes[2] != 'cstring'::regtype OR
p.proargtypes[3] != 'internal'::regtype OR
- p.proargtypes[4] != 'int4'::regtype);
+ p.proargtypes[4] != 'int4'::regtype OR
+ p.proargtypes[5] != 'bool'::regtype);
oid | proname | oid | conname
-----+---------+-----+---------
(0 rows)
AS '@libdir@/regress@DLSUFFIX@', 'test_opclass_options_func'
LANGUAGE C;
+CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)
+ AS '@libdir@/regress@DLSUFFIX@', 'test_enc_conversion'
+ LANGUAGE C;
+
-- Things that shouldn't work:
CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL
RETURNS void
AS '@libdir@/regress@DLSUFFIX@', 'test_opclass_options_func'
LANGUAGE C;
+CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)
+ AS '@libdir@/regress@DLSUFFIX@', 'test_enc_conversion'
+ LANGUAGE C;
-- Things that shouldn't work:
CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL
AS 'SELECT ''not an integer'';';
#include "access/htup_details.h"
#include "access/transam.h"
#include "access/xact.h"
+#include "catalog/namespace.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_type.h"
#include "commands/sequence.h"
#include "commands/trigger.h"
#include "executor/executor.h"
#include "executor/spi.h"
+#include "funcapi.h"
+#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "nodes/supportnodes.h"
#include "optimizer/optimizer.h"
{
PG_RETURN_NULL();
}
+
+/*
+ * Call an encoding conversion or verification function.
+ *
+ * Arguments:
+ * string bytea -- string to convert
+ * src_enc name -- source encoding
+ * dest_enc name -- destination encoding
+ * noError bool -- if set, don't ereport() on invalid or untranslatable
+ * input
+ *
+ * Result is a tuple with two attributes:
+ * int4 -- number of input bytes successfully converted
+ * bytea -- converted string
+ */
+PG_FUNCTION_INFO_V1(test_enc_conversion);
+Datum
+test_enc_conversion(PG_FUNCTION_ARGS)
+{
+ bytea *string = PG_GETARG_BYTEA_PP(0);
+ char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
+ int src_encoding = pg_char_to_encoding(src_encoding_name);
+ char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
+ int dest_encoding = pg_char_to_encoding(dest_encoding_name);
+ bool noError = PG_GETARG_BOOL(3);
+ TupleDesc tupdesc;
+ char *src;
+ char *dst;
+ bytea *retval;
+ Size srclen;
+ Size dstsize;
+ Oid proc;
+ int convertedbytes;
+ int dstlen;
+ Datum values[2];
+ bool nulls[2];
+ HeapTuple tuple;
+
+ if (src_encoding < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid source encoding name \"%s\"",
+ src_encoding_name)));
+ if (dest_encoding < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid destination encoding name \"%s\"",
+ dest_encoding_name)));
+
+ /* Build a tuple descriptor for our result type */
+ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+ tupdesc = BlessTupleDesc(tupdesc);
+
+ srclen = VARSIZE_ANY_EXHDR(string);
+ src = VARDATA_ANY(string);
+
+ if (src_encoding == dest_encoding)
+ {
+ /* just check that the source string is valid */
+ int oklen;
+
+ oklen = pg_encoding_verifymbstr(src_encoding, src, srclen);
+
+ if (oklen == srclen)
+ {
+ convertedbytes = oklen;
+ retval = string;
+ }
+ else if (!noError)
+ {
+ report_invalid_encoding(src_encoding, src + oklen, srclen - oklen);
+ }
+ else
+ {
+ /*
+ * build bytea data type structure.
+ */
+ Assert(oklen < srclen);
+ convertedbytes = oklen;
+ retval = (bytea *) palloc(oklen + VARHDRSZ);
+ SET_VARSIZE(retval, oklen + VARHDRSZ);
+ memcpy(VARDATA(retval), src, oklen);
+ }
+ }
+ else
+ {
+ proc = FindDefaultConversionProc(src_encoding, dest_encoding);
+ if (!OidIsValid(proc))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
+ pg_encoding_to_char(src_encoding),
+ pg_encoding_to_char(dest_encoding))));
+
+ if (srclen >= (MaxAllocSize / (Size) MAX_CONVERSION_GROWTH))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("out of memory"),
+ errdetail("String of %d bytes is too long for encoding conversion.",
+ (int) srclen)));
+
+ dstsize = (Size) srclen * MAX_CONVERSION_GROWTH + 1;
+ dst = MemoryContextAlloc(CurrentMemoryContext, dstsize);
+
+ /* perform conversion */
+ convertedbytes = pg_do_encoding_conversion_buf(proc,
+ src_encoding,
+ dest_encoding,
+ (unsigned char *) src, srclen,
+ (unsigned char *) dst, dstsize,
+ noError);
+ dstlen = strlen(dst);
+
+ /*
+ * build bytea data type structure.
+ */
+ retval = (bytea *) palloc(dstlen + VARHDRSZ);
+ SET_VARSIZE(retval, dstlen + VARHDRSZ);
+ memcpy(VARDATA(retval), dst, dstlen);
+
+ pfree(dst);
+ }
+
+ MemSet(nulls, 0, sizeof(nulls));
+ values[0] = Int32GetDatum(convertedbytes);
+ values[1] = PointerGetDatum(retval);
+ tuple = heap_form_tuple(tupdesc, values, nulls);
+
+ PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
+}
--
RESET SESSION AUTHORIZATION;
DROP USER regress_conversion_user;
+
+--
+-- Test built-in conversion functions.
+--
+
+-- Helper function to test a conversion. Uses the test_enc_conversion function
+-- that was created in the create_function_1 test.
+create or replace function test_conv(
+ input IN bytea,
+ src_encoding IN text,
+ dst_encoding IN text,
+
+ result OUT bytea,
+ errorat OUT bytea,
+ error OUT text)
+language plpgsql as
+$$
+declare
+ validlen int;
+begin
+ -- First try to perform the conversion with noError = false. If that errors out,
+ -- capture the error message, and try again with noError = true. The second call
+ -- should succeed and return the position of the error, return that too.
+ begin
+ select * into validlen, result from test_enc_conversion(input, src_encoding, dst_encoding, false);
+ errorat = NULL;
+ error := NULL;
+ exception when others then
+ error := sqlerrm;
+ select * into validlen, result from test_enc_conversion(input, src_encoding, dst_encoding, true);
+ errorat = substr(input, validlen + 1);
+ end;
+ return;
+end;
+$$;
+
+
+--
+-- UTF-8
+--
+CREATE TABLE utf8_inputs (inbytes bytea, description text);
+insert into utf8_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\xc3a4c3b6', 'valid, extra latin chars'),
+ ('\xd184d0bed0be', 'valid, cyrillic'),
+ ('\x666f6fe8b1a1', 'valid, kanji/Chinese'),
+ ('\xe382abe3829a', 'valid, two chars that combine to one in EUC_JIS_2004'),
+ ('\xe382ab', 'only first half of combined char in EUC_JIS_2004'),
+ ('\xe382abe382', 'incomplete combination when converted EUC_JIS_2004'),
+ ('\xecbd94eb81bceba6ac', 'valid, Hangul, Korean'),
+ ('\x666f6fefa8aa', 'valid, needs mapping function to convert to GB18030'),
+ ('\x66e8b1ff6f6f', 'invalid byte sequence'),
+ ('\x66006f', 'invalid, NUL byte'),
+ ('\x666f6fe8b100', 'invalid, NUL byte'),
+ ('\x666f6fe8b1', 'incomplete character at end');
+
+-- Test UTF-8 verification
+select description, (test_conv(inbytes, 'utf8', 'utf8')).* from utf8_inputs;
+-- Test conversions from UTF-8
+select description, inbytes, (test_conv(inbytes, 'utf8', 'euc_jis_2004')).* from utf8_inputs;
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin1')).* from utf8_inputs;
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin2')).* from utf8_inputs;
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin5')).* from utf8_inputs;
+select description, inbytes, (test_conv(inbytes, 'utf8', 'koi8r')).* from utf8_inputs;
+select description, inbytes, (test_conv(inbytes, 'utf8', 'gb18030')).* from utf8_inputs;
+
+--
+-- EUC_JIS_2004
+--
+CREATE TABLE euc_jis_2004_inputs (inbytes bytea, description text);
+insert into euc_jis_2004_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\x666f6fbedd', 'valid'),
+ ('\xa5f7', 'valid, translates to two UTF-8 chars '),
+ ('\xbeddbe', 'incomplete char '),
+ ('\x666f6f00bedd', 'invalid, NUL byte'),
+ ('\x666f6fbe00dd', 'invalid, NUL byte'),
+ ('\x666f6fbedd00', 'invalid, NUL byte'),
+ ('\xbe04', 'invalid byte sequence');
+
+-- Test EUC_JIS_2004 verification
+select description, inbytes, (test_conv(inbytes, 'euc_jis_2004', 'euc_jis_2004')).* from euc_jis_2004_inputs;
+-- Test conversions from EUC_JIS_2004
+select description, inbytes, (test_conv(inbytes, 'euc_jis_2004', 'utf8')).* from euc_jis_2004_inputs;
+
+--
+-- SHIFT-JIS-2004
+--
+CREATE TABLE shiftjis2004_inputs (inbytes bytea, description text);
+insert into shiftjis2004_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\x666f6f8fdb', 'valid'),
+ ('\x666f6f81c0', 'valid, no translation to UTF-8'),
+ ('\x666f6f82f5', 'valid, translates to two UTF-8 chars '),
+ ('\x666f6f8fdb8f', 'incomplete char '),
+ ('\x666f6f820a', 'incomplete char, followed by newline '),
+ ('\x666f6f008fdb', 'invalid, NUL byte'),
+ ('\x666f6f8f00db', 'invalid, NUL byte'),
+ ('\x666f6f8fdb00', 'invalid, NUL byte');
+
+-- Test SHIFT-JIS-2004 verification
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'shiftjis2004')).* from shiftjis2004_inputs;
+-- Test conversions from SHIFT-JIS-2004
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'utf8')).* from shiftjis2004_inputs;
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'euc_jis_2004')).* from shiftjis2004_inputs;
+
+--
+-- GB18030
+--
+CREATE TABLE gb18030_inputs (inbytes bytea, description text);
+insert into gb18030_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\x666f6fcff3', 'valid'),
+ ('\x666f6f8431a530', 'valid, no translation to UTF-8'),
+ ('\x666f6f84309c38', 'valid, translates to UTF-8 by mapping function'),
+ ('\x666f6f84309c', 'incomplete char '),
+ ('\x666f6f84309c0a', 'incomplete char, followed by newline '),
+ ('\x666f6f84309c3800', 'invalid, NUL byte'),
+ ('\x666f6f84309c0038', 'invalid, NUL byte');
+
+-- Test GB18030 verification
+select description, inbytes, (test_conv(inbytes, 'gb18030', 'gb18030')).* from gb18030_inputs;
+-- Test conversions from GB18030
+select description, inbytes, (test_conv(inbytes, 'gb18030', 'utf8')).* from gb18030_inputs;
+
+
+--
+-- ISO-8859-5
+--
+CREATE TABLE iso8859_5_inputs (inbytes bytea, description text);
+insert into iso8859_5_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\xe4dede', 'valid'),
+ ('\x00', 'invalid, NUL byte'),
+ ('\xe400dede', 'invalid, NUL byte'),
+ ('\xe4dede00', 'invalid, NUL byte');
+
+-- Test ISO-8859-5 verification
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'iso8859-5')).* from iso8859_5_inputs;
+-- Test conversions from ISO-8859-5
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'utf8')).* from iso8859_5_inputs;
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'koi8r')).* from iso8859_5_inputs;
+select description, inbytes, (test_conv(inbytes, 'iso8859_5', 'mule_internal')).* from iso8859_5_inputs;
+
+--
+-- Big5
+--
+CREATE TABLE big5_inputs (inbytes bytea, description text);
+insert into big5_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\x666f6fb648', 'valid'),
+ ('\x666f6fa27f', 'valid, no translation to UTF-8'),
+ ('\x666f6fb60048', 'invalid, NUL byte'),
+ ('\x666f6fb64800', 'invalid, NUL byte');
+
+-- Test Big5 verification
+select description, inbytes, (test_conv(inbytes, 'big5', 'big5')).* from big5_inputs;
+-- Test conversions from Big5
+select description, inbytes, (test_conv(inbytes, 'big5', 'utf8')).* from big5_inputs;
+select description, inbytes, (test_conv(inbytes, 'big5', 'mule_internal')).* from big5_inputs;
+
+--
+-- MULE_INTERNAL
+--
+CREATE TABLE mic_inputs (inbytes bytea, description text);
+insert into mic_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\x8bc68bcf8bcf', 'valid (in KOI8R)'),
+ ('\x8bc68bcf8b', 'invalid,incomplete char'),
+ ('\x92bedd', 'valid (in SHIFT_JIS)'),
+ ('\x92be', 'invalid, incomplete char)'),
+ ('\x666f6f95a3c1', 'valid (in Big5)'),
+ ('\x666f6f95a3', 'invalid, incomplete char'),
+ ('\x9200bedd', 'invalid, NUL byte'),
+ ('\x92bedd00', 'invalid, NUL byte'),
+ ('\x8b00c68bcf8bcf', 'invalid, NUL byte');
+
+-- Test MULE_INTERNAL verification
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'mule_internal')).* from mic_inputs;
+-- Test conversions from MULE_INTERNAL
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'koi8r')).* from mic_inputs;
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'iso8859-5')).* from mic_inputs;
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'sjis')).* from mic_inputs;
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'big5')).* from mic_inputs;
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'euc_jp')).* from mic_inputs;
SELECT p.oid, p.proname, c.oid, c.conname
FROM pg_proc p, pg_conversion c
WHERE p.oid = c.conproc AND
- (p.prorettype != 'void'::regtype OR p.proretset OR
- p.pronargs != 5 OR
+ (p.prorettype != 'int4'::regtype OR p.proretset OR
+ p.pronargs != 6 OR
p.proargtypes[0] != 'int4'::regtype OR
p.proargtypes[1] != 'int4'::regtype OR
p.proargtypes[2] != 'cstring'::regtype OR
p.proargtypes[3] != 'internal'::regtype OR
- p.proargtypes[4] != 'int4'::regtype);
+ p.proargtypes[4] != 'int4'::regtype OR
+ p.proargtypes[5] != 'bool'::regtype);
-- Check for conprocs that don't perform the specific conversion that
-- pg_conversion alleges they do, by trying to invoke each conversion