Add 'noError' argument to encoding conversion functions.

author Heikki Linnakangas <[email protected]>

Thu, 1 Apr 2021 08:45:22 +0000 (11:45 +0300)

committer Heikki Linnakangas <[email protected]>

Thu, 1 Apr 2021 08:45:22 +0000 (11:45 +0300)
author Heikki Linnakangas <[email protected]>
Thu, 1 Apr 2021 08:45:22 +0000 (11:45 +0300)
committer Heikki Linnakangas <[email protected]>
Thu, 1 Apr 2021 08:45:22 +0000 (11:45 +0300)
diff --git a/doc/src/sgml/ref/create_conversion.sgml b/doc/src/sgml/ref/create_conversion.sgml

index e7700fecfc53f13e546ad0869469364bb876eca3..75d7b0094558f4dfefbf2dae848a1ab1db1ae1c6 100644 (file)
--- a/doc/src/sgml/ref/create_conversion.sgml
+++ b/doc/src/sgml/ref/create_conversion.sgml
@@ -117,9 +117,15 @@ conv_proc(
      integer,  -- destination encoding ID
      cstring,  -- source string (null terminated C string)
      internal, -- destination (fill with a null terminated C string)
-    integer   -- source string length
-) RETURNS void;
-</programlisting></para>
+    integer,  -- source string length
+    boolean   -- if true, don't throw an error if conversion fails
+) RETURNS integer;
+</programlisting>
+       The return value is the number of source bytes that were successfully
+       converted. If the last argument is false, the function must throw an
+       error on invalid input, and the return value is always equal to the
+       source string length.
+      </para>
       </listitem>
      </varlistentry>
     </variablelist>
diff --git a/src/backend/commands/conversioncmds.c b/src/backend/commands/conversioncmds.c

index f7ff321de71a06cac04fb4a0c56a3679c8e14703..5fed97a2f990e2ccb45b26054d705ac4068d4442 100644 (file)
--- a/src/backend/commands/conversioncmds.c
+++ b/src/backend/commands/conversioncmds.c
@@ -45,8 +45,9 @@ CreateConversionCommand(CreateConversionStmt *stmt)
     const char *from_encoding_name = stmt->for_encoding_name;
     const char *to_encoding_name = stmt->to_encoding_name;
     List       *func_name = stmt->func_name;
-   static const Oid funcargs[] = {INT4OID, INT4OID, CSTRINGOID, INTERNALOID, INT4OID};
+   static const Oid funcargs[] = {INT4OID, INT4OID, CSTRINGOID, INTERNALOID, INT4OID, BOOLOID};
     char        result[1];
+   Datum       funcresult;
  
     /* Convert list of names to a name and namespace */
     namespaceId = QualifiedNameGetCreationNamespace(stmt->conversion_name,
@@ -92,12 +93,12 @@ CreateConversionCommand(CreateConversionStmt *stmt)
     funcoid = LookupFuncName(func_name, sizeof(funcargs) / sizeof(Oid),
                              funcargs, false);
  
-   /* Check it returns VOID, else it's probably the wrong function */
-   if (get_func_rettype(funcoid) != VOIDOID)
+   /* Check it returns int4, else it's probably the wrong function */
+   if (get_func_rettype(funcoid) != INT4OID)
         ereport(ERROR,
                 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
                  errmsg("encoding conversion function %s must return type %s",
-                       NameListToString(func_name), "void")));
+                       NameListToString(func_name), "integer")));
  
     /* Check we have EXECUTE rights for the function */
     aclresult = pg_proc_aclcheck(funcoid, GetUserId(), ACL_EXECUTE);
@@ -111,12 +112,23 @@ CreateConversionCommand(CreateConversionStmt *stmt)
      * string; the conversion function should throw an error if it can't
      * perform the requested conversion.
      */
-   OidFunctionCall5(funcoid,
-                    Int32GetDatum(from_encoding),
-                    Int32GetDatum(to_encoding),
-                    CStringGetDatum(""),
-                    CStringGetDatum(result),
-                    Int32GetDatum(0));
+   funcresult = OidFunctionCall6(funcoid,
+                                 Int32GetDatum(from_encoding),
+                                 Int32GetDatum(to_encoding),
+                                 CStringGetDatum(""),
+                                 CStringGetDatum(result),
+                                 Int32GetDatum(0),
+                                 BoolGetDatum(false));
+
+   /*
+    * The function should return 0 for empty input. Might as well check that,
+    * too.
+    */
+   if (DatumGetInt32(funcresult) != 0)
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                errmsg("encoding conversion function %s returned incorrect result for empty input",
+                       NameListToString(func_name))));
  
     /*
      * All seem ok, go ahead (possible failure would be a duplicate conversion
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c

index 9e4ea1b345a8b25079ec971a224e7596d50c6edc..423df2f3006d5d711f0d1e4d9d75d7debfa7f176 100644 (file)
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -2271,6 +2271,8 @@ write_console(const char *line, int len)
      * Conversion on non-win32 platforms is not implemented yet. It requires
      * non-throw version of pg_do_encoding_conversion(), that converts
      * unconvertable characters to '?' without errors.
+    *
+    * XXX: We have a no-throw version now. It doesn't convert to '?' though.
      */
  #endif
  
diff --git a/src/backend/utils/mb/conv.c b/src/backend/utils/mb/conv.c

index a07b54bd3b85cd24245f750e537a6c745c4b7ef8..33e9c9a9e3c3d17098b85ef90ba5e1a9bd9e4247 100644 (file)
--- a/src/backend/utils/mb/conv.c
+++ b/src/backend/utils/mb/conv.c
@@ -25,15 +25,20 @@
   * tab holds conversion entries for the source charset
   * starting from 128 (0x80). each entry in the table holds the corresponding
   * code point for the target charset, or 0 if there is no equivalent code.
+ *
+ * Returns the number of input bytes consumed.  If noError is true, this can
+ * be less than 'len'.
   */
-void
+int
  local2local(const unsigned char *l,
             unsigned char *p,
             int len,
             int src_encoding,
             int dest_encoding,
-           const unsigned char *tab)
+           const unsigned char *tab,
+           bool noError)
  {
+   const unsigned char *start = l;
     unsigned char c1,
                 c2;
  
@@ -41,7 +46,11 @@ local2local(const unsigned char *l,
     {
         c1 = *l;
         if (c1 == 0)
+       {
+           if (noError)
+               break;
             report_invalid_encoding(src_encoding, (const char *) l, len);
+       }
         if (!IS_HIGHBIT_SET(c1))
             *p++ = c1;
         else
@@ -50,13 +59,19 @@ local2local(const unsigned char *l,
             if (c2)
                 *p++ = c2;
             else
+           {
+               if (noError)
+                   break;
                 report_untranslatable_char(src_encoding, dest_encoding,
                                            (const char *) l, len);
+           }
         }
         l++;
         len--;
     }
     *p = '\0';
+
+   return l - start;
  }
  
  /*
@@ -66,18 +81,26 @@ local2local(const unsigned char *l,
   * p is the output area (must be large enough!)
   * lc is the mule character set id for the local encoding
   * encoding is the PG identifier for the local encoding
+ *
+ * Returns the number of input bytes consumed.  If noError is true, this can
+ * be less than 'len'.
   */
-void
+int
  latin2mic(const unsigned char *l, unsigned char *p, int len,
-         int lc, int encoding)
+         int lc, int encoding, bool noError)
  {
+   const unsigned char *start = l;
     int         c1;
  
     while (len > 0)
     {
         c1 = *l;
         if (c1 == 0)
+       {
+           if (noError)
+               break;
             report_invalid_encoding(encoding, (const char *) l, len);
+       }
         if (IS_HIGHBIT_SET(c1))
             *p++ = lc;
         *p++ = c1;
@@ -85,6 +108,8 @@ latin2mic(const unsigned char *l, unsigned char *p, int len,
         len--;
     }
     *p = '\0';
+
+   return l - start;
  }
  
  /*
@@ -94,18 +119,26 @@ latin2mic(const unsigned char *l, unsigned char *p, int len,
   * p is the output area (must be large enough!)
   * lc is the mule character set id for the local encoding
   * encoding is the PG identifier for the local encoding
+ *
+ * Returns the number of input bytes consumed.  If noError is true, this can
+ * be less than 'len'.
   */
-void
+int
  mic2latin(const unsigned char *mic, unsigned char *p, int len,
-         int lc, int encoding)
+         int lc, int encoding, bool noError)
  {
+   const unsigned char *start = mic;
     int         c1;
  
     while (len > 0)
     {
         c1 = *mic;
         if (c1 == 0)
+       {
+           if (noError)
+               break;
             report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
+       }
         if (!IS_HIGHBIT_SET(c1))
         {
             /* easy for ASCII */
@@ -118,17 +151,27 @@ mic2latin(const unsigned char *mic, unsigned char *p, int len,
             int         l = pg_mule_mblen(mic);
  
             if (len < l)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
                                         len);
+           }
             if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
+           {
+               if (noError)
+                   break;
                 report_untranslatable_char(PG_MULE_INTERNAL, encoding,
                                            (const char *) mic, len);
+           }
             *p++ = mic[1];
             mic += 2;
             len -= 2;
         }
     }
     *p = '\0';
+
+   return mic - start;
  }
  
  
@@ -143,15 +186,20 @@ mic2latin(const unsigned char *mic, unsigned char *p, int len,
   * tab holds conversion entries for the local charset
   * starting from 128 (0x80). each entry in the table holds the corresponding
   * code point for the mule encoding, or 0 if there is no equivalent code.
+ *
+ * Returns the number of input bytes consumed.  If noError is true, this can
+ * be less than 'len'.
   */
-void
+int
  latin2mic_with_table(const unsigned char *l,
                      unsigned char *p,
                      int len,
                      int lc,
                      int encoding,
-                    const unsigned char *tab)
+                    const unsigned char *tab,
+                    bool noError)
  {
+   const unsigned char *start = l;
     unsigned char c1,
                 c2;
  
@@ -159,7 +207,11 @@ latin2mic_with_table(const unsigned char *l,
     {
         c1 = *l;
         if (c1 == 0)
+       {
+           if (noError)
+               break;
             report_invalid_encoding(encoding, (const char *) l, len);
+       }
         if (!IS_HIGHBIT_SET(c1))
             *p++ = c1;
         else
@@ -171,13 +223,19 @@ latin2mic_with_table(const unsigned char *l,
                 *p++ = c2;
             }
             else
+           {
+               if (noError)
+                   break;
                 report_untranslatable_char(encoding, PG_MULE_INTERNAL,
                                            (const char *) l, len);
+           }
         }
         l++;
         len--;
     }
     *p = '\0';
+
+   return l - start;
  }
  
  /*
@@ -191,15 +249,20 @@ latin2mic_with_table(const unsigned char *l,
   * tab holds conversion entries for the mule internal code's second byte,
   * starting from 128 (0x80). each entry in the table holds the corresponding
   * code point for the local charset, or 0 if there is no equivalent code.
+ *
+ * Returns the number of input bytes consumed.  If noError is true, this can
+ * be less than 'len'.
   */
-void
+int
  mic2latin_with_table(const unsigned char *mic,
                      unsigned char *p,
                      int len,
                      int lc,
                      int encoding,
-                    const unsigned char *tab)
+                    const unsigned char *tab,
+                    bool noError)
  {
+   const unsigned char *start = mic;
     unsigned char c1,
                 c2;
  
@@ -207,7 +270,11 @@ mic2latin_with_table(const unsigned char *mic,
     {
         c1 = *mic;
         if (c1 == 0)
+       {
+           if (noError)
+               break;
             report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
+       }
         if (!IS_HIGHBIT_SET(c1))
         {
             /* easy for ASCII */
@@ -220,11 +287,17 @@ mic2latin_with_table(const unsigned char *mic,
             int         l = pg_mule_mblen(mic);
  
             if (len < l)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
                                         len);
+           }
             if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
                 (c2 = tab[mic[1] - HIGHBIT]) == 0)
             {
+               if (noError)
+                   break;
                 report_untranslatable_char(PG_MULE_INTERNAL, encoding,
                                            (const char *) mic, len);
                 break;          /* keep compiler quiet */
@@ -235,6 +308,8 @@ mic2latin_with_table(const unsigned char *mic,
         }
     }
     *p = '\0';
+
+   return mic - start;
  }
  
  /*
@@ -424,18 +499,22 @@ pg_mb_radix_conv(const pg_mb_radix_tree *rt,
   * is applied.  An error is raised if no match is found.
   *
   * See pg_wchar.h for more details about the data structures used here.
+ *
+ * Returns the number of input bytes consumed.  If noError is true, this can
+ * be less than 'len'.
   */
-void
+int
  UtfToLocal(const unsigned char *utf, int len,
            unsigned char *iso,
            const pg_mb_radix_tree *map,
            const pg_utf_to_local_combined *cmap, int cmapsize,
            utf_local_conversion_func conv_func,
-          int encoding)
+          int encoding, bool noError)
  {
     uint32      iutf;
     int         l;
     const pg_utf_to_local_combined *cp;
+   const unsigned char *start = utf;
  
     if (!PG_VALID_ENCODING(encoding))
         ereport(ERROR,
@@ -505,10 +584,19 @@ UtfToLocal(const unsigned char *utf, int len,
  
             l = pg_utf_mblen(utf);
             if (len < l)
+           {
+               /* need more data to decide if this is a combined char */
+               utf -= l_save;
                 break;
+           }
  
             if (!pg_utf8_islegal(utf, l))
+           {
+               if (!noError)
+                   report_invalid_encoding(PG_UTF8, (const char *) utf, len);
+               utf -= l_save;
                 break;
+           }
  
             /* We assume ASCII character cannot be in combined map */
             if (l > 1)
@@ -584,15 +672,20 @@ UtfToLocal(const unsigned char *utf, int len,
         }
  
         /* failed to translate this character */
+       utf -= l;
+       if (noError)
+           break;
         report_untranslatable_char(PG_UTF8, encoding,
-                                  (const char *) (utf - l), len);
+                                  (const char *) utf, len);
     }
  
     /* if we broke out of loop early, must be invalid input */
-   if (len > 0)
+   if (len > 0 && !noError)
         report_invalid_encoding(PG_UTF8, (const char *) utf, len);
  
     *iso = '\0';
+
+   return utf - start;
  }
  
  /*
@@ -616,18 +709,23 @@ UtfToLocal(const unsigned char *utf, int len,
   * (if provided) is applied.  An error is raised if no match is found.
   *
   * See pg_wchar.h for more details about the data structures used here.
+ *
+ * Returns the number of input bytes consumed.  If noError is true, this can
+ * be less than 'len'.
   */
-void
+int
  LocalToUtf(const unsigned char *iso, int len,
            unsigned char *utf,
            const pg_mb_radix_tree *map,
            const pg_local_to_utf_combined *cmap, int cmapsize,
            utf_local_conversion_func conv_func,
-          int encoding)
+          int encoding,
+          bool noError)
  {
     uint32      iiso;
     int         l;
     const pg_local_to_utf_combined *cp;
+   const unsigned char *start = iso;
  
     if (!PG_VALID_ENCODING(encoding))
         ereport(ERROR,
@@ -723,13 +821,18 @@ LocalToUtf(const unsigned char *iso, int len,
         }
  
         /* failed to translate this character */
+       iso -= l;
+       if (noError)
+           break;
         report_untranslatable_char(encoding, PG_UTF8,
-                                  (const char *) (iso - l), len);
+                                  (const char *) iso, len);
     }
  
     /* if we broke out of loop early, must be invalid input */
-   if (len > 0)
+   if (len > 0 && !noError)
         report_invalid_encoding(encoding, (const char *) iso, len);
  
     *utf = '\0';
+
+   return iso - start;
  }
diff --git a/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c b/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c

index 4c5b02654de39734288332fc14f5d4d4d4cfea5f..368c2deb5e4b92f68c97c26fdb64419aa5b0975d 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c
+++ b/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c
@@ -44,8 +44,11 @@ PG_FUNCTION_INFO_V1(win866_to_iso);
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  
@@ -306,12 +309,14 @@ koi8r_to_mic(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_MULE_INTERNAL);
  
-   latin2mic(src, dest, len, LC_KOI8_R, PG_KOI8R);
+   converted = latin2mic(src, dest, len, LC_KOI8_R, PG_KOI8R, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -320,12 +325,14 @@ mic_to_koi8r(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_KOI8R);
  
-   mic2latin(src, dest, len, LC_KOI8_R, PG_KOI8R);
+   converted = mic2latin(src, dest, len, LC_KOI8_R, PG_KOI8R, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -334,12 +341,14 @@ iso_to_mic(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_MULE_INTERNAL);
  
-   latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, iso2koi);
+   converted = latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, iso2koi, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -348,12 +357,14 @@ mic_to_iso(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_ISO_8859_5);
  
-   mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, koi2iso);
+   converted = mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, koi2iso, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -362,12 +373,14 @@ win1251_to_mic(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_MULE_INTERNAL);
  
-   latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, win12512koi);
+   converted = latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, win12512koi, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -376,12 +389,14 @@ mic_to_win1251(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN1251);
  
-   mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, koi2win1251);
+   converted = mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, koi2win1251, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -390,12 +405,14 @@ win866_to_mic(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_MULE_INTERNAL);
  
-   latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, win8662koi);
+   converted = latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, win8662koi, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -404,12 +421,14 @@ mic_to_win866(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN866);
  
-   mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, koi2win866);
+   converted = mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, koi2win866, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -418,12 +437,14 @@ koi8r_to_win1251(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_WIN1251);
  
-   local2local(src, dest, len, PG_KOI8R, PG_WIN1251, koi2win1251);
+   converted = local2local(src, dest, len, PG_KOI8R, PG_WIN1251, koi2win1251, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -432,12 +453,14 @@ win1251_to_koi8r(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_KOI8R);
  
-   local2local(src, dest, len, PG_WIN1251, PG_KOI8R, win12512koi);
+   converted = local2local(src, dest, len, PG_WIN1251, PG_KOI8R, win12512koi, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -446,12 +469,14 @@ koi8r_to_win866(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_WIN866);
  
-   local2local(src, dest, len, PG_KOI8R, PG_WIN866, koi2win866);
+   converted = local2local(src, dest, len, PG_KOI8R, PG_WIN866, koi2win866, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -460,12 +485,14 @@ win866_to_koi8r(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_KOI8R);
  
-   local2local(src, dest, len, PG_WIN866, PG_KOI8R, win8662koi);
+   converted = local2local(src, dest, len, PG_WIN866, PG_KOI8R, win8662koi, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -474,12 +501,14 @@ win866_to_win1251(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_WIN1251);
  
-   local2local(src, dest, len, PG_WIN866, PG_WIN1251, win8662win1251);
+   converted = local2local(src, dest, len, PG_WIN866, PG_WIN1251, win8662win1251, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -488,12 +517,14 @@ win1251_to_win866(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_WIN866);
  
-   local2local(src, dest, len, PG_WIN1251, PG_WIN866, win12512win866);
+   converted = local2local(src, dest, len, PG_WIN1251, PG_WIN866, win12512win866, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -502,12 +533,14 @@ iso_to_koi8r(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_KOI8R);
  
-   local2local(src, dest, len, PG_ISO_8859_5, PG_KOI8R, iso2koi);
+   converted = local2local(src, dest, len, PG_ISO_8859_5, PG_KOI8R, iso2koi, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -516,12 +549,14 @@ koi8r_to_iso(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_ISO_8859_5);
  
-   local2local(src, dest, len, PG_KOI8R, PG_ISO_8859_5, koi2iso);
+   converted = local2local(src, dest, len, PG_KOI8R, PG_ISO_8859_5, koi2iso, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -530,12 +565,14 @@ iso_to_win1251(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_WIN1251);
  
-   local2local(src, dest, len, PG_ISO_8859_5, PG_WIN1251, iso2win1251);
+   converted = local2local(src, dest, len, PG_ISO_8859_5, PG_WIN1251, iso2win1251, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -544,12 +581,14 @@ win1251_to_iso(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_ISO_8859_5);
  
-   local2local(src, dest, len, PG_WIN1251, PG_ISO_8859_5, win12512iso);
+   converted = local2local(src, dest, len, PG_WIN1251, PG_ISO_8859_5, win12512iso, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -558,12 +597,14 @@ iso_to_win866(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_WIN866);
  
-   local2local(src, dest, len, PG_ISO_8859_5, PG_WIN866, iso2win866);
+   converted = local2local(src, dest, len, PG_ISO_8859_5, PG_WIN866, iso2win866, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -572,10 +613,12 @@ win866_to_iso(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_ISO_8859_5);
  
-   local2local(src, dest, len, PG_WIN866, PG_ISO_8859_5, win8662iso);
+   converted = local2local(src, dest, len, PG_WIN866, PG_ISO_8859_5, win8662iso, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
diff --git a/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c b/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c

index 4d7fb116cfdbf1542e7bea358dfbb947218fa494..a3fd35bd406360c1eb067cba3c1ef27ca366f8f8 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c
+++ b/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c
@@ -19,8 +19,8 @@ PG_MODULE_MAGIC;
  PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004);
  PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004);
  
-static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len);
-static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len);
+static int euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len, bool noError);
+static int shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len, bool noError);
  
  /* ----------
   * conv_proc(
@@ -28,8 +28,11 @@ static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  
@@ -39,12 +42,14 @@ euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_SHIFT_JIS_2004);
  
-   euc_jis_20042shift_jis_2004(src, dest, len);
+   converted = euc_jis_20042shift_jis_2004(src, dest, len, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -53,20 +58,23 @@ shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_EUC_JIS_2004);
  
-   shift_jis_20042euc_jis_2004(src, dest, len);
+   converted = shift_jis_20042euc_jis_2004(src, dest, len, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  /*
   * EUC_JIS_2004 -> SHIFT_JIS_2004
   */
-static void
-euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
+static int
+euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len, bool noError)
  {
+   const unsigned char *start = euc;
     int         c1,
                 ku,
                 ten;
@@ -79,8 +87,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
         {
             /* ASCII */
             if (c1 == 0)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_EUC_JIS_2004,
                                         (const char *) euc, len);
+           }
             *p++ = c1;
             euc++;
             len--;
@@ -90,8 +102,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
         l = pg_encoding_verifymbchar(PG_EUC_JIS_2004, (const char *) euc, len);
  
         if (l < 0)
+       {
+           if (noError)
+               break;
             report_invalid_encoding(PG_EUC_JIS_2004,
                                     (const char *) euc, len);
+       }
  
         if (c1 == SS2 && l == 2)    /* JIS X 0201 kana? */
         {
@@ -121,8 +137,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
                         *p++ = (ku + 0x19b) >> 1;
                     }
                     else
+                   {
+                       if (noError)
+                           break;
                         report_invalid_encoding(PG_EUC_JIS_2004,
                                                 (const char *) euc, len);
+                   }
             }
  
             if (ku % 2)
@@ -132,8 +152,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
                 else if (ten >= 64 && ten <= 94)
                     *p++ = ten + 0x40;
                 else
+               {
+                   if (noError)
+                       break;
                     report_invalid_encoding(PG_EUC_JIS_2004,
                                             (const char *) euc, len);
+               }
             }
             else
                 *p++ = ten + 0x9e;
@@ -149,8 +173,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
             else if (ku >= 63 && ku <= 94)
                 *p++ = (ku + 0x181) >> 1;
             else
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_EUC_JIS_2004,
                                         (const char *) euc, len);
+           }
  
             if (ku % 2)
             {
@@ -159,20 +187,30 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
                 else if (ten >= 64 && ten <= 94)
                     *p++ = ten + 0x40;
                 else
+               {
+                   if (noError)
+                       break;
                     report_invalid_encoding(PG_EUC_JIS_2004,
                                             (const char *) euc, len);
+               }
             }
             else
                 *p++ = ten + 0x9e;
         }
         else
+       {
+           if (noError)
+               break;
             report_invalid_encoding(PG_EUC_JIS_2004,
                                     (const char *) euc, len);
+       }
  
         euc += l;
         len -= l;
     }
     *p = '\0';
+
+   return euc - start;
  }
  
  /*
@@ -212,9 +250,10 @@ get_ten(int b, int *ku)
   * SHIFT_JIS_2004 ---> EUC_JIS_2004
   */
  
-static void
-shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len)
+static int
+shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len, bool noError)
  {
+   const unsigned char *start = sjis;
     int         c1;
     int         ku,
                 ten,
@@ -230,8 +269,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
         {
             /* ASCII */
             if (c1 == 0)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_SHIFT_JIS_2004,
                                         (const char *) sjis, len);
+           }
             *p++ = c1;
             sjis++;
             len--;
@@ -241,8 +284,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
         l = pg_encoding_verifymbchar(PG_SHIFT_JIS_2004, (const char *) sjis, len);
  
         if (l < 0 || l > len)
+       {
+           if (noError)
+               break;
             report_invalid_encoding(PG_SHIFT_JIS_2004,
                                     (const char *) sjis, len);
+       }
  
         if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
         {
@@ -266,8 +313,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
                 ku = (c1 << 1) - 0x100;
                 ten = get_ten(c2, &kubun);
                 if (ten < 0)
+               {
+                   if (noError)
+                       break;
                     report_invalid_encoding(PG_SHIFT_JIS_2004,
                                             (const char *) sjis, len);
+               }
                 ku -= kubun;
             }
             else if (c1 >= 0xe0 && c1 <= 0xef)  /* plane 1 62ku-94ku */
@@ -275,9 +326,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
                 ku = (c1 << 1) - 0x180;
                 ten = get_ten(c2, &kubun);
                 if (ten < 0)
+               {
+                   if (noError)
+                       break;
                     report_invalid_encoding(PG_SHIFT_JIS_2004,
-
                                             (const char *) sjis, len);
+               }
                 ku -= kubun;
             }
             else if (c1 >= 0xf0 && c1 <= 0xf3)  /* plane 2
@@ -286,8 +340,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
                 plane = 2;
                 ten = get_ten(c2, &kubun);
                 if (ten < 0)
+               {
+                   if (noError)
+                       break;
                     report_invalid_encoding(PG_SHIFT_JIS_2004,
                                             (const char *) sjis, len);
+               }
                 switch (c1)
                 {
                     case 0xf0:
@@ -309,16 +367,24 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
                 plane = 2;
                 ten = get_ten(c2, &kubun);
                 if (ten < 0)
+               {
+                   if (noError)
+                       break;
                     report_invalid_encoding(PG_SHIFT_JIS_2004,
                                             (const char *) sjis, len);
+               }
                 if (c1 == 0xf4 && kubun == 1)
                     ku = 15;
                 else
                     ku = (c1 << 1) - 0x19a - kubun;
             }
             else
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_SHIFT_JIS_2004,
                                         (const char *) sjis, len);
+           }
  
             if (plane == 2)
                 *p++ = SS3;
@@ -330,4 +396,6 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
         len -= l;
     }
     *p = '\0';
+
+   return sjis - start;
  }
diff --git a/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c b/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c

index e9bb896935f3737cbe71634617ef3ca1a0d325a8..09b3c2e75bfefa95972a2260f954a1fab8db27e9 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c
+++ b/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c
@@ -26,13 +26,16 @@ PG_FUNCTION_INFO_V1(mic_to_euc_cn);
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  
-static void euc_cn2mic(const unsigned char *euc, unsigned char *p, int len);
-static void mic2euc_cn(const unsigned char *mic, unsigned char *p, int len);
+static int euc_cn2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
+static int mic2euc_cn(const unsigned char *mic, unsigned char *p, int len, bool noError);
  
  Datum
  euc_cn_to_mic(PG_FUNCTION_ARGS)
@@ -40,12 +43,14 @@ euc_cn_to_mic(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_CN, PG_MULE_INTERNAL);
  
-   euc_cn2mic(src, dest, len);
+   converted = euc_cn2mic(src, dest, len, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -54,20 +59,23 @@ mic_to_euc_cn(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_CN);
  
-   mic2euc_cn(src, dest, len);
+   converted = mic2euc_cn(src, dest, len, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  /*
   * EUC_CN ---> MIC
   */
-static void
-euc_cn2mic(const unsigned char *euc, unsigned char *p, int len)
+static int
+euc_cn2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
  {
+   const unsigned char *start = euc;
     int         c1;
  
     while (len > 0)
@@ -76,7 +84,11 @@ euc_cn2mic(const unsigned char *euc, unsigned char *p, int len)
         if (IS_HIGHBIT_SET(c1))
         {
             if (len < 2 || !IS_HIGHBIT_SET(euc[1]))
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_EUC_CN, (const char *) euc, len);
+           }
             *p++ = LC_GB2312_80;
             *p++ = c1;
             *p++ = euc[1];
@@ -86,21 +98,28 @@ euc_cn2mic(const unsigned char *euc, unsigned char *p, int len)
         else
         {                       /* should be ASCII */
             if (c1 == 0)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_EUC_CN, (const char *) euc, len);
+           }
             *p++ = c1;
             euc++;
             len--;
         }
     }
     *p = '\0';
+
+   return euc - start;
  }
  
  /*
   * MIC ---> EUC_CN
   */
-static void
-mic2euc_cn(const unsigned char *mic, unsigned char *p, int len)
+static int
+mic2euc_cn(const unsigned char *mic, unsigned char *p, int len, bool noError)
  {
+   const unsigned char *start = mic;
     int         c1;
  
     while (len > 0)
@@ -109,11 +128,19 @@ mic2euc_cn(const unsigned char *mic, unsigned char *p, int len)
         if (IS_HIGHBIT_SET(c1))
         {
             if (c1 != LC_GB2312_80)
+           {
+               if (noError)
+                   break;
                 report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_CN,
                                            (const char *) mic, len);
+           }
             if (len < 3 || !IS_HIGHBIT_SET(mic[1]) || !IS_HIGHBIT_SET(mic[2]))
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_MULE_INTERNAL,
                                         (const char *) mic, len);
+           }
             mic++;
             *p++ = *mic++;
             *p++ = *mic++;
@@ -122,12 +149,18 @@ mic2euc_cn(const unsigned char *mic, unsigned char *p, int len)
         else
         {                       /* should be ASCII */
             if (c1 == 0)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_MULE_INTERNAL,
                                         (const char *) mic, len);
+           }
             *p++ = c1;
             mic++;
             len--;
         }
     }
     *p = '\0';
+
+   return mic - start;
  }
diff --git a/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c b/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c

index 5059f917a982829b26159e4091edce17fc02d51a..2e68708893dcb30402930ad2440a5125dc614fd8 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c
+++ b/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c
@@ -42,17 +42,20 @@ PG_FUNCTION_INFO_V1(mic_to_sjis);
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  
-static void sjis2mic(const unsigned char *sjis, unsigned char *p, int len);
-static void mic2sjis(const unsigned char *mic, unsigned char *p, int len);
-static void euc_jp2mic(const unsigned char *euc, unsigned char *p, int len);
-static void mic2euc_jp(const unsigned char *mic, unsigned char *p, int len);
-static void euc_jp2sjis(const unsigned char *mic, unsigned char *p, int len);
-static void sjis2euc_jp(const unsigned char *mic, unsigned char *p, int len);
+static int sjis2mic(const unsigned char *sjis, unsigned char *p, int len, bool noError);
+static int mic2sjis(const unsigned char *mic, unsigned char *p, int len, bool noError);
+static int euc_jp2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
+static int mic2euc_jp(const unsigned char *mic, unsigned char *p, int len, bool noError);
+static int euc_jp2sjis(const unsigned char *mic, unsigned char *p, int len, bool noError);
+static int sjis2euc_jp(const unsigned char *mic, unsigned char *p, int len, bool noError);
  
  Datum
  euc_jp_to_sjis(PG_FUNCTION_ARGS)
@@ -60,12 +63,14 @@ euc_jp_to_sjis(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_SJIS);
  
-   euc_jp2sjis(src, dest, len);
+   converted = euc_jp2sjis(src, dest, len, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -74,12 +79,14 @@ sjis_to_euc_jp(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_EUC_JP);
  
-   sjis2euc_jp(src, dest, len);
+   converted = sjis2euc_jp(src, dest, len, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -88,12 +95,14 @@ euc_jp_to_mic(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_MULE_INTERNAL);
  
-   euc_jp2mic(src, dest, len);
+   converted = euc_jp2mic(src, dest, len, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -102,12 +111,14 @@ mic_to_euc_jp(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_JP);
  
-   mic2euc_jp(src, dest, len);
+   converted = mic2euc_jp(src, dest, len, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -116,12 +127,14 @@ sjis_to_mic(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_MULE_INTERNAL);
  
-   sjis2mic(src, dest, len);
+   converted = sjis2mic(src, dest, len, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -130,20 +143,23 @@ mic_to_sjis(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_SJIS);
  
-   mic2sjis(src, dest, len);
+   converted = mic2sjis(src, dest, len, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  /*
   * SJIS ---> MIC
   */
-static void
-sjis2mic(const unsigned char *sjis, unsigned char *p, int len)
+static int
+sjis2mic(const unsigned char *sjis, unsigned char *p, int len, bool noError)
  {
+   const unsigned char *start = sjis;
     int         c1,
                 c2,
                 i,
@@ -167,7 +183,11 @@ sjis2mic(const unsigned char *sjis, unsigned char *p, int len)
              * JIS X0208, X0212, user defined extended characters
              */
             if (len < 2 || !ISSJISHEAD(c1) || !ISSJISTAIL(sjis[1]))
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_SJIS, (const char *) sjis, len);
+           }
             c2 = sjis[1];
             k = (c1 << 8) + c2;
             if (k >= 0xed40 && k < 0xf040)
@@ -257,21 +277,28 @@ sjis2mic(const unsigned char *sjis, unsigned char *p, int len)
         else
         {                       /* should be ASCII */
             if (c1 == 0)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_SJIS, (const char *) sjis, len);
+           }
             *p++ = c1;
             sjis++;
             len--;
         }
     }
     *p = '\0';
+
+   return sjis - start;
  }
  
  /*
   * MIC ---> SJIS
   */
-static void
-mic2sjis(const unsigned char *mic, unsigned char *p, int len)
+static int
+mic2sjis(const unsigned char *mic, unsigned char *p, int len, bool noError)
  {
+   const unsigned char *start = mic;
     int         c1,
                 c2,
                 k,
@@ -284,8 +311,12 @@ mic2sjis(const unsigned char *mic, unsigned char *p, int len)
         {
             /* ASCII */
             if (c1 == 0)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_MULE_INTERNAL,
                                         (const char *) mic, len);
+           }
             *p++ = c1;
             mic++;
             len--;
@@ -293,8 +324,12 @@ mic2sjis(const unsigned char *mic, unsigned char *p, int len)
         }
         l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
         if (l < 0)
+       {
+           if (noError)
+               break;
             report_invalid_encoding(PG_MULE_INTERNAL,
                                     (const char *) mic, len);
+       }
         if (c1 == LC_JISX0201K)
             *p++ = mic[1];
         else if (c1 == LC_JISX0208)
@@ -350,20 +385,27 @@ mic2sjis(const unsigned char *mic, unsigned char *p, int len)
             }
         }
         else
+       {
+           if (noError)
+               break;
             report_untranslatable_char(PG_MULE_INTERNAL, PG_SJIS,
                                        (const char *) mic, len);
+       }
         mic += l;
         len -= l;
     }
     *p = '\0';
+
+   return mic - start;
  }
  
  /*
   * EUC_JP ---> MIC
   */
-static void
-euc_jp2mic(const unsigned char *euc, unsigned char *p, int len)
+static int
+euc_jp2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
  {
+   const unsigned char *start = euc;
     int         c1;
     int         l;
  
@@ -374,8 +416,12 @@ euc_jp2mic(const unsigned char *euc, unsigned char *p, int len)
         {
             /* ASCII */
             if (c1 == 0)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_EUC_JP,
                                         (const char *) euc, len);
+           }
             *p++ = c1;
             euc++;
             len--;
@@ -383,8 +429,12 @@ euc_jp2mic(const unsigned char *euc, unsigned char *p, int len)
         }
         l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len);
         if (l < 0)
+       {
+           if (noError)
+               break;
             report_invalid_encoding(PG_EUC_JP,
                                     (const char *) euc, len);
+       }
         if (c1 == SS2)
         {                       /* 1 byte kana? */
             *p++ = LC_JISX0201K;
@@ -406,14 +456,17 @@ euc_jp2mic(const unsigned char *euc, unsigned char *p, int len)
         len -= l;
     }
     *p = '\0';
+
+   return euc - start;
  }
  
  /*
   * MIC ---> EUC_JP
   */
-static void
-mic2euc_jp(const unsigned char *mic, unsigned char *p, int len)
+static int
+mic2euc_jp(const unsigned char *mic, unsigned char *p, int len, bool noError)
  {
+   const unsigned char *start = mic;
     int         c1;
     int         l;
  
@@ -424,8 +477,12 @@ mic2euc_jp(const unsigned char *mic, unsigned char *p, int len)
         {
             /* ASCII */
             if (c1 == 0)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_MULE_INTERNAL,
                                         (const char *) mic, len);
+           }
             *p++ = c1;
             mic++;
             len--;
@@ -433,8 +490,12 @@ mic2euc_jp(const unsigned char *mic, unsigned char *p, int len)
         }
         l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
         if (l < 0)
+       {
+           if (noError)
+               break;
             report_invalid_encoding(PG_MULE_INTERNAL,
                                     (const char *) mic, len);
+       }
         if (c1 == LC_JISX0201K)
         {
             *p++ = SS2;
@@ -452,20 +513,27 @@ mic2euc_jp(const unsigned char *mic, unsigned char *p, int len)
             *p++ = mic[2];
         }
         else
+       {
+           if (noError)
+               break;
             report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_JP,
                                        (const char *) mic, len);
+       }
         mic += l;
         len -= l;
     }
     *p = '\0';
+
+   return mic - start;
  }
  
  /*
   * EUC_JP -> SJIS
   */
-static void
-euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len)
+static int
+euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len, bool noError)
  {
+   const unsigned char *start = euc;
     int         c1,
                 c2,
                 k;
@@ -478,8 +546,12 @@ euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len)
         {
             /* ASCII */
             if (c1 == 0)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_EUC_JP,
                                         (const char *) euc, len);
+           }
             *p++ = c1;
             euc++;
             len--;
@@ -487,8 +559,12 @@ euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len)
         }
         l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len);
         if (l < 0)
+       {
+           if (noError)
+               break;
             report_invalid_encoding(PG_EUC_JP,
                                     (const char *) euc, len);
+       }
         if (c1 == SS2)
         {
             /* hankaku kana? */
@@ -551,14 +627,17 @@ euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len)
         len -= l;
     }
     *p = '\0';
+
+   return euc - start;
  }
  
  /*
   * SJIS ---> EUC_JP
   */
-static void
-sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len)
+static int
+sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len, bool noError)
  {
+   const unsigned char *start = sjis;
     int         c1,
                 c2,
                 i,
@@ -573,8 +652,12 @@ sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len)
         {
             /* ASCII */
             if (c1 == 0)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_SJIS,
                                         (const char *) sjis, len);
+           }
             *p++ = c1;
             sjis++;
             len--;
@@ -582,8 +665,12 @@ sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len)
         }
         l = pg_encoding_verifymbchar(PG_SJIS, (const char *) sjis, len);
         if (l < 0)
+       {
+           if (noError)
+               break;
             report_invalid_encoding(PG_SJIS,
                                     (const char *) sjis, len);
+       }
         if (c1 >= 0xa1 && c1 <= 0xdf)
         {
             /* JIS X0201 (1 byte kana) */
@@ -680,4 +767,6 @@ sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len)
         len -= l;
     }
     *p = '\0';
+
+   return sjis - start;
  }
diff --git a/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c b/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c

index ac823d6c270183269714c28f719f56d63efdf8df..3b85f0c1861aee81b2775a7e3f8c4e326e6b798c 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c
+++ b/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c
@@ -26,13 +26,16 @@ PG_FUNCTION_INFO_V1(mic_to_euc_kr);
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  
-static void euc_kr2mic(const unsigned char *euc, unsigned char *p, int len);
-static void mic2euc_kr(const unsigned char *mic, unsigned char *p, int len);
+static int euc_kr2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
+static int mic2euc_kr(const unsigned char *mic, unsigned char *p, int len, bool noError);
  
  Datum
  euc_kr_to_mic(PG_FUNCTION_ARGS)
@@ -40,12 +43,14 @@ euc_kr_to_mic(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_KR, PG_MULE_INTERNAL);
  
-   euc_kr2mic(src, dest, len);
+   converted = euc_kr2mic(src, dest, len, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -54,20 +59,23 @@ mic_to_euc_kr(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_KR);
  
-   mic2euc_kr(src, dest, len);
+   converted = mic2euc_kr(src, dest, len, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  /*
   * EUC_KR ---> MIC
   */
-static void
-euc_kr2mic(const unsigned char *euc, unsigned char *p, int len)
+static int
+euc_kr2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
  {
+   const unsigned char *start = euc;
     int         c1;
     int         l;
  
@@ -78,8 +86,12 @@ euc_kr2mic(const unsigned char *euc, unsigned char *p, int len)
         {
             l = pg_encoding_verifymbchar(PG_EUC_KR, (const char *) euc, len);
             if (l != 2)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_EUC_KR,
                                         (const char *) euc, len);
+           }
             *p++ = LC_KS5601;
             *p++ = c1;
             *p++ = euc[1];
@@ -89,22 +101,29 @@ euc_kr2mic(const unsigned char *euc, unsigned char *p, int len)
         else
         {                       /* should be ASCII */
             if (c1 == 0)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_EUC_KR,
                                         (const char *) euc, len);
+           }
             *p++ = c1;
             euc++;
             len--;
         }
     }
     *p = '\0';
+
+   return euc - start;
  }
  
  /*
   * MIC ---> EUC_KR
   */
-static void
-mic2euc_kr(const unsigned char *mic, unsigned char *p, int len)
+static int
+mic2euc_kr(const unsigned char *mic, unsigned char *p, int len, bool noError)
  {
+   const unsigned char *start = mic;
     int         c1;
     int         l;
  
@@ -115,8 +134,12 @@ mic2euc_kr(const unsigned char *mic, unsigned char *p, int len)
         {
             /* ASCII */
             if (c1 == 0)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_MULE_INTERNAL,
                                         (const char *) mic, len);
+           }
             *p++ = c1;
             mic++;
             len--;
@@ -124,18 +147,28 @@ mic2euc_kr(const unsigned char *mic, unsigned char *p, int len)
         }
         l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
         if (l < 0)
+       {
+           if (noError)
+               break;
             report_invalid_encoding(PG_MULE_INTERNAL,
                                     (const char *) mic, len);
+       }
         if (c1 == LC_KS5601)
         {
             *p++ = mic[1];
             *p++ = mic[2];
         }
         else
+       {
+           if (noError)
+               break;
             report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_KR,
                                        (const char *) mic, len);
+       }
         mic += l;
         len -= l;
     }
     *p = '\0';
+
+   return mic - start;
  }
diff --git a/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c

index 66c242d7f3688d2f2935db2bf72caf3f6a706b9c..4bf8acda99fe909c67737507cdf69a4a9d89183b 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c
+++ b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c
@@ -32,17 +32,20 @@ PG_FUNCTION_INFO_V1(mic_to_big5);
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  
-static void euc_tw2big5(const unsigned char *euc, unsigned char *p, int len);
-static void big52euc_tw(const unsigned char *euc, unsigned char *p, int len);
-static void big52mic(const unsigned char *big5, unsigned char *p, int len);
-static void mic2big5(const unsigned char *mic, unsigned char *p, int len);
-static void euc_tw2mic(const unsigned char *euc, unsigned char *p, int len);
-static void mic2euc_tw(const unsigned char *mic, unsigned char *p, int len);
+static int euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError);
+static int big52euc_tw(const unsigned char *euc, unsigned char *p, int len, bool noError);
+static int big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError);
+static int mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError);
+static int euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
+static int mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError);
  
  Datum
  euc_tw_to_big5(PG_FUNCTION_ARGS)
@@ -50,12 +53,14 @@ euc_tw_to_big5(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_BIG5);
  
-   euc_tw2big5(src, dest, len);
+   converted = euc_tw2big5(src, dest, len, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -64,12 +69,14 @@ big5_to_euc_tw(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_EUC_TW);
  
-   big52euc_tw(src, dest, len);
+   converted = big52euc_tw(src, dest, len, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -78,12 +85,14 @@ euc_tw_to_mic(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_MULE_INTERNAL);
  
-   euc_tw2mic(src, dest, len);
+   converted = euc_tw2mic(src, dest, len, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -92,12 +101,14 @@ mic_to_euc_tw(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_TW);
  
-   mic2euc_tw(src, dest, len);
+   converted = mic2euc_tw(src, dest, len, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -106,12 +117,14 @@ big5_to_mic(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_MULE_INTERNAL);
  
-   big52mic(src, dest, len);
+   converted = big52mic(src, dest, len, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -120,21 +133,24 @@ mic_to_big5(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_BIG5);
  
-   mic2big5(src, dest, len);
+   converted = mic2big5(src, dest, len, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  
  /*
   * EUC_TW ---> Big5
   */
-static void
-euc_tw2big5(const unsigned char *euc, unsigned char *p, int len)
+static int
+euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError)
  {
+   const unsigned char *start = euc;
     unsigned char c1;
     unsigned short big5buf,
                 cnsBuf;
@@ -149,8 +165,12 @@ euc_tw2big5(const unsigned char *euc, unsigned char *p, int len)
             /* Verify and decode the next EUC_TW input character */
             l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
             if (l < 0)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_EUC_TW,
                                         (const char *) euc, len);
+           }
             if (c1 == SS2)
             {
                 c1 = euc[1];    /* plane No. */
@@ -171,8 +191,12 @@ euc_tw2big5(const unsigned char *euc, unsigned char *p, int len)
             /* Write it out in Big5 */
             big5buf = CNStoBIG5(cnsBuf, lc);
             if (big5buf == 0)
+           {
+               if (noError)
+                   break;
                 report_untranslatable_char(PG_EUC_TW, PG_BIG5,
                                            (const char *) euc, len);
+           }
             *p++ = (big5buf >> 8) & 0x00ff;
             *p++ = big5buf & 0x00ff;
  
@@ -182,22 +206,29 @@ euc_tw2big5(const unsigned char *euc, unsigned char *p, int len)
         else
         {                       /* should be ASCII */
             if (c1 == 0)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_EUC_TW,
                                         (const char *) euc, len);
+           }
             *p++ = c1;
             euc++;
             len--;
         }
     }
     *p = '\0';
+
+   return euc - start;
  }
  
  /*
   * Big5 ---> EUC_TW
   */
-static void
-big52euc_tw(const unsigned char *big5, unsigned char *p, int len)
+static int
+big52euc_tw(const unsigned char *big5, unsigned char *p, int len, bool noError)
  {
+   const unsigned char *start = big5;
     unsigned short c1;
     unsigned short big5buf,
                 cnsBuf;
@@ -212,8 +243,12 @@ big52euc_tw(const unsigned char *big5, unsigned char *p, int len)
         {
             l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
             if (l < 0)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_BIG5,
                                         (const char *) big5, len);
+           }
             big5buf = (c1 << 8) | big5[1];
             cnsBuf = BIG5toCNS(big5buf, &lc);
  
@@ -237,8 +272,12 @@ big52euc_tw(const unsigned char *big5, unsigned char *p, int len)
                 *p++ = cnsBuf & 0x00ff;
             }
             else
+           {
+               if (noError)
+                   break;
                 report_untranslatable_char(PG_BIG5, PG_EUC_TW,
                                            (const char *) big5, len);
+           }
  
             big5 += l;
             len -= l;
@@ -256,14 +295,17 @@ big52euc_tw(const unsigned char *big5, unsigned char *p, int len)
         }
     }
     *p = '\0';
+
+   return big5 - start;
  }
  
  /*
   * EUC_TW ---> MIC
   */
-static void
-euc_tw2mic(const unsigned char *euc, unsigned char *p, int len)
+static int
+euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
  {
+   const unsigned char *start = euc;
     int         c1;
     int         l;
  
@@ -274,8 +316,12 @@ euc_tw2mic(const unsigned char *euc, unsigned char *p, int len)
         {
             l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
             if (l < 0)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_EUC_TW,
                                         (const char *) euc, len);
+           }
             if (c1 == SS2)
             {
                 c1 = euc[1];    /* plane No. */
@@ -304,22 +350,29 @@ euc_tw2mic(const unsigned char *euc, unsigned char *p, int len)
         else
         {                       /* should be ASCII */
             if (c1 == 0)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_EUC_TW,
                                         (const char *) euc, len);
+           }
             *p++ = c1;
             euc++;
             len--;
         }
     }
     *p = '\0';
+
+   return euc - start;
  }
  
  /*
   * MIC ---> EUC_TW
   */
-static void
-mic2euc_tw(const unsigned char *mic, unsigned char *p, int len)
+static int
+mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError)
  {
+   const unsigned char *start = mic;
     int         c1;
     int         l;
  
@@ -330,8 +383,12 @@ mic2euc_tw(const unsigned char *mic, unsigned char *p, int len)
         {
             /* ASCII */
             if (c1 == 0)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_MULE_INTERNAL,
                                         (const char *) mic, len);
+           }
             *p++ = c1;
             mic++;
             len--;
@@ -339,8 +396,12 @@ mic2euc_tw(const unsigned char *mic, unsigned char *p, int len)
         }
         l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
         if (l < 0)
+       {
+           if (noError)
+               break;
             report_invalid_encoding(PG_MULE_INTERNAL,
                                     (const char *) mic, len);
+       }
         if (c1 == LC_CNS11643_1)
         {
             *p++ = mic[1];
@@ -362,20 +423,27 @@ mic2euc_tw(const unsigned char *mic, unsigned char *p, int len)
             *p++ = mic[3];
         }
         else
+       {
+           if (noError)
+               break;
             report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_TW,
                                        (const char *) mic, len);
+       }
         mic += l;
         len -= l;
     }
     *p = '\0';
+
+   return mic - start;
  }
  
  /*
   * Big5 ---> MIC
   */
-static void
-big52mic(const unsigned char *big5, unsigned char *p, int len)
+static int
+big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError)
  {
+   const unsigned char *start = big5;
     unsigned short c1;
     unsigned short big5buf,
                 cnsBuf;
@@ -389,8 +457,12 @@ big52mic(const unsigned char *big5, unsigned char *p, int len)
         {
             /* ASCII */
             if (c1 == 0)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_BIG5,
                                         (const char *) big5, len);
+           }
             *p++ = c1;
             big5++;
             len--;
@@ -398,8 +470,12 @@ big52mic(const unsigned char *big5, unsigned char *p, int len)
         }
         l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
         if (l < 0)
+       {
+           if (noError)
+               break;
             report_invalid_encoding(PG_BIG5,
                                     (const char *) big5, len);
+       }
         big5buf = (c1 << 8) | big5[1];
         cnsBuf = BIG5toCNS(big5buf, &lc);
         if (lc != 0)
@@ -412,20 +488,27 @@ big52mic(const unsigned char *big5, unsigned char *p, int len)
             *p++ = cnsBuf & 0x00ff;
         }
         else
+       {
+           if (noError)
+               break;
             report_untranslatable_char(PG_BIG5, PG_MULE_INTERNAL,
                                        (const char *) big5, len);
+       }
         big5 += l;
         len -= l;
     }
     *p = '\0';
+
+   return big5 - start;
  }
  
  /*
   * MIC ---> Big5
   */
-static void
-mic2big5(const unsigned char *mic, unsigned char *p, int len)
+static int
+mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError)
  {
+   const unsigned char *start = mic;
     unsigned short c1;
     unsigned short big5buf,
                 cnsBuf;
@@ -438,8 +521,12 @@ mic2big5(const unsigned char *mic, unsigned char *p, int len)
         {
             /* ASCII */
             if (c1 == 0)
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_MULE_INTERNAL,
                                         (const char *) mic, len);
+           }
             *p++ = c1;
             mic++;
             len--;
@@ -447,8 +534,12 @@ mic2big5(const unsigned char *mic, unsigned char *p, int len)
         }
         l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
         if (l < 0)
+       {
+           if (noError)
+               break;
             report_invalid_encoding(PG_MULE_INTERNAL,
                                     (const char *) mic, len);
+       }
         if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == LCPRV2_B)
         {
             if (c1 == LCPRV2_B)
@@ -462,16 +553,26 @@ mic2big5(const unsigned char *mic, unsigned char *p, int len)
             }
             big5buf = CNStoBIG5(cnsBuf, c1);
             if (big5buf == 0)
+           {
+               if (noError)
+                   break;
                 report_untranslatable_char(PG_MULE_INTERNAL, PG_BIG5,
                                            (const char *) mic, len);
+           }
             *p++ = (big5buf >> 8) & 0x00ff;
             *p++ = big5buf & 0x00ff;
         }
         else
+       {
+           if (noError)
+               break;
             report_untranslatable_char(PG_MULE_INTERNAL, PG_BIG5,
                                        (const char *) mic, len);
+       }
         mic += l;
         len -= l;
     }
     *p = '\0';
+
+   return mic - start;
  }
diff --git a/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c b/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c

index 2e28e6780a58cc2e68bb187593f9b2d7b243d805..8610fcb69aa8258381d77b6ee3d6065bfaba5e4a 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c
+++ b/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c
@@ -30,8 +30,11 @@ PG_FUNCTION_INFO_V1(win1250_to_latin2);
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  
@@ -82,12 +85,14 @@ latin2_to_mic(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN2, PG_MULE_INTERNAL);
  
-   latin2mic(src, dest, len, LC_ISO8859_2, PG_LATIN2);
+   converted = latin2mic(src, dest, len, LC_ISO8859_2, PG_LATIN2, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -96,12 +101,14 @@ mic_to_latin2(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN2);
  
-   mic2latin(src, dest, len, LC_ISO8859_2, PG_LATIN2);
+   converted = mic2latin(src, dest, len, LC_ISO8859_2, PG_LATIN2, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -110,13 +117,15 @@ win1250_to_mic(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1250, PG_MULE_INTERNAL);
  
-   latin2mic_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250,
-                        win1250_2_iso88592);
+   converted = latin2mic_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250,
+                                    win1250_2_iso88592, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -125,13 +134,15 @@ mic_to_win1250(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN1250);
  
-   mic2latin_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250,
-                        iso88592_2_win1250);
+   converted = mic2latin_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250,
+                                    iso88592_2_win1250, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -140,12 +151,15 @@ latin2_to_win1250(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN2, PG_WIN1250);
  
-   local2local(src, dest, len, PG_LATIN2, PG_WIN1250, iso88592_2_win1250);
+   converted = local2local(src, dest, len, PG_LATIN2, PG_WIN1250,
+                           iso88592_2_win1250, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -154,10 +168,13 @@ win1250_to_latin2(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1250, PG_LATIN2);
  
-   local2local(src, dest, len, PG_WIN1250, PG_LATIN2, win1250_2_iso88592);
+   converted = local2local(src, dest, len, PG_WIN1250, PG_LATIN2,
+                           win1250_2_iso88592, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
diff --git a/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c b/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c

index bc651410f21dd0f6f2f37240956341d5ce96d7d4..bff27d1c29590cf6b1bf9e82807ffc8d457d2abd 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c
+++ b/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c
@@ -30,8 +30,11 @@ PG_FUNCTION_INFO_V1(mic_to_latin4);
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  
@@ -42,12 +45,14 @@ latin1_to_mic(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN1, PG_MULE_INTERNAL);
  
-   latin2mic(src, dest, len, LC_ISO8859_1, PG_LATIN1);
+   converted = latin2mic(src, dest, len, LC_ISO8859_1, PG_LATIN1, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -56,12 +61,14 @@ mic_to_latin1(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN1);
  
-   mic2latin(src, dest, len, LC_ISO8859_1, PG_LATIN1);
+   converted = mic2latin(src, dest, len, LC_ISO8859_1, PG_LATIN1, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -70,12 +77,14 @@ latin3_to_mic(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN3, PG_MULE_INTERNAL);
  
-   latin2mic(src, dest, len, LC_ISO8859_3, PG_LATIN3);
+   converted = latin2mic(src, dest, len, LC_ISO8859_3, PG_LATIN3, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -84,12 +93,14 @@ mic_to_latin3(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN3);
  
-   mic2latin(src, dest, len, LC_ISO8859_3, PG_LATIN3);
+   converted = mic2latin(src, dest, len, LC_ISO8859_3, PG_LATIN3, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -98,12 +109,14 @@ latin4_to_mic(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN4, PG_MULE_INTERNAL);
  
-   latin2mic(src, dest, len, LC_ISO8859_4, PG_LATIN4);
+   converted = latin2mic(src, dest, len, LC_ISO8859_4, PG_LATIN4, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -112,10 +125,12 @@ mic_to_latin4(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN4);
  
-   mic2latin(src, dest, len, LC_ISO8859_4, PG_LATIN4);
+   converted = mic2latin(src, dest, len, LC_ISO8859_4, PG_LATIN4, noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c b/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c

index d6067cdc24e96af88b2474ab4e006ed3be741de8..3838b15cab91c1a6ced2210f283cf31bd4125f9f 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c
@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_big5);
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  Datum
@@ -38,16 +41,19 @@ big5_to_utf8(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_UTF8);
  
-   LocalToUtf(src, len, dest,
-              &big5_to_unicode_tree,
-              NULL, 0,
-              NULL,
-              PG_BIG5);
+   converted = LocalToUtf(src, len, dest,
+                          &big5_to_unicode_tree,
+                          NULL, 0,
+                          NULL,
+                          PG_BIG5,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -56,14 +62,17 @@ utf8_to_big5(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_BIG5);
  
-   UtfToLocal(src, len, dest,
-              &big5_from_unicode_tree,
-              NULL, 0,
-              NULL,
-              PG_BIG5);
+   converted = UtfToLocal(src, len, dest,
+                          &big5_from_unicode_tree,
+                          NULL, 0,
+                          NULL,
+                          PG_BIG5,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c b/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c

index ed90e8e682e5d45a69d48e0b62f90fcfc57a1ceb..75719fe5f1b2ea4510c3d3dd76b9cfd020f078f3 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c
@@ -33,8 +33,11 @@ PG_FUNCTION_INFO_V1(koi8u_to_utf8);
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  
@@ -44,16 +47,19 @@ utf8_to_koi8r(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8R);
  
-   UtfToLocal(src, len, dest,
-              &koi8r_from_unicode_tree,
-              NULL, 0,
-              NULL,
-              PG_KOI8R);
+   converted = UtfToLocal(src, len, dest,
+                          &koi8r_from_unicode_tree,
+                          NULL, 0,
+                          NULL,
+                          PG_KOI8R,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -62,16 +68,19 @@ koi8r_to_utf8(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_UTF8);
  
-   LocalToUtf(src, len, dest,
-              &koi8r_to_unicode_tree,
-              NULL, 0,
-              NULL,
-              PG_KOI8R);
+   converted = LocalToUtf(src, len, dest,
+                          &koi8r_to_unicode_tree,
+                          NULL, 0,
+                          NULL,
+                          PG_KOI8R,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -80,16 +89,19 @@ utf8_to_koi8u(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8U);
  
-   UtfToLocal(src, len, dest,
-              &koi8u_from_unicode_tree,
-              NULL, 0,
-              NULL,
-              PG_KOI8U);
+   converted = UtfToLocal(src, len, dest,
+                          &koi8u_from_unicode_tree,
+                          NULL, 0,
+                          NULL,
+                          PG_KOI8U,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -98,14 +110,17 @@ koi8u_to_utf8(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8U, PG_UTF8);
  
-   LocalToUtf(src, len, dest,
-              &koi8u_to_unicode_tree,
-              NULL, 0,
-              NULL,
-              PG_KOI8U);
+   converted = LocalToUtf(src, len, dest,
+                          &koi8u_to_unicode_tree,
+                          NULL, 0,
+                          NULL,
+                          PG_KOI8U,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c

index d699affce47f3a741fbb946964264961f6c24aca..5391001951ac400958834cd8dec7ce091c3eeab8 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c
@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_jis_2004);
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  Datum
@@ -38,16 +41,19 @@ euc_jis_2004_to_utf8(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_UTF8);
  
-   LocalToUtf(src, len, dest,
-              &euc_jis_2004_to_unicode_tree,
-              LUmapEUC_JIS_2004_combined, lengthof(LUmapEUC_JIS_2004_combined),
-              NULL,
-              PG_EUC_JIS_2004);
+   converted = LocalToUtf(src, len, dest,
+                          &euc_jis_2004_to_unicode_tree,
+                          LUmapEUC_JIS_2004_combined, lengthof(LUmapEUC_JIS_2004_combined),
+                          NULL,
+                          PG_EUC_JIS_2004,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -56,14 +62,17 @@ utf8_to_euc_jis_2004(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JIS_2004);
  
-   UtfToLocal(src, len, dest,
-              &euc_jis_2004_from_unicode_tree,
-              ULmapEUC_JIS_2004_combined, lengthof(ULmapEUC_JIS_2004_combined),
-              NULL,
-              PG_EUC_JIS_2004);
+   converted = UtfToLocal(src, len, dest,
+                          &euc_jis_2004_from_unicode_tree,
+                          ULmapEUC_JIS_2004_combined, lengthof(ULmapEUC_JIS_2004_combined),
+                          NULL,
+                          PG_EUC_JIS_2004,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c

index d7c0ba6a58b4d2c7f825816f6066962dfbc16013..c87d1bf2398e58cb61548a83c222c1d6dd7869cf 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c
@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_cn);
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  Datum
@@ -38,16 +41,19 @@ euc_cn_to_utf8(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_CN, PG_UTF8);
  
-   LocalToUtf(src, len, dest,
-              &euc_cn_to_unicode_tree,
-              NULL, 0,
-              NULL,
-              PG_EUC_CN);
+   converted = LocalToUtf(src, len, dest,
+                          &euc_cn_to_unicode_tree,
+                          NULL, 0,
+                          NULL,
+                          PG_EUC_CN,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -56,14 +62,17 @@ utf8_to_euc_cn(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_CN);
  
-   UtfToLocal(src, len, dest,
-              &euc_cn_from_unicode_tree,
-              NULL, 0,
-              NULL,
-              PG_EUC_CN);
+   converted = UtfToLocal(src, len, dest,
+                          &euc_cn_from_unicode_tree,
+                          NULL, 0,
+                          NULL,
+                          PG_EUC_CN,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c

index 13a3a23e77b8d6aa93cfa54440a650f7b6eb5149..6a55134db2110cf84f3ab90b448399c141bc779d 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c
@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_jp);
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  Datum
@@ -38,16 +41,19 @@ euc_jp_to_utf8(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_UTF8);
  
-   LocalToUtf(src, len, dest,
-              &euc_jp_to_unicode_tree,
-              NULL, 0,
-              NULL,
-              PG_EUC_JP);
+   converted = LocalToUtf(src, len, dest,
+                          &euc_jp_to_unicode_tree,
+                          NULL, 0,
+                          NULL,
+                          PG_EUC_JP,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -56,14 +62,17 @@ utf8_to_euc_jp(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JP);
  
-   UtfToLocal(src, len, dest,
-              &euc_jp_from_unicode_tree,
-              NULL, 0,
-              NULL,
-              PG_EUC_JP);
+   converted = UtfToLocal(src, len, dest,
+                          &euc_jp_from_unicode_tree,
+                          NULL, 0,
+                          NULL,
+                          PG_EUC_JP,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c

index 1bbb8aaef7b8d2adf8b8763ae6f9d5e3b7aa1492..fe1924e2fec911600dc4b664d6efabf2b3e24b2d 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c
@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_kr);
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  Datum
@@ -38,16 +41,19 @@ euc_kr_to_utf8(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_KR, PG_UTF8);
  
-   LocalToUtf(src, len, dest,
-              &euc_kr_to_unicode_tree,
-              NULL, 0,
-              NULL,
-              PG_EUC_KR);
+   converted = LocalToUtf(src, len, dest,
+                          &euc_kr_to_unicode_tree,
+                          NULL, 0,
+                          NULL,
+                          PG_EUC_KR,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -56,14 +62,17 @@ utf8_to_euc_kr(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_KR);
  
-   UtfToLocal(src, len, dest,
-              &euc_kr_from_unicode_tree,
-              NULL, 0,
-              NULL,
-              PG_EUC_KR);
+   converted = UtfToLocal(src, len, dest,
+                          &euc_kr_from_unicode_tree,
+                          NULL, 0,
+                          NULL,
+                          PG_EUC_KR,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c

index 9830045dccd6a3707c7ca7e80d31800539624cc4..68215659b577c3334fe177f2cd1795c7c9df57f6 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c
@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_tw);
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  Datum
@@ -38,16 +41,19 @@ euc_tw_to_utf8(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_UTF8);
  
-   LocalToUtf(src, len, dest,
-              &euc_tw_to_unicode_tree,
-              NULL, 0,
-              NULL,
-              PG_EUC_TW);
+   converted = LocalToUtf(src, len, dest,
+                          &euc_tw_to_unicode_tree,
+                          NULL, 0,
+                          NULL,
+                          PG_EUC_TW,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -56,14 +62,17 @@ utf8_to_euc_tw(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_TW);
  
-   UtfToLocal(src, len, dest,
-              &euc_tw_from_unicode_tree,
-              NULL, 0,
-              NULL,
-              PG_EUC_TW);
+   converted = UtfToLocal(src, len, dest,
+                          &euc_tw_from_unicode_tree,
+                          NULL, 0,
+                          NULL,
+                          PG_EUC_TW,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c b/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c

index f86ecf274241ddc1e203dc6db277cc141becac91..e1a59c39a4dbb15c3aa915965874d96fdebbae67 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c
@@ -183,8 +183,11 @@ conv_utf8_to_18030(uint32 code)
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  Datum
@@ -193,16 +196,19 @@ gb18030_to_utf8(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_GB18030, PG_UTF8);
  
-   LocalToUtf(src, len, dest,
-              &gb18030_to_unicode_tree,
-              NULL, 0,
-              conv_18030_to_utf8,
-              PG_GB18030);
+   converted = LocalToUtf(src, len, dest,
+                          &gb18030_to_unicode_tree,
+                          NULL, 0,
+                          conv_18030_to_utf8,
+                          PG_GB18030,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -211,14 +217,17 @@ utf8_to_gb18030(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GB18030);
  
-   UtfToLocal(src, len, dest,
-              &gb18030_from_unicode_tree,
-              NULL, 0,
-              conv_utf8_to_18030,
-              PG_GB18030);
+   converted = UtfToLocal(src, len, dest,
+                          &gb18030_from_unicode_tree,
+                          NULL, 0,
+                          conv_utf8_to_18030,
+                          PG_GB18030,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c b/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c

index 2ab8b16c8a819b1e55ae751e197cb06275d315aa..881386d5347744f90791435bcd30a07de78bb24b 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c
@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_gbk);
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  Datum
@@ -38,16 +41,19 @@ gbk_to_utf8(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_GBK, PG_UTF8);
  
-   LocalToUtf(src, len, dest,
-              &gbk_to_unicode_tree,
-              NULL, 0,
-              NULL,
-              PG_GBK);
+   converted = LocalToUtf(src, len, dest,
+                          &gbk_to_unicode_tree,
+                          NULL, 0,
+                          NULL,
+                          PG_GBK,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -56,14 +62,17 @@ utf8_to_gbk(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GBK);
  
-   UtfToLocal(src, len, dest,
-              &gbk_from_unicode_tree,
-              NULL, 0,
-              NULL,
-              PG_GBK);
+   converted = UtfToLocal(src, len, dest,
+                          &gbk_from_unicode_tree,
+                          NULL, 0,
+                          NULL,
+                          PG_GBK,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c

index 3e49f67ea2f291c1ab89c0b6c00132cd584c477a..d93a521badf2cd23a89895f2ac92017e9dd221ea 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c
@@ -52,8 +52,11 @@ PG_FUNCTION_INFO_V1(utf8_to_iso8859);
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  
@@ -100,6 +103,7 @@ iso8859_to_utf8(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
     int         i;
  
     CHECK_ENCODING_CONVERSION_ARGS(-1, PG_UTF8);
@@ -108,12 +112,15 @@ iso8859_to_utf8(PG_FUNCTION_ARGS)
     {
         if (encoding == maps[i].encoding)
         {
-           LocalToUtf(src, len, dest,
-                      maps[i].map1,
-                      NULL, 0,
-                      NULL,
-                      encoding);
-           PG_RETURN_VOID();
+           int         converted;
+
+           converted = LocalToUtf(src, len, dest,
+                                  maps[i].map1,
+                                  NULL, 0,
+                                  NULL,
+                                  encoding,
+                                  noError);
+           PG_RETURN_INT32(converted);
         }
     }
  
@@ -122,7 +129,7 @@ iso8859_to_utf8(PG_FUNCTION_ARGS)
              errmsg("unexpected encoding ID %d for ISO 8859 character sets",
                     encoding)));
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(0);
  }
  
  Datum
@@ -132,6 +139,7 @@ utf8_to_iso8859(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
     int         i;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, -1);
@@ -140,12 +148,15 @@ utf8_to_iso8859(PG_FUNCTION_ARGS)
     {
         if (encoding == maps[i].encoding)
         {
-           UtfToLocal(src, len, dest,
-                      maps[i].map2,
-                      NULL, 0,
-                      NULL,
-                      encoding);
-           PG_RETURN_VOID();
+           int         converted;
+
+           converted = UtfToLocal(src, len, dest,
+                                  maps[i].map2,
+                                  NULL, 0,
+                                  NULL,
+                                  encoding,
+                                  noError);
+           PG_RETURN_INT32(converted);
         }
     }
  
@@ -154,5 +165,5 @@ utf8_to_iso8859(PG_FUNCTION_ARGS)
              errmsg("unexpected encoding ID %d for ISO 8859 character sets",
                     encoding)));
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(0);
  }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c

index 67e713cca11c3f5e54e7a6497c17ccdf967ffb25..d0dc4cca3788a83f69304a4e2c5b05e3986c7377 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c
@@ -26,8 +26,11 @@ PG_FUNCTION_INFO_V1(utf8_to_iso8859_1);
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  
@@ -37,6 +40,8 @@ iso8859_1_to_utf8(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   unsigned char *start = src;
     unsigned short c;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN1, PG_UTF8);
@@ -45,7 +50,11 @@ iso8859_1_to_utf8(PG_FUNCTION_ARGS)
     {
         c = *src;
         if (c == 0)
+       {
+           if (noError)
+               break;
             report_invalid_encoding(PG_LATIN1, (const char *) src, len);
+       }
         if (!IS_HIGHBIT_SET(c))
             *dest++ = c;
         else
@@ -58,7 +67,7 @@ iso8859_1_to_utf8(PG_FUNCTION_ARGS)
     }
     *dest = '\0';
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(src - start);
  }
  
  Datum
@@ -67,6 +76,8 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   unsigned char *start = src;
     unsigned short c,
                 c1;
  
@@ -76,7 +87,11 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS)
     {
         c = *src;
         if (c == 0)
+       {
+           if (noError)
+               break;
             report_invalid_encoding(PG_UTF8, (const char *) src, len);
+       }
         /* fast path for ASCII-subset characters */
         if (!IS_HIGHBIT_SET(c))
         {
@@ -89,10 +104,18 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS)
             int         l = pg_utf_mblen(src);
  
             if (l > len || !pg_utf8_islegal(src, l))
+           {
+               if (noError)
+                   break;
                 report_invalid_encoding(PG_UTF8, (const char *) src, len);
+           }
             if (l != 2)
+           {
+               if (noError)
+                   break;
                 report_untranslatable_char(PG_UTF8, PG_LATIN1,
                                            (const char *) src, len);
+           }
             c1 = src[1] & 0x3f;
             c = ((c & 0x1f) << 6) | c1;
             if (c >= 0x80 && c <= 0xff)
@@ -102,11 +125,15 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS)
                 len -= 2;
             }
             else
+           {
+               if (noError)
+                   break;
                 report_untranslatable_char(PG_UTF8, PG_LATIN1,
                                            (const char *) src, len);
+           }
         }
     }
     *dest = '\0';
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(src - start);
  }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c b/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c

index 578f5df4e7f720bfa83a69e8545d00560707583d..317daa2d5eed2bb8bb0a20f1d0123a6abfcc5cc9 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c
@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_johab);
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  Datum
@@ -38,16 +41,19 @@ johab_to_utf8(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_JOHAB, PG_UTF8);
  
-   LocalToUtf(src, len, dest,
-              &johab_to_unicode_tree,
-              NULL, 0,
-              NULL,
-              PG_JOHAB);
+   converted = LocalToUtf(src, len, dest,
+                          &johab_to_unicode_tree,
+                          NULL, 0,
+                          NULL,
+                          PG_JOHAB,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -56,14 +62,17 @@ utf8_to_johab(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_JOHAB);
  
-   UtfToLocal(src, len, dest,
-              &johab_from_unicode_tree,
-              NULL, 0,
-              NULL,
-              PG_JOHAB);
+   converted = UtfToLocal(src, len, dest,
+                          &johab_from_unicode_tree,
+                          NULL, 0,
+                          NULL,
+                          PG_JOHAB,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c b/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c

index dd9fc2975ad237e82ab99eeed0de776d1e203330..4c9348aba59fbf5dd6593dd4169ba30aecbbfba0 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c
@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_sjis);
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  Datum
@@ -38,16 +41,19 @@ sjis_to_utf8(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_UTF8);
  
-   LocalToUtf(src, len, dest,
-              &sjis_to_unicode_tree,
-              NULL, 0,
-              NULL,
-              PG_SJIS);
+   converted = LocalToUtf(src, len, dest,
+                          &sjis_to_unicode_tree,
+                          NULL, 0,
+                          NULL,
+                          PG_SJIS,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -56,14 +62,17 @@ utf8_to_sjis(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_SJIS);
  
-   UtfToLocal(src, len, dest,
-              &sjis_from_unicode_tree,
-              NULL, 0,
-              NULL,
-              PG_SJIS);
+   converted = UtfToLocal(src, len, dest,
+                          &sjis_from_unicode_tree,
+                          NULL, 0,
+                          NULL,
+                          PG_SJIS,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c b/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c

index 4bcc886d674e38c90dceceff361c434ec24c8e40..1fffdc5930c2d5f2b4274f83e83ecc204735d392 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c
@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_shift_jis_2004);
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  Datum
@@ -38,16 +41,19 @@ shift_jis_2004_to_utf8(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_UTF8);
  
-   LocalToUtf(src, len, dest,
-              &shift_jis_2004_to_unicode_tree,
-              LUmapSHIFT_JIS_2004_combined, lengthof(LUmapSHIFT_JIS_2004_combined),
-              NULL,
-              PG_SHIFT_JIS_2004);
+   converted = LocalToUtf(src, len, dest,
+                          &shift_jis_2004_to_unicode_tree,
+                          LUmapSHIFT_JIS_2004_combined, lengthof(LUmapSHIFT_JIS_2004_combined),
+                          NULL,
+                          PG_SHIFT_JIS_2004,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -56,14 +62,17 @@ utf8_to_shift_jis_2004(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_SHIFT_JIS_2004);
  
-   UtfToLocal(src, len, dest,
-              &shift_jis_2004_from_unicode_tree,
-              ULmapSHIFT_JIS_2004_combined, lengthof(ULmapSHIFT_JIS_2004_combined),
-              NULL,
-              PG_SHIFT_JIS_2004);
+   converted = UtfToLocal(src, len, dest,
+                          &shift_jis_2004_from_unicode_tree,
+                          ULmapSHIFT_JIS_2004_combined, lengthof(ULmapSHIFT_JIS_2004_combined),
+                          NULL,
+                          PG_SHIFT_JIS_2004,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c b/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c

index c8e512994a1037d23bdab7d0c7b7ce9fc1319708..d9471dad097c7dae83427ab43422f4e0a73c6c38 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c
@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_uhc);
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  Datum
@@ -38,16 +41,19 @@ uhc_to_utf8(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_UHC, PG_UTF8);
  
-   LocalToUtf(src, len, dest,
-              &uhc_to_unicode_tree,
-              NULL, 0,
-              NULL,
-              PG_UHC);
+   converted = LocalToUtf(src, len, dest,
+                          &uhc_to_unicode_tree,
+                          NULL, 0,
+                          NULL,
+                          PG_UHC,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
  
  Datum
@@ -56,14 +62,17 @@ utf8_to_uhc(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
+   int         converted;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_UHC);
  
-   UtfToLocal(src, len, dest,
-              &uhc_from_unicode_tree,
-              NULL, 0,
-              NULL,
-              PG_UHC);
+   converted = UtfToLocal(src, len, dest,
+                          &uhc_from_unicode_tree,
+                          NULL, 0,
+                          NULL,
+                          PG_UHC,
+                          noError);
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(converted);
  }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c b/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c

index 0c9493dee564e254be7c10908f514c1919fb6497..110ba5677d03a22cfcdf15cc12092c838898ecdd 100644 (file)
--- a/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c
@@ -48,8 +48,11 @@ PG_FUNCTION_INFO_V1(utf8_to_win);
   *     INTEGER,    -- destination encoding id
   *     CSTRING,    -- source string (null terminated C string)
   *     CSTRING,    -- destination string (null terminated C string)
- *     INTEGER     -- source string length
- * ) returns VOID;
+ *     INTEGER,    -- source string length
+ *     BOOL        -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
   * ----------
   */
  
@@ -81,6 +84,7 @@ win_to_utf8(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
     int         i;
  
     CHECK_ENCODING_CONVERSION_ARGS(-1, PG_UTF8);
@@ -89,12 +93,15 @@ win_to_utf8(PG_FUNCTION_ARGS)
     {
         if (encoding == maps[i].encoding)
         {
-           LocalToUtf(src, len, dest,
-                      maps[i].map1,
-                      NULL, 0,
-                      NULL,
-                      encoding);
-           PG_RETURN_VOID();
+           int         converted;
+
+           converted = LocalToUtf(src, len, dest,
+                                  maps[i].map1,
+                                  NULL, 0,
+                                  NULL,
+                                  encoding,
+                                  noError);
+           PG_RETURN_INT32(converted);
         }
     }
  
@@ -103,7 +110,7 @@ win_to_utf8(PG_FUNCTION_ARGS)
              errmsg("unexpected encoding ID %d for WIN character sets",
                     encoding)));
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(0);
  }
  
  Datum
@@ -113,6 +120,7 @@ utf8_to_win(PG_FUNCTION_ARGS)
     unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
     unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
     int         len = PG_GETARG_INT32(4);
+   bool        noError = PG_GETARG_BOOL(5);
     int         i;
  
     CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, -1);
@@ -121,12 +129,15 @@ utf8_to_win(PG_FUNCTION_ARGS)
     {
         if (encoding == maps[i].encoding)
         {
-           UtfToLocal(src, len, dest,
-                      maps[i].map2,
-                      NULL, 0,
-                      NULL,
-                      encoding);
-           PG_RETURN_VOID();
+           int         converted;
+
+           converted = UtfToLocal(src, len, dest,
+                                  maps[i].map2,
+                                  NULL, 0,
+                                  NULL,
+                                  encoding,
+                                  noError);
+           PG_RETURN_INT32(converted);
         }
     }
  
@@ -135,5 +146,5 @@ utf8_to_win(PG_FUNCTION_ARGS)
              errmsg("unexpected encoding ID %d for WIN character sets",
                     encoding)));
  
-   PG_RETURN_VOID();
+   PG_RETURN_INT32(0);
  }
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c

index 2578573b0ab13bceb23509f09059b0e5054e58f5..a13c398f4acb23fd1f0ca5b6990ac7f211541d85 100644 (file)
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -406,12 +406,13 @@ pg_do_encoding_conversion(unsigned char *src, int len,
         MemoryContextAllocHuge(CurrentMemoryContext,
                                (Size) len * MAX_CONVERSION_GROWTH + 1);
  
-   OidFunctionCall5(proc,
-                    Int32GetDatum(src_encoding),
-                    Int32GetDatum(dest_encoding),
-                    CStringGetDatum(src),
-                    CStringGetDatum(result),
-                    Int32GetDatum(len));
+   (void) OidFunctionCall6(proc,
+                           Int32GetDatum(src_encoding),
+                           Int32GetDatum(dest_encoding),
+                           CStringGetDatum(src),
+                           CStringGetDatum(result),
+                           Int32GetDatum(len),
+                           BoolGetDatum(false));
  
     /*
      * If the result is large, it's worth repalloc'ing to release any extra
@@ -435,6 +436,62 @@ pg_do_encoding_conversion(unsigned char *src, int len,
     return result;
  }
  
+/*
+ * Convert src string to another encoding.
+ *
+ * This function has a different API than the other conversion functions.
+ * The caller should've looked up the conversion function using
+ * FindDefaultConversionProc().  Unlike the other functions, the converted
+ * result is not palloc'd.  It is written to the caller-supplied buffer
+ * instead.
+ *
+ * src_encoding   - encoding to convert from
+ * dest_encoding  - encoding to convert to
+ * src, srclen    - input buffer and its length in bytes
+ * dest, destlen  - destination buffer and its size in bytes
+ *
+ * The output is null-terminated.
+ *
+ * If destlen < srclen * MAX_CONVERSION_LENGTH + 1, the converted output
+ * wouldn't necessarily fit in the output buffer, and the function will not
+ * convert the whole input.
+ *
+ * TODO: The conversion function interface is not great.  Firstly, it
+ * would be nice to pass through the destination buffer size to the
+ * conversion function, so that if you pass a shorter destination buffer, it
+ * could still continue to fill up the whole buffer.  Currently, we have to
+ * assume worst case expansion and stop the conversion short, even if there
+ * is in fact space left in the destination buffer.  Secondly, it would be
+ * nice to return the number of bytes written to the caller, to avoid a call
+ * to strlen().
+ */
+int
+pg_do_encoding_conversion_buf(Oid proc,
+                             int src_encoding,
+                             int dest_encoding,
+                             unsigned char *src, int srclen,
+                             unsigned char *dest, int destlen,
+                             bool noError)
+{
+   Datum       result;
+
+   /*
+    * If the destination buffer is not large enough to hold the result in the
+    * worst case, limit the input size passed to the conversion function.
+    */
+   if ((Size) srclen >= ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH))
+       srclen = ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH);
+
+   result = OidFunctionCall6(proc,
+                             Int32GetDatum(src_encoding),
+                             Int32GetDatum(dest_encoding),
+                             CStringGetDatum(src),
+                             CStringGetDatum(dest),
+                             Int32GetDatum(srclen),
+                             BoolGetDatum(noError));
+   return DatumGetInt32(result);
+}
+
  /*
   * Convert string to encoding encoding_name. The source
   * encoding is the DB encoding.
@@ -762,12 +819,13 @@ perform_default_encoding_conversion(const char *src, int len,
         MemoryContextAllocHuge(CurrentMemoryContext,
                                (Size) len * MAX_CONVERSION_GROWTH + 1);
  
-   FunctionCall5(flinfo,
+   FunctionCall6(flinfo,
                   Int32GetDatum(src_encoding),
                   Int32GetDatum(dest_encoding),
                   CStringGetDatum(src),
                   CStringGetDatum(result),
-                 Int32GetDatum(len));
+                 Int32GetDatum(len),
+                 BoolGetDatum(false));
  
     /*
      * Release extra space if there might be a lot --- see comments in
@@ -849,12 +907,13 @@ pg_unicode_to_server(pg_wchar c, unsigned char *s)
     c_as_utf8[c_as_utf8_len] = '\0';
  
     /* Convert, or throw error if we can't */
-   FunctionCall5(Utf8ToServerConvProc,
+   FunctionCall6(Utf8ToServerConvProc,
                   Int32GetDatum(PG_UTF8),
                   Int32GetDatum(server_encoding),
                   CStringGetDatum(c_as_utf8),
                   CStringGetDatum(s),
-                 Int32GetDatum(c_as_utf8_len));
+                 Int32GetDatum(c_as_utf8_len),
+                 BoolGetDatum(false));
  }
  
  
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c

index 43fc297eb69d589c4c47c27c58a08b55e1920283..d77183b8d124c25fdd7e9db94eb06be3d6438863 100644 (file)
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -28,6 +28,7 @@ static void check_for_reg_data_type_usage(ClusterInfo *cluster);
  static void check_for_jsonb_9_4_usage(ClusterInfo *cluster);
  static void check_for_pg_role_prefix(ClusterInfo *cluster);
  static void check_for_new_tablespace_dir(ClusterInfo *new_cluster);
+static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster);
  static char *get_canonical_locale_name(int category, const char *locale);
  
  
@@ -102,6 +103,15 @@ check_and_dump_old_cluster(bool live_check)
     check_for_reg_data_type_usage(&old_cluster);
     check_for_isn_and_int8_passing_mismatch(&old_cluster);
  
+   /*
+    * PG 14 changed the function signature of encoding conversion functions.
+    * Conversions from older versions cannot be upgraded automatically
+    * because the user-defined functions used by the encoding conversions
+    * need to be changed to match the new signature.
+    */
+   if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1300)
+       check_for_user_defined_encoding_conversions(&old_cluster);
+
     /*
      * Pre-PG 14 allowed user defined postfix operators, which are not
      * supported anymore.  Verify there are none, iff applicable.
@@ -1268,6 +1278,91 @@ check_for_pg_role_prefix(ClusterInfo *cluster)
     check_ok();
  }
  
+/*
+ * Verify that no user-defined encoding conversions exist.
+ */
+static void
+check_for_user_defined_encoding_conversions(ClusterInfo *cluster)
+{
+   int         dbnum;
+   FILE       *script = NULL;
+   bool        found = false;
+   char        output_path[MAXPGPATH];
+
+   prep_status("Checking for user-defined encoding conversions");
+
+   snprintf(output_path, sizeof(output_path),
+            "encoding_conversions.txt");
+
+   /* Find any user defined encoding conversions */
+   for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
+   {
+       PGresult   *res;
+       bool        db_used = false;
+       int         ntups;
+       int         rowno;
+       int         i_conoid,
+                   i_conname,
+                   i_nspname;
+       DbInfo     *active_db = &cluster->dbarr.dbs[dbnum];
+       PGconn     *conn = connectToServer(cluster, active_db->db_name);
+
+       /*
+        * The query below hardcodes FirstNormalObjectId as 16384 rather than
+        * interpolating that C #define into the query because, if that
+        * #define is ever changed, the cutoff we want to use is the value
+        * used by pre-version 14 servers, not that of some future version.
+        */
+       res = executeQueryOrDie(conn,
+                               "SELECT c.oid as conoid, c.conname, n.nspname "
+                               "FROM pg_catalog.pg_conversion c, "
+                               "     pg_catalog.pg_namespace n "
+                               "WHERE c.connamespace = n.oid AND "
+                               "      c.oid >= 16384");
+       ntups = PQntuples(res);
+       i_conoid = PQfnumber(res, "conoid");
+       i_conname = PQfnumber(res, "conname");
+       i_nspname = PQfnumber(res, "nspname");
+       for (rowno = 0; rowno < ntups; rowno++)
+       {
+           found = true;
+           if (script == NULL &&
+               (script = fopen_priv(output_path, "w")) == NULL)
+               pg_fatal("could not open file \"%s\": %s\n",
+                        output_path, strerror(errno));
+           if (!db_used)
+           {
+               fprintf(script, "In database: %s\n", active_db->db_name);
+               db_used = true;
+           }
+           fprintf(script, "  (oid=%s) %s.%s\n",
+                   PQgetvalue(res, rowno, i_conoid),
+                   PQgetvalue(res, rowno, i_nspname),
+                   PQgetvalue(res, rowno, i_conname));
+       }
+
+       PQclear(res);
+
+       PQfinish(conn);
+   }
+
+   if (script)
+       fclose(script);
+
+   if (found)
+   {
+       pg_log(PG_REPORT, "fatal\n");
+       pg_fatal("Your installation contains user-defined encoding conversions.\n"
+                "The conversion function parameters changed in PostgreSQL version 14\n"
+                "so this cluster cannot currently be upgraded.  You can remove the\n"
+                "encoding conversions in the old cluster and restart the upgrade.\n"
+                "A list of user-defined encoding conversions is in the file:\n"
+                "    %s\n\n", output_path);
+   }
+   else
+       check_ok();
+}
+
  
  /*
   * get_canonical_locale_name
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 489f5be427fc4b3c5d6132339ea7b2a2990a49bd..6a61c8f64f064b9f2c6dace516ac0bab5b9e4d53 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
   */
  
  /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 202103291
+#define CATALOG_VERSION_NO 202104011
  
  #endif
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat

index bfb89e0575dfeaa79e6a3fc5571b3c48fd484a74..69ffd0c3f4dc313670f315dbc02269f94f855fb8 100644 (file)
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -10914,388 +10914,388 @@
  # conversion functions
  { oid => '4302',
    descr => 'internal conversion function for KOI8R to MULE_INTERNAL',
-  proname => 'koi8r_to_mic', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'koi8r_to_mic',
+  proname => 'koi8r_to_mic', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'koi8r_to_mic',
    probin => '$libdir/cyrillic_and_mic' },
  { oid => '4303',
    descr => 'internal conversion function for MULE_INTERNAL to KOI8R',
-  proname => 'mic_to_koi8r', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_koi8r',
+  proname => 'mic_to_koi8r', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_koi8r',
    probin => '$libdir/cyrillic_and_mic' },
  { oid => '4304',
    descr => 'internal conversion function for ISO-8859-5 to MULE_INTERNAL',
-  proname => 'iso_to_mic', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'iso_to_mic',
+  proname => 'iso_to_mic', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'iso_to_mic',
    probin => '$libdir/cyrillic_and_mic' },
  { oid => '4305',
    descr => 'internal conversion function for MULE_INTERNAL to ISO-8859-5',
-  proname => 'mic_to_iso', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_iso',
+  proname => 'mic_to_iso', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_iso',
    probin => '$libdir/cyrillic_and_mic' },
  { oid => '4306',
    descr => 'internal conversion function for WIN1251 to MULE_INTERNAL',
-  proname => 'win1251_to_mic', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'win1251_to_mic',
+  proname => 'win1251_to_mic', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'win1251_to_mic',
    probin => '$libdir/cyrillic_and_mic' },
  { oid => '4307',
    descr => 'internal conversion function for MULE_INTERNAL to WIN1251',
-  proname => 'mic_to_win1251', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_win1251',
+  proname => 'mic_to_win1251', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_win1251',
    probin => '$libdir/cyrillic_and_mic' },
  { oid => '4308',
    descr => 'internal conversion function for WIN866 to MULE_INTERNAL',
-  proname => 'win866_to_mic', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'win866_to_mic',
+  proname => 'win866_to_mic', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'win866_to_mic',
    probin => '$libdir/cyrillic_and_mic' },
  { oid => '4309',
    descr => 'internal conversion function for MULE_INTERNAL to WIN866',
-  proname => 'mic_to_win866', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_win866',
+  proname => 'mic_to_win866', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_win866',
    probin => '$libdir/cyrillic_and_mic' },
  { oid => '4310', descr => 'internal conversion function for KOI8R to WIN1251',
-  proname => 'koi8r_to_win1251', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4',
+  proname => 'koi8r_to_win1251', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool',
    prosrc => 'koi8r_to_win1251', probin => '$libdir/cyrillic_and_mic' },
  { oid => '4311', descr => 'internal conversion function for WIN1251 to KOI8R',
-  proname => 'win1251_to_koi8r', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4',
+  proname => 'win1251_to_koi8r', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool',
    prosrc => 'win1251_to_koi8r', probin => '$libdir/cyrillic_and_mic' },
  { oid => '4312', descr => 'internal conversion function for KOI8R to WIN866',
-  proname => 'koi8r_to_win866', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'koi8r_to_win866',
+  proname => 'koi8r_to_win866', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'koi8r_to_win866',
    probin => '$libdir/cyrillic_and_mic' },
  { oid => '4313', descr => 'internal conversion function for WIN866 to KOI8R',
-  proname => 'win866_to_koi8r', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'win866_to_koi8r',
+  proname => 'win866_to_koi8r', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'win866_to_koi8r',
    probin => '$libdir/cyrillic_and_mic' },
  { oid => '4314',
    descr => 'internal conversion function for WIN866 to WIN1251',
-  proname => 'win866_to_win1251', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4',
+  proname => 'win866_to_win1251', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool',
    prosrc => 'win866_to_win1251', probin => '$libdir/cyrillic_and_mic' },
  { oid => '4315',
    descr => 'internal conversion function for WIN1251 to WIN866',
-  proname => 'win1251_to_win866', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4',
+  proname => 'win1251_to_win866', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool',
    prosrc => 'win1251_to_win866', probin => '$libdir/cyrillic_and_mic' },
  { oid => '4316',
    descr => 'internal conversion function for ISO-8859-5 to KOI8R',
-  proname => 'iso_to_koi8r', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'iso_to_koi8r',
+  proname => 'iso_to_koi8r', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'iso_to_koi8r',
    probin => '$libdir/cyrillic_and_mic' },
  { oid => '4317',
    descr => 'internal conversion function for KOI8R to ISO-8859-5',
-  proname => 'koi8r_to_iso', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'koi8r_to_iso',
+  proname => 'koi8r_to_iso', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'koi8r_to_iso',
    probin => '$libdir/cyrillic_and_mic' },
  { oid => '4318',
    descr => 'internal conversion function for ISO-8859-5 to WIN1251',
-  proname => 'iso_to_win1251', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'iso_to_win1251',
+  proname => 'iso_to_win1251', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'iso_to_win1251',
    probin => '$libdir/cyrillic_and_mic' },
  { oid => '4319',
    descr => 'internal conversion function for WIN1251 to ISO-8859-5',
-  proname => 'win1251_to_iso', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'win1251_to_iso',
+  proname => 'win1251_to_iso', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'win1251_to_iso',
    probin => '$libdir/cyrillic_and_mic' },
  { oid => '4320',
    descr => 'internal conversion function for ISO-8859-5 to WIN866',
-  proname => 'iso_to_win866', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'iso_to_win866',
+  proname => 'iso_to_win866', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'iso_to_win866',
    probin => '$libdir/cyrillic_and_mic' },
  { oid => '4321',
    descr => 'internal conversion function for WIN866 to ISO-8859-5',
-  proname => 'win866_to_iso', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'win866_to_iso',
+  proname => 'win866_to_iso', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'win866_to_iso',
    probin => '$libdir/cyrillic_and_mic' },
  { oid => '4322',
    descr => 'internal conversion function for EUC_CN to MULE_INTERNAL',
-  proname => 'euc_cn_to_mic', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_cn_to_mic',
+  proname => 'euc_cn_to_mic', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_cn_to_mic',
    probin => '$libdir/euc_cn_and_mic' },
  { oid => '4323',
    descr => 'internal conversion function for MULE_INTERNAL to EUC_CN',
-  proname => 'mic_to_euc_cn', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_euc_cn',
+  proname => 'mic_to_euc_cn', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_euc_cn',
    probin => '$libdir/euc_cn_and_mic' },
  { oid => '4324', descr => 'internal conversion function for EUC_JP to SJIS',
-  proname => 'euc_jp_to_sjis', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_jp_to_sjis',
+  proname => 'euc_jp_to_sjis', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_jp_to_sjis',
    probin => '$libdir/euc_jp_and_sjis' },
  { oid => '4325', descr => 'internal conversion function for SJIS to EUC_JP',
-  proname => 'sjis_to_euc_jp', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'sjis_to_euc_jp',
+  proname => 'sjis_to_euc_jp', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'sjis_to_euc_jp',
    probin => '$libdir/euc_jp_and_sjis' },
  { oid => '4326',
    descr => 'internal conversion function for EUC_JP to MULE_INTERNAL',
-  proname => 'euc_jp_to_mic', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_jp_to_mic',
+  proname => 'euc_jp_to_mic', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_jp_to_mic',
    probin => '$libdir/euc_jp_and_sjis' },
  { oid => '4327',
    descr => 'internal conversion function for SJIS to MULE_INTERNAL',
-  proname => 'sjis_to_mic', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'sjis_to_mic',
+  proname => 'sjis_to_mic', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'sjis_to_mic',
    probin => '$libdir/euc_jp_and_sjis' },
  { oid => '4328',
    descr => 'internal conversion function for MULE_INTERNAL to EUC_JP',
-  proname => 'mic_to_euc_jp', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_euc_jp',
+  proname => 'mic_to_euc_jp', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_euc_jp',
    probin => '$libdir/euc_jp_and_sjis' },
  { oid => '4329',
    descr => 'internal conversion function for MULE_INTERNAL to SJIS',
-  proname => 'mic_to_sjis', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_sjis',
+  proname => 'mic_to_sjis', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_sjis',
    probin => '$libdir/euc_jp_and_sjis' },
  { oid => '4330',
    descr => 'internal conversion function for EUC_KR to MULE_INTERNAL',
-  proname => 'euc_kr_to_mic', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_kr_to_mic',
+  proname => 'euc_kr_to_mic', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_kr_to_mic',
    probin => '$libdir/euc_kr_and_mic' },
  { oid => '4331',
    descr => 'internal conversion function for MULE_INTERNAL to EUC_KR',
-  proname => 'mic_to_euc_kr', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_euc_kr',
+  proname => 'mic_to_euc_kr', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_euc_kr',
    probin => '$libdir/euc_kr_and_mic' },
  { oid => '4332', descr => 'internal conversion function for EUC_TW to BIG5',
-  proname => 'euc_tw_to_big5', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_tw_to_big5',
+  proname => 'euc_tw_to_big5', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_tw_to_big5',
    probin => '$libdir/euc_tw_and_big5' },
  { oid => '4333', descr => 'internal conversion function for BIG5 to EUC_TW',
-  proname => 'big5_to_euc_tw', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'big5_to_euc_tw',
+  proname => 'big5_to_euc_tw', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'big5_to_euc_tw',
    probin => '$libdir/euc_tw_and_big5' },
  { oid => '4334',
    descr => 'internal conversion function for EUC_TW to MULE_INTERNAL',
-  proname => 'euc_tw_to_mic', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_tw_to_mic',
+  proname => 'euc_tw_to_mic', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_tw_to_mic',
    probin => '$libdir/euc_tw_and_big5' },
  { oid => '4335',
    descr => 'internal conversion function for BIG5 to MULE_INTERNAL',
-  proname => 'big5_to_mic', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'big5_to_mic',
+  proname => 'big5_to_mic', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'big5_to_mic',
    probin => '$libdir/euc_tw_and_big5' },
  { oid => '4336',
    descr => 'internal conversion function for MULE_INTERNAL to EUC_TW',
-  proname => 'mic_to_euc_tw', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_euc_tw',
+  proname => 'mic_to_euc_tw', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_euc_tw',
    probin => '$libdir/euc_tw_and_big5' },
  { oid => '4337',
    descr => 'internal conversion function for MULE_INTERNAL to BIG5',
-  proname => 'mic_to_big5', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_big5',
+  proname => 'mic_to_big5', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_big5',
    probin => '$libdir/euc_tw_and_big5' },
  { oid => '4338',
    descr => 'internal conversion function for LATIN2 to MULE_INTERNAL',
-  proname => 'latin2_to_mic', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'latin2_to_mic',
+  proname => 'latin2_to_mic', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'latin2_to_mic',
    probin => '$libdir/latin2_and_win1250' },
  { oid => '4339',
    descr => 'internal conversion function for MULE_INTERNAL to LATIN2',
-  proname => 'mic_to_latin2', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_latin2',
+  proname => 'mic_to_latin2', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_latin2',
    probin => '$libdir/latin2_and_win1250' },
  { oid => '4340',
    descr => 'internal conversion function for WIN1250 to MULE_INTERNAL',
-  proname => 'win1250_to_mic', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'win1250_to_mic',
+  proname => 'win1250_to_mic', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'win1250_to_mic',
    probin => '$libdir/latin2_and_win1250' },
  { oid => '4341',
    descr => 'internal conversion function for MULE_INTERNAL to WIN1250',
-  proname => 'mic_to_win1250', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_win1250',
+  proname => 'mic_to_win1250', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_win1250',
    probin => '$libdir/latin2_and_win1250' },
  { oid => '4342',
    descr => 'internal conversion function for LATIN2 to WIN1250',
-  proname => 'latin2_to_win1250', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4',
+  proname => 'latin2_to_win1250', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool',
    prosrc => 'latin2_to_win1250', probin => '$libdir/latin2_and_win1250' },
  { oid => '4343',
    descr => 'internal conversion function for WIN1250 to LATIN2',
-  proname => 'win1250_to_latin2', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4',
+  proname => 'win1250_to_latin2', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool',
    prosrc => 'win1250_to_latin2', probin => '$libdir/latin2_and_win1250' },
  { oid => '4344',
    descr => 'internal conversion function for LATIN1 to MULE_INTERNAL',
-  proname => 'latin1_to_mic', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'latin1_to_mic',
+  proname => 'latin1_to_mic', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'latin1_to_mic',
    probin => '$libdir/latin_and_mic' },
  { oid => '4345',
    descr => 'internal conversion function for MULE_INTERNAL to LATIN1',
-  proname => 'mic_to_latin1', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_latin1',
+  proname => 'mic_to_latin1', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_latin1',
    probin => '$libdir/latin_and_mic' },
  { oid => '4346',
    descr => 'internal conversion function for LATIN3 to MULE_INTERNAL',
-  proname => 'latin3_to_mic', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'latin3_to_mic',
+  proname => 'latin3_to_mic', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'latin3_to_mic',
    probin => '$libdir/latin_and_mic' },
  { oid => '4347',
    descr => 'internal conversion function for MULE_INTERNAL to LATIN3',
-  proname => 'mic_to_latin3', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_latin3',
+  proname => 'mic_to_latin3', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_latin3',
    probin => '$libdir/latin_and_mic' },
  { oid => '4348',
    descr => 'internal conversion function for LATIN4 to MULE_INTERNAL',
-  proname => 'latin4_to_mic', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'latin4_to_mic',
+  proname => 'latin4_to_mic', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'latin4_to_mic',
    probin => '$libdir/latin_and_mic' },
  { oid => '4349',
    descr => 'internal conversion function for MULE_INTERNAL to LATIN4',
-  proname => 'mic_to_latin4', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_latin4',
+  proname => 'mic_to_latin4', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_latin4',
    probin => '$libdir/latin_and_mic' },
  { oid => '4352', descr => 'internal conversion function for BIG5 to UTF8',
-  proname => 'big5_to_utf8', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'big5_to_utf8',
+  proname => 'big5_to_utf8', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'big5_to_utf8',
    probin => '$libdir/utf8_and_big5' },
  { oid => '4353', descr => 'internal conversion function for UTF8 to BIG5',
-  proname => 'utf8_to_big5', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_big5',
+  proname => 'utf8_to_big5', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_big5',
    probin => '$libdir/utf8_and_big5' },
  { oid => '4354', descr => 'internal conversion function for UTF8 to KOI8R',
-  proname => 'utf8_to_koi8r', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_koi8r',
+  proname => 'utf8_to_koi8r', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_koi8r',
    probin => '$libdir/utf8_and_cyrillic' },
  { oid => '4355', descr => 'internal conversion function for KOI8R to UTF8',
-  proname => 'koi8r_to_utf8', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'koi8r_to_utf8',
+  proname => 'koi8r_to_utf8', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'koi8r_to_utf8',
    probin => '$libdir/utf8_and_cyrillic' },
  { oid => '4356', descr => 'internal conversion function for UTF8 to KOI8U',
-  proname => 'utf8_to_koi8u', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_koi8u',
+  proname => 'utf8_to_koi8u', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_koi8u',
    probin => '$libdir/utf8_and_cyrillic' },
  { oid => '4357', descr => 'internal conversion function for KOI8U to UTF8',
-  proname => 'koi8u_to_utf8', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'koi8u_to_utf8',
+  proname => 'koi8u_to_utf8', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'koi8u_to_utf8',
    probin => '$libdir/utf8_and_cyrillic' },
  { oid => '4358', descr => 'internal conversion function for UTF8 to WIN',
-  proname => 'utf8_to_win', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_win',
+  proname => 'utf8_to_win', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_win',
    probin => '$libdir/utf8_and_win' },
  { oid => '4359', descr => 'internal conversion function for WIN to UTF8',
-  proname => 'win_to_utf8', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'win_to_utf8',
+  proname => 'win_to_utf8', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'win_to_utf8',
    probin => '$libdir/utf8_and_win' },
  { oid => '4360', descr => 'internal conversion function for EUC_CN to UTF8',
-  proname => 'euc_cn_to_utf8', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_cn_to_utf8',
+  proname => 'euc_cn_to_utf8', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_cn_to_utf8',
    probin => '$libdir/utf8_and_euc_cn' },
  { oid => '4361', descr => 'internal conversion function for UTF8 to EUC_CN',
-  proname => 'utf8_to_euc_cn', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_euc_cn',
+  proname => 'utf8_to_euc_cn', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_euc_cn',
    probin => '$libdir/utf8_and_euc_cn' },
  { oid => '4362', descr => 'internal conversion function for EUC_JP to UTF8',
-  proname => 'euc_jp_to_utf8', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_jp_to_utf8',
+  proname => 'euc_jp_to_utf8', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_jp_to_utf8',
    probin => '$libdir/utf8_and_euc_jp' },
  { oid => '4363', descr => 'internal conversion function for UTF8 to EUC_JP',
-  proname => 'utf8_to_euc_jp', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_euc_jp',
+  proname => 'utf8_to_euc_jp', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_euc_jp',
    probin => '$libdir/utf8_and_euc_jp' },
  { oid => '4364', descr => 'internal conversion function for EUC_KR to UTF8',
-  proname => 'euc_kr_to_utf8', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_kr_to_utf8',
+  proname => 'euc_kr_to_utf8', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_kr_to_utf8',
    probin => '$libdir/utf8_and_euc_kr' },
  { oid => '4365', descr => 'internal conversion function for UTF8 to EUC_KR',
-  proname => 'utf8_to_euc_kr', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_euc_kr',
+  proname => 'utf8_to_euc_kr', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_euc_kr',
    probin => '$libdir/utf8_and_euc_kr' },
  { oid => '4366', descr => 'internal conversion function for EUC_TW to UTF8',
-  proname => 'euc_tw_to_utf8', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_tw_to_utf8',
+  proname => 'euc_tw_to_utf8', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_tw_to_utf8',
    probin => '$libdir/utf8_and_euc_tw' },
  { oid => '4367', descr => 'internal conversion function for UTF8 to EUC_TW',
-  proname => 'utf8_to_euc_tw', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_euc_tw',
+  proname => 'utf8_to_euc_tw', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_euc_tw',
    probin => '$libdir/utf8_and_euc_tw' },
  { oid => '4368', descr => 'internal conversion function for GB18030 to UTF8',
-  proname => 'gb18030_to_utf8', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'gb18030_to_utf8',
+  proname => 'gb18030_to_utf8', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'gb18030_to_utf8',
    probin => '$libdir/utf8_and_gb18030' },
  { oid => '4369', descr => 'internal conversion function for UTF8 to GB18030',
-  proname => 'utf8_to_gb18030', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_gb18030',
+  proname => 'utf8_to_gb18030', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_gb18030',
    probin => '$libdir/utf8_and_gb18030' },
  { oid => '4370', descr => 'internal conversion function for GBK to UTF8',
-  proname => 'gbk_to_utf8', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'gbk_to_utf8',
+  proname => 'gbk_to_utf8', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'gbk_to_utf8',
    probin => '$libdir/utf8_and_gbk' },
  { oid => '4371', descr => 'internal conversion function for UTF8 to GBK',
-  proname => 'utf8_to_gbk', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_gbk',
+  proname => 'utf8_to_gbk', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_gbk',
    probin => '$libdir/utf8_and_gbk' },
  { oid => '4372',
    descr => 'internal conversion function for UTF8 to ISO-8859 2-16',
-  proname => 'utf8_to_iso8859', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_iso8859',
+  proname => 'utf8_to_iso8859', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_iso8859',
    probin => '$libdir/utf8_and_iso8859' },
  { oid => '4373',
    descr => 'internal conversion function for ISO-8859 2-16 to UTF8',
-  proname => 'iso8859_to_utf8', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'iso8859_to_utf8',
+  proname => 'iso8859_to_utf8', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'iso8859_to_utf8',
    probin => '$libdir/utf8_and_iso8859' },
  { oid => '4374', descr => 'internal conversion function for LATIN1 to UTF8',
-  proname => 'iso8859_1_to_utf8', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4',
+  proname => 'iso8859_1_to_utf8', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool',
    prosrc => 'iso8859_1_to_utf8', probin => '$libdir/utf8_and_iso8859_1' },
  { oid => '4375', descr => 'internal conversion function for UTF8 to LATIN1',
-  proname => 'utf8_to_iso8859_1', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4',
+  proname => 'utf8_to_iso8859_1', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool',
    prosrc => 'utf8_to_iso8859_1', probin => '$libdir/utf8_and_iso8859_1' },
  { oid => '4376', descr => 'internal conversion function for JOHAB to UTF8',
-  proname => 'johab_to_utf8', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'johab_to_utf8',
+  proname => 'johab_to_utf8', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'johab_to_utf8',
    probin => '$libdir/utf8_and_johab' },
  { oid => '4377', descr => 'internal conversion function for UTF8 to JOHAB',
-  proname => 'utf8_to_johab', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_johab',
+  proname => 'utf8_to_johab', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_johab',
    probin => '$libdir/utf8_and_johab' },
  { oid => '4378', descr => 'internal conversion function for SJIS to UTF8',
-  proname => 'sjis_to_utf8', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'sjis_to_utf8',
+  proname => 'sjis_to_utf8', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'sjis_to_utf8',
    probin => '$libdir/utf8_and_sjis' },
  { oid => '4379', descr => 'internal conversion function for UTF8 to SJIS',
-  proname => 'utf8_to_sjis', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_sjis',
+  proname => 'utf8_to_sjis', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_sjis',
    probin => '$libdir/utf8_and_sjis' },
  { oid => '4380', descr => 'internal conversion function for UHC to UTF8',
-  proname => 'uhc_to_utf8', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'uhc_to_utf8',
+  proname => 'uhc_to_utf8', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'uhc_to_utf8',
    probin => '$libdir/utf8_and_uhc' },
  { oid => '4381', descr => 'internal conversion function for UTF8 to UHC',
-  proname => 'utf8_to_uhc', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_uhc',
+  proname => 'utf8_to_uhc', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_uhc',
    probin => '$libdir/utf8_and_uhc' },
  { oid => '4382',
    descr => 'internal conversion function for EUC_JIS_2004 to UTF8',
-  proname => 'euc_jis_2004_to_utf8', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4',
+  proname => 'euc_jis_2004_to_utf8', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool',
    prosrc => 'euc_jis_2004_to_utf8', probin => '$libdir/utf8_and_euc2004' },
  { oid => '4383',
    descr => 'internal conversion function for UTF8 to EUC_JIS_2004',
-  proname => 'utf8_to_euc_jis_2004', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4',
+  proname => 'utf8_to_euc_jis_2004', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool',
    prosrc => 'utf8_to_euc_jis_2004', probin => '$libdir/utf8_and_euc2004' },
  { oid => '4384',
    descr => 'internal conversion function for SHIFT_JIS_2004 to UTF8',
-  proname => 'shift_jis_2004_to_utf8', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4',
+  proname => 'shift_jis_2004_to_utf8', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool',
    prosrc => 'shift_jis_2004_to_utf8', probin => '$libdir/utf8_and_sjis2004' },
  { oid => '4385',
    descr => 'internal conversion function for UTF8 to SHIFT_JIS_2004',
-  proname => 'utf8_to_shift_jis_2004', prolang => 'c', prorettype => 'void',
-  proargtypes => 'int4 int4 cstring internal int4',
+  proname => 'utf8_to_shift_jis_2004', prolang => 'c', prorettype => 'int4',
+  proargtypes => 'int4 int4 cstring internal int4 bool',
    prosrc => 'utf8_to_shift_jis_2004', probin => '$libdir/utf8_and_sjis2004' },
  { oid => '4386',
    descr => 'internal conversion function for EUC_JIS_2004 to SHIFT_JIS_2004',
    proname => 'euc_jis_2004_to_shift_jis_2004', prolang => 'c',
-  prorettype => 'void', proargtypes => 'int4 int4 cstring internal int4',
+  prorettype => 'int4', proargtypes => 'int4 int4 cstring internal int4 bool',
    prosrc => 'euc_jis_2004_to_shift_jis_2004',
    probin => '$libdir/euc2004_sjis2004' },
  { oid => '4387',
    descr => 'internal conversion function for SHIFT_JIS_2004 to EUC_JIS_2004',
    proname => 'shift_jis_2004_to_euc_jis_2004', prolang => 'c',
-  prorettype => 'void', proargtypes => 'int4 int4 cstring internal int4',
+  prorettype => 'int4', proargtypes => 'int4 int4 cstring internal int4 bool',
    prosrc => 'shift_jis_2004_to_euc_jis_2004',
    probin => '$libdir/euc2004_sjis2004' },
  
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h

index 64b22e4b0d461c57fd2014dd6a0fb4a64694b17f..a9aaff9e6dcfad72f28a8e0583083ece87dbc004 100644 (file)
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -616,6 +616,12 @@ extern int pg_bind_textdomain_codeset(const char *domainname);
  extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len,
                                                 int src_encoding,
                                                 int dest_encoding);
+extern int pg_do_encoding_conversion_buf(Oid proc,
+                                         int src_encoding,
+                                         int dest_encoding,
+                                         unsigned char *src, int srclen,
+                                         unsigned char *dst, int dstlen,
+                                         bool noError);
  
  extern char *pg_client_to_server(const char *s, int len);
  extern char *pg_server_to_client(const char *s, int len);
@@ -627,18 +633,18 @@ extern void pg_unicode_to_server(pg_wchar c, unsigned char *s);
  extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc);
  extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc);
  
-extern void UtfToLocal(const unsigned char *utf, int len,
+extern int UtfToLocal(const unsigned char *utf, int len,
                        unsigned char *iso,
                        const pg_mb_radix_tree *map,
                        const pg_utf_to_local_combined *cmap, int cmapsize,
                        utf_local_conversion_func conv_func,
-                      int encoding);
-extern void LocalToUtf(const unsigned char *iso, int len,
+                      int encoding, bool noError);
+extern int LocalToUtf(const unsigned char *iso, int len,
                        unsigned char *utf,
                        const pg_mb_radix_tree *map,
                        const pg_local_to_utf_combined *cmap, int cmapsize,
                        utf_local_conversion_func conv_func,
-                      int encoding);
+                      int encoding, bool noError);
  
  extern bool pg_verifymbstr(const char *mbstr, int len, bool noError);
  extern bool pg_verify_mbstr(int encoding, const char *mbstr, int len,
@@ -656,18 +662,19 @@ extern void report_invalid_encoding(int encoding, const char *mbstr, int len) pg
  extern void report_untranslatable_char(int src_encoding, int dest_encoding,
                                        const char *mbstr, int len) pg_attribute_noreturn();
  
-extern void local2local(const unsigned char *l, unsigned char *p, int len,
-                       int src_encoding, int dest_encoding, const unsigned char *tab);
-extern void latin2mic(const unsigned char *l, unsigned char *p, int len,
-                     int lc, int encoding);
-extern void mic2latin(const unsigned char *mic, unsigned char *p, int len,
-                     int lc, int encoding);
-extern void latin2mic_with_table(const unsigned char *l, unsigned char *p,
+extern int local2local(const unsigned char *l, unsigned char *p, int len,
+                       int src_encoding, int dest_encoding,
+                       const unsigned char *tab, bool noError);
+extern int latin2mic(const unsigned char *l, unsigned char *p, int len,
+                     int lc, int encoding, bool noError);
+extern int mic2latin(const unsigned char *mic, unsigned char *p, int len,
+                     int lc, int encoding, bool noError);
+extern int latin2mic_with_table(const unsigned char *l, unsigned char *p,
                                  int len, int lc, int encoding,
-                                const unsigned char *tab);
-extern void mic2latin_with_table(const unsigned char *mic, unsigned char *p,
+                                const unsigned char *tab, bool noError);
+extern int mic2latin_with_table(const unsigned char *mic, unsigned char *p,
                                  int len, int lc, int encoding,
-                                const unsigned char *tab);
+                                const unsigned char *tab, bool noError);
  
  #ifdef WIN32
  extern WCHAR *pgwin32_message_to_UTF16(const char *str, int len, int *utf16len);
diff --git a/src/test/regress/expected/conversion.out b/src/test/regress/expected/conversion.out

index 62c106716852ca14b5b3d1dba17a7a59fad6ea31..e34ab20974dcedc715745230dacda53ed7af5448 100644 (file)
--- a/src/test/regress/expected/conversion.out
+++ b/src/test/regress/expected/conversion.out
@@ -37,3 +37,522 @@ DROP CONVERSION mydef;
  --
  RESET SESSION AUTHORIZATION;
  DROP USER regress_conversion_user;
+--
+-- Test built-in conversion functions.
+--
+-- Helper function to test a conversion. Uses the test_enc_conversion function
+-- that was created in the create_function_1 test.
+create or replace function test_conv(
+  input IN bytea,
+  src_encoding IN text,
+  dst_encoding IN text,
+  result OUT bytea,
+  errorat OUT bytea,
+  error OUT text)
+language plpgsql as
+$$
+declare
+  validlen int;
+begin
+  -- First try to perform the conversion with noError = false. If that errors out,
+  -- capture the error message, and try again with noError = true. The second call
+  -- should succeed and return the position of the error, return that too.
+  begin
+    select * into validlen, result from test_enc_conversion(input, src_encoding, dst_encoding, false);
+    errorat = NULL;
+    error := NULL;
+  exception when others then
+    error := sqlerrm;
+    select * into validlen, result from test_enc_conversion(input, src_encoding, dst_encoding, true);
+    errorat = substr(input, validlen + 1);
+  end;
+  return;
+end;
+$$;
+--
+-- UTF-8
+--
+CREATE TABLE utf8_inputs (inbytes bytea, description text);
+insert into utf8_inputs  values
+  ('\x666f6f',     'valid, pure ASCII'),
+  ('\xc3a4c3b6',   'valid, extra latin chars'),
+  ('\xd184d0bed0be',   'valid, cyrillic'),
+  ('\x666f6fe8b1a1',   'valid, kanji/Chinese'),
+  ('\xe382abe3829a',   'valid, two chars that combine to one in EUC_JIS_2004'),
+  ('\xe382ab',     'only first half of combined char in EUC_JIS_2004'),
+  ('\xe382abe382', 'incomplete combination when converted EUC_JIS_2004'),
+  ('\xecbd94eb81bceba6ac', 'valid, Hangul, Korean'),
+  ('\x666f6fefa8aa',   'valid, needs mapping function to convert to GB18030'),
+  ('\x66e8b1ff6f6f',   'invalid byte sequence'),
+  ('\x66006f',     'invalid, NUL byte'),
+  ('\x666f6fe8b100',   'invalid, NUL byte'),
+  ('\x666f6fe8b1', 'incomplete character at end');
+-- Test UTF-8 verification
+select description, (test_conv(inbytes, 'utf8', 'utf8')).* from utf8_inputs;
+                     description                      |        result        |   errorat    |                           error                           
+------------------------------------------------------+----------------------+--------------+-----------------------------------------------------------
+ valid, pure ASCII                                    | \x666f6f             |              | 
+ valid, extra latin chars                             | \xc3a4c3b6           |              | 
+ valid, cyrillic                                      | \xd184d0bed0be       |              | 
+ valid, kanji/Chinese                                 | \x666f6fe8b1a1       |              | 
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a       |              | 
+ only first half of combined char in EUC_JIS_2004     | \xe382ab             |              | 
+ incomplete combination when converted EUC_JIS_2004   | \xe382ab             | \xe382       | invalid byte sequence for encoding "UTF8": 0xe3 0x82
+ valid, Hangul, Korean                                | \xecbd94eb81bceba6ac |              | 
+ valid, needs mapping function to convert to GB18030  | \x666f6fefa8aa       |              | 
+ invalid byte sequence                                | \x66                 | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte                                    | \x66                 | \x006f       | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte                                    | \x666f6f             | \xe8b100     | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end                          | \x666f6f             | \xe8b1       | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(13 rows)
+
+-- Test conversions from UTF-8
+select description, inbytes, (test_conv(inbytes, 'utf8', 'euc_jis_2004')).* from utf8_inputs;
+                     description                      |       inbytes        |     result     |       errorat        |                                                    error                                                    
+------------------------------------------------------+----------------------+----------------+----------------------+-------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII                                    | \x666f6f             | \x666f6f       |                      | 
+ valid, extra latin chars                             | \xc3a4c3b6           | \xa9daa9ec     |                      | 
+ valid, cyrillic                                      | \xd184d0bed0be       | \xa7e6a7e0a7e0 |                      | 
+ valid, kanji/Chinese                                 | \x666f6fe8b1a1       | \x666f6fbedd   |                      | 
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a       | \xa5f7         |                      | 
+ only first half of combined char in EUC_JIS_2004     | \xe382ab             | \xa5ab         |                      | 
+ incomplete combination when converted EUC_JIS_2004   | \xe382abe382         | \x             | \xe382abe382         | invalid byte sequence for encoding "UTF8": 0xe3 0x82
+ valid, Hangul, Korean                                | \xecbd94eb81bceba6ac | \x             | \xecbd94eb81bceba6ac | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "EUC_JIS_2004"
+ valid, needs mapping function to convert to GB18030  | \x666f6fefa8aa       | \x666f6f       | \xefa8aa             | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "EUC_JIS_2004"
+ invalid byte sequence                                | \x66e8b1ff6f6f       | \x66           | \xe8b1ff6f6f         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte                                    | \x66006f             | \x66           | \x006f               | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte                                    | \x666f6fe8b100       | \x666f6f       | \xe8b100             | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end                          | \x666f6fe8b1         | \x666f6f       | \xe8b1               | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(13 rows)
+
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin1')).* from utf8_inputs;
+                     description                      |       inbytes        |  result  |       errorat        |                                                 error                                                 
+------------------------------------------------------+----------------------+----------+----------------------+-------------------------------------------------------------------------------------------------------
+ valid, pure ASCII                                    | \x666f6f             | \x666f6f |                      | 
+ valid, extra latin chars                             | \xc3a4c3b6           | \xe4f6   |                      | 
+ valid, cyrillic                                      | \xd184d0bed0be       | \x       | \xd184d0bed0be       | character with byte sequence 0xd1 0x84 in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ valid, kanji/Chinese                                 | \x666f6fe8b1a1       | \x666f6f | \xe8b1a1             | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a       | \x       | \xe382abe3829a       | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ only first half of combined char in EUC_JIS_2004     | \xe382ab             | \x       | \xe382ab             | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ incomplete combination when converted EUC_JIS_2004   | \xe382abe382         | \x       | \xe382abe382         | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ valid, Hangul, Korean                                | \xecbd94eb81bceba6ac | \x       | \xecbd94eb81bceba6ac | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ valid, needs mapping function to convert to GB18030  | \x666f6fefa8aa       | \x666f6f | \xefa8aa             | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ invalid byte sequence                                | \x66e8b1ff6f6f       | \x66     | \xe8b1ff6f6f         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte                                    | \x66006f             | \x66     | \x006f               | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte                                    | \x666f6fe8b100       | \x666f6f | \xe8b100             | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end                          | \x666f6fe8b1         | \x666f6f | \xe8b1               | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(13 rows)
+
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin2')).* from utf8_inputs;
+                     description                      |       inbytes        |  result  |       errorat        |                                                 error                                                 
+------------------------------------------------------+----------------------+----------+----------------------+-------------------------------------------------------------------------------------------------------
+ valid, pure ASCII                                    | \x666f6f             | \x666f6f |                      | 
+ valid, extra latin chars                             | \xc3a4c3b6           | \xe4f6   |                      | 
+ valid, cyrillic                                      | \xd184d0bed0be       | \x       | \xd184d0bed0be       | character with byte sequence 0xd1 0x84 in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ valid, kanji/Chinese                                 | \x666f6fe8b1a1       | \x666f6f | \xe8b1a1             | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a       | \x       | \xe382abe3829a       | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ only first half of combined char in EUC_JIS_2004     | \xe382ab             | \x       | \xe382ab             | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ incomplete combination when converted EUC_JIS_2004   | \xe382abe382         | \x       | \xe382abe382         | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ valid, Hangul, Korean                                | \xecbd94eb81bceba6ac | \x       | \xecbd94eb81bceba6ac | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ valid, needs mapping function to convert to GB18030  | \x666f6fefa8aa       | \x666f6f | \xefa8aa             | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ invalid byte sequence                                | \x66e8b1ff6f6f       | \x66     | \xe8b1ff6f6f         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte                                    | \x66006f             | \x66     | \x006f               | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte                                    | \x666f6fe8b100       | \x666f6f | \xe8b100             | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end                          | \x666f6fe8b1         | \x666f6f | \xe8b1               | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(13 rows)
+
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin5')).* from utf8_inputs;
+                     description                      |       inbytes        |  result  |       errorat        |                                                 error                                                 
+------------------------------------------------------+----------------------+----------+----------------------+-------------------------------------------------------------------------------------------------------
+ valid, pure ASCII                                    | \x666f6f             | \x666f6f |                      | 
+ valid, extra latin chars                             | \xc3a4c3b6           | \xe4f6   |                      | 
+ valid, cyrillic                                      | \xd184d0bed0be       | \x       | \xd184d0bed0be       | character with byte sequence 0xd1 0x84 in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ valid, kanji/Chinese                                 | \x666f6fe8b1a1       | \x666f6f | \xe8b1a1             | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a       | \x       | \xe382abe3829a       | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ only first half of combined char in EUC_JIS_2004     | \xe382ab             | \x       | \xe382ab             | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ incomplete combination when converted EUC_JIS_2004   | \xe382abe382         | \x       | \xe382abe382         | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ valid, Hangul, Korean                                | \xecbd94eb81bceba6ac | \x       | \xecbd94eb81bceba6ac | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ valid, needs mapping function to convert to GB18030  | \x666f6fefa8aa       | \x666f6f | \xefa8aa             | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ invalid byte sequence                                | \x66e8b1ff6f6f       | \x66     | \xe8b1ff6f6f         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte                                    | \x66006f             | \x66     | \x006f               | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte                                    | \x666f6fe8b100       | \x666f6f | \xe8b100             | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end                          | \x666f6fe8b1         | \x666f6f | \xe8b1               | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(13 rows)
+
+select description, inbytes, (test_conv(inbytes, 'utf8', 'koi8r')).* from utf8_inputs;
+                     description                      |       inbytes        |  result  |       errorat        |                                                error                                                 
+------------------------------------------------------+----------------------+----------+----------------------+------------------------------------------------------------------------------------------------------
+ valid, pure ASCII                                    | \x666f6f             | \x666f6f |                      | 
+ valid, extra latin chars                             | \xc3a4c3b6           | \x       | \xc3a4c3b6           | character with byte sequence 0xc3 0xa4 in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ valid, cyrillic                                      | \xd184d0bed0be       | \xc6cfcf |                      | 
+ valid, kanji/Chinese                                 | \x666f6fe8b1a1       | \x666f6f | \xe8b1a1             | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a       | \x       | \xe382abe3829a       | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ only first half of combined char in EUC_JIS_2004     | \xe382ab             | \x       | \xe382ab             | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ incomplete combination when converted EUC_JIS_2004   | \xe382abe382         | \x       | \xe382abe382         | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ valid, Hangul, Korean                                | \xecbd94eb81bceba6ac | \x       | \xecbd94eb81bceba6ac | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ valid, needs mapping function to convert to GB18030  | \x666f6fefa8aa       | \x666f6f | \xefa8aa             | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ invalid byte sequence                                | \x66e8b1ff6f6f       | \x66     | \xe8b1ff6f6f         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte                                    | \x66006f             | \x66     | \x006f               | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte                                    | \x666f6fe8b100       | \x666f6f | \xe8b100             | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end                          | \x666f6fe8b1         | \x666f6f | \xe8b1               | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(13 rows)
+
+select description, inbytes, (test_conv(inbytes, 'utf8', 'gb18030')).* from utf8_inputs;
+                     description                      |       inbytes        |           result           |   errorat    |                           error                           
+------------------------------------------------------+----------------------+----------------------------+--------------+-----------------------------------------------------------
+ valid, pure ASCII                                    | \x666f6f             | \x666f6f                   |              | 
+ valid, extra latin chars                             | \xc3a4c3b6           | \x81308a3181308b32         |              | 
+ valid, cyrillic                                      | \xd184d0bed0be       | \xa7e6a7e0a7e0             |              | 
+ valid, kanji/Chinese                                 | \x666f6fe8b1a1       | \x666f6fcff3               |              | 
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a       | \xa5ab8139a732             |              | 
+ only first half of combined char in EUC_JIS_2004     | \xe382ab             | \xa5ab                     |              | 
+ incomplete combination when converted EUC_JIS_2004   | \xe382abe382         | \xa5ab                     | \xe382       | invalid byte sequence for encoding "UTF8": 0xe3 0x82
+ valid, Hangul, Korean                                | \xecbd94eb81bceba6ac | \x8334e5398238c4338330b335 |              | 
+ valid, needs mapping function to convert to GB18030  | \x666f6fefa8aa       | \x666f6f84309c38           |              | 
+ invalid byte sequence                                | \x66e8b1ff6f6f       | \x66                       | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte                                    | \x66006f             | \x66                       | \x006f       | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte                                    | \x666f6fe8b100       | \x666f6f                   | \xe8b100     | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end                          | \x666f6fe8b1         | \x666f6f                   | \xe8b1       | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(13 rows)
+
+--
+-- EUC_JIS_2004
+--
+CREATE TABLE euc_jis_2004_inputs (inbytes bytea, description text);
+insert into euc_jis_2004_inputs  values
+  ('\x666f6f',     'valid, pure ASCII'),
+  ('\x666f6fbedd', 'valid'),
+  ('\xa5f7',       'valid, translates to two UTF-8 chars '),
+  ('\xbeddbe',     'incomplete char '),
+  ('\x666f6f00bedd',   'invalid, NUL byte'),
+  ('\x666f6fbe00dd',   'invalid, NUL byte'),
+  ('\x666f6fbedd00',   'invalid, NUL byte'),
+  ('\xbe04',       'invalid byte sequence');
+-- Test EUC_JIS_2004 verification
+select description, inbytes, (test_conv(inbytes, 'euc_jis_2004', 'euc_jis_2004')).* from euc_jis_2004_inputs;
+              description              |    inbytes     |    result    | errorat  |                            error                             
+---------------------------------------+----------------+--------------+----------+--------------------------------------------------------------
+ valid, pure ASCII                     | \x666f6f       | \x666f6f     |          | 
+ valid                                 | \x666f6fbedd   | \x666f6fbedd |          | 
+ valid, translates to two UTF-8 chars  | \xa5f7         | \xa5f7       |          | 
+ incomplete char                       | \xbeddbe       | \xbedd       | \xbe     | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe
+ invalid, NUL byte                     | \x666f6f00bedd | \x666f6f     | \x00bedd | invalid byte sequence for encoding "EUC_JIS_2004": 0x00
+ invalid, NUL byte                     | \x666f6fbe00dd | \x666f6f     | \xbe00dd | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x00
+ invalid, NUL byte                     | \x666f6fbedd00 | \x666f6fbedd | \x00     | invalid byte sequence for encoding "EUC_JIS_2004": 0x00
+ invalid byte sequence                 | \xbe04         | \x           | \xbe04   | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x04
+(8 rows)
+
+-- Test conversions from EUC_JIS_2004
+select description, inbytes, (test_conv(inbytes, 'euc_jis_2004', 'utf8')).* from euc_jis_2004_inputs;
+              description              |    inbytes     |     result     | errorat  |                            error                             
+---------------------------------------+----------------+----------------+----------+--------------------------------------------------------------
+ valid, pure ASCII                     | \x666f6f       | \x666f6f       |          | 
+ valid                                 | \x666f6fbedd   | \x666f6fe8b1a1 |          | 
+ valid, translates to two UTF-8 chars  | \xa5f7         | \xe382abe3829a |          | 
+ incomplete char                       | \xbeddbe       | \xe8b1a1       | \xbe     | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe
+ invalid, NUL byte                     | \x666f6f00bedd | \x666f6f       | \x00bedd | invalid byte sequence for encoding "EUC_JIS_2004": 0x00
+ invalid, NUL byte                     | \x666f6fbe00dd | \x666f6f       | \xbe00dd | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x00
+ invalid, NUL byte                     | \x666f6fbedd00 | \x666f6fe8b1a1 | \x00     | invalid byte sequence for encoding "EUC_JIS_2004": 0x00
+ invalid byte sequence                 | \xbe04         | \x             | \xbe04   | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x04
+(8 rows)
+
+--
+-- SHIFT-JIS-2004
+--
+CREATE TABLE shiftjis2004_inputs (inbytes bytea, description text);
+insert into shiftjis2004_inputs  values
+  ('\x666f6f',     'valid, pure ASCII'),
+  ('\x666f6f8fdb', 'valid'),
+  ('\x666f6f81c0', 'valid, no translation to UTF-8'),
+  ('\x666f6f82f5', 'valid, translates to two UTF-8 chars '),
+  ('\x666f6f8fdb8f',   'incomplete char '),
+  ('\x666f6f820a', 'incomplete char, followed by newline '),
+  ('\x666f6f008fdb',   'invalid, NUL byte'),
+  ('\x666f6f8f00db',   'invalid, NUL byte'),
+  ('\x666f6f8fdb00',   'invalid, NUL byte');
+-- Test SHIFT-JIS-2004 verification
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'shiftjis2004')).* from shiftjis2004_inputs;
+              description              |    inbytes     |    result    | errorat  |                             error                              
+---------------------------------------+----------------+--------------+----------+----------------------------------------------------------------
+ valid, pure ASCII                     | \x666f6f       | \x666f6f     |          | 
+ valid                                 | \x666f6f8fdb   | \x666f6f8fdb |          | 
+ valid, no translation to UTF-8        | \x666f6f81c0   | \x666f6f81c0 |          | 
+ valid, translates to two UTF-8 chars  | \x666f6f82f5   | \x666f6f82f5 |          | 
+ incomplete char                       | \x666f6f8fdb8f | \x666f6f8fdb | \x8f     | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f
+ incomplete char, followed by newline  | \x666f6f820a   | \x666f6f     | \x820a   | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x82 0x0a
+ invalid, NUL byte                     | \x666f6f008fdb | \x666f6f     | \x008fdb | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+ invalid, NUL byte                     | \x666f6f8f00db | \x666f6f     | \x8f00db | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f 0x00
+ invalid, NUL byte                     | \x666f6f8fdb00 | \x666f6f8fdb | \x00     | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+(9 rows)
+
+-- Test conversions from SHIFT-JIS-2004
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'utf8')).* from shiftjis2004_inputs;
+              description              |    inbytes     |        result        | errorat  |                             error                              
+---------------------------------------+----------------+----------------------+----------+----------------------------------------------------------------
+ valid, pure ASCII                     | \x666f6f       | \x666f6f             |          | 
+ valid                                 | \x666f6f8fdb   | \x666f6fe8b1a1       |          | 
+ valid, no translation to UTF-8        | \x666f6f81c0   | \x666f6fe28a84       |          | 
+ valid, translates to two UTF-8 chars  | \x666f6f82f5   | \x666f6fe3818be3829a |          | 
+ incomplete char                       | \x666f6f8fdb8f | \x666f6fe8b1a1       | \x8f     | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f
+ incomplete char, followed by newline  | \x666f6f820a   | \x666f6f             | \x820a   | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x82 0x0a
+ invalid, NUL byte                     | \x666f6f008fdb | \x666f6f             | \x008fdb | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+ invalid, NUL byte                     | \x666f6f8f00db | \x666f6f             | \x8f00db | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f 0x00
+ invalid, NUL byte                     | \x666f6f8fdb00 | \x666f6fe8b1a1       | \x00     | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+(9 rows)
+
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'euc_jis_2004')).* from shiftjis2004_inputs;
+              description              |    inbytes     |    result    | errorat  |                             error                              
+---------------------------------------+----------------+--------------+----------+----------------------------------------------------------------
+ valid, pure ASCII                     | \x666f6f       | \x666f6f     |          | 
+ valid                                 | \x666f6f8fdb   | \x666f6fbedd |          | 
+ valid, no translation to UTF-8        | \x666f6f81c0   | \x666f6fa2c2 |          | 
+ valid, translates to two UTF-8 chars  | \x666f6f82f5   | \x666f6fa4f7 |          | 
+ incomplete char                       | \x666f6f8fdb8f | \x666f6fbedd | \x8f     | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f
+ incomplete char, followed by newline  | \x666f6f820a   | \x666f6f     | \x820a   | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x82 0x0a
+ invalid, NUL byte                     | \x666f6f008fdb | \x666f6f     | \x008fdb | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+ invalid, NUL byte                     | \x666f6f8f00db | \x666f6f     | \x8f00db | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f 0x00
+ invalid, NUL byte                     | \x666f6f8fdb00 | \x666f6fbedd | \x00     | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+(9 rows)
+
+--
+-- GB18030
+--
+CREATE TABLE gb18030_inputs (inbytes bytea, description text);
+insert into gb18030_inputs  values
+  ('\x666f6f',     'valid, pure ASCII'),
+  ('\x666f6fcff3', 'valid'),
+  ('\x666f6f8431a530', 'valid, no translation to UTF-8'),
+  ('\x666f6f84309c38', 'valid, translates to UTF-8 by mapping function'),
+  ('\x666f6f84309c',   'incomplete char '),
+  ('\x666f6f84309c0a', 'incomplete char, followed by newline '),
+  ('\x666f6f84309c3800', 'invalid, NUL byte'),
+  ('\x666f6f84309c0038', 'invalid, NUL byte');
+-- Test GB18030 verification
+select description, inbytes, (test_conv(inbytes, 'gb18030', 'gb18030')).* from gb18030_inputs;
+                  description                   |      inbytes       |      result      |   errorat    |                               error                               
+------------------------------------------------+--------------------+------------------+--------------+-------------------------------------------------------------------
+ valid, pure ASCII                              | \x666f6f           | \x666f6f         |              | 
+ valid                                          | \x666f6fcff3       | \x666f6fcff3     |              | 
+ valid, no translation to UTF-8                 | \x666f6f8431a530   | \x666f6f8431a530 |              | 
+ valid, translates to UTF-8 by mapping function | \x666f6f84309c38   | \x666f6f84309c38 |              | 
+ incomplete char                                | \x666f6f84309c     | \x666f6f         | \x84309c     | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c
+ incomplete char, followed by newline           | \x666f6f84309c0a   | \x666f6f         | \x84309c0a   | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x0a
+ invalid, NUL byte                              | \x666f6f84309c3800 | \x666f6f84309c38 | \x00         | invalid byte sequence for encoding "GB18030": 0x00
+ invalid, NUL byte                              | \x666f6f84309c0038 | \x666f6f         | \x84309c0038 | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x00
+(8 rows)
+
+-- Test conversions from GB18030
+select description, inbytes, (test_conv(inbytes, 'gb18030', 'utf8')).* from gb18030_inputs;
+                  description                   |      inbytes       |     result     |   errorat    |                                                    error                                                    
+------------------------------------------------+--------------------+----------------+--------------+-------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII                              | \x666f6f           | \x666f6f       |              | 
+ valid                                          | \x666f6fcff3       | \x666f6fe8b1a1 |              | 
+ valid, no translation to UTF-8                 | \x666f6f8431a530   | \x666f6f       | \x8431a530   | character with byte sequence 0x84 0x31 0xa5 0x30 in encoding "GB18030" has no equivalent in encoding "UTF8"
+ valid, translates to UTF-8 by mapping function | \x666f6f84309c38   | \x666f6fefa8aa |              | 
+ incomplete char                                | \x666f6f84309c     | \x666f6f       | \x84309c     | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c
+ incomplete char, followed by newline           | \x666f6f84309c0a   | \x666f6f       | \x84309c0a   | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x0a
+ invalid, NUL byte                              | \x666f6f84309c3800 | \x666f6fefa8aa | \x00         | invalid byte sequence for encoding "GB18030": 0x00
+ invalid, NUL byte                              | \x666f6f84309c0038 | \x666f6f       | \x84309c0038 | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x00
+(8 rows)
+
+--
+-- ISO-8859-5
+--
+CREATE TABLE iso8859_5_inputs (inbytes bytea, description text);
+insert into iso8859_5_inputs  values
+  ('\x666f6f',     'valid, pure ASCII'),
+  ('\xe4dede',     'valid'),
+  ('\x00',     'invalid, NUL byte'),
+  ('\xe400dede',   'invalid, NUL byte'),
+  ('\xe4dede00',   'invalid, NUL byte');
+-- Test ISO-8859-5 verification
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'iso8859-5')).* from iso8859_5_inputs;
+    description    |  inbytes   |  result  | errorat  |                         error                         
+-------------------+------------+----------+----------+-------------------------------------------------------
+ valid, pure ASCII | \x666f6f   | \x666f6f |          | 
+ valid             | \xe4dede   | \xe4dede |          | 
+ invalid, NUL byte | \x00       | \x       | \x00     | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe400dede | \xe4     | \x00dede | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe4dede00 | \xe4dede | \x00     | invalid byte sequence for encoding "ISO_8859_5": 0x00
+(5 rows)
+
+-- Test conversions from ISO-8859-5
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'utf8')).* from iso8859_5_inputs;
+    description    |  inbytes   |     result     | errorat  |                         error                         
+-------------------+------------+----------------+----------+-------------------------------------------------------
+ valid, pure ASCII | \x666f6f   | \x666f6f       |          | 
+ valid             | \xe4dede   | \xd184d0bed0be |          | 
+ invalid, NUL byte | \x00       | \x             | \x00     | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe400dede | \xd184         | \x00dede | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe4dede00 | \xd184d0bed0be | \x00     | invalid byte sequence for encoding "ISO_8859_5": 0x00
+(5 rows)
+
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'koi8r')).* from iso8859_5_inputs;
+    description    |  inbytes   |  result  | errorat  |                         error                         
+-------------------+------------+----------+----------+-------------------------------------------------------
+ valid, pure ASCII | \x666f6f   | \x666f6f |          | 
+ valid             | \xe4dede   | \xc6cfcf |          | 
+ invalid, NUL byte | \x00       | \x       | \x00     | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe400dede | \xc6     | \x00dede | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe4dede00 | \xc6cfcf | \x00     | invalid byte sequence for encoding "ISO_8859_5": 0x00
+(5 rows)
+
+select description, inbytes, (test_conv(inbytes, 'iso8859_5', 'mule_internal')).* from iso8859_5_inputs;
+    description    |  inbytes   |     result     | errorat  |                         error                         
+-------------------+------------+----------------+----------+-------------------------------------------------------
+ valid, pure ASCII | \x666f6f   | \x666f6f       |          | 
+ valid             | \xe4dede   | \x8bc68bcf8bcf |          | 
+ invalid, NUL byte | \x00       | \x             | \x00     | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe400dede | \x8bc6         | \x00dede | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe4dede00 | \x8bc68bcf8bcf | \x00     | invalid byte sequence for encoding "ISO_8859_5": 0x00
+(5 rows)
+
+--
+-- Big5
+--
+CREATE TABLE big5_inputs (inbytes bytea, description text);
+insert into big5_inputs  values
+  ('\x666f6f',     'valid, pure ASCII'),
+  ('\x666f6fb648', 'valid'),
+  ('\x666f6fa27f', 'valid, no translation to UTF-8'),
+  ('\x666f6fb60048',   'invalid, NUL byte'),
+  ('\x666f6fb64800',   'invalid, NUL byte');
+-- Test Big5 verification
+select description, inbytes, (test_conv(inbytes, 'big5', 'big5')).* from big5_inputs;
+          description           |    inbytes     |    result    | errorat  |                        error                         
+--------------------------------+----------------+--------------+----------+------------------------------------------------------
+ valid, pure ASCII              | \x666f6f       | \x666f6f     |          | 
+ valid                          | \x666f6fb648   | \x666f6fb648 |          | 
+ valid, no translation to UTF-8 | \x666f6fa27f   | \x666f6fa27f |          | 
+ invalid, NUL byte              | \x666f6fb60048 | \x666f6f     | \xb60048 | invalid byte sequence for encoding "BIG5": 0xb6 0x00
+ invalid, NUL byte              | \x666f6fb64800 | \x666f6fb648 | \x00     | invalid byte sequence for encoding "BIG5": 0x00
+(5 rows)
+
+-- Test conversions from Big5
+select description, inbytes, (test_conv(inbytes, 'big5', 'utf8')).* from big5_inputs;
+          description           |    inbytes     |     result     | errorat  |                                             error                                              
+--------------------------------+----------------+----------------+----------+------------------------------------------------------------------------------------------------
+ valid, pure ASCII              | \x666f6f       | \x666f6f       |          | 
+ valid                          | \x666f6fb648   | \x666f6fe8b1a1 |          | 
+ valid, no translation to UTF-8 | \x666f6fa27f   | \x666f6f       | \xa27f   | character with byte sequence 0xa2 0x7f in encoding "BIG5" has no equivalent in encoding "UTF8"
+ invalid, NUL byte              | \x666f6fb60048 | \x666f6f       | \xb60048 | invalid byte sequence for encoding "BIG5": 0xb6 0x00
+ invalid, NUL byte              | \x666f6fb64800 | \x666f6fe8b1a1 | \x00     | invalid byte sequence for encoding "BIG5": 0x00
+(5 rows)
+
+select description, inbytes, (test_conv(inbytes, 'big5', 'mule_internal')).* from big5_inputs;
+          description           |    inbytes     |     result     | errorat  |                        error                         
+--------------------------------+----------------+----------------+----------+------------------------------------------------------
+ valid, pure ASCII              | \x666f6f       | \x666f6f       |          | 
+ valid                          | \x666f6fb648   | \x666f6f95e2af |          | 
+ valid, no translation to UTF-8 | \x666f6fa27f   | \x666f6f95a3c1 |          | 
+ invalid, NUL byte              | \x666f6fb60048 | \x666f6f       | \xb60048 | invalid byte sequence for encoding "BIG5": 0xb6 0x00
+ invalid, NUL byte              | \x666f6fb64800 | \x666f6f95e2af | \x00     | invalid byte sequence for encoding "BIG5": 0x00
+(5 rows)
+
+--
+-- MULE_INTERNAL
+--
+CREATE TABLE mic_inputs (inbytes bytea, description text);
+insert into mic_inputs  values
+  ('\x666f6f',     'valid, pure ASCII'),
+  ('\x8bc68bcf8bcf',   'valid (in KOI8R)'),
+  ('\x8bc68bcf8b', 'invalid,incomplete char'),
+  ('\x92bedd',     'valid (in SHIFT_JIS)'),
+  ('\x92be',       'invalid, incomplete char)'),
+  ('\x666f6f95a3c1',   'valid (in Big5)'),
+  ('\x666f6f95a3', 'invalid, incomplete char'),
+  ('\x9200bedd',   'invalid, NUL byte'),
+  ('\x92bedd00',   'invalid, NUL byte'),
+  ('\x8b00c68bcf8bcf', 'invalid, NUL byte');
+-- Test MULE_INTERNAL verification
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'mule_internal')).* from mic_inputs;
+        description        |     inbytes      |     result     |     errorat      |                               error                                
+---------------------------+------------------+----------------+------------------+--------------------------------------------------------------------
+ valid, pure ASCII         | \x666f6f         | \x666f6f       |                  | 
+ valid (in KOI8R)          | \x8bc68bcf8bcf   | \x8bc68bcf8bcf |                  | 
+ invalid,incomplete char   | \x8bc68bcf8b     | \x8bc68bcf     | \x8b             | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b
+ valid (in SHIFT_JIS)      | \x92bedd         | \x92bedd       |                  | 
+ invalid, incomplete char) | \x92be           | \x             | \x92be           | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5)           | \x666f6f95a3c1   | \x666f6f95a3c1 |                  | 
+ invalid, incomplete char  | \x666f6f95a3     | \x666f6f       | \x95a3           | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte         | \x9200bedd       | \x             | \x9200bedd       | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
+ invalid, NUL byte         | \x92bedd00       | \x92bedd       | \x00             | invalid byte sequence for encoding "MULE_INTERNAL": 0x00
+ invalid, NUL byte         | \x8b00c68bcf8bcf | \x             | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
+(10 rows)
+
+-- Test conversions from MULE_INTERNAL
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'koi8r')).* from mic_inputs;
+        description        |     inbytes      |  result  |     errorat      |                                                     error                                                     
+---------------------------+------------------+----------+------------------+---------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII         | \x666f6f         | \x666f6f |                  | 
+ valid (in KOI8R)          | \x8bc68bcf8bcf   | \xc6cfcf |                  | 
+ invalid,incomplete char   | \x8bc68bcf8b     | \xc6cf   | \x8b             | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b
+ valid (in SHIFT_JIS)      | \x92bedd         | \x       | \x92bedd         | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
+ invalid, incomplete char) | \x92be           | \x       | \x92be           | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5)           | \x666f6f95a3c1   | \x666f6f | \x95a3c1         | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
+ invalid, incomplete char  | \x666f6f95a3     | \x666f6f | \x95a3           | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte         | \x9200bedd       | \x       | \x9200bedd       | character with byte sequence 0x92 0x00 0xbe in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
+ invalid, NUL byte         | \x92bedd00       | \x       | \x92bedd00       | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
+ invalid, NUL byte         | \x8b00c68bcf8bcf | \x       | \x8b00c68bcf8bcf | character with byte sequence 0x8b 0x00 in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
+(10 rows)
+
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'iso8859-5')).* from mic_inputs;
+        description        |     inbytes      |  result  |     errorat      |                                                       error                                                        
+---------------------------+------------------+----------+------------------+--------------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII         | \x666f6f         | \x666f6f |                  | 
+ valid (in KOI8R)          | \x8bc68bcf8bcf   | \xe4dede |                  | 
+ invalid,incomplete char   | \x8bc68bcf8b     | \xe4de   | \x8b             | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b
+ valid (in SHIFT_JIS)      | \x92bedd         | \x       | \x92bedd         | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
+ invalid, incomplete char) | \x92be           | \x       | \x92be           | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5)           | \x666f6f95a3c1   | \x666f6f | \x95a3c1         | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
+ invalid, incomplete char  | \x666f6f95a3     | \x666f6f | \x95a3           | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte         | \x9200bedd       | \x       | \x9200bedd       | character with byte sequence 0x92 0x00 0xbe in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
+ invalid, NUL byte         | \x92bedd00       | \x       | \x92bedd00       | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
+ invalid, NUL byte         | \x8b00c68bcf8bcf | \x       | \x8b00c68bcf8bcf | character with byte sequence 0x8b 0x00 in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
+(10 rows)
+
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'sjis')).* from mic_inputs;
+        description        |     inbytes      |  result  |     errorat      |                                                    error                                                     
+---------------------------+------------------+----------+------------------+--------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII         | \x666f6f         | \x666f6f |                  | 
+ valid (in KOI8R)          | \x8bc68bcf8bcf   | \x       | \x8bc68bcf8bcf   | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "SJIS"
+ invalid,incomplete char   | \x8bc68bcf8b     | \x       | \x8bc68bcf8b     | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "SJIS"
+ valid (in SHIFT_JIS)      | \x92bedd         | \x8fdb   |                  | 
+ invalid, incomplete char) | \x92be           | \x       | \x92be           | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5)           | \x666f6f95a3c1   | \x666f6f | \x95a3c1         | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "SJIS"
+ invalid, incomplete char  | \x666f6f95a3     | \x666f6f | \x95a3           | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte         | \x9200bedd       | \x       | \x9200bedd       | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
+ invalid, NUL byte         | \x92bedd00       | \x8fdb   | \x00             | invalid byte sequence for encoding "MULE_INTERNAL": 0x00
+ invalid, NUL byte         | \x8b00c68bcf8bcf | \x       | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
+(10 rows)
+
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'big5')).* from mic_inputs;
+        description        |     inbytes      |    result    |     errorat      |                                                    error                                                     
+---------------------------+------------------+--------------+------------------+--------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII         | \x666f6f         | \x666f6f     |                  | 
+ valid (in KOI8R)          | \x8bc68bcf8bcf   | \x           | \x8bc68bcf8bcf   | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5"
+ invalid,incomplete char   | \x8bc68bcf8b     | \x           | \x8bc68bcf8b     | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5"
+ valid (in SHIFT_JIS)      | \x92bedd         | \x           | \x92bedd         | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5"
+ invalid, incomplete char) | \x92be           | \x           | \x92be           | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5)           | \x666f6f95a3c1   | \x666f6fa2a1 |                  | 
+ invalid, incomplete char  | \x666f6f95a3     | \x666f6f     | \x95a3           | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte         | \x9200bedd       | \x           | \x9200bedd       | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
+ invalid, NUL byte         | \x92bedd00       | \x           | \x92bedd00       | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5"
+ invalid, NUL byte         | \x8b00c68bcf8bcf | \x           | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
+(10 rows)
+
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'euc_jp')).* from mic_inputs;
+        description        |     inbytes      |  result  |     errorat      |                                                     error                                                      
+---------------------------+------------------+----------+------------------+----------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII         | \x666f6f         | \x666f6f |                  | 
+ valid (in KOI8R)          | \x8bc68bcf8bcf   | \x       | \x8bc68bcf8bcf   | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "EUC_JP"
+ invalid,incomplete char   | \x8bc68bcf8b     | \x       | \x8bc68bcf8b     | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "EUC_JP"
+ valid (in SHIFT_JIS)      | \x92bedd         | \xbedd   |                  | 
+ invalid, incomplete char) | \x92be           | \x       | \x92be           | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5)           | \x666f6f95a3c1   | \x666f6f | \x95a3c1         | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "EUC_JP"
+ invalid, incomplete char  | \x666f6f95a3     | \x666f6f | \x95a3           | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte         | \x9200bedd       | \x       | \x9200bedd       | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
+ invalid, NUL byte         | \x92bedd00       | \xbedd   | \x00             | invalid byte sequence for encoding "MULE_INTERNAL": 0x00
+ invalid, NUL byte         | \x8b00c68bcf8bcf | \x       | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
+(10 rows)
+
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out

index ef4b4444b9037bda526275e103300bf8c7d0fb3c..fa26bf761046a2e24d99758d2d26ef383a4e03ac 100644 (file)
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -1052,13 +1052,14 @@ WHERE p1.conproc = 0 OR
  SELECT p.oid, p.proname, c.oid, c.conname
  FROM pg_proc p, pg_conversion c
  WHERE p.oid = c.conproc AND
-    (p.prorettype != 'void'::regtype OR p.proretset OR
-     p.pronargs != 5 OR
+    (p.prorettype != 'int4'::regtype OR p.proretset OR
+     p.pronargs != 6 OR
       p.proargtypes[0] != 'int4'::regtype OR
       p.proargtypes[1] != 'int4'::regtype OR
       p.proargtypes[2] != 'cstring'::regtype OR
       p.proargtypes[3] != 'internal'::regtype OR
-     p.proargtypes[4] != 'int4'::regtype);
+     p.proargtypes[4] != 'int4'::regtype OR
+     p.proargtypes[5] != 'bool'::regtype);
   oid | proname | oid | conname 
  -----+---------+-----+---------
  (0 rows)
diff --git a/src/test/regress/input/create_function_1.source b/src/test/regress/input/create_function_1.source

index 412e339fcf2dfbeb8270e671afa3dff0af256994..6ba37fe63b617d7cc2dc26ac2f54682d57b4bc60 100644 (file)
--- a/src/test/regress/input/create_function_1.source
+++ b/src/test/regress/input/create_function_1.source
@@ -78,6 +78,10 @@ CREATE FUNCTION test_opclass_options_func(internal)
      AS '@libdir@/regress@DLSUFFIX@', 'test_opclass_options_func'
      LANGUAGE C;
  
+CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)
+    AS '@libdir@/regress@DLSUFFIX@', 'test_enc_conversion'
+    LANGUAGE C;
+
  -- Things that shouldn't work:
  
  CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL
diff --git a/src/test/regress/output/create_function_1.source b/src/test/regress/output/create_function_1.source

index 4d78fa1228964dc41e6f24cba908bd8f95bfeec7..cb38a039bf4636643dbe7fc8a2e38183a8349a41 100644 (file)
--- a/src/test/regress/output/create_function_1.source
+++ b/src/test/regress/output/create_function_1.source
@@ -68,6 +68,9 @@ CREATE FUNCTION test_opclass_options_func(internal)
      RETURNS void
      AS '@libdir@/regress@DLSUFFIX@', 'test_opclass_options_func'
      LANGUAGE C;
+CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)
+    AS '@libdir@/regress@DLSUFFIX@', 'test_enc_conversion'
+    LANGUAGE C;
  -- Things that shouldn't work:
  CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL
      AS 'SELECT ''not an integer'';';
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c

index 32ab9ed6b537a1670d0dd22b74a8e2ec29f806af..1990cbb6a13baa19409ce81a075a2b85f2d36687 100644 (file)
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -23,12 +23,15 @@
  #include "access/htup_details.h"
  #include "access/transam.h"
  #include "access/xact.h"
+#include "catalog/namespace.h"
  #include "catalog/pg_operator.h"
  #include "catalog/pg_type.h"
  #include "commands/sequence.h"
  #include "commands/trigger.h"
  #include "executor/executor.h"
  #include "executor/spi.h"
+#include "funcapi.h"
+#include "mb/pg_wchar.h"
  #include "miscadmin.h"
  #include "nodes/supportnodes.h"
  #include "optimizer/optimizer.h"
@@ -1060,3 +1063,134 @@ test_opclass_options_func(PG_FUNCTION_ARGS)
  {
     PG_RETURN_NULL();
  }
+
+/*
+ * Call an encoding conversion or verification function.
+ *
+ * Arguments:
+ * string    bytea -- string to convert
+ * src_enc   name  -- source encoding
+ * dest_enc  name  -- destination encoding
+ * noError   bool  -- if set, don't ereport() on invalid or untranslatable
+ *                    input
+ *
+ * Result is a tuple with two attributes:
+ *  int4   -- number of input bytes successfully converted
+ *  bytea  -- converted string
+ */
+PG_FUNCTION_INFO_V1(test_enc_conversion);
+Datum
+test_enc_conversion(PG_FUNCTION_ARGS)
+{
+   bytea      *string = PG_GETARG_BYTEA_PP(0);
+   char       *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
+   int         src_encoding = pg_char_to_encoding(src_encoding_name);
+   char       *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
+   int         dest_encoding = pg_char_to_encoding(dest_encoding_name);
+   bool        noError = PG_GETARG_BOOL(3);
+   TupleDesc   tupdesc;
+   char       *src;
+   char       *dst;
+   bytea      *retval;
+   Size        srclen;
+   Size        dstsize;
+   Oid         proc;
+   int         convertedbytes;
+   int         dstlen;
+   Datum       values[2];
+   bool        nulls[2];
+   HeapTuple   tuple;
+
+   if (src_encoding < 0)
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                errmsg("invalid source encoding name \"%s\"",
+                       src_encoding_name)));
+   if (dest_encoding < 0)
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                errmsg("invalid destination encoding name \"%s\"",
+                       dest_encoding_name)));
+
+   /* Build a tuple descriptor for our result type */
+   if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+       elog(ERROR, "return type must be a row type");
+   tupdesc = BlessTupleDesc(tupdesc);
+
+   srclen = VARSIZE_ANY_EXHDR(string);
+   src = VARDATA_ANY(string);
+
+   if (src_encoding == dest_encoding)
+   {
+       /* just check that the source string is valid */
+       int         oklen;
+
+       oklen = pg_encoding_verifymbstr(src_encoding, src, srclen);
+
+       if (oklen == srclen)
+       {
+           convertedbytes = oklen;
+           retval = string;
+       }
+       else if (!noError)
+       {
+           report_invalid_encoding(src_encoding, src + oklen, srclen - oklen);
+       }
+       else
+       {
+           /*
+            * build bytea data type structure.
+            */
+           Assert(oklen < srclen);
+           convertedbytes = oklen;
+           retval = (bytea *) palloc(oklen + VARHDRSZ);
+           SET_VARSIZE(retval, oklen + VARHDRSZ);
+           memcpy(VARDATA(retval), src, oklen);
+       }
+   }
+   else
+   {
+       proc = FindDefaultConversionProc(src_encoding, dest_encoding);
+       if (!OidIsValid(proc))
+           ereport(ERROR,
+                   (errcode(ERRCODE_UNDEFINED_FUNCTION),
+                    errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
+                           pg_encoding_to_char(src_encoding),
+                           pg_encoding_to_char(dest_encoding))));
+
+       if (srclen >= (MaxAllocSize / (Size) MAX_CONVERSION_GROWTH))
+           ereport(ERROR,
+                   (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                    errmsg("out of memory"),
+                    errdetail("String of %d bytes is too long for encoding conversion.",
+                              (int) srclen)));
+
+       dstsize = (Size) srclen * MAX_CONVERSION_GROWTH + 1;
+       dst = MemoryContextAlloc(CurrentMemoryContext, dstsize);
+
+       /* perform conversion */
+       convertedbytes = pg_do_encoding_conversion_buf(proc,
+                                                      src_encoding,
+                                                      dest_encoding,
+                                                      (unsigned char *) src, srclen,
+                                                      (unsigned char *) dst, dstsize,
+                                                      noError);
+       dstlen = strlen(dst);
+
+       /*
+        * build bytea data type structure.
+        */
+       retval = (bytea *) palloc(dstlen + VARHDRSZ);
+       SET_VARSIZE(retval, dstlen + VARHDRSZ);
+       memcpy(VARDATA(retval), dst, dstlen);
+
+       pfree(dst);
+   }
+
+   MemSet(nulls, 0, sizeof(nulls));
+   values[0] = Int32GetDatum(convertedbytes);
+   values[1] = PointerGetDatum(retval);
+   tuple = heap_form_tuple(tupdesc, values, nulls);
+
+   PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
+}
diff --git a/src/test/regress/sql/conversion.sql b/src/test/regress/sql/conversion.sql

index 02cf39f1ce95033baaa1bd963a478df09ec6802c..ea85f20ed8354e8d4dd1605cd7f334af4d209a89 100644 (file)
--- a/src/test/regress/sql/conversion.sql
+++ b/src/test/regress/sql/conversion.sql
@@ -34,3 +34,188 @@ DROP CONVERSION mydef;
  --
  RESET SESSION AUTHORIZATION;
  DROP USER regress_conversion_user;
+
+--
+-- Test built-in conversion functions.
+--
+
+-- Helper function to test a conversion. Uses the test_enc_conversion function
+-- that was created in the create_function_1 test.
+create or replace function test_conv(
+  input IN bytea,
+  src_encoding IN text,
+  dst_encoding IN text,
+
+  result OUT bytea,
+  errorat OUT bytea,
+  error OUT text)
+language plpgsql as
+$$
+declare
+  validlen int;
+begin
+  -- First try to perform the conversion with noError = false. If that errors out,
+  -- capture the error message, and try again with noError = true. The second call
+  -- should succeed and return the position of the error, return that too.
+  begin
+    select * into validlen, result from test_enc_conversion(input, src_encoding, dst_encoding, false);
+    errorat = NULL;
+    error := NULL;
+  exception when others then
+    error := sqlerrm;
+    select * into validlen, result from test_enc_conversion(input, src_encoding, dst_encoding, true);
+    errorat = substr(input, validlen + 1);
+  end;
+  return;
+end;
+$$;
+
+
+--
+-- UTF-8
+--
+CREATE TABLE utf8_inputs (inbytes bytea, description text);
+insert into utf8_inputs  values
+  ('\x666f6f',     'valid, pure ASCII'),
+  ('\xc3a4c3b6',   'valid, extra latin chars'),
+  ('\xd184d0bed0be',   'valid, cyrillic'),
+  ('\x666f6fe8b1a1',   'valid, kanji/Chinese'),
+  ('\xe382abe3829a',   'valid, two chars that combine to one in EUC_JIS_2004'),
+  ('\xe382ab',     'only first half of combined char in EUC_JIS_2004'),
+  ('\xe382abe382', 'incomplete combination when converted EUC_JIS_2004'),
+  ('\xecbd94eb81bceba6ac', 'valid, Hangul, Korean'),
+  ('\x666f6fefa8aa',   'valid, needs mapping function to convert to GB18030'),
+  ('\x66e8b1ff6f6f',   'invalid byte sequence'),
+  ('\x66006f',     'invalid, NUL byte'),
+  ('\x666f6fe8b100',   'invalid, NUL byte'),
+  ('\x666f6fe8b1', 'incomplete character at end');
+
+-- Test UTF-8 verification
+select description, (test_conv(inbytes, 'utf8', 'utf8')).* from utf8_inputs;
+-- Test conversions from UTF-8
+select description, inbytes, (test_conv(inbytes, 'utf8', 'euc_jis_2004')).* from utf8_inputs;
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin1')).* from utf8_inputs;
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin2')).* from utf8_inputs;
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin5')).* from utf8_inputs;
+select description, inbytes, (test_conv(inbytes, 'utf8', 'koi8r')).* from utf8_inputs;
+select description, inbytes, (test_conv(inbytes, 'utf8', 'gb18030')).* from utf8_inputs;
+
+--
+-- EUC_JIS_2004
+--
+CREATE TABLE euc_jis_2004_inputs (inbytes bytea, description text);
+insert into euc_jis_2004_inputs  values
+  ('\x666f6f',     'valid, pure ASCII'),
+  ('\x666f6fbedd', 'valid'),
+  ('\xa5f7',       'valid, translates to two UTF-8 chars '),
+  ('\xbeddbe',     'incomplete char '),
+  ('\x666f6f00bedd',   'invalid, NUL byte'),
+  ('\x666f6fbe00dd',   'invalid, NUL byte'),
+  ('\x666f6fbedd00',   'invalid, NUL byte'),
+  ('\xbe04',       'invalid byte sequence');
+
+-- Test EUC_JIS_2004 verification
+select description, inbytes, (test_conv(inbytes, 'euc_jis_2004', 'euc_jis_2004')).* from euc_jis_2004_inputs;
+-- Test conversions from EUC_JIS_2004
+select description, inbytes, (test_conv(inbytes, 'euc_jis_2004', 'utf8')).* from euc_jis_2004_inputs;
+
+--
+-- SHIFT-JIS-2004
+--
+CREATE TABLE shiftjis2004_inputs (inbytes bytea, description text);
+insert into shiftjis2004_inputs  values
+  ('\x666f6f',     'valid, pure ASCII'),
+  ('\x666f6f8fdb', 'valid'),
+  ('\x666f6f81c0', 'valid, no translation to UTF-8'),
+  ('\x666f6f82f5', 'valid, translates to two UTF-8 chars '),
+  ('\x666f6f8fdb8f',   'incomplete char '),
+  ('\x666f6f820a', 'incomplete char, followed by newline '),
+  ('\x666f6f008fdb',   'invalid, NUL byte'),
+  ('\x666f6f8f00db',   'invalid, NUL byte'),
+  ('\x666f6f8fdb00',   'invalid, NUL byte');
+
+-- Test SHIFT-JIS-2004 verification
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'shiftjis2004')).* from shiftjis2004_inputs;
+-- Test conversions from SHIFT-JIS-2004
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'utf8')).* from shiftjis2004_inputs;
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'euc_jis_2004')).* from shiftjis2004_inputs;
+
+--
+-- GB18030
+--
+CREATE TABLE gb18030_inputs (inbytes bytea, description text);
+insert into gb18030_inputs  values
+  ('\x666f6f',     'valid, pure ASCII'),
+  ('\x666f6fcff3', 'valid'),
+  ('\x666f6f8431a530', 'valid, no translation to UTF-8'),
+  ('\x666f6f84309c38', 'valid, translates to UTF-8 by mapping function'),
+  ('\x666f6f84309c',   'incomplete char '),
+  ('\x666f6f84309c0a', 'incomplete char, followed by newline '),
+  ('\x666f6f84309c3800', 'invalid, NUL byte'),
+  ('\x666f6f84309c0038', 'invalid, NUL byte');
+
+-- Test GB18030 verification
+select description, inbytes, (test_conv(inbytes, 'gb18030', 'gb18030')).* from gb18030_inputs;
+-- Test conversions from GB18030
+select description, inbytes, (test_conv(inbytes, 'gb18030', 'utf8')).* from gb18030_inputs;
+
+
+--
+-- ISO-8859-5
+--
+CREATE TABLE iso8859_5_inputs (inbytes bytea, description text);
+insert into iso8859_5_inputs  values
+  ('\x666f6f',     'valid, pure ASCII'),
+  ('\xe4dede',     'valid'),
+  ('\x00',     'invalid, NUL byte'),
+  ('\xe400dede',   'invalid, NUL byte'),
+  ('\xe4dede00',   'invalid, NUL byte');
+
+-- Test ISO-8859-5 verification
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'iso8859-5')).* from iso8859_5_inputs;
+-- Test conversions from ISO-8859-5
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'utf8')).* from iso8859_5_inputs;
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'koi8r')).* from iso8859_5_inputs;
+select description, inbytes, (test_conv(inbytes, 'iso8859_5', 'mule_internal')).* from iso8859_5_inputs;
+
+--
+-- Big5
+--
+CREATE TABLE big5_inputs (inbytes bytea, description text);
+insert into big5_inputs  values
+  ('\x666f6f',     'valid, pure ASCII'),
+  ('\x666f6fb648', 'valid'),
+  ('\x666f6fa27f', 'valid, no translation to UTF-8'),
+  ('\x666f6fb60048',   'invalid, NUL byte'),
+  ('\x666f6fb64800',   'invalid, NUL byte');
+
+-- Test Big5 verification
+select description, inbytes, (test_conv(inbytes, 'big5', 'big5')).* from big5_inputs;
+-- Test conversions from Big5
+select description, inbytes, (test_conv(inbytes, 'big5', 'utf8')).* from big5_inputs;
+select description, inbytes, (test_conv(inbytes, 'big5', 'mule_internal')).* from big5_inputs;
+
+--
+-- MULE_INTERNAL
+--
+CREATE TABLE mic_inputs (inbytes bytea, description text);
+insert into mic_inputs  values
+  ('\x666f6f',     'valid, pure ASCII'),
+  ('\x8bc68bcf8bcf',   'valid (in KOI8R)'),
+  ('\x8bc68bcf8b', 'invalid,incomplete char'),
+  ('\x92bedd',     'valid (in SHIFT_JIS)'),
+  ('\x92be',       'invalid, incomplete char)'),
+  ('\x666f6f95a3c1',   'valid (in Big5)'),
+  ('\x666f6f95a3', 'invalid, incomplete char'),
+  ('\x9200bedd',   'invalid, NUL byte'),
+  ('\x92bedd00',   'invalid, NUL byte'),
+  ('\x8b00c68bcf8bcf', 'invalid, NUL byte');
+
+-- Test MULE_INTERNAL verification
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'mule_internal')).* from mic_inputs;
+-- Test conversions from MULE_INTERNAL
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'koi8r')).* from mic_inputs;
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'iso8859-5')).* from mic_inputs;
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'sjis')).* from mic_inputs;
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'big5')).* from mic_inputs;
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'euc_jp')).* from mic_inputs;
diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql

index bbd3834b6345b0111faa95808f3b129d5a548c0c..04691745981f9addae46b1ee1379523d9f09a961 100644 (file)
--- a/src/test/regress/sql/opr_sanity.sql
+++ b/src/test/regress/sql/opr_sanity.sql
@@ -556,13 +556,14 @@ WHERE p1.conproc = 0 OR
  SELECT p.oid, p.proname, c.oid, c.conname
  FROM pg_proc p, pg_conversion c
  WHERE p.oid = c.conproc AND
-    (p.prorettype != 'void'::regtype OR p.proretset OR
-     p.pronargs != 5 OR
+    (p.prorettype != 'int4'::regtype OR p.proretset OR
+     p.pronargs != 6 OR
       p.proargtypes[0] != 'int4'::regtype OR
       p.proargtypes[1] != 'int4'::regtype OR
       p.proargtypes[2] != 'cstring'::regtype OR
       p.proargtypes[3] != 'internal'::regtype OR
-     p.proargtypes[4] != 'int4'::regtype);
+     p.proargtypes[4] != 'int4'::regtype OR
+     p.proargtypes[5] != 'bool'::regtype);
  
  -- Check for conprocs that don't perform the specific conversion that
  -- pg_conversion alleges they do, by trying to invoke each conversion
author	Heikki Linnakangas <[email protected]>
	Thu, 1 Apr 2021 08:45:22 +0000 (11:45 +0300)
committer	Heikki Linnakangas <[email protected]>
	Thu, 1 Apr 2021 08:45:22 +0000 (11:45 +0300)
doc/src/sgml/ref/create_conversion.sgml		patch \| blob \| blame \| history
src/backend/commands/conversioncmds.c		patch \| blob \| blame \| history
src/backend/utils/error/elog.c		patch \| blob \| blame \| history
src/backend/utils/mb/conv.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c		patch \| blob \| blame \| history
src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c		patch \| blob \| blame \| history
src/backend/utils/mb/mbutils.c		patch \| blob \| blame \| history
src/bin/pg_upgrade/check.c		patch \| blob \| blame \| history
src/include/catalog/catversion.h		patch \| blob \| blame \| history
src/include/catalog/pg_proc.dat		patch \| blob \| blame \| history
src/include/mb/pg_wchar.h		patch \| blob \| blame \| history
src/test/regress/expected/conversion.out		patch \| blob \| blame \| history
src/test/regress/expected/opr_sanity.out		patch \| blob \| blame \| history
src/test/regress/input/create_function_1.source		patch \| blob \| blame \| history
src/test/regress/output/create_function_1.source		patch \| blob \| blame \| history
src/test/regress/regress.c		patch \| blob \| blame \| history
src/test/regress/sql/conversion.sql		patch \| blob \| blame \| history
src/test/regress/sql/opr_sanity.sql		patch \| blob \| blame \| history