Prefer pg_any_to_server/pg_server_to_any over pg_do_encoding_conversion.
authorTom Lane <[email protected]>
Sun, 23 Feb 2014 21:59:05 +0000 (16:59 -0500)
committerTom Lane <[email protected]>
Sun, 23 Feb 2014 21:59:05 +0000 (16:59 -0500)
A large majority of the callers of pg_do_encoding_conversion were
specifying the database encoding as either source or target of the
conversion, meaning that we can use the less general functions
pg_any_to_server/pg_server_to_any instead.

The main advantage of using the latter functions is that they can make use
of a cached conversion-function lookup in the common case that the other
encoding is the current client_encoding.  It's notationally cleaner too in
most cases, not least because of the historical artifact that the latter
functions use "char *" rather than "unsigned char *" in their APIs.

Note that pg_any_to_server will apply an encoding verification step in
some cases where pg_do_encoding_conversion would have just done nothing.
This seems to me to be a good idea at most of these call sites, though
it partially negates the performance benefit.

Per discussion of bug #9210.

12 files changed:
contrib/pg_stat_statements/pg_stat_statements.c
contrib/sslinfo/sslinfo.c
src/backend/commands/extension.c
src/backend/snowball/dict_snowball.c
src/backend/tsearch/ts_locale.c
src/backend/utils/adt/pg_locale.c
src/backend/utils/adt/xml.c
src/backend/utils/mb/mbutils.c
src/pl/plperl/plperl.c
src/pl/plperl/plperl_helpers.h
src/pl/plpython/plpy_util.c
src/pl/tcl/pltcl.c

index 97f171d68edfdf4c8526531f705243f5e4fbe868..0bd0d046c7adc3c72746c37b6dc16d41d40fd9d2 100644 (file)
@@ -1458,11 +1458,9 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo,
                {
                    char       *enc;
 
-                   enc = (char *)
-                       pg_do_encoding_conversion((unsigned char *) qstr,
-                                                 entry->query_len,
-                                                 entry->encoding,
-                                                 GetDatabaseEncoding());
+                   enc = pg_any_to_server(qstr,
+                                          entry->query_len,
+                                          entry->encoding);
 
                    values[i++] = CStringGetTextDatum(enc);
 
index d724fe91ff5b4dbc4ff402bb93c374b1eec05144..90c6b57c699a280b63e0f6e43325834668580521 100644 (file)
@@ -158,10 +158,7 @@ ASN1_STRING_to_text(ASN1_STRING *str)
    nullterm = '\0';
    BIO_write(membuf, &nullterm, 1);
    size = BIO_get_mem_data(membuf, &sp);
-   dp = (char *) pg_do_encoding_conversion((unsigned char *) sp,
-                                           size - 1,
-                                           PG_UTF8,
-                                           GetDatabaseEncoding());
+   dp = pg_any_to_server(sp, size - 1, PG_UTF8);
    result = cstring_to_text(dp);
    if (dp != sp)
        pfree(dp);
@@ -323,10 +320,7 @@ X509_NAME_to_text(X509_NAME *name)
    nullterm = '\0';
    BIO_write(membuf, &nullterm, 1);
    size = BIO_get_mem_data(membuf, &sp);
-   dp = (char *) pg_do_encoding_conversion((unsigned char *) sp,
-                                           size - 1,
-                                           PG_UTF8,
-                                           GetDatabaseEncoding());
+   dp = pg_any_to_server(sp, size - 1, PG_UTF8);
    result = cstring_to_text(dp);
    if (dp != sp)
        pfree(dp);
index ce5aed301b128357849b2e1e67643d6cf4fc0d83..06bd90b9aa9f3fe699e07bb09e3f90324fd8f949 100644 (file)
@@ -635,7 +635,6 @@ read_extension_script_file(const ExtensionControlFile *control,
                           const char *filename)
 {
    int         src_encoding;
-   int         dest_encoding = GetDatabaseEncoding();
    bytea      *content;
    char       *src_str;
    char       *dest_str;
@@ -645,7 +644,7 @@ read_extension_script_file(const ExtensionControlFile *control,
 
    /* use database encoding if not given */
    if (control->encoding < 0)
-       src_encoding = dest_encoding;
+       src_encoding = GetDatabaseEncoding();
    else
        src_encoding = control->encoding;
 
@@ -655,10 +654,7 @@ read_extension_script_file(const ExtensionControlFile *control,
    pg_verify_mbstr_len(src_encoding, src_str, len, false);
 
    /* convert the encoding to the database encoding */
-   dest_str = (char *) pg_do_encoding_conversion((unsigned char *) src_str,
-                                                 len,
-                                                 src_encoding,
-                                                 dest_encoding);
+   dest_str = pg_any_to_server(src_str, len, src_encoding);
 
    /* if no conversion happened, we have to arrange for null termination */
    if (dest_str == src_str)
index a585c7206b2be181e07da615bd7313f42ff5bf53..37d2966359fc3e836d45f5360f88eff5a27315fe 100644 (file)
@@ -255,10 +255,7 @@ dsnowball_lexize(PG_FUNCTION_ARGS)
        {
            char       *recoded;
 
-           recoded = (char *) pg_do_encoding_conversion((unsigned char *) txt,
-                                                        strlen(txt),
-                                                      GetDatabaseEncoding(),
-                                                        PG_UTF8);
+           recoded = pg_server_to_any(txt, strlen(txt), PG_UTF8);
            if (recoded != txt)
            {
                pfree(txt);
@@ -284,10 +281,7 @@ dsnowball_lexize(PG_FUNCTION_ARGS)
        {
            char       *recoded;
 
-           recoded = (char *) pg_do_encoding_conversion((unsigned char *) txt,
-                                                        strlen(txt),
-                                                        PG_UTF8,
-                                                     GetDatabaseEncoding());
+           recoded = pg_any_to_server(txt, strlen(txt), PG_UTF8);
            if (recoded != txt)
            {
                pfree(txt);
index d73687af9e0d662414c830c1c5be560581b05128..f9490c835ddbce8f5136ec818bce05c508373d63 100644 (file)
@@ -209,10 +209,7 @@ t_readline(FILE *fp)
    (void) pg_verify_mbstr(PG_UTF8, buf, len, false);
 
    /* And convert */
-   recoded = (char *) pg_do_encoding_conversion((unsigned char *) buf,
-                                                len,
-                                                PG_UTF8,
-                                                GetDatabaseEncoding());
+   recoded = pg_any_to_server(buf, len, PG_UTF8);
    if (recoded == buf)
    {
        /*
index 0bf350adeab16541378b2ff725f412f2ecb2a53b..f34446329f580d9234f8098ac8627a6aa6b831de 100644 (file)
@@ -418,9 +418,7 @@ db_encoding_strdup(int encoding, const char *str)
    char       *mstr;
 
    /* convert the string to the database encoding */
-   pstr = (char *) pg_do_encoding_conversion(
-                                         (unsigned char *) str, strlen(str),
-                                           encoding, GetDatabaseEncoding());
+   pstr = pg_any_to_server(str, strlen(str), encoding);
    mstr = strdup(pstr);
    if (pstr != str)
        pfree(pstr);
@@ -581,35 +579,32 @@ strftime_win32(char *dst, size_t dstlen, const wchar_t *format, const struct tm
 {
    size_t      len;
    wchar_t     wbuf[MAX_L10N_DATA];
-   int         encoding;
-
-   encoding = GetDatabaseEncoding();
 
    len = wcsftime(wbuf, MAX_L10N_DATA, format, tm);
    if (len == 0)
-
+   {
        /*
         * strftime call failed - return 0 with the contents of dst
         * unspecified
         */
        return 0;
+   }
 
    len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen, NULL, NULL);
    if (len == 0)
-       elog(ERROR,
-       "could not convert string to UTF-8: error code %lu", GetLastError());
+       elog(ERROR, "could not convert string to UTF-8: error code %lu",
+            GetLastError());
 
    dst[len] = '\0';
-   if (encoding != PG_UTF8)
+   if (GetDatabaseEncoding() != PG_UTF8)
    {
-       char       *convstr =
-       (char *) pg_do_encoding_conversion((unsigned char *) dst,
-                                          len, PG_UTF8, encoding);
+       char       *convstr = pg_any_to_server(dst, len, PG_UTF8);
 
-       if (dst != convstr)
+       if (convstr != dst)
        {
            strlcpy(dst, convstr, dstlen);
            len = strlen(dst);
+           pfree(convstr);
        }
    }
 
index d36751855b63089bd45b687f62374711057f46cf..765469c623e4f63ce8023689f60d5015a9db4535 100644 (file)
@@ -345,10 +345,7 @@ xml_recv(PG_FUNCTION_ARGS)
    xmlFreeDoc(doc);
 
    /* Now that we know what we're dealing with, convert to server encoding */
-   newstr = (char *) pg_do_encoding_conversion((unsigned char *) str,
-                                               nbytes,
-                                               encoding,
-                                               GetDatabaseEncoding());
+   newstr = pg_any_to_server(str, nbytes, encoding);
 
    if (newstr != str)
    {
@@ -1793,10 +1790,8 @@ sqlchar_to_unicode(char *s)
    char       *utf8string;
    pg_wchar    ret[2];         /* need space for trailing zero */
 
-   utf8string = (char *) pg_do_encoding_conversion((unsigned char *) s,
-                                                   pg_mblen(s),
-                                                   GetDatabaseEncoding(),
-                                                   PG_UTF8);
+   /* note we're not assuming s is null-terminated */
+   utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
 
    pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
                                  pg_encoding_mblen(PG_UTF8, utf8string));
@@ -1892,19 +1887,15 @@ map_sql_identifier_to_xml_name(char *ident, bool fully_escaped,
 static char *
 unicode_to_sqlchar(pg_wchar c)
 {
-   unsigned char utf8string[5];    /* need room for trailing zero */
+   char        utf8string[8];          /* need room for trailing zero */
    char       *result;
 
    memset(utf8string, 0, sizeof(utf8string));
-   unicode_to_utf8(c, utf8string);
-
-   result = (char *) pg_do_encoding_conversion(utf8string,
-                                               pg_encoding_mblen(PG_UTF8,
-                                                       (char *) utf8string),
-                                               PG_UTF8,
-                                               GetDatabaseEncoding());
-   /* if pg_do_encoding_conversion didn't strdup, we must */
-   if (result == (char *) utf8string)
+   unicode_to_utf8(c, (unsigned char *) utf8string);
+
+   result = pg_any_to_server(utf8string, strlen(utf8string), PG_UTF8);
+   /* if pg_any_to_server didn't strdup, we must */
+   if (result == utf8string)
        result = pstrdup(result);
    return result;
 }
index 7f43cae69e2fa52fefc553a783a3c7841d3cacd3..15cf0d806b389a1d0b5fa1e2667d13ca03fecabe 100644 (file)
@@ -1077,7 +1077,9 @@ pgwin32_message_to_UTF16(const char *str, int len, int *utf16len)
        char       *utf8;
 
        utf8 = (char *) pg_do_encoding_conversion((unsigned char *) str,
-                                        len, GetMessageEncoding(), PG_UTF8);
+                                                 len,
+                                                 GetMessageEncoding(),
+                                                 PG_UTF8);
        if (utf8 != str)
            len = strlen(utf8);
 
index ed6884e863a60b8e20c37471d0ce468035f0d0f2..f8ccaa59e8930080a05ebb8f279552aa4011cc08 100644 (file)
@@ -3811,9 +3811,7 @@ hv_store_string(HV *hv, const char *key, SV *val)
    char       *hkey;
    SV        **ret;
 
-   hkey = (char *)
-       pg_do_encoding_conversion((unsigned char *) key, strlen(key),
-                                 GetDatabaseEncoding(), PG_UTF8);
+   hkey = pg_server_to_any(key, strlen(key), PG_UTF8);
 
    /*
     * This seems nowhere documented, but under Perl 5.8.0 and up, hv_store()
@@ -3841,9 +3839,7 @@ hv_fetch_string(HV *hv, const char *key)
    char       *hkey;
    SV        **ret;
 
-   hkey = (char *)
-       pg_do_encoding_conversion((unsigned char *) key, strlen(key),
-                                 GetDatabaseEncoding(), PG_UTF8);
+   hkey = pg_server_to_any(key, strlen(key), PG_UTF8);
 
    /* See notes in hv_store_string */
    hlen = -(int) strlen(hkey);
index 3e8aa7c4a26c49bbdc8e4f2fc4c67010db3c4082..c1c7c297cc52a4e6857159ddb06b62ebe8739b4d 100644 (file)
@@ -9,24 +9,11 @@
 static inline char *
 utf_u2e(char *utf8_str, size_t len)
 {
-   int         enc = GetDatabaseEncoding();
    char       *ret;
 
-   /*
-    * When we are in a PG_UTF8 or SQL_ASCII database
-    * pg_do_encoding_conversion() will not do any conversion (which is good)
-    * or verification (not so much), so we need to run the verification step
-    * separately.
-    */
-   if (enc == PG_UTF8 || enc == PG_SQL_ASCII)
-   {
-       pg_verify_mbstr_len(enc, utf8_str, len, false);
-       ret = utf8_str;
-   }
-   else
-       ret = (char *) pg_do_encoding_conversion((unsigned char *) utf8_str,
-                                                len, PG_UTF8, enc);
+   ret = pg_any_to_server(utf8_str, len, PG_UTF8);
 
+   /* ensure we have a copy even if no conversion happened */
    if (ret == utf8_str)
        ret = pstrdup(ret);
 
@@ -41,12 +28,14 @@ utf_u2e(char *utf8_str, size_t len)
 static inline char *
 utf_e2u(const char *str)
 {
-   char       *ret =
-   (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str),
-                                      GetDatabaseEncoding(), PG_UTF8);
+   char       *ret;
 
+   ret = pg_server_to_any(str, strlen(str), PG_UTF8);
+
+   /* ensure we have a copy even if no conversion happened */
    if (ret == str)
        ret = pstrdup(ret);
+
    return ret;
 }
 
index 95cbba5cdc5db683ba86c46a7a8d4da148952351..88670e66d0a7a29b7a001102f404d58f10555cd2 100644 (file)
@@ -90,11 +90,9 @@ PLyUnicode_Bytes(PyObject *unicode)
    {
        PG_TRY();
        {
-           encoded = (char *) pg_do_encoding_conversion(
-                                               (unsigned char *) utf8string,
-                                                        strlen(utf8string),
-                                                        PG_UTF8,
-                                                     GetDatabaseEncoding());
+           encoded = pg_any_to_server(utf8string,
+                                      strlen(utf8string),
+                                      PG_UTF8);
        }
        PG_CATCH();
        {
@@ -109,7 +107,7 @@ PLyUnicode_Bytes(PyObject *unicode)
    /* finally, build a bytes object in the server encoding */
    rv = PyBytes_FromStringAndSize(encoded, strlen(encoded));
 
-   /* if pg_do_encoding_conversion allocated memory, free it now */
+   /* if pg_any_to_server allocated memory, free it now */
    if (utf8string != encoded)
        pfree(encoded);
 
@@ -149,10 +147,7 @@ PLyUnicode_FromString(const char *s)
    char       *utf8string;
    PyObject   *o;
 
-   utf8string = (char *) pg_do_encoding_conversion((unsigned char *) s,
-                                                   strlen(s),
-                                                   GetDatabaseEncoding(),
-                                                   PG_UTF8);
+   utf8string = pg_server_to_any(s, strlen(s), PG_UTF8);
 
    o = PyUnicode_FromString(utf8string);
 
index 053803898215e6740c39b6342363f9e8399807cd..b3bf65ec885e3cc25c8d911af02ecd4b1aa348ec 100644 (file)
 static unsigned char *
 utf_u2e(unsigned char *src)
 {
-   return pg_do_encoding_conversion(src, strlen(src), PG_UTF8, GetDatabaseEncoding());
+   return (unsigned char *) pg_any_to_server((char *) src,
+                                             strlen(src),
+                                             PG_UTF8);
 }
 
 static unsigned char *
 utf_e2u(unsigned char *src)
 {
-   return pg_do_encoding_conversion(src, strlen(src), GetDatabaseEncoding(), PG_UTF8);
+   return (unsigned char *) pg_server_to_any((char *) src,
+                                             strlen(src),
+                                             PG_UTF8);
 }
 
 #define PLTCL_UTF