Don't allow creation of database with ICU locale with unsupported encoding
authorPeter Eisentraut <[email protected]>
Fri, 16 Sep 2022 07:37:54 +0000 (09:37 +0200)
committerPeter Eisentraut <[email protected]>
Fri, 16 Sep 2022 07:41:33 +0000 (09:41 +0200)
Check in CREATE DATABASE and initdb that the selected encoding is
supported by ICU.  Before, they would pass but users would later get
an error from the server when they tried to use the database.

Also document that initdb sets the encoding to UTF8 by default if the
ICU locale provider is chosen.

Author: Marina Polyakova <[email protected]>
Reviewed-by: Kyotaro Horiguchi <[email protected]>
Discussion: https://www.postgresql.org/message-id/6dd6db0984d86a51b7255ba79f111971@postgrespro.ru

doc/src/sgml/ref/initdb.sgml
src/backend/commands/dbcommands.c
src/bin/initdb/initdb.c
src/bin/initdb/t/001_initdb.pl
src/bin/scripts/t/020_createdb.pl

index f01df2dde961c792b85a7e9f87a2e12cb31021b8..81588962980fa6c1958f103454618df2c2266ce8 100644 (file)
@@ -209,8 +209,9 @@ PostgreSQL documentation
        <para>
         Selects the encoding of the template databases. This will also
         be the default encoding of any database you create later,
-        unless you override it then.  The default is derived from the locale, or
-        <literal>SQL_ASCII</literal> if that does not work. The character sets supported by
+        unless you override it then.  The default is derived from the locale,
+        if the libc locale provider is used, or <literal>UTF8</literal> if the
+        ICU locale provider is used.  The character sets supported by
         the <productname>PostgreSQL</productname> server are described
         in <xref linkend="multibyte-charset-supported"/>.
        </para>
index 6ff48bb18f3639ae45d9528b32df51a4aebc60c0..f248ad42b77c8c0cf2089963d4357b120914ce20 100644 (file)
@@ -1034,6 +1034,12 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 
        if (dblocprovider == COLLPROVIDER_ICU)
        {
+               if (!(is_encoding_supported_by_icu(encoding)))
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("encoding \"%s\" is not supported with ICU provider",
+                                                       pg_encoding_to_char(encoding))));
+
                /*
                 * This would happen if template0 uses the libc provider but the new
                 * database uses icu.
@@ -1042,10 +1048,9 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
                        ereport(ERROR,
                                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                                         errmsg("ICU locale must be specified")));
-       }
 
-       if (dblocprovider == COLLPROVIDER_ICU)
                check_icu_locale(dbiculocale);
+       }
 
        /*
         * Check that the new encoding and locale settings match the source
index 6aeec8d426c52414b827686781c245291f27ed1f..28f22b25b2ebb26994e717bef8f5f5f621abe4f4 100644 (file)
@@ -2042,6 +2042,27 @@ check_locale_encoding(const char *locale, int user_enc)
        return true;
 }
 
+/*
+ * check if the chosen encoding matches is supported by ICU
+ *
+ * this should match the similar check in the backend createdb() function
+ */
+static bool
+check_icu_locale_encoding(int user_enc)
+{
+       if (!(is_encoding_supported_by_icu(user_enc)))
+       {
+               pg_log_error("encoding mismatch");
+               pg_log_error_detail("The encoding you selected (%s) is not supported with the ICU provider.",
+                                                       pg_encoding_to_char(user_enc));
+               pg_log_error_hint("Rerun %s and either do not specify an encoding explicitly, "
+                                                 "or choose a matching combination.",
+                                                 progname);
+               return false;
+       }
+       return true;
+}
+
 /*
  * set up the locale variables
  *
@@ -2310,7 +2331,11 @@ setup_locale_encoding(void)
        }
 
        if (!encoding && locale_provider == COLLPROVIDER_ICU)
+       {
                encodingid = PG_UTF8;
+               printf(_("The default database encoding has been set to \"%s\".\n"),
+                          pg_encoding_to_char(encodingid));
+       }
        else if (!encoding)
        {
                int                     ctype_enc;
@@ -2362,6 +2387,10 @@ setup_locale_encoding(void)
        if (!check_locale_encoding(lc_ctype, encodingid) ||
                !check_locale_encoding(lc_collate, encodingid))
                exit(1);                                /* check_locale_encoding printed the error */
+
+       if (locale_provider == COLLPROVIDER_ICU &&
+               !check_icu_locale_encoding(encodingid))
+               exit(1);
 }
 
 
index a37f6dd9b334b6ee22d9fdd4d51422795cb54a39..164fc11cbffc8c466f84d51c07106b602d022bc6 100644 (file)
@@ -118,6 +118,15 @@ if ($ENV{with_icu} eq 'yes')
                ],
                qr/FATAL:  could not open collator for locale/,
                'fails for invalid ICU locale');
+
+       command_fails_like(
+               [
+                       'initdb',                '--no-sync',
+                       '--locale-provider=icu', '--encoding=SQL_ASCII',
+                       '--icu-locale=en', "$tempdir/dataX"
+               ],
+               qr/error: encoding mismatch/,
+               'fails for encoding not supported by ICU');
 }
 else
 {
index e91c1d013d08d8bd1e3a92f2aba958c5c7713ca6..e95f200d0b93cbd2fc4beeb20f3417b10f328df4 100644 (file)
@@ -50,6 +50,15 @@ if ($ENV{with_icu} eq 'yes')
                ],
                'fails for invalid ICU locale');
 
+       $node->command_fails_like(
+               [
+                       'createdb',             '-T',
+                       'template0',            '--locale-provider=icu',
+                       '--encoding=SQL_ASCII', 'foobarX'
+               ],
+               qr/ERROR:  encoding "SQL_ASCII" is not supported with ICU provider/,
+               'fails for encoding not supported by ICU');
+
        # additional node, which uses the icu provider
        my $node2 = PostgreSQL::Test::Cluster->new('icu');
        $node2->init(extra => ['--locale-provider=icu', '--icu-locale=en']);