Canonicalize ICU locale names to language tags.

author Jeff Davis <[email protected]>

Tue, 4 Apr 2023 17:28:08 +0000 (10:28 -0700)

committer Jeff Davis <[email protected]>

Tue, 4 Apr 2023 17:38:58 +0000 (10:38 -0700)
author Jeff Davis <[email protected]>
Tue, 4 Apr 2023 17:28:08 +0000 (10:28 -0700)
committer Jeff Davis <[email protected]>
Tue, 4 Apr 2023 17:38:58 +0000 (10:38 -0700)
diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml

index 12fabb73720946e4e70722066c43f0d4ca8f360d..6dd95b89664b77f6fa8008bab3857c31aefe7fd3 100644 (file)
--- a/doc/src/sgml/charset.sgml
+++ b/doc/src/sgml/charset.sgml
@@ -893,7 +893,7 @@ CREATE COLLATION german (provider = libc, locale = 'de_DE');
          The first example selects the ICU locale using a <quote>language
          tag</quote> per BCP 47.  The second example uses the traditional
          ICU-specific locale syntax.  The first style is preferred going
-        forward, but it is not supported by older ICU versions.
+        forward, and is used internally to store locales.
         </para>
         <para>
          Note that you can name the collation objects in the SQL environment
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c

index 45de78352c7389e886ee8f72f56794e57db56e66..c91fe66d9b20b8d0ba4bbddc0a9baa1ea242d460 100644 (file)
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -165,6 +165,11 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
         else
             colliculocale = NULL;
  
+       /*
+        * When the ICU locale comes from an existing collation, do not
+        * canonicalize to a language tag.
+        */
+
         datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
         if (!isnull)
             collicurules = TextDatumGetCString(datum);
@@ -259,6 +264,25 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
                         (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
                          errmsg("parameter \"locale\" must be specified")));
  
+           /*
+            * During binary upgrade, preserve the locale string. Otherwise,
+            * canonicalize to a language tag.
+            */
+           if (!IsBinaryUpgrade)
+           {
+               char *langtag = icu_language_tag(colliculocale,
+                                                icu_validation_level);
+
+               if (langtag && strcmp(colliculocale, langtag) != 0)
+               {
+                   ereport(NOTICE,
+                           (errmsg("using standard form \"%s\" for locale \"%s\"",
+                                   langtag, colliculocale)));
+
+                   colliculocale = langtag;
+               }
+           }
+
             icu_validate_locale(colliculocale);
         }
  
@@ -569,26 +593,6 @@ cmpaliases(const void *a, const void *b)
  
  
  #ifdef USE_ICU
-/*
- * Get the ICU language tag for a locale name.
- * The result is a palloc'd string.
- */
-static char *
-get_icu_language_tag(const char *localename)
-{
-   char        buf[ULOC_FULLNAME_CAPACITY];
-   UErrorCode  status;
-
-   status = U_ZERO_ERROR;
-   uloc_toLanguageTag(localename, buf, sizeof(buf), true, &status);
-   if (U_FAILURE(status))
-       ereport(ERROR,
-               (errmsg("could not convert locale name \"%s\" to language tag: %s",
-                       localename, u_errorName(status))));
-
-   return pstrdup(buf);
-}
-
  /*
   * Get a comment (specifically, the display name) for an ICU locale.
   * The result is a palloc'd string, or NULL if we can't get a comment
@@ -950,7 +954,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
             else
                 name = uloc_getAvailable(i);
  
-           langtag = get_icu_language_tag(name);
+           langtag = icu_language_tag(name, ERROR);
  
             /*
              * Be paranoid about not allowing any non-ASCII strings into
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c

index 24bcc5adfe88d662314014d9d686e911012171f2..2e242eeff242bd13008f041ca685faa3aad4b967 100644 (file)
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -1058,6 +1058,26 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                      errmsg("ICU locale must be specified")));
  
+       /*
+        * During binary upgrade, or when the locale came from the template
+        * database, preserve locale string. Otherwise, canonicalize to a
+        * language tag.
+        */
+       if (!IsBinaryUpgrade && dbiculocale != src_iculocale)
+       {
+           char *langtag = icu_language_tag(dbiculocale,
+                                            icu_validation_level);
+
+           if (langtag && strcmp(dbiculocale, langtag) != 0)
+           {
+               ereport(NOTICE,
+                       (errmsg("using standard form \"%s\" for locale \"%s\"",
+                               langtag, dbiculocale)));
+
+               dbiculocale = langtag;
+           }
+       }
+
         icu_validate_locale(dbiculocale);
     }
     else
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c

index 9497c20d1238d09a5b3c9b6d4031a2a98ee0748f..06e73aa012f42e056580d00989ba50f48b34af82 100644 (file)
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -2826,6 +2826,91 @@ icu_set_collation_attributes(UCollator *collator, const char *loc,
  
  #endif
  
+/*
+ * Return the BCP47 language tag representation of the requested locale.
+ *
+ * This function should be called before passing the string to ucol_open(),
+ * because conversion to a language tag also performs "level 2
+ * canonicalization". In addition to producing a consistent format, level 2
+ * canonicalization is able to more accurately interpret different input
+ * locale string formats, such as POSIX and .NET IDs.
+ */
+char *
+icu_language_tag(const char *loc_str, int elevel)
+{
+#ifdef USE_ICU
+   UErrorCode   status;
+   char         lang[ULOC_LANG_CAPACITY];
+   char        *langtag;
+   size_t       buflen = 32;   /* arbitrary starting buffer size */
+   const bool   strict = true;
+
+   status = U_ZERO_ERROR;
+   uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
+   if (U_FAILURE(status))
+   {
+       if (elevel > 0)
+           ereport(elevel,
+                   (errmsg("could not get language from locale \"%s\": %s",
+                           loc_str, u_errorName(status))));
+       return NULL;
+   }
+
+   /* C/POSIX locales aren't handled by uloc_getLanguageTag() */
+   if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
+       return pstrdup("en-US-u-va-posix");
+
+   /*
+    * A BCP47 language tag doesn't have a clearly-defined upper limit
+    * (cf. RFC5646 section 4.4). Additionally, in older ICU versions,
+    * uloc_toLanguageTag() doesn't always return the ultimate length on the
+    * first call, necessitating a loop.
+    */
+   langtag = palloc(buflen);
+   while (true)
+   {
+       int32_t     len;
+
+       status = U_ZERO_ERROR;
+       len = uloc_toLanguageTag(loc_str, langtag, buflen, strict, &status);
+
+       /*
+        * If the result fits in the buffer exactly (len == buflen),
+        * uloc_toLanguageTag() will return success without nul-terminating
+        * the result. Check for either U_BUFFER_OVERFLOW_ERROR or len >=
+        * buflen and try again.
+        */
+       if ((status == U_BUFFER_OVERFLOW_ERROR ||
+            (U_SUCCESS(status) && len >= buflen)) &&
+           buflen < MaxAllocSize)
+       {
+           buflen = Min(buflen * 2, MaxAllocSize);
+           langtag = repalloc(langtag, buflen);
+           continue;
+       }
+
+       break;
+   }
+
+   if (U_FAILURE(status))
+   {
+       pfree(langtag);
+
+       if (elevel > 0)
+           ereport(elevel,
+                   (errmsg("could not convert locale name \"%s\" to language tag: %s",
+                           loc_str, u_errorName(status))));
+       return NULL;
+   }
+
+   return langtag;
+#else                          /* not USE_ICU */
+   ereport(ERROR,
+           (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+            errmsg("ICU is not supported in this build")));
+#endif                         /* not USE_ICU */
+}
+
  /*
   * Perform best-effort check that the locale is a valid one.
   */
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c

index 208ddc9b302f7a4c973fa26ce15bf973df754dce..4814c1c4052cdd4a7dcf4e7d3682e99651e30743 100644 (file)
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -2229,6 +2229,78 @@ check_icu_locale_encoding(int user_enc)
     return true;
  }
  
+/*
+ * Convert to canonical BCP47 language tag. Must be consistent with
+ * icu_language_tag().
+ */
+static char *
+icu_language_tag(const char *loc_str)
+{
+#ifdef USE_ICU
+   UErrorCode   status;
+   char         lang[ULOC_LANG_CAPACITY];
+   char        *langtag;
+   size_t       buflen = 32;   /* arbitrary starting buffer size */
+   const bool   strict = true;
+
+   status = U_ZERO_ERROR;
+   uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
+   if (U_FAILURE(status))
+   {
+       pg_fatal("could not get language from locale \"%s\": %s",
+                loc_str, u_errorName(status));
+       return NULL;
+   }
+
+   /* C/POSIX locales aren't handled by uloc_getLanguageTag() */
+   if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
+       return pstrdup("en-US-u-va-posix");
+
+   /*
+    * A BCP47 language tag doesn't have a clearly-defined upper limit
+    * (cf. RFC5646 section 4.4). Additionally, in older ICU versions,
+    * uloc_toLanguageTag() doesn't always return the ultimate length on the
+    * first call, necessitating a loop.
+    */
+   langtag = pg_malloc(buflen);
+   while (true)
+   {
+       int32_t     len;
+
+       status = U_ZERO_ERROR;
+       len = uloc_toLanguageTag(loc_str, langtag, buflen, strict, &status);
+
+       /*
+        * If the result fits in the buffer exactly (len == buflen),
+        * uloc_toLanguageTag() will return success without nul-terminating
+        * the result. Check for either U_BUFFER_OVERFLOW_ERROR or len >=
+        * buflen and try again.
+        */
+       if (status == U_BUFFER_OVERFLOW_ERROR ||
+           (U_SUCCESS(status) && len >= buflen))
+       {
+           buflen = buflen * 2;
+           langtag = pg_realloc(langtag, buflen);
+           continue;
+       }
+
+       break;
+   }
+
+   if (U_FAILURE(status))
+   {
+       pg_free(langtag);
+
+       pg_fatal("could not convert locale name \"%s\" to language tag: %s",
+                loc_str, u_errorName(status));
+   }
+
+   return langtag;
+#else
+   pg_fatal("ICU is not supported in this build");
+#endif
+}
+
  /*
   * Perform best-effort check that the locale is a valid one. Should be
   * consistent with pg_locale.c, except that it doesn't need to open the
@@ -2376,6 +2448,8 @@ setlocales(void)
  
     if (locale_provider == COLLPROVIDER_ICU)
     {
+       char *langtag;
+
         /* acquire default locale from the environment, if not specified */
         if (icu_locale == NULL)
         {
@@ -2383,6 +2457,13 @@ setlocales(void)
             printf(_("Using default ICU locale \"%s\".\n"), icu_locale);
         }
  
+       /* canonicalize to a language tag */
+       langtag = icu_language_tag(icu_locale);
+       printf(_("Using language tag \"%s\" for ICU locale \"%s\".\n"),
+              langtag, icu_locale);
+       pg_free(icu_locale);
+       icu_locale = langtag;
+
         icu_validate_locale(icu_locale);
  
         /*
diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl

index db7995fe28df4c7464a3e88f575a0683d20dd02e..17a444d80c5da2e501e11f4f4b03eaeb8b3d1710 100644 (file)
--- a/src/bin/initdb/t/001_initdb.pl
+++ b/src/bin/initdb/t/001_initdb.pl
@@ -144,7 +144,7 @@ if ($ENV{with_icu} eq 'yes')
             '--locale-provider=icu',
             '--icu-locale=@colNumeric=lower', "$tempdir/dataX"
         ],
-       qr/could not open collator for locale "\@colNumeric=lower": U_ILLEGAL_ARGUMENT_ERROR/,
+       qr/could not open collator for locale "und-u-kn-lower": U_ILLEGAL_ARGUMENT_ERROR/,
         'fails for invalid collation argument');
  }
  else
diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl

index 42215f82f7aa4894e856638a9c8afd189b99cbd6..df26ba42d60b91801d4c34196758a7dde38e4e37 100644 (file)
--- a/src/bin/pg_dump/t/002_pg_dump.pl
+++ b/src/bin/pg_dump/t/002_pg_dump.pl
@@ -1860,9 +1860,9 @@ my %tests = (
  
     'CREATE COLLATION icu_collation' => {
         create_order => 76,
-       create_sql   => "CREATE COLLATION icu_collation (PROVIDER = icu, LOCALE = 'C');",
+       create_sql   => "CREATE COLLATION icu_collation (PROVIDER = icu, LOCALE = 'en-US-u-va-posix');",
         regexp =>
-         qr/CREATE COLLATION public.icu_collation \(provider = icu, locale = 'C'(, version = '[^']*')?\);/m,
+         qr/CREATE COLLATION public.icu_collation \(provider = icu, locale = 'en-US-u-va-posix'(, version = '[^']*')?\);/m,
         icu => 1,
         like      => { %full_runs, section_pre_data => 1, },
     },
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h

index c2754279760b467d90d9074f6256c513c407c52a..8c095abc5241295983460440112e0f23217a05cd 100644 (file)
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -120,6 +120,7 @@ extern size_t pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
                                  size_t srclen, pg_locale_t locale);
  
  extern void icu_validate_locale(const char *loc_str);
+extern char *icu_language_tag(const char *loc_str, int elevel);
  
  #ifdef USE_ICU
  extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes);
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out

index 5e480d45cdf71d3056a2914986478156a21283c8..b5a221b030052381aa76083767f33ba17d320c56 100644 (file)
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1019,6 +1019,7 @@ reset enable_seqscan;
  CREATE ROLE regress_test_role;
  CREATE SCHEMA test_schema;
  -- We need to do this this way to cope with varying names for encodings:
+SET client_min_messages TO WARNING;
  do $$
  BEGIN
    EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' ||
@@ -1033,12 +1034,17 @@ BEGIN
            quote_literal(current_setting('lc_collate')) || ');';
  END
  $$;
+RESET client_min_messages;
  CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
  ERROR:  parameter "locale" must be specified
  CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); -- fails
  ERROR:  ICU locale "nonsense-nowhere" has unknown language "nonsense"
  HINT:  To disable ICU locale validation, set parameter icu_validation_level to DISABLED.
+CREATE COLLATION testx (provider = icu, locale = '@colStrength=primary;nonsense=yes'); -- fails
+ERROR:  could not convert locale name "@colStrength=primary;nonsense=yes" to language tag: U_ILLEGAL_ARGUMENT_ERROR
  SET icu_validation_level = WARNING;
+CREATE COLLATION testx (provider = icu, locale = '@colStrength=primary;nonsense=yes'); DROP COLLATION testx;
+WARNING:  could not convert locale name "@colStrength=primary;nonsense=yes" to language tag: U_ILLEGAL_ARGUMENT_ERROR
  CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); DROP COLLATION testx;
  WARNING:  ICU locale "nonsense-nowhere" has unknown language "nonsense"
  HINT:  To disable ICU locale validation, set parameter icu_validation_level to DISABLED.
@@ -1169,14 +1175,18 @@ SELECT * FROM collate_test2 ORDER BY b COLLATE UNICODE;
  
  -- test ICU collation customization
  -- test the attributes handled by icu_set_collation_attributes()
+SET client_min_messages=WARNING;
  CREATE COLLATION testcoll_ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes');
+RESET client_min_messages;
  SELECT 'aaá' > 'AAA' COLLATE "und-x-icu", 'aaá' < 'AAA' COLLATE testcoll_ignore_accents;
   ?column? | ?column? 
  ----------+----------
   t        | t
  (1 row)
  
+SET client_min_messages=WARNING;
  CREATE COLLATION testcoll_backwards (provider = icu, locale = '@colBackwards=yes');
+RESET client_min_messages;
  SELECT 'coté' < 'côte' COLLATE "und-x-icu", 'coté' > 'côte' COLLATE testcoll_backwards;
   ?column? | ?column? 
  ----------+----------
@@ -1184,7 +1194,9 @@ SELECT 'coté' < 'côte' COLLATE "und-x-icu", 'coté' > 'côte' COLLATE testcoll
  (1 row)
  
  CREATE COLLATION testcoll_lower_first (provider = icu, locale = '@colCaseFirst=lower');
+NOTICE:  using standard form "und-u-kf-lower" for locale "@colCaseFirst=lower"
  CREATE COLLATION testcoll_upper_first (provider = icu, locale = '@colCaseFirst=upper');
+NOTICE:  using standard form "und-u-kf-upper" for locale "@colCaseFirst=upper"
  SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcoll_upper_first;
   ?column? | ?column? 
  ----------+----------
@@ -1192,13 +1204,16 @@ SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcol
  (1 row)
  
  CREATE COLLATION testcoll_shifted (provider = icu, locale = '@colAlternate=shifted');
+NOTICE:  using standard form "und-u-ka-shifted" for locale "@colAlternate=shifted"
  SELECT 'de-luge' < 'deanza' COLLATE "und-x-icu", 'de-luge' > 'deanza' COLLATE testcoll_shifted;
   ?column? | ?column? 
  ----------+----------
   t        | t
  (1 row)
  
+SET client_min_messages=WARNING;
  CREATE COLLATION testcoll_numeric (provider = icu, locale = '@colNumeric=yes');
+RESET client_min_messages;
  SELECT 'A-21' > 'A-123' COLLATE "und-x-icu", 'A-21' < 'A-123' COLLATE testcoll_numeric;
   ?column? | ?column? 
  ----------+----------
@@ -1206,10 +1221,12 @@ SELECT 'A-21' > 'A-123' COLLATE "und-x-icu", 'A-21' < 'A-123' COLLATE testcoll_n
  (1 row)
  
  CREATE COLLATION testcoll_error1 (provider = icu, locale = '@colNumeric=lower');
-ERROR:  could not open collator for locale "@colNumeric=lower": U_ILLEGAL_ARGUMENT_ERROR
+NOTICE:  using standard form "und-u-kn-lower" for locale "@colNumeric=lower"
+ERROR:  could not open collator for locale "und-u-kn-lower": U_ILLEGAL_ARGUMENT_ERROR
  -- test that attributes not handled by icu_set_collation_attributes()
  -- (handled by ucol_open() directly) also work
  CREATE COLLATION testcoll_de_phonebook (provider = icu, locale = 'de@collation=phonebook');
+NOTICE:  using standard form "de-u-co-phonebk" for locale "de@collation=phonebook"
  SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE testcoll_de_phonebook;
   ?column? | ?column? 
  ----------+----------
@@ -1218,6 +1235,7 @@ SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE tes
  
  -- rules
  CREATE COLLATION testcoll_rules1 (provider = icu, locale = '', rules = '&a < g');
+NOTICE:  using standard form "und" for locale ""
  CREATE TABLE test7 (a text);
  -- example from https://unicode-org.github.io/icu/userguide/collation/customization/#syntax
  INSERT INTO test7 VALUES ('Abernathy'), ('apple'), ('bird'), ('Boston'), ('Graham'), ('green');
@@ -1245,10 +1263,13 @@ SELECT * FROM test7 ORDER BY a COLLATE testcoll_rules1;
  
  DROP TABLE test7;
  CREATE COLLATION testcoll_rulesx (provider = icu, locale = '', rules = '!!wrong!!');
-ERROR:  could not open collator for locale "" with rules "!!wrong!!": U_INVALID_FORMAT_ERROR
+NOTICE:  using standard form "und" for locale ""
+ERROR:  could not open collator for locale "und" with rules "!!wrong!!": U_INVALID_FORMAT_ERROR
  -- nondeterministic collations
  CREATE COLLATION ctest_det (provider = icu, locale = '', deterministic = true);
+NOTICE:  using standard form "und" for locale ""
  CREATE COLLATION ctest_nondet (provider = icu, locale = '', deterministic = false);
+NOTICE:  using standard form "und" for locale ""
  CREATE TABLE test6 (a int, b text);
  -- same string in different normal forms
  INSERT INTO test6 VALUES (1, U&'\00E4bc');
@@ -1298,7 +1319,9 @@ SELECT * FROM test6a WHERE b = ARRAY['äbc'] COLLATE ctest_nondet;
  (2 rows)
  
  CREATE COLLATION case_sensitive (provider = icu, locale = '');
+NOTICE:  using standard form "und" for locale ""
  CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=secondary', deterministic = false);
+NOTICE:  using standard form "und-u-ks-level2" for locale "@colStrength=secondary"
  SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
   ?column? | ?column? 
  ----------+----------
@@ -1780,7 +1803,9 @@ SELECT * FROM outer_text WHERE (f1, f2) NOT IN (SELECT * FROM inner_text);
  (2 rows)
  
  -- accents
+SET client_min_messages=WARNING;
  CREATE COLLATION ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes', deterministic = false);
+RESET client_min_messages;
  CREATE TABLE test4 (a int, b text);
  INSERT INTO test4 VALUES (1, 'cote'), (2, 'côte'), (3, 'coté'), (4, 'côté');
  SELECT * FROM test4 WHERE b = 'cote';
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql

index 95d96f2eb89230c9cd38263cae652288a29a875c..85e26951b626323adf575372dcedf77f886a7702 100644 (file)
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -357,6 +357,8 @@ CREATE ROLE regress_test_role;
  CREATE SCHEMA test_schema;
  
  -- We need to do this this way to cope with varying names for encodings:
+SET client_min_messages TO WARNING;
+
  do $$
  BEGIN
    EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' ||
@@ -370,9 +372,14 @@ BEGIN
            quote_literal(current_setting('lc_collate')) || ');';
  END
  $$;
+
+RESET client_min_messages;
+
  CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
  CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); -- fails
+CREATE COLLATION testx (provider = icu, locale = '@colStrength=primary;nonsense=yes'); -- fails
  SET icu_validation_level = WARNING;
+CREATE COLLATION testx (provider = icu, locale = '@colStrength=primary;nonsense=yes'); DROP COLLATION testx;
  CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); DROP COLLATION testx;
  RESET icu_validation_level;
  
@@ -457,10 +464,14 @@ SELECT * FROM collate_test2 ORDER BY b COLLATE UNICODE;
  
  -- test the attributes handled by icu_set_collation_attributes()
  
+SET client_min_messages=WARNING;
  CREATE COLLATION testcoll_ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes');
+RESET client_min_messages;
  SELECT 'aaá' > 'AAA' COLLATE "und-x-icu", 'aaá' < 'AAA' COLLATE testcoll_ignore_accents;
  
+SET client_min_messages=WARNING;
  CREATE COLLATION testcoll_backwards (provider = icu, locale = '@colBackwards=yes');
+RESET client_min_messages;
  SELECT 'coté' < 'côte' COLLATE "und-x-icu", 'coté' > 'côte' COLLATE testcoll_backwards;
  
  CREATE COLLATION testcoll_lower_first (provider = icu, locale = '@colCaseFirst=lower');
@@ -470,7 +481,9 @@ SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcol
  CREATE COLLATION testcoll_shifted (provider = icu, locale = '@colAlternate=shifted');
  SELECT 'de-luge' < 'deanza' COLLATE "und-x-icu", 'de-luge' > 'deanza' COLLATE testcoll_shifted;
  
+SET client_min_messages=WARNING;
  CREATE COLLATION testcoll_numeric (provider = icu, locale = '@colNumeric=yes');
+RESET client_min_messages;
  SELECT 'A-21' > 'A-123' COLLATE "und-x-icu", 'A-21' < 'A-123' COLLATE testcoll_numeric;
  
  CREATE COLLATION testcoll_error1 (provider = icu, locale = '@colNumeric=lower');
@@ -659,7 +672,9 @@ INSERT INTO inner_text VALUES ('a', NULL);
  SELECT * FROM outer_text WHERE (f1, f2) NOT IN (SELECT * FROM inner_text);
  
  -- accents
+SET client_min_messages=WARNING;
  CREATE COLLATION ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes', deterministic = false);
+RESET client_min_messages;
  
  CREATE TABLE test4 (a int, b text);
  INSERT INTO test4 VALUES (1, 'cote'), (2, 'côte'), (3, 'coté'), (4, 'côté');
author	Jeff Davis <[email protected]>
	Tue, 4 Apr 2023 17:28:08 +0000 (10:28 -0700)
committer	Jeff Davis <[email protected]>
	Tue, 4 Apr 2023 17:38:58 +0000 (10:38 -0700)
doc/src/sgml/charset.sgml		patch \| blob \| blame \| history
src/backend/commands/collationcmds.c		patch \| blob \| blame \| history
src/backend/commands/dbcommands.c		patch \| blob \| blame \| history
src/backend/utils/adt/pg_locale.c		patch \| blob \| blame \| history
src/bin/initdb/initdb.c		patch \| blob \| blame \| history
src/bin/initdb/t/001_initdb.pl		patch \| blob \| blame \| history
src/bin/pg_dump/t/002_pg_dump.pl		patch \| blob \| blame \| history
src/include/utils/pg_locale.h		patch \| blob \| blame \| history
src/test/regress/expected/collate.icu.utf8.out		patch \| blob \| blame \| history
src/test/regress/sql/collate.icu.utf8.sql		patch \| blob \| blame \| history