Move ICU-specific code from pg_locale.c into pg_locale_icu.c.
authorJeff Davis <[email protected]>
Mon, 14 Oct 2024 19:13:20 +0000 (12:13 -0700)
committerJeff Davis <[email protected]>
Mon, 14 Oct 2024 19:13:26 +0000 (12:13 -0700)
Discussion: https://postgr.es/m/flat/2830211e1b6e6a2e26d845780b03e125281ea17b[email protected]

src/backend/utils/adt/Makefile
src/backend/utils/adt/meson.build
src/backend/utils/adt/pg_locale.c
src/backend/utils/adt/pg_locale_icu.c [new file with mode: 0644]

index edb09d4e35622346ecb943d82edaa2c173d12004..bb416c867441abd7c83eac3a5e184f924cd77473 100644 (file)
@@ -79,6 +79,7 @@ OBJS = \
        orderedsetaggs.o \
        partitionfuncs.o \
        pg_locale.o \
+       pg_locale_icu.o \
        pg_lsn.o \
        pg_upgrade_support.o \
        pgstatfuncs.o \
index 8c6fc80c373b260ae76485c1ccfa868ec5ebb824..19a27465a294f523459779fb2cda883b2ea9688f 100644 (file)
@@ -66,6 +66,7 @@ backend_sources += files(
   'orderedsetaggs.c',
   'partitionfuncs.c',
   'pg_locale.c',
+  'pg_locale_icu.c',
   'pg_lsn.c',
   'pg_upgrade_support.c',
   'pgstatfuncs.c',
index b4954959f9891f2a23f3cb8a7d70d46d6af8ecb2..313200009b8833590cc29484d56b2b0e94a9b6fb 100644 (file)
 #include "utils/pg_locale.h"
 #include "utils/syscache.h"
 
-#ifdef USE_ICU
-#include <unicode/ucnv.h>
-#include <unicode/ustring.h>
-#endif
-
 #ifdef __GLIBC__
 #include <gnu/libc-version.h>
 #endif
 
 #define                MAX_L10N_DATA           80
 
+/* pg_locale_icu.c */
+#ifdef USE_ICU
+extern UCollator *pg_ucol_open(const char *loc_str);
+extern UCollator *make_icu_collator(const char *iculocstr,
+                                                                       const char *icurules);
+extern int     strncoll_icu(const char *arg1, ssize_t len1,
+                                                const char *arg2, ssize_t len2,
+                                                pg_locale_t locale);
+extern size_t strnxfrm_icu(char *dest, size_t destsize,
+                                                  const char *src, ssize_t srclen,
+                                                  pg_locale_t locale);
+extern size_t strnxfrm_prefix_icu(char *dest, size_t destsize,
+                                                                 const char *src, ssize_t srclen,
+                                                                 pg_locale_t locale);
+#endif
 
 /* GUC settings */
 char      *locale_messages;
@@ -163,25 +173,6 @@ static pg_locale_t last_collation_cache_locale = NULL;
 static char *IsoLocaleName(const char *);
 #endif
 
-#ifdef USE_ICU
-/*
- * Converter object for converting between ICU's UChar strings and C strings
- * in database encoding.  Since the database encoding doesn't change, we only
- * need one of these per session.
- */
-static UConverter *icu_converter = NULL;
-
-static UCollator *pg_ucol_open(const char *loc_str);
-static void init_icu_converter(void);
-static size_t uchar_length(UConverter *converter,
-                                                  const char *str, int32_t len);
-static int32_t uchar_convert(UConverter *converter,
-                                                        UChar *dest, int32_t destlen,
-                                                        const char *src, int32_t srclen);
-static void icu_set_collation_attributes(UCollator *collator, const char *loc,
-                                                                                UErrorCode *status);
-#endif
-
 /*
  * POSIX doesn't define _l-variants of these functions, but several systems
  * have them.  We provide our own replacements here.
@@ -1391,76 +1382,6 @@ make_libc_collator(const char *collate, const char *ctype)
        return loc;
 }
 
-/*
- * Create a UCollator with the given locale string and rules.
- *
- * Ensure that no path leaks a UCollator.
- */
-#ifdef USE_ICU
-static UCollator *
-make_icu_collator(const char *iculocstr, const char *icurules)
-{
-       if (!icurules)
-       {
-               /* simple case without rules */
-               return pg_ucol_open(iculocstr);
-       }
-       else
-       {
-               UCollator  *collator_std_rules;
-               UCollator  *collator_all_rules;
-               const UChar *std_rules;
-               UChar      *my_rules;
-               UChar      *all_rules;
-               int32_t         length;
-               int32_t         total;
-               UErrorCode      status;
-
-               /*
-                * If rules are specified, we extract the rules of the standard
-                * collation, add our own rules, and make a new collator with the
-                * combined rules.
-                */
-               icu_to_uchar(&my_rules, icurules, strlen(icurules));
-
-               collator_std_rules = pg_ucol_open(iculocstr);
-
-               std_rules = ucol_getRules(collator_std_rules, &length);
-
-               total = u_strlen(std_rules) + u_strlen(my_rules) + 1;
-
-               /* avoid leaking collator on OOM */
-               all_rules = palloc_extended(sizeof(UChar) * total, MCXT_ALLOC_NO_OOM);
-               if (!all_rules)
-               {
-                       ucol_close(collator_std_rules);
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_OUT_OF_MEMORY),
-                                        errmsg("out of memory")));
-               }
-
-               u_strcpy(all_rules, std_rules);
-               u_strcat(all_rules, my_rules);
-
-               ucol_close(collator_std_rules);
-
-               status = U_ZERO_ERROR;
-               collator_all_rules = ucol_openRules(all_rules, u_strlen(all_rules),
-                                                                                       UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,
-                                                                                       NULL, &status);
-               if (U_FAILURE(status))
-               {
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                        errmsg("could not open collator for locale \"%s\" with rules \"%s\": %s",
-                                                       iculocstr, icurules, u_errorName(status))));
-               }
-
-               return collator_all_rules;
-       }
-}
-#endif                                                 /* not USE_ICU */
-
 /*
  * Initialize default_locale with database locale settings.
  */
@@ -1969,104 +1890,6 @@ strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
        return result;
 }
 
-#ifdef USE_ICU
-
-/*
- * strncoll_icu_no_utf8
- *
- * Convert the arguments from the database encoding to UChar strings, then
- * call ucol_strcoll(). An argument length of -1 means that the string is
- * NUL-terminated.
- *
- * When the database encoding is UTF-8, and ICU supports ucol_strcollUTF8(),
- * caller should call that instead.
- */
-static int
-strncoll_icu_no_utf8(const char *arg1, ssize_t len1,
-                                        const char *arg2, ssize_t len2, pg_locale_t locale)
-{
-       char            sbuf[TEXTBUFLEN];
-       char       *buf = sbuf;
-       int32_t         ulen1;
-       int32_t         ulen2;
-       size_t          bufsize1;
-       size_t          bufsize2;
-       UChar      *uchar1,
-                          *uchar2;
-       int                     result;
-
-       Assert(locale->provider == COLLPROVIDER_ICU);
-#ifdef HAVE_UCOL_STRCOLLUTF8
-       Assert(GetDatabaseEncoding() != PG_UTF8);
-#endif
-
-       init_icu_converter();
-
-       ulen1 = uchar_length(icu_converter, arg1, len1);
-       ulen2 = uchar_length(icu_converter, arg2, len2);
-
-       bufsize1 = (ulen1 + 1) * sizeof(UChar);
-       bufsize2 = (ulen2 + 1) * sizeof(UChar);
-
-       if (bufsize1 + bufsize2 > TEXTBUFLEN)
-               buf = palloc(bufsize1 + bufsize2);
-
-       uchar1 = (UChar *) buf;
-       uchar2 = (UChar *) (buf + bufsize1);
-
-       ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1);
-       ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2);
-
-       result = ucol_strcoll(locale->info.icu.ucol,
-                                                 uchar1, ulen1,
-                                                 uchar2, ulen2);
-
-       if (buf != sbuf)
-               pfree(buf);
-
-       return result;
-}
-
-/*
- * strncoll_icu
- *
- * Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given
- * database encoding. An argument length of -1 means the string is
- * NUL-terminated.
- */
-static int
-strncoll_icu(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
-                        pg_locale_t locale)
-{
-       int                     result;
-
-       Assert(locale->provider == COLLPROVIDER_ICU);
-
-#ifdef HAVE_UCOL_STRCOLLUTF8
-       if (GetDatabaseEncoding() == PG_UTF8)
-       {
-               UErrorCode      status;
-
-               status = U_ZERO_ERROR;
-               result = ucol_strcollUTF8(locale->info.icu.ucol,
-                                                                 arg1, len1,
-                                                                 arg2, len2,
-                                                                 &status);
-               if (U_FAILURE(status))
-                       ereport(ERROR,
-                                       (errmsg("collation failed: %s", u_errorName(status))));
-       }
-       else
-#endif
-       {
-               result = strncoll_icu_no_utf8(arg1, len1, arg2, len2, locale);
-       }
-
-       return result;
-}
-
-#endif                                                 /* USE_ICU */
-
 /*
  * pg_strcoll
  *
@@ -2162,143 +1985,6 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
        return result;
 }
 
-#ifdef USE_ICU
-
-/* 'srclen' of -1 means the strings are NUL-terminated */
-static size_t
-strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
-                        pg_locale_t locale)
-{
-       char            sbuf[TEXTBUFLEN];
-       char       *buf = sbuf;
-       UChar      *uchar;
-       int32_t         ulen;
-       size_t          uchar_bsize;
-       Size            result_bsize;
-
-       Assert(locale->provider == COLLPROVIDER_ICU);
-
-       init_icu_converter();
-
-       ulen = uchar_length(icu_converter, src, srclen);
-
-       uchar_bsize = (ulen + 1) * sizeof(UChar);
-
-       if (uchar_bsize > TEXTBUFLEN)
-               buf = palloc(uchar_bsize);
-
-       uchar = (UChar *) buf;
-
-       ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
-
-       result_bsize = ucol_getSortKey(locale->info.icu.ucol,
-                                                                  uchar, ulen,
-                                                                  (uint8_t *) dest, destsize);
-
-       /*
-        * ucol_getSortKey() counts the nul-terminator in the result length, but
-        * this function should not.
-        */
-       Assert(result_bsize > 0);
-       result_bsize--;
-
-       if (buf != sbuf)
-               pfree(buf);
-
-       /* if dest is defined, it should be nul-terminated */
-       Assert(result_bsize >= destsize || dest[result_bsize] == '\0');
-
-       return result_bsize;
-}
-
-/* 'srclen' of -1 means the strings are NUL-terminated */
-static size_t
-strnxfrm_prefix_icu_no_utf8(char *dest, size_t destsize,
-                                                       const char *src, ssize_t srclen,
-                                                       pg_locale_t locale)
-{
-       char            sbuf[TEXTBUFLEN];
-       char       *buf = sbuf;
-       UCharIterator iter;
-       uint32_t        state[2];
-       UErrorCode      status;
-       int32_t         ulen = -1;
-       UChar      *uchar = NULL;
-       size_t          uchar_bsize;
-       Size            result_bsize;
-
-       Assert(locale->provider == COLLPROVIDER_ICU);
-       Assert(GetDatabaseEncoding() != PG_UTF8);
-
-       init_icu_converter();
-
-       ulen = uchar_length(icu_converter, src, srclen);
-
-       uchar_bsize = (ulen + 1) * sizeof(UChar);
-
-       if (uchar_bsize > TEXTBUFLEN)
-               buf = palloc(uchar_bsize);
-
-       uchar = (UChar *) buf;
-
-       ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
-
-       uiter_setString(&iter, uchar, ulen);
-       state[0] = state[1] = 0;        /* won't need that again */
-       status = U_ZERO_ERROR;
-       result_bsize = ucol_nextSortKeyPart(locale->info.icu.ucol,
-                                                                               &iter,
-                                                                               state,
-                                                                               (uint8_t *) dest,
-                                                                               destsize,
-                                                                               &status);
-       if (U_FAILURE(status))
-               ereport(ERROR,
-                               (errmsg("sort key generation failed: %s",
-                                               u_errorName(status))));
-
-       return result_bsize;
-}
-
-/* 'srclen' of -1 means the strings are NUL-terminated */
-static size_t
-strnxfrm_prefix_icu(char *dest, size_t destsize,
-                                       const char *src, ssize_t srclen,
-                                       pg_locale_t locale)
-{
-       size_t          result;
-
-       Assert(locale->provider == COLLPROVIDER_ICU);
-
-       if (GetDatabaseEncoding() == PG_UTF8)
-       {
-               UCharIterator iter;
-               uint32_t        state[2];
-               UErrorCode      status;
-
-               uiter_setUTF8(&iter, src, srclen);
-               state[0] = state[1] = 0;        /* won't need that again */
-               status = U_ZERO_ERROR;
-               result = ucol_nextSortKeyPart(locale->info.icu.ucol,
-                                                                         &iter,
-                                                                         state,
-                                                                         (uint8_t *) dest,
-                                                                         destsize,
-                                                                         &status);
-               if (U_FAILURE(status))
-                       ereport(ERROR,
-                                       (errmsg("sort key generation failed: %s",
-                                                       u_errorName(status))));
-       }
-       else
-               result = strnxfrm_prefix_icu_no_utf8(dest, destsize, src, srclen,
-                                                                                        locale);
-
-       return result;
-}
-
-#endif
-
 /*
  * Return true if the collation provider supports pg_strxfrm() and
  * pg_strnxfrm(); otherwise false.
@@ -2509,354 +2195,6 @@ builtin_validate_locale(int encoding, const char *locale)
 }
 
 
-#ifdef USE_ICU
-
-/*
- * Wrapper around ucol_open() to handle API differences for older ICU
- * versions.
- *
- * Ensure that no path leaks a UCollator.
- */
-static UCollator *
-pg_ucol_open(const char *loc_str)
-{
-       UCollator  *collator;
-       UErrorCode      status;
-       const char *orig_str = loc_str;
-       char       *fixed_str = NULL;
-
-       /*
-        * Must never open default collator, because it depends on the environment
-        * and may change at any time. Should not happen, but check here to catch
-        * bugs that might be hard to catch otherwise.
-        *
-        * NB: the default collator is not the same as the collator for the root
-        * locale. The root locale may be specified as the empty string, "und", or
-        * "root". The default collator is opened by passing NULL to ucol_open().
-        */
-       if (loc_str == NULL)
-               elog(ERROR, "opening default collator is not supported");
-
-       /*
-        * In ICU versions 54 and earlier, "und" is not a recognized spelling of
-        * the root locale. If the first component of the locale is "und", replace
-        * with "root" before opening.
-        */
-       if (U_ICU_VERSION_MAJOR_NUM < 55)
-       {
-               char            lang[ULOC_LANG_CAPACITY];
-
-               status = U_ZERO_ERROR;
-               uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
-               if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
-               {
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                        errmsg("could not get language from locale \"%s\": %s",
-                                                       loc_str, u_errorName(status))));
-               }
-
-               if (strcmp(lang, "und") == 0)
-               {
-                       const char *remainder = loc_str + strlen("und");
-
-                       fixed_str = palloc(strlen("root") + strlen(remainder) + 1);
-                       strcpy(fixed_str, "root");
-                       strcat(fixed_str, remainder);
-
-                       loc_str = fixed_str;
-               }
-       }
-
-       status = U_ZERO_ERROR;
-       collator = ucol_open(loc_str, &status);
-       if (U_FAILURE(status))
-               ereport(ERROR,
-               /* use original string for error report */
-                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                errmsg("could not open collator for locale \"%s\": %s",
-                                               orig_str, u_errorName(status))));
-
-       if (U_ICU_VERSION_MAJOR_NUM < 54)
-       {
-               status = U_ZERO_ERROR;
-               icu_set_collation_attributes(collator, loc_str, &status);
-
-               /*
-                * Pretend the error came from ucol_open(), for consistent error
-                * message across ICU versions.
-                */
-               if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
-               {
-                       ucol_close(collator);
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                        errmsg("could not open collator for locale \"%s\": %s",
-                                                       orig_str, u_errorName(status))));
-               }
-       }
-
-       if (fixed_str != NULL)
-               pfree(fixed_str);
-
-       return collator;
-}
-
-static void
-init_icu_converter(void)
-{
-       const char *icu_encoding_name;
-       UErrorCode      status;
-       UConverter *conv;
-
-       if (icu_converter)
-               return;                                 /* already done */
-
-       icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
-       if (!icu_encoding_name)
-               ereport(ERROR,
-                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                                errmsg("encoding \"%s\" not supported by ICU",
-                                               pg_encoding_to_char(GetDatabaseEncoding()))));
-
-       status = U_ZERO_ERROR;
-       conv = ucnv_open(icu_encoding_name, &status);
-       if (U_FAILURE(status))
-               ereport(ERROR,
-                               (errmsg("could not open ICU converter for encoding \"%s\": %s",
-                                               icu_encoding_name, u_errorName(status))));
-
-       icu_converter = conv;
-}
-
-/*
- * Find length, in UChars, of given string if converted to UChar string.
- *
- * A length of -1 indicates that the input string is NUL-terminated.
- */
-static size_t
-uchar_length(UConverter *converter, const char *str, int32_t len)
-{
-       UErrorCode      status = U_ZERO_ERROR;
-       int32_t         ulen;
-
-       ulen = ucnv_toUChars(converter, NULL, 0, str, len, &status);
-       if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
-               ereport(ERROR,
-                               (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
-       return ulen;
-}
-
-/*
- * Convert the given source string into a UChar string, stored in dest, and
- * return the length (in UChars).
- *
- * A srclen of -1 indicates that the input string is NUL-terminated.
- */
-static int32_t
-uchar_convert(UConverter *converter, UChar *dest, int32_t destlen,
-                         const char *src, int32_t srclen)
-{
-       UErrorCode      status = U_ZERO_ERROR;
-       int32_t         ulen;
-
-       status = U_ZERO_ERROR;
-       ulen = ucnv_toUChars(converter, dest, destlen, src, srclen, &status);
-       if (U_FAILURE(status))
-               ereport(ERROR,
-                               (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
-       return ulen;
-}
-
-/*
- * Convert a string in the database encoding into a string of UChars.
- *
- * The source string at buff is of length nbytes
- * (it needn't be nul-terminated)
- *
- * *buff_uchar receives a pointer to the palloc'd result string, and
- * the function's result is the number of UChars generated.
- *
- * The result string is nul-terminated, though most callers rely on the
- * result length instead.
- */
-int32_t
-icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
-{
-       int32_t         len_uchar;
-
-       init_icu_converter();
-
-       len_uchar = uchar_length(icu_converter, buff, nbytes);
-
-       *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
-       len_uchar = uchar_convert(icu_converter,
-                                                         *buff_uchar, len_uchar + 1, buff, nbytes);
-
-       return len_uchar;
-}
-
-/*
- * Convert a string of UChars into the database encoding.
- *
- * The source string at buff_uchar is of length len_uchar
- * (it needn't be nul-terminated)
- *
- * *result receives a pointer to the palloc'd result string, and the
- * function's result is the number of bytes generated (not counting nul).
- *
- * The result string is nul-terminated.
- */
-int32_t
-icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
-{
-       UErrorCode      status;
-       int32_t         len_result;
-
-       init_icu_converter();
-
-       status = U_ZERO_ERROR;
-       len_result = ucnv_fromUChars(icu_converter, NULL, 0,
-                                                                buff_uchar, len_uchar, &status);
-       if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
-               ereport(ERROR,
-                               (errmsg("%s failed: %s", "ucnv_fromUChars",
-                                               u_errorName(status))));
-
-       *result = palloc(len_result + 1);
-
-       status = U_ZERO_ERROR;
-       len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
-                                                                buff_uchar, len_uchar, &status);
-       if (U_FAILURE(status) ||
-               status == U_STRING_NOT_TERMINATED_WARNING)
-               ereport(ERROR,
-                               (errmsg("%s failed: %s", "ucnv_fromUChars",
-                                               u_errorName(status))));
-
-       return len_result;
-}
-
-/*
- * Parse collation attributes from the given locale string and apply them to
- * the open collator.
- *
- * First, the locale string is canonicalized to an ICU format locale ID such
- * as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies
- * the key-value arguments.
- *
- * Starting with ICU version 54, the attributes are processed automatically by
- * ucol_open(), so this is only necessary for emulating this behavior on older
- * versions.
- */
-pg_attribute_unused()
-static void
-icu_set_collation_attributes(UCollator *collator, const char *loc,
-                                                        UErrorCode *status)
-{
-       int32_t         len;
-       char       *icu_locale_id;
-       char       *lower_str;
-       char       *str;
-       char       *token;
-
-       /*
-        * The input locale may be a BCP 47 language tag, e.g.
-        * "und-u-kc-ks-level1", which expresses the same attributes in a
-        * different form. It will be converted to the equivalent ICU format
-        * locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by
-        * uloc_canonicalize().
-        */
-       *status = U_ZERO_ERROR;
-       len = uloc_canonicalize(loc, NULL, 0, status);
-       icu_locale_id = palloc(len + 1);
-       *status = U_ZERO_ERROR;
-       len = uloc_canonicalize(loc, icu_locale_id, len + 1, status);
-       if (U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING)
-               return;
-
-       lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id));
-
-       pfree(icu_locale_id);
-
-       str = strchr(lower_str, '@');
-       if (!str)
-               return;
-       str++;
-
-       while ((token = strsep(&str, ";")))
-       {
-               char       *e = strchr(token, '=');
-
-               if (e)
-               {
-                       char       *name;
-                       char       *value;
-                       UColAttribute uattr;
-                       UColAttributeValue uvalue;
-
-                       *status = U_ZERO_ERROR;
-
-                       *e = '\0';
-                       name = token;
-                       value = e + 1;
-
-                       /*
-                        * See attribute name and value lists in ICU i18n/coll.cpp
-                        */
-                       if (strcmp(name, "colstrength") == 0)
-                               uattr = UCOL_STRENGTH;
-                       else if (strcmp(name, "colbackwards") == 0)
-                               uattr = UCOL_FRENCH_COLLATION;
-                       else if (strcmp(name, "colcaselevel") == 0)
-                               uattr = UCOL_CASE_LEVEL;
-                       else if (strcmp(name, "colcasefirst") == 0)
-                               uattr = UCOL_CASE_FIRST;
-                       else if (strcmp(name, "colalternate") == 0)
-                               uattr = UCOL_ALTERNATE_HANDLING;
-                       else if (strcmp(name, "colnormalization") == 0)
-                               uattr = UCOL_NORMALIZATION_MODE;
-                       else if (strcmp(name, "colnumeric") == 0)
-                               uattr = UCOL_NUMERIC_COLLATION;
-                       else
-                               /* ignore if unknown */
-                               continue;
-
-                       if (strcmp(value, "primary") == 0)
-                               uvalue = UCOL_PRIMARY;
-                       else if (strcmp(value, "secondary") == 0)
-                               uvalue = UCOL_SECONDARY;
-                       else if (strcmp(value, "tertiary") == 0)
-                               uvalue = UCOL_TERTIARY;
-                       else if (strcmp(value, "quaternary") == 0)
-                               uvalue = UCOL_QUATERNARY;
-                       else if (strcmp(value, "identical") == 0)
-                               uvalue = UCOL_IDENTICAL;
-                       else if (strcmp(value, "no") == 0)
-                               uvalue = UCOL_OFF;
-                       else if (strcmp(value, "yes") == 0)
-                               uvalue = UCOL_ON;
-                       else if (strcmp(value, "shifted") == 0)
-                               uvalue = UCOL_SHIFTED;
-                       else if (strcmp(value, "non-ignorable") == 0)
-                               uvalue = UCOL_NON_IGNORABLE;
-                       else if (strcmp(value, "lower") == 0)
-                               uvalue = UCOL_LOWER_FIRST;
-                       else if (strcmp(value, "upper") == 0)
-                               uvalue = UCOL_UPPER_FIRST;
-                       else
-                       {
-                               *status = U_ILLEGAL_ARGUMENT_ERROR;
-                               break;
-                       }
-
-                       ucol_setAttribute(collator, uattr, uvalue, status);
-               }
-       }
-
-       pfree(lower_str);
-}
-#endif
 
 /*
  * Return the BCP47 language tag representation of the requested locale.
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
new file mode 100644 (file)
index 0000000..2a87e25
--- /dev/null
@@ -0,0 +1,708 @@
+/*-----------------------------------------------------------------------
+ *
+ * PostgreSQL locale utilities for ICU
+ *
+ * Portions Copyright (c) 2002-2024, PostgreSQL Global Development Group
+ *
+ * src/backend/utils/adt/pg_locale_icu.c
+ *
+ *-----------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#ifdef USE_ICU
+
+#include <unicode/ucnv.h>
+#include <unicode/ustring.h>
+
+#include "catalog/pg_collation.h"
+#include "mb/pg_wchar.h"
+#include "utils/formatting.h"
+#include "utils/pg_locale.h"
+
+/*
+ * Size of stack buffer to use for string transformations, used to avoid heap
+ * allocations in typical cases. This should be large enough that most strings
+ * will fit, but small enough that we feel comfortable putting it on the
+ * stack.
+ */
+#define                TEXTBUFLEN                      1024
+
+extern UCollator *pg_ucol_open(const char *loc_str);
+extern UCollator *make_icu_collator(const char *iculocstr,
+                                                                       const char *icurules);
+extern int     strncoll_icu(const char *arg1, ssize_t len1,
+                                                const char *arg2, ssize_t len2,
+                                                pg_locale_t locale);
+extern size_t strnxfrm_icu(char *dest, size_t destsize,
+                                                  const char *src, ssize_t srclen,
+                                                  pg_locale_t locale);
+extern size_t strnxfrm_prefix_icu(char *dest, size_t destsize,
+                                                                 const char *src, ssize_t srclen,
+                                                                 pg_locale_t locale);
+
+/*
+ * Converter object for converting between ICU's UChar strings and C strings
+ * in database encoding.  Since the database encoding doesn't change, we only
+ * need one of these per session.
+ */
+static UConverter *icu_converter = NULL;
+
+static int     strncoll_icu_no_utf8(const char *arg1, ssize_t len1,
+                                                                const char *arg2, ssize_t len2,
+                                                                pg_locale_t locale);
+static size_t strnxfrm_prefix_icu_no_utf8(char *dest, size_t destsize,
+                                                                                 const char *src, ssize_t srclen,
+                                                                                 pg_locale_t locale);
+static void init_icu_converter(void);
+static size_t uchar_length(UConverter *converter,
+                                                  const char *str, int32_t len);
+static int32_t uchar_convert(UConverter *converter,
+                                                        UChar *dest, int32_t destlen,
+                                                        const char *src, int32_t srclen);
+static void icu_set_collation_attributes(UCollator *collator, const char *loc,
+                                                                                UErrorCode *status);
+
+/*
+ * Wrapper around ucol_open() to handle API differences for older ICU
+ * versions.
+ *
+ * Ensure that no path leaks a UCollator.
+ */
+UCollator *
+pg_ucol_open(const char *loc_str)
+{
+       UCollator  *collator;
+       UErrorCode      status;
+       const char *orig_str = loc_str;
+       char       *fixed_str = NULL;
+
+       /*
+        * Must never open default collator, because it depends on the environment
+        * and may change at any time. Should not happen, but check here to catch
+        * bugs that might be hard to catch otherwise.
+        *
+        * NB: the default collator is not the same as the collator for the root
+        * locale. The root locale may be specified as the empty string, "und", or
+        * "root". The default collator is opened by passing NULL to ucol_open().
+        */
+       if (loc_str == NULL)
+               elog(ERROR, "opening default collator is not supported");
+
+       /*
+        * In ICU versions 54 and earlier, "und" is not a recognized spelling of
+        * the root locale. If the first component of the locale is "und", replace
+        * with "root" before opening.
+        */
+       if (U_ICU_VERSION_MAJOR_NUM < 55)
+       {
+               char            lang[ULOC_LANG_CAPACITY];
+
+               status = U_ZERO_ERROR;
+               uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
+               if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
+               {
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("could not get language from locale \"%s\": %s",
+                                                       loc_str, u_errorName(status))));
+               }
+
+               if (strcmp(lang, "und") == 0)
+               {
+                       const char *remainder = loc_str + strlen("und");
+
+                       fixed_str = palloc(strlen("root") + strlen(remainder) + 1);
+                       strcpy(fixed_str, "root");
+                       strcat(fixed_str, remainder);
+
+                       loc_str = fixed_str;
+               }
+       }
+
+       status = U_ZERO_ERROR;
+       collator = ucol_open(loc_str, &status);
+       if (U_FAILURE(status))
+               ereport(ERROR,
+               /* use original string for error report */
+                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                errmsg("could not open collator for locale \"%s\": %s",
+                                               orig_str, u_errorName(status))));
+
+       if (U_ICU_VERSION_MAJOR_NUM < 54)
+       {
+               status = U_ZERO_ERROR;
+               icu_set_collation_attributes(collator, loc_str, &status);
+
+               /*
+                * Pretend the error came from ucol_open(), for consistent error
+                * message across ICU versions.
+                */
+               if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
+               {
+                       ucol_close(collator);
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("could not open collator for locale \"%s\": %s",
+                                                       orig_str, u_errorName(status))));
+               }
+       }
+
+       if (fixed_str != NULL)
+               pfree(fixed_str);
+
+       return collator;
+}
+
+/*
+ * Create a UCollator with the given locale string and rules.
+ *
+ * Ensure that no path leaks a UCollator.
+ */
+UCollator *
+make_icu_collator(const char *iculocstr, const char *icurules)
+{
+       if (!icurules)
+       {
+               /* simple case without rules */
+               return pg_ucol_open(iculocstr);
+       }
+       else
+       {
+               UCollator  *collator_std_rules;
+               UCollator  *collator_all_rules;
+               const UChar *std_rules;
+               UChar      *my_rules;
+               UChar      *all_rules;
+               int32_t         length;
+               int32_t         total;
+               UErrorCode      status;
+
+               /*
+                * If rules are specified, we extract the rules of the standard
+                * collation, add our own rules, and make a new collator with the
+                * combined rules.
+                */
+               icu_to_uchar(&my_rules, icurules, strlen(icurules));
+
+               collator_std_rules = pg_ucol_open(iculocstr);
+
+               std_rules = ucol_getRules(collator_std_rules, &length);
+
+               total = u_strlen(std_rules) + u_strlen(my_rules) + 1;
+
+               /* avoid leaking collator on OOM */
+               all_rules = palloc_extended(sizeof(UChar) * total, MCXT_ALLOC_NO_OOM);
+               if (!all_rules)
+               {
+                       ucol_close(collator_std_rules);
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_OUT_OF_MEMORY),
+                                        errmsg("out of memory")));
+               }
+
+               u_strcpy(all_rules, std_rules);
+               u_strcat(all_rules, my_rules);
+
+               ucol_close(collator_std_rules);
+
+               status = U_ZERO_ERROR;
+               collator_all_rules = ucol_openRules(all_rules, u_strlen(all_rules),
+                                                                                       UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,
+                                                                                       NULL, &status);
+               if (U_FAILURE(status))
+               {
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("could not open collator for locale \"%s\" with rules \"%s\": %s",
+                                                       iculocstr, icurules, u_errorName(status))));
+               }
+
+               return collator_all_rules;
+       }
+}
+
+/*
+ * strncoll_icu
+ *
+ * Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given
+ * database encoding. An argument length of -1 means the string is
+ * NUL-terminated.
+ */
+int
+strncoll_icu(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
+                        pg_locale_t locale)
+{
+       int                     result;
+
+       Assert(locale->provider == COLLPROVIDER_ICU);
+
+#ifdef HAVE_UCOL_STRCOLLUTF8
+       if (GetDatabaseEncoding() == PG_UTF8)
+       {
+               UErrorCode      status;
+
+               status = U_ZERO_ERROR;
+               result = ucol_strcollUTF8(locale->info.icu.ucol,
+                                                                 arg1, len1,
+                                                                 arg2, len2,
+                                                                 &status);
+               if (U_FAILURE(status))
+                       ereport(ERROR,
+                                       (errmsg("collation failed: %s", u_errorName(status))));
+       }
+       else
+#endif
+       {
+               result = strncoll_icu_no_utf8(arg1, len1, arg2, len2, locale);
+       }
+
+       return result;
+}
+
+/* 'srclen' of -1 means the strings are NUL-terminated */
+size_t
+strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
+                        pg_locale_t locale)
+{
+       char            sbuf[TEXTBUFLEN];
+       char       *buf = sbuf;
+       UChar      *uchar;
+       int32_t         ulen;
+       size_t          uchar_bsize;
+       Size            result_bsize;
+
+       Assert(locale->provider == COLLPROVIDER_ICU);
+
+       init_icu_converter();
+
+       ulen = uchar_length(icu_converter, src, srclen);
+
+       uchar_bsize = (ulen + 1) * sizeof(UChar);
+
+       if (uchar_bsize > TEXTBUFLEN)
+               buf = palloc(uchar_bsize);
+
+       uchar = (UChar *) buf;
+
+       ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
+
+       result_bsize = ucol_getSortKey(locale->info.icu.ucol,
+                                                                  uchar, ulen,
+                                                                  (uint8_t *) dest, destsize);
+
+       /*
+        * ucol_getSortKey() counts the nul-terminator in the result length, but
+        * this function should not.
+        */
+       Assert(result_bsize > 0);
+       result_bsize--;
+
+       if (buf != sbuf)
+               pfree(buf);
+
+       /* if dest is defined, it should be nul-terminated */
+       Assert(result_bsize >= destsize || dest[result_bsize] == '\0');
+
+       return result_bsize;
+}
+
+/* 'srclen' of -1 means the strings are NUL-terminated */
+size_t
+strnxfrm_prefix_icu(char *dest, size_t destsize,
+                                       const char *src, ssize_t srclen,
+                                       pg_locale_t locale)
+{
+       size_t          result;
+
+       Assert(locale->provider == COLLPROVIDER_ICU);
+
+       if (GetDatabaseEncoding() == PG_UTF8)
+       {
+               UCharIterator iter;
+               uint32_t        state[2];
+               UErrorCode      status;
+
+               uiter_setUTF8(&iter, src, srclen);
+               state[0] = state[1] = 0;        /* won't need that again */
+               status = U_ZERO_ERROR;
+               result = ucol_nextSortKeyPart(locale->info.icu.ucol,
+                                                                         &iter,
+                                                                         state,
+                                                                         (uint8_t *) dest,
+                                                                         destsize,
+                                                                         &status);
+               if (U_FAILURE(status))
+                       ereport(ERROR,
+                                       (errmsg("sort key generation failed: %s",
+                                                       u_errorName(status))));
+       }
+       else
+               result = strnxfrm_prefix_icu_no_utf8(dest, destsize, src, srclen,
+                                                                                        locale);
+
+       return result;
+}
+
+/*
+ * Convert a string in the database encoding into a string of UChars.
+ *
+ * The source string at buff is of length nbytes
+ * (it needn't be nul-terminated)
+ *
+ * *buff_uchar receives a pointer to the palloc'd result string, and
+ * the function's result is the number of UChars generated.
+ *
+ * The result string is nul-terminated, though most callers rely on the
+ * result length instead.
+ */
+int32_t
+icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
+{
+       int32_t         len_uchar;
+
+       init_icu_converter();
+
+       len_uchar = uchar_length(icu_converter, buff, nbytes);
+
+       *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
+       len_uchar = uchar_convert(icu_converter,
+                                                         *buff_uchar, len_uchar + 1, buff, nbytes);
+
+       return len_uchar;
+}
+
+/*
+ * Convert a string of UChars into the database encoding.
+ *
+ * The source string at buff_uchar is of length len_uchar
+ * (it needn't be nul-terminated)
+ *
+ * *result receives a pointer to the palloc'd result string, and the
+ * function's result is the number of bytes generated (not counting nul).
+ *
+ * The result string is nul-terminated.
+ */
+int32_t
+icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
+{
+       UErrorCode      status;
+       int32_t         len_result;
+
+       init_icu_converter();
+
+       status = U_ZERO_ERROR;
+       len_result = ucnv_fromUChars(icu_converter, NULL, 0,
+                                                                buff_uchar, len_uchar, &status);
+       if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
+               ereport(ERROR,
+                               (errmsg("%s failed: %s", "ucnv_fromUChars",
+                                               u_errorName(status))));
+
+       *result = palloc(len_result + 1);
+
+       status = U_ZERO_ERROR;
+       len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
+                                                                buff_uchar, len_uchar, &status);
+       if (U_FAILURE(status) ||
+               status == U_STRING_NOT_TERMINATED_WARNING)
+               ereport(ERROR,
+                               (errmsg("%s failed: %s", "ucnv_fromUChars",
+                                               u_errorName(status))));
+
+       return len_result;
+}
+
+/*
+ * strncoll_icu_no_utf8
+ *
+ * Convert the arguments from the database encoding to UChar strings, then
+ * call ucol_strcoll(). An argument length of -1 means that the string is
+ * NUL-terminated.
+ *
+ * When the database encoding is UTF-8, and ICU supports ucol_strcollUTF8(),
+ * caller should call that instead.
+ */
+static int
+strncoll_icu_no_utf8(const char *arg1, ssize_t len1,
+                                        const char *arg2, ssize_t len2, pg_locale_t locale)
+{
+       char            sbuf[TEXTBUFLEN];
+       char       *buf = sbuf;
+       int32_t         ulen1;
+       int32_t         ulen2;
+       size_t          bufsize1;
+       size_t          bufsize2;
+       UChar      *uchar1,
+                          *uchar2;
+       int                     result;
+
+       Assert(locale->provider == COLLPROVIDER_ICU);
+#ifdef HAVE_UCOL_STRCOLLUTF8
+       Assert(GetDatabaseEncoding() != PG_UTF8);
+#endif
+
+       init_icu_converter();
+
+       ulen1 = uchar_length(icu_converter, arg1, len1);
+       ulen2 = uchar_length(icu_converter, arg2, len2);
+
+       bufsize1 = (ulen1 + 1) * sizeof(UChar);
+       bufsize2 = (ulen2 + 1) * sizeof(UChar);
+
+       if (bufsize1 + bufsize2 > TEXTBUFLEN)
+               buf = palloc(bufsize1 + bufsize2);
+
+       uchar1 = (UChar *) buf;
+       uchar2 = (UChar *) (buf + bufsize1);
+
+       ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1);
+       ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2);
+
+       result = ucol_strcoll(locale->info.icu.ucol,
+                                                 uchar1, ulen1,
+                                                 uchar2, ulen2);
+
+       if (buf != sbuf)
+               pfree(buf);
+
+       return result;
+}
+
+/* 'srclen' of -1 means the strings are NUL-terminated */
+static size_t
+strnxfrm_prefix_icu_no_utf8(char *dest, size_t destsize,
+                                                       const char *src, ssize_t srclen,
+                                                       pg_locale_t locale)
+{
+       char            sbuf[TEXTBUFLEN];
+       char       *buf = sbuf;
+       UCharIterator iter;
+       uint32_t        state[2];
+       UErrorCode      status;
+       int32_t         ulen = -1;
+       UChar      *uchar = NULL;
+       size_t          uchar_bsize;
+       Size            result_bsize;
+
+       Assert(locale->provider == COLLPROVIDER_ICU);
+       Assert(GetDatabaseEncoding() != PG_UTF8);
+
+       init_icu_converter();
+
+       ulen = uchar_length(icu_converter, src, srclen);
+
+       uchar_bsize = (ulen + 1) * sizeof(UChar);
+
+       if (uchar_bsize > TEXTBUFLEN)
+               buf = palloc(uchar_bsize);
+
+       uchar = (UChar *) buf;
+
+       ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
+
+       uiter_setString(&iter, uchar, ulen);
+       state[0] = state[1] = 0;        /* won't need that again */
+       status = U_ZERO_ERROR;
+       result_bsize = ucol_nextSortKeyPart(locale->info.icu.ucol,
+                                                                               &iter,
+                                                                               state,
+                                                                               (uint8_t *) dest,
+                                                                               destsize,
+                                                                               &status);
+       if (U_FAILURE(status))
+               ereport(ERROR,
+                               (errmsg("sort key generation failed: %s",
+                                               u_errorName(status))));
+
+       return result_bsize;
+}
+
+static void
+init_icu_converter(void)
+{
+       const char *icu_encoding_name;
+       UErrorCode      status;
+       UConverter *conv;
+
+       if (icu_converter)
+               return;                                 /* already done */
+
+       icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
+       if (!icu_encoding_name)
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                errmsg("encoding \"%s\" not supported by ICU",
+                                               pg_encoding_to_char(GetDatabaseEncoding()))));
+
+       status = U_ZERO_ERROR;
+       conv = ucnv_open(icu_encoding_name, &status);
+       if (U_FAILURE(status))
+               ereport(ERROR,
+                               (errmsg("could not open ICU converter for encoding \"%s\": %s",
+                                               icu_encoding_name, u_errorName(status))));
+
+       icu_converter = conv;
+}
+
+/*
+ * Find length, in UChars, of given string if converted to UChar string.
+ *
+ * A length of -1 indicates that the input string is NUL-terminated.
+ */
+static size_t
+uchar_length(UConverter *converter, const char *str, int32_t len)
+{
+       UErrorCode      status = U_ZERO_ERROR;
+       int32_t         ulen;
+
+       ulen = ucnv_toUChars(converter, NULL, 0, str, len, &status);
+       if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
+               ereport(ERROR,
+                               (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
+       return ulen;
+}
+
+/*
+ * Convert the given source string into a UChar string, stored in dest, and
+ * return the length (in UChars).
+ *
+ * A srclen of -1 indicates that the input string is NUL-terminated.
+ */
+static int32_t
+uchar_convert(UConverter *converter, UChar *dest, int32_t destlen,
+                         const char *src, int32_t srclen)
+{
+       UErrorCode      status = U_ZERO_ERROR;
+       int32_t         ulen;
+
+       status = U_ZERO_ERROR;
+       ulen = ucnv_toUChars(converter, dest, destlen, src, srclen, &status);
+       if (U_FAILURE(status))
+               ereport(ERROR,
+                               (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
+       return ulen;
+}
+
+/*
+ * Parse collation attributes from the given locale string and apply them to
+ * the open collator.
+ *
+ * First, the locale string is canonicalized to an ICU format locale ID such
+ * as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies
+ * the key-value arguments.
+ *
+ * Starting with ICU version 54, the attributes are processed automatically by
+ * ucol_open(), so this is only necessary for emulating this behavior on older
+ * versions.
+ */
+pg_attribute_unused()
+static void
+icu_set_collation_attributes(UCollator *collator, const char *loc,
+                                                        UErrorCode *status)
+{
+       int32_t         len;
+       char       *icu_locale_id;
+       char       *lower_str;
+       char       *str;
+       char       *token;
+
+       /*
+        * The input locale may be a BCP 47 language tag, e.g.
+        * "und-u-kc-ks-level1", which expresses the same attributes in a
+        * different form. It will be converted to the equivalent ICU format
+        * locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by
+        * uloc_canonicalize().
+        */
+       *status = U_ZERO_ERROR;
+       len = uloc_canonicalize(loc, NULL, 0, status);
+       icu_locale_id = palloc(len + 1);
+       *status = U_ZERO_ERROR;
+       len = uloc_canonicalize(loc, icu_locale_id, len + 1, status);
+       if (U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING)
+               return;
+
+       lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id));
+
+       pfree(icu_locale_id);
+
+       str = strchr(lower_str, '@');
+       if (!str)
+               return;
+       str++;
+
+       while ((token = strsep(&str, ";")))
+       {
+               char       *e = strchr(token, '=');
+
+               if (e)
+               {
+                       char       *name;
+                       char       *value;
+                       UColAttribute uattr;
+                       UColAttributeValue uvalue;
+
+                       *status = U_ZERO_ERROR;
+
+                       *e = '\0';
+                       name = token;
+                       value = e + 1;
+
+                       /*
+                        * See attribute name and value lists in ICU i18n/coll.cpp
+                        */
+                       if (strcmp(name, "colstrength") == 0)
+                               uattr = UCOL_STRENGTH;
+                       else if (strcmp(name, "colbackwards") == 0)
+                               uattr = UCOL_FRENCH_COLLATION;
+                       else if (strcmp(name, "colcaselevel") == 0)
+                               uattr = UCOL_CASE_LEVEL;
+                       else if (strcmp(name, "colcasefirst") == 0)
+                               uattr = UCOL_CASE_FIRST;
+                       else if (strcmp(name, "colalternate") == 0)
+                               uattr = UCOL_ALTERNATE_HANDLING;
+                       else if (strcmp(name, "colnormalization") == 0)
+                               uattr = UCOL_NORMALIZATION_MODE;
+                       else if (strcmp(name, "colnumeric") == 0)
+                               uattr = UCOL_NUMERIC_COLLATION;
+                       else
+                               /* ignore if unknown */
+                               continue;
+
+                       if (strcmp(value, "primary") == 0)
+                               uvalue = UCOL_PRIMARY;
+                       else if (strcmp(value, "secondary") == 0)
+                               uvalue = UCOL_SECONDARY;
+                       else if (strcmp(value, "tertiary") == 0)
+                               uvalue = UCOL_TERTIARY;
+                       else if (strcmp(value, "quaternary") == 0)
+                               uvalue = UCOL_QUATERNARY;
+                       else if (strcmp(value, "identical") == 0)
+                               uvalue = UCOL_IDENTICAL;
+                       else if (strcmp(value, "no") == 0)
+                               uvalue = UCOL_OFF;
+                       else if (strcmp(value, "yes") == 0)
+                               uvalue = UCOL_ON;
+                       else if (strcmp(value, "shifted") == 0)
+                               uvalue = UCOL_SHIFTED;
+                       else if (strcmp(value, "non-ignorable") == 0)
+                               uvalue = UCOL_NON_IGNORABLE;
+                       else if (strcmp(value, "lower") == 0)
+                               uvalue = UCOL_LOWER_FIRST;
+                       else if (strcmp(value, "upper") == 0)
+                               uvalue = UCOL_UPPER_FIRST;
+                       else
+                       {
+                               *status = U_ILLEGAL_ARGUMENT_ERROR;
+                               break;
+                       }
+
+                       ucol_setAttribute(collator, uattr, uvalue, status);
+               }
+       }
+
+       pfree(lower_str);
+}
+
+#endif                                                 /* USE_ICU */