#include "catalog/pg_class_d.h"
#include "fe_utils/string_utils.h"
#include "pg_upgrade.h"
+#include "common/unicode_version.h"
static void check_new_cluster_is_empty(void);
static void check_is_install_user(ClusterInfo *cluster);
static void check_for_pg_role_prefix(ClusterInfo *cluster);
static void check_for_new_tablespace_dir(void);
static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster);
+static void check_for_unicode_update(ClusterInfo *cluster);
static void check_new_cluster_logical_replication_slots(void);
static void check_new_cluster_subscription_configuration(void);
static void check_old_cluster_for_valid_slots(void);
check_for_data_types_usage(&old_cluster);
+ /*
+ * Unicode updates can affect some objects that use expressions with
+ * functions dependent on Unicode.
+ */
+ check_for_unicode_update(&old_cluster);
+
/*
* PG 14 changed the function signature of encoding conversion functions.
* Conversions from older versions cannot be upgraded automatically
check_ok();
}
+/*
+ * Callback function for processing results of query for
+ * check_for_unicode_update()'s UpgradeTask. If the query returned any rows
+ * (i.e., the check failed), write the details to the report file.
+ */
+static void
+process_unicode_update(DbInfo *dbinfo, PGresult *res, void *arg)
+{
+ UpgradeTaskReport *report = (UpgradeTaskReport *) arg;
+ int ntups = PQntuples(res);
+ int i_reloid = PQfnumber(res, "reloid");
+ int i_nspname = PQfnumber(res, "nspname");
+ int i_relname = PQfnumber(res, "relname");
+
+ if (ntups == 0)
+ return;
+
+ if (report->file == NULL &&
+ (report->file = fopen_priv(report->path, "w")) == NULL)
+ pg_fatal("could not open file \"%s\": %m", report->path);
+
+ fprintf(report->file, "In database: %s\n", dbinfo->db_name);
+
+ for (int rowno = 0; rowno < ntups; rowno++)
+ fprintf(report->file, " (oid=%s) %s.%s\n",
+ PQgetvalue(res, rowno, i_reloid),
+ PQgetvalue(res, rowno, i_nspname),
+ PQgetvalue(res, rowno, i_relname));
+}
+
+/*
+ * Check if the Unicode version built into Postgres changed between the old
+ * cluster and the new cluster.
+ */
+static bool
+unicode_version_changed(ClusterInfo *cluster)
+{
+ PGconn *conn_template1 = connectToServer(cluster, "template1");
+ PGresult *res;
+ char *old_unicode_version;
+ bool unicode_updated;
+
+ res = executeQueryOrDie(conn_template1, "SELECT unicode_version()");
+ old_unicode_version = PQgetvalue(res, 0, 0);
+ unicode_updated = (strcmp(old_unicode_version, PG_UNICODE_VERSION) != 0);
+
+ PQclear(res);
+ PQfinish(conn_template1);
+
+ return unicode_updated;
+}
+
+/*
+ * check_for_unicode_update()
+ *
+ * Check if the version of Unicode in the old server and the new server
+ * differ. If so, check for indexes, partitioned tables, or constraints that
+ * use expressions with functions dependent on Unicode behavior.
+ */
+static void
+check_for_unicode_update(ClusterInfo *cluster)
+{
+ UpgradeTaskReport report;
+ UpgradeTask *task = upgrade_task_create();
+ const char *query;
+
+ /*
+ * The builtin provider did not exist prior to version 17. While there are
+ * still problems that could potentially be caught from earlier versions,
+ * such as an index on NORMALIZE(), we don't check for that here.
+ */
+ if (GET_MAJOR_VERSION(cluster->major_version) < 1700)
+ return;
+
+ prep_status("Checking for objects affected by Unicode update");
+
+ if (!unicode_version_changed(cluster))
+ {
+ check_ok();
+ return;
+ }
+
+ report.file = NULL;
+ snprintf(report.path, sizeof(report.path), "%s/%s",
+ log_opts.basedir,
+ "unicode_dependent_rels.txt");
+
+ query =
+ /* collations that use built-in Unicode for character semantics */
+ "WITH collations(collid) AS ( "
+ " SELECT oid FROM pg_collation "
+ " WHERE collprovider='b' AND colllocale IN ('C.UTF-8','PG_UNICODE_FAST') "
+ /* include default collation, if appropriate */
+ " UNION "
+ " SELECT 'pg_catalog.default'::regcollation FROM pg_database "
+ " WHERE datname = current_database() AND "
+ " datlocprovider='b' AND datlocale IN ('C.UTF-8','PG_UNICODE_FAST') "
+ "), "
+ /* functions that use built-in Unicode */
+ "functions(procid) AS ( "
+ " SELECT proc.oid FROM pg_proc proc "
+ " WHERE proname IN ('normalize','unicode_assigned','unicode_version','is_normalized') AND "
+ " pronamespace='pg_catalog'::regnamespace "
+ "), "
+ /* operators that use the input collation for character semantics */
+ "coll_operators(operid, procid, collid) AS ( "
+ " SELECT oper.oid, oper.oprcode, collid FROM pg_operator oper, collations "
+ " WHERE oprname IN ('~', '~*', '!~', '!~*', '~~*', '!~~*') AND "
+ " oprnamespace='pg_catalog'::regnamespace AND "
+ " oprright='text'::regtype "
+ "), "
+ /* functions that use the input collation for character semantics */
+ "coll_functions(procid, collid) AS ( "
+ " SELECT proc.oid, collid FROM pg_proc proc, collations "
+ " WHERE proname IN ('lower','initcap','upper') AND "
+ " pronamespace='pg_catalog'::regnamespace AND "
+ " proargtypes[0] = 'text'::regtype "
+ /* include functions behind the operators listed above */
+ " UNION "
+ " SELECT procid, collid FROM coll_operators "
+ "), "
+
+ /*
+ * Generate patterns to search a pg_node_tree for the above functions and
+ * operators.
+ */
+ "patterns(p) AS ( "
+ " SELECT '{FUNCEXPR :funcid ' || procid::text || '[ }]' FROM functions "
+ " UNION "
+ " SELECT '{OPEXPR :opno ' || operid::text || ' (:\\w+ \\w+ )*' || "
+ " ':inputcollid ' || collid::text || '[ }]' FROM coll_operators "
+ " UNION "
+ " SELECT '{FUNCEXPR :funcid ' || procid::text || ' (:\\w+ \\w+ )*' || "
+ " ':inputcollid ' || collid::text || '[ }]' FROM coll_functions "
+ ") "
+
+ /*
+ * Match the patterns against expressions used for relation contents.
+ */
+ "SELECT reloid, relkind, nspname, relname "
+ " FROM ( "
+ " SELECT conrelid "
+ " FROM pg_constraint, patterns WHERE conbin::text ~ p "
+ " UNION "
+ " SELECT indexrelid "
+ " FROM pg_index, patterns WHERE indexprs::text ~ p OR indpred::text ~ p "
+ " UNION "
+ " SELECT partrelid "
+ " FROM pg_partitioned_table, patterns WHERE partexprs::text ~ p "
+ " ) s(reloid), pg_class c, pg_namespace n, pg_database d "
+ " WHERE s.reloid = c.oid AND c.relnamespace = n.oid AND "
+ " d.datname = current_database() AND "
+ " d.encoding = pg_char_to_encoding('UTF8');";
+
+ upgrade_task_add_step(task, query,
+ process_unicode_update,
+ true, &report);
+ upgrade_task_run(task, cluster);
+ upgrade_task_free(task);
+
+ if (report.file)
+ {
+ fclose(report.file);
+ report_status(PG_WARNING, "warning");
+ pg_log(PG_WARNING, "Your installation contains relations that may be affected by a new version of Unicode.\n"
+ "A list of potentially-affected relations is in the file:\n"
+ " %s", report.path);
+ }
+ else
+ check_ok();
+}
+
/*
* check_new_cluster_logical_replication_slots()
*