pg_dump: Retrieve attribute statistics in batches.

author Nathan Bossart <[email protected]>

Fri, 4 Apr 2025 19:51:08 +0000 (14:51 -0500)

committer Nathan Bossart <[email protected]>

Fri, 4 Apr 2025 19:51:08 +0000 (14:51 -0500)
author Nathan Bossart <[email protected]>
Fri, 4 Apr 2025 19:51:08 +0000 (14:51 -0500)
committer Nathan Bossart <[email protected]>
Fri, 4 Apr 2025 19:51:08 +0000 (14:51 -0500)
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h

index 781f8fa1cc91e9e6024c69a3dc1e8dbd21bd69f2..9005b4253b402076fdb3fbff50fb34f5b9d5b3a3 100644 (file)
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -285,7 +285,10 @@ typedef int DumpId;
   * Function pointer prototypes for assorted callback methods.
   */
  
-typedef char *(*DefnDumperPtr) (Archive *AH, const void *userArg);
+/* forward declaration to avoid including pg_backup_archiver.h here */
+typedef struct _tocEntry TocEntry;
+
+typedef char *(*DefnDumperPtr) (Archive *AH, const void *userArg, const TocEntry *te);
  typedef int (*DataDumperPtr) (Archive *AH, const void *userArg);
  
  typedef void (*SetupWorkerPtrType) (Archive *AH);
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c

index 2f902494e64e139066797f579cfe1fbc87061e3c..ed14843d182c8f0c84ce53700df2572d980f177f 100644 (file)
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -2655,7 +2655,7 @@ WriteToc(ArchiveHandle *AH)
         }
         else if (te->defnDumper)
         {
-           char       *defn = te->defnDumper((Archive *) AH, te->defnDumperArg);
+           char       *defn = te->defnDumper((Archive *) AH, te->defnDumperArg, te);
  
             te->defnLen = WriteStr(AH, defn);
             pg_free(defn);
@@ -3284,23 +3284,16 @@ _tocEntryRestorePass(ArchiveHandle *AH, TocEntry *te)
  
     /*
      * If statistics data is dependent on materialized view data, it must be
-    * deferred to RESTORE_PASS_POST_ACL.
+    * deferred to RESTORE_PASS_POST_ACL.  Those entries are already marked as
+    * SECTION_POST_DATA, and some other stats entries (e.g., index stats)
+    * will also be marked as SECTION_POST_DATA.  Additionally, our lookahead
+    * code in fetchAttributeStats() assumes that we dump all statistics data
+    * entries in TOC order.  To ensure this assumption holds, we move all
+    * statistics data entries in SECTION_POST_DATA to RESTORE_PASS_POST_ACL.
      */
-   if (strcmp(te->desc, "STATISTICS DATA") == 0)
-   {
-       for (int i = 0; i < te->nDeps; i++)
-       {
-           DumpId      depid = te->dependencies[i];
-
-           if (depid <= AH->maxDumpId && AH->tocsByDumpId[depid] != NULL)
-           {
-               TocEntry   *otherte = AH->tocsByDumpId[depid];
-
-               if (strcmp(otherte->desc, "MATERIALIZED VIEW DATA") == 0)
-                   return RESTORE_PASS_POST_ACL;
-           }
-       }
-   }
+   if (strcmp(te->desc, "STATISTICS DATA") == 0 &&
+       te->section == SECTION_POST_DATA)
+       return RESTORE_PASS_POST_ACL;
  
     /* All else can be handled in the main pass. */
     return RESTORE_PASS_MAIN;
@@ -3951,7 +3944,7 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx)
     }
     else if (te->defnDumper)
     {
-       char       *defn = te->defnDumper((Archive *) AH, te->defnDumperArg);
+       char       *defn = te->defnDumper((Archive *) AH, te->defnDumperArg, te);
  
         te->defnLen = ahprintf(AH, "%s\n\n", defn);
         pg_free(defn);
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c

index e553cc84934595e8d1866be6f48f2aedbbb9ed16..7977d0519f11b4bec0e51828e70ca8a298b8d8ce 100644 (file)
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -209,6 +209,9 @@ static int  nbinaryUpgradeClassOids = 0;
  static SequenceItem *sequences = NULL;
  static int nsequences = 0;
  
+/* Maximum number of relations to fetch in a fetchAttributeStats() call. */
+#define MAX_ATTR_STATS_RELS 64
+
  /*
   * The default number of rows per INSERT when
   * --inserts is specified without --rows-per-insert
@@ -10553,6 +10556,77 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname,
     appendPQExpBuffer(out, "::%s", argtype);
  }
  
+/*
+ * fetchAttributeStats --
+ *
+ * Fetch next batch of attribute statistics for dumpRelationStats_dumper().
+ */
+static PGresult *
+fetchAttributeStats(Archive *fout)
+{
+   ArchiveHandle *AH = (ArchiveHandle *) fout;
+   PQExpBuffer nspnames = createPQExpBuffer();
+   PQExpBuffer relnames = createPQExpBuffer();
+   int         count = 0;
+   PGresult   *res = NULL;
+   static TocEntry *te;
+   static bool restarted;
+
+   /* If we're just starting, set our TOC pointer. */
+   if (!te)
+       te = AH->toc->next;
+
+   /*
+    * We can't easily avoid a second TOC scan for the tar format because it
+    * writes restore.sql separately, which means we must execute the queries
+    * twice.  This feels risky, but there is no known reason it should
+    * generate different output than the first pass.  Even if it does, the
+    * worst-case scenario is that restore.sql might have different statistics
+    * data than the archive.
+    */
+   if (!restarted && te == AH->toc && AH->format == archTar)
+   {
+       te = AH->toc->next;
+       restarted = true;
+   }
+
+   /*
+    * Scan the TOC for the next set of relevant stats entries.  We assume
+    * that statistics are dumped in the order they are listed in the TOC.
+    * This is perhaps not the sturdiest assumption, so we verify it matches
+    * reality in dumpRelationStats_dumper().
+    */
+   for (; te != AH->toc && count < MAX_ATTR_STATS_RELS; te = te->next)
+   {
+       if ((te->reqs & REQ_STATS) != 0 &&
+           strcmp(te->desc, "STATISTICS DATA") == 0)
+       {
+           appendPQExpBuffer(nspnames, "%s%s", count ? "," : "",
+                             fmtId(te->namespace));
+           appendPQExpBuffer(relnames, "%s%s", count ? "," : "",
+                             fmtId(te->tag));
+           count++;
+       }
+   }
+
+   /* Execute the query for the next batch of relations. */
+   if (count > 0)
+   {
+       PQExpBuffer query = createPQExpBuffer();
+
+       appendPQExpBuffer(query, "EXECUTE getAttributeStats("
+                         "'{%s}'::pg_catalog.name[],"
+                         "'{%s}'::pg_catalog.name[])",
+                         nspnames->data, relnames->data);
+       res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
+       destroyPQExpBuffer(query);
+   }
+
+   destroyPQExpBuffer(nspnames);
+   destroyPQExpBuffer(relnames);
+   return res;
+}
+
  /*
   * dumpRelationStats_dumper --
   *
@@ -10561,14 +10635,16 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname,
   * dumped.
   */
  static char *
-dumpRelationStats_dumper(Archive *fout, const void *userArg)
+dumpRelationStats_dumper(Archive *fout, const void *userArg, const TocEntry *te)
  {
     const RelStatsInfo *rsinfo = (RelStatsInfo *) userArg;
-   const DumpableObject *dobj = &rsinfo->dobj;
-   PGresult   *res;
+   static PGresult *res;
+   static int  rownum;
     PQExpBuffer query;
     PQExpBufferData out_data;
     PQExpBuffer out = &out_data;
+   int         i_schemaname;
+   int         i_tablename;
     int         i_attname;
     int         i_inherited;
     int         i_null_frac;
@@ -10584,13 +10660,31 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg)
     int         i_range_length_histogram;
     int         i_range_empty_frac;
     int         i_range_bounds_histogram;
+   static TocEntry *expected_te;
+
+   /*
+    * fetchAttributeStats() assumes that the statistics are dumped in the
+    * order they are listed in the TOC.  We verify that here for safety.
+    */
+   if (!expected_te)
+       expected_te = ((ArchiveHandle *) fout)->toc;
+
+   expected_te = expected_te->next;
+   while ((expected_te->reqs & REQ_STATS) == 0 ||
+          strcmp(expected_te->desc, "STATISTICS DATA") != 0)
+       expected_te = expected_te->next;
+
+   if (te != expected_te)
+       pg_fatal("stats dumped out of order (current: %d %s %s) (expected: %d %s %s)",
+                te->dumpId, te->desc, te->tag,
+                expected_te->dumpId, expected_te->desc, expected_te->tag);
  
     query = createPQExpBuffer();
     if (!fout->is_prepared[PREPQUERY_GETATTRIBUTESTATS])
     {
         appendPQExpBufferStr(query,
-                            "PREPARE getAttributeStats(pg_catalog.name, pg_catalog.name) AS\n"
-                            "SELECT s.attname, s.inherited, "
+                            "PREPARE getAttributeStats(pg_catalog.name[], pg_catalog.name[]) AS\n"
+                            "SELECT s.schemaname, s.tablename, s.attname, s.inherited, "
                              "s.null_frac, s.avg_width, s.n_distinct, "
                              "s.most_common_vals, s.most_common_freqs, "
                              "s.histogram_bounds, s.correlation, "
@@ -10608,11 +10702,21 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg)
                                  "NULL AS range_empty_frac,"
                                  "NULL AS range_bounds_histogram ");
  
+       /*
+        * The results must be in the order of the relations supplied in the
+        * parameters to ensure we remain in sync as we walk through the TOC.
+        * The redundant filter clause on s.tablename = ANY(...) seems
+        * sufficient to convince the planner to use
+        * pg_class_relname_nsp_index, which avoids a full scan of pg_stats.
+        * This may not work for all versions.
+        */
         appendPQExpBufferStr(query,
                              "FROM pg_catalog.pg_stats s "
-                            "WHERE s.schemaname = $1 "
-                            "AND s.tablename = $2 "
-                            "ORDER BY s.attname, s.inherited");
+                            "JOIN unnest($1, $2) WITH ORDINALITY AS u (schemaname, tablename, ord) "
+                            "ON s.schemaname = u.schemaname "
+                            "AND s.tablename = u.tablename "
+                            "WHERE s.tablename = ANY($2) "
+                            "ORDER BY u.ord, s.attname, s.inherited");
  
         ExecuteSqlStatement(fout, query->data);
  
@@ -10642,16 +10746,16 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg)
  
     appendPQExpBufferStr(out, "\n);\n");
  
+   /* Fetch the next batch of attribute statistics if needed. */
+   if (rownum >= PQntuples(res))
+   {
+       PQclear(res);
+       res = fetchAttributeStats(fout);
+       rownum = 0;
+   }
  
-   /* fetch attribute stats */
-   appendPQExpBufferStr(query, "EXECUTE getAttributeStats(");
-   appendStringLiteralAH(query, dobj->namespace->dobj.name, fout);
-   appendPQExpBufferStr(query, ", ");
-   appendStringLiteralAH(query, dobj->name, fout);
-   appendPQExpBufferStr(query, ");");
-
-   res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
-
+   i_schemaname = PQfnumber(res, "schemaname");
+   i_tablename = PQfnumber(res, "tablename");
     i_attname = PQfnumber(res, "attname");
     i_inherited = PQfnumber(res, "inherited");
     i_null_frac = PQfnumber(res, "null_frac");
@@ -10669,10 +10773,15 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg)
     i_range_bounds_histogram = PQfnumber(res, "range_bounds_histogram");
  
     /* restore attribute stats */
-   for (int rownum = 0; rownum < PQntuples(res); rownum++)
+   for (; rownum < PQntuples(res); rownum++)
     {
         const char *attname;
  
+       /* Stop if the next stat row in our cache isn't for this relation. */
+       if (strcmp(te->tag, PQgetvalue(res, rownum, i_tablename)) != 0 ||
+           strcmp(te->namespace, PQgetvalue(res, rownum, i_schemaname)) != 0)
+           break;
+
         appendPQExpBufferStr(out, "SELECT * FROM pg_catalog.pg_restore_attribute_stats(\n");
         appendPQExpBuffer(out, "\t'version', '%u'::integer,\n",
                           fout->remoteVersion);
@@ -10762,8 +10871,6 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg)
         appendPQExpBufferStr(out, "\n);\n");
     }
  
-   PQclear(res);
-
     destroyPQExpBuffer(query);
     return out->data;
  }
author	Nathan Bossart <[email protected]>
	Fri, 4 Apr 2025 19:51:08 +0000 (14:51 -0500)
committer	Nathan Bossart <[email protected]>
	Fri, 4 Apr 2025 19:51:08 +0000 (14:51 -0500)
src/bin/pg_dump/pg_backup.h		patch \| blob \| blame \| history
src/bin/pg_dump/pg_backup_archiver.c		patch \| blob \| blame \| history
src/bin/pg_dump/pg_dump.c		patch \| blob \| blame \| history