Have pg_rewind run crash recovery before rewinding
authorAlvaro Herrera <[email protected]>
Fri, 27 Sep 2019 19:40:01 +0000 (16:40 -0300)
committerAlvaro Herrera <[email protected]>
Fri, 27 Sep 2019 19:40:01 +0000 (16:40 -0300)
If we don't do this, the rewind fails if the server wasn't cleanly shut
down, which seems unhelpful serving no purpose.

Also provide a new option --no-ensure-shutdown to suppress this
behavior, for alleged advanced usage that prefers to avoid the crash
recovery.

Authors: Paul Guo, Jimmy Yih, Ashwin Agrawal
Reviewed-by: Álvaro Herrera
Discussion: https://postgr.es/m/CAEET0ZEffUkXc48pg2iqARQgGRYDiiVxDu+yYek_bTwJF+q=Uw@mail.gmail.com

doc/src/sgml/ref/pg_rewind.sgml
src/bin/pg_rewind/pg_rewind.c

index ac142d22fcd11ad5d1022c0ea6ccacf87bbf790a..a06e5ac5e11b8b0ff716370c5077acaa7f74aac5 100644 (file)
@@ -165,6 +165,21 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--no-ensure-shutdown</option></term>
+      <listitem>
+       <para>
+        <application>pg_rewind</application> verifies that the target server
+        is cleanly shutdown before rewinding; by default, if it isn't, it
+        starts the server in single-user mode to complete crash recovery.
+        By passing this option, <application>pg_rewind</application> skips
+        this and errors out immediately if the server is not cleanly shut
+        down.  Users are expected to handle the situation themselves in that
+        case.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-n</option></term>
       <term><option>--dry-run</option></term>
index 15e3eab55050c6328a82e4f01f42f11254fdf321..8cb0d726cfeef5a552c1385ae508735ebd1c7192 100644 (file)
@@ -40,6 +40,7 @@ static void digestControlFile(ControlFileData *ControlFile, char *source,
 static void syncTargetDirectory(void);
 static void sanityChecks(void);
 static void findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex);
+static void ensureCleanShutdown(const char *argv0);
 
 static ControlFileData ControlFile_target;
 static ControlFileData ControlFile_source;
@@ -79,6 +80,7 @@ usage(const char *progname)
        printf(_("  -N, --no-sync                  do not wait for changes to be written\n"
                         "                                 safely to disk\n"));
        printf(_("  -P, --progress                 write progress messages\n"));
+       printf(_("      --no-ensure-shutdown       do not automatically fix unclean shutdown\n"));
        printf(_("      --debug                    write a lot of debug messages\n"));
        printf(_("  -V, --version                  output version information, then exit\n"));
        printf(_("  -?, --help                     show this help, then exit\n"));
@@ -94,6 +96,7 @@ main(int argc, char **argv)
                {"target-pgdata", required_argument, NULL, 'D'},
                {"source-pgdata", required_argument, NULL, 1},
                {"source-server", required_argument, NULL, 2},
+               {"no-ensure-shutdown", no_argument, NULL, 44},
                {"version", no_argument, NULL, 'V'},
                {"dry-run", no_argument, NULL, 'n'},
                {"no-sync", no_argument, NULL, 'N'},
@@ -110,6 +113,7 @@ main(int argc, char **argv)
        XLogRecPtr      chkptredo;
        size_t          size;
        char       *buffer;
+       bool            no_ensure_shutdown = false;
        bool            rewind_needed;
        XLogRecPtr      endrec;
        TimeLineID      endtli;
@@ -169,6 +173,9 @@ main(int argc, char **argv)
                        case 2:                         /* --source-server */
                                connstr_source = pg_strdup(optarg);
                                break;
+                       case 4:
+                               no_ensure_shutdown = true;
+                               break;
                }
        }
 
@@ -241,6 +248,24 @@ main(int argc, char **argv)
        digestControlFile(&ControlFile_target, buffer, size);
        pg_free(buffer);
 
+       /*
+        * If the target instance was not cleanly shut down, run a single-user
+        * postgres session really quickly and reload the control file to get the
+        * new state. Note if no_ensure_shutdown is specified, pg_rewind won't do
+        * that automatically. That means users need to do themselves in advance,
+        * else pg_rewind will soon quit, see sanityChecks().
+        */
+       if (!no_ensure_shutdown &&
+               ControlFile_target.state != DB_SHUTDOWNED &&
+               ControlFile_target.state != DB_SHUTDOWNED_IN_RECOVERY)
+       {
+               ensureCleanShutdown(argv[0]);
+
+               buffer = slurpFile(datadir_target, "global/pg_control", &size);
+               digestControlFile(&ControlFile_target, buffer, size);
+               pg_free(buffer);
+       }
+
        buffer = fetchFile("global/pg_control", &size);
        digestControlFile(&ControlFile_source, buffer, size);
        pg_free(buffer);
@@ -748,3 +773,58 @@ syncTargetDirectory(void)
 
        fsync_pgdata(datadir_target, PG_VERSION_NUM);
 }
+
+/*
+ * Ensure clean shutdown of target instance by launching single-user mode
+ * postgres to do crash recovery.
+ */
+static void
+ensureCleanShutdown(const char *argv0)
+{
+       int                     ret;
+#define MAXCMDLEN (2 * MAXPGPATH)
+       char            exec_path[MAXPGPATH];
+       char            cmd[MAXCMDLEN];
+
+       /* locate postgres binary */
+       if ((ret = find_other_exec(argv0, "postgres",
+                                                          PG_BACKEND_VERSIONSTR,
+                                                          exec_path)) < 0)
+       {
+               char            full_path[MAXPGPATH];
+
+               if (find_my_exec(argv0, full_path) < 0)
+                       strlcpy(full_path, progname, sizeof(full_path));
+
+               if (ret == -1)
+                       pg_fatal("The program \"%s\" is needed by %s but was\n"
+                                        "not found in the same directory as \"%s\".\n"
+                                        "Check your installation.",
+                                        "postgres", progname, full_path);
+               else
+                       pg_fatal("The program \"%s\" was found by \"%s\" but was\n"
+                                        "not the same version as %s.\n"
+                                        "Check your installation.",
+                                        "postgres", full_path, progname);
+       }
+
+       pg_log_info("executing \"%s\" for target server to complete crash recovery",
+                               exec_path);
+
+       /*
+        * Skip processing if requested, but only after ensuring presence of
+        * postgres.
+        */
+       if (dry_run)
+               return;
+
+       /* finally run postgres in single-user mode */
+       snprintf(cmd, MAXCMDLEN, "\"%s\" --single -D \"%s\" template1 < \"%s\"",
+                        exec_path, datadir_target, DEVNULL);
+
+       if (system(cmd) != 0)
+       {
+               pg_log_error("postgres single-user mode of target instance failed");
+               pg_fatal("Command was: %s", cmd);
+       }
+}