Add IsRecoveryProcessingMode() quick exits to XLogNeedsFlush,
authorHeikki Linnakangas <[email protected]>
Mon, 9 Feb 2009 12:54:41 +0000 (14:54 +0200)
committerHeikki Linnakangas <[email protected]>
Mon, 9 Feb 2009 13:13:27 +0000 (15:13 +0200)
XLogAsyncCommitFlush and XLogBackgroundFlush. Fix restore command
invocation so that fast shutdown requests are not lost. Update
minRecoveryPoint in CreateRestartPoint when we can't create a
restart point.

src/backend/access/transam/xlog.c

index 0ace629e8ff5f0e94c2307e908b59effa74cae13..87e4551b0e306deaa5af412772b25cc438b53507 100644 (file)
@@ -429,6 +429,11 @@ static bool InRedo = false;
  * Flag set by interrupt handlers for later service in the redo loop.
  */
 static volatile sig_atomic_t shutdown_requested = false;
+/*
+ * Flag set when executing a restore command, to tell SIGTERM signal handler
+ * that it's safe to just proc_exit(0).
+ */
+static volatile sig_atomic_t in_restore_command = false;
 
 
 static void XLogArchiveNotify(const char *xlog);
@@ -460,7 +465,7 @@ static void PreallocXlogFiles(XLogRecPtr endptr);
 static void RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr);
 static void ValidateXLOGDirectoryStructure(void);
 static void CleanupBackupHistory(void);
-static void UpdateMinRecoveryPoint(XLogRecPtr lsn);
+static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force);
 static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode);
 static bool ValidXLOGHeader(XLogPageHeader hdr, int emode);
 static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt);
@@ -1766,14 +1771,16 @@ XLogSetAsyncCommitLSN(XLogRecPtr asyncCommitLSN)
  * Advance minRecoveryPoint in control file.
  *
  * If we crash during recovery, we must reach this point again before the
- * database is consistent. If minRecoveryPoint is already greater than or
- * equal to 'lsn', it is not updated.
+ * database is consistent. 
+ * 
+ * If 'force' is true, 'lsn' argument is ignored. Otherwise, minRecoveryPoint
+ * is is only updated if it's already greater than or equal to 'lsn'.
  */
 static void
-UpdateMinRecoveryPoint(XLogRecPtr lsn)
+UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
 {
        /* Quick check using our local copy of the variable */
-       if (!updateMinRecoveryPoint || XLByteLE(lsn, minRecoveryPoint))
+       if (!updateMinRecoveryPoint || (!force && XLByteLE(lsn, minRecoveryPoint)))
                return;
 
        LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
@@ -1787,10 +1794,11 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn)
         */
        if (minRecoveryPoint.xlogid == 0 && minRecoveryPoint.xrecoff == 0)
                updateMinRecoveryPoint = false;
-       else if (XLByteLT(minRecoveryPoint, lsn))
+       else if (force || XLByteLT(minRecoveryPoint, lsn))
        {
                /* use volatile pointer to prevent code rearrangement */
                volatile XLogCtlData *xlogctl = XLogCtl;
+               XLogRecPtr newMinRecoveryPoint;
 
                /*
                 * To avoid having to update the control file too often, we update
@@ -1798,12 +1806,16 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn)
                 * would suffice for correctness.
                 */
                SpinLockAcquire(&xlogctl->info_lck);
-               minRecoveryPoint = xlogctl->replayEndRecPtr;
+               newMinRecoveryPoint = xlogctl->replayEndRecPtr;
                SpinLockRelease(&xlogctl->info_lck);
 
                /* update control file */
-               ControlFile->minRecoveryPoint = minRecoveryPoint;
-               UpdateControlFile();
+               if (XLByteLT(ControlFile->minRecoveryPoint, newMinRecoveryPoint))
+               {
+                       ControlFile->minRecoveryPoint = newMinRecoveryPoint;
+                       UpdateControlFile();
+                       minRecoveryPoint = newMinRecoveryPoint;
+               }
 
                elog(DEBUG2, "updated min recovery point to %X/%X",
                         minRecoveryPoint.xlogid, minRecoveryPoint.xrecoff);
@@ -1829,7 +1841,7 @@ XLogFlush(XLogRecPtr record)
         */
        if (IsRecoveryProcessingMode())
        {
-               UpdateMinRecoveryPoint(record);
+               UpdateMinRecoveryPoint(record, false);
                return;
        }
 
@@ -1957,6 +1969,10 @@ XLogBackgroundFlush(void)
        XLogRecPtr      WriteRqstPtr;
        bool            flexible = true;
 
+       /* XLOG doesn't need flushing during recovery */
+       if (IsRecoveryProcessingMode())
+               return;
+
        /* read LogwrtResult and update local state */
        {
                /* use volatile pointer to prevent code rearrangement */
@@ -2028,6 +2044,10 @@ XLogAsyncCommitFlush(void)
        /* use volatile pointer to prevent code rearrangement */
        volatile XLogCtlData *xlogctl = XLogCtl;
 
+       /* There's no asynchronously committed transactions during recovery */
+       if (IsRecoveryProcessingMode())
+               return;
+
        SpinLockAcquire(&xlogctl->info_lck);
        WriteRqstPtr = xlogctl->asyncCommitLSN;
        SpinLockRelease(&xlogctl->info_lck);
@@ -2044,6 +2064,10 @@ XLogAsyncCommitFlush(void)
 bool
 XLogNeedsFlush(XLogRecPtr record)
 {
+       /* XLOG doesn't flushing during recovery */
+       if (IsRecoveryProcessingMode())
+               return false;
+
        /* Quick exit if already known flushed */
        if (XLByteLE(record, LogwrtResult.Flush))
                return false;
@@ -2718,10 +2742,23 @@ RestoreArchivedFile(char *path, const char *xlogfname,
                        (errmsg_internal("executing restore command \"%s\"",
                                                         xlogRestoreCmd)));
 
+
+       /*
+        * Set in_restore_command to indicate that we should just exit on
+        * SIGTERM. We know that we're in a safe point to do that. Check
+        * if we had already received the signal.
+        */
+       in_restore_command = true;
+       if (shutdown_requested)
+               proc_exit(0);
+
        /*
         * Copy xlog from archival storage to XLOGDIR
         */
        rc = system(xlogRestoreCmd);
+
+       in_restore_command = false;
+
        if (rc == 0)
        {
                /*
@@ -2774,25 +2811,22 @@ RestoreArchivedFile(char *path, const char *xlogfname,
         * assume that recovery is complete and start up the database!) It's
         * essential to abort on child SIGINT and SIGQUIT, because per spec
         * system() ignores SIGINT and SIGQUIT while waiting; if we see one of
-        * those it's a good bet we should have gotten it too.  Aborting on other
-        * signals such as SIGTERM seems a good idea as well.
+        * those it's a good bet we should have gotten it too.
         *
-        * However, if we were requested to terminate, we don't really care what
-        * happened to the restore command, so we just exit cleanly. In fact,
-        * the restore command most likely received the SIGTERM too, and we don't
-        * want to complain about that.
+        * On SIGTERM, assume we have received a fast shutdown request, and exit
+        * cleanly. It's pure chance whether we receive the SIGTERM first, or the
+        * child process. If we receive it first, the signal handler will call
+        * proc_exit(0), otherwise we do it here. If we received SIGTERM for any
+        * other reason, postmaster will perform an immediate shutdown when it
+        * sees us exiting unexpectedly.
         *
         * Per the Single Unix Spec, shells report exit status > 128 when a called
         * command died on a signal.  Also, 126 and 127 are used to report
         * problems such as an unfindable command; treat those as fatal errors
         * too.
         */
-       if (shutdown_requested && InRedo)
-       {
-               /* XXX: Is EndRecPtr always the right value here? */
-               UpdateMinRecoveryPoint(EndRecPtr);
+       if (WTERMSIG(rc) == SIGTERM)
                proc_exit(0);
-       }
 
        signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
 
@@ -5335,10 +5369,7 @@ StartupXLOG(void)
                                 * recovery.
                                 */
                                if (shutdown_requested)
-                               {
-                                       UpdateMinRecoveryPoint(ReadRecPtr);
                                        proc_exit(0);
-                               }
 
                                /*
                                 * Have we reached our safe starting point? If so, we can
@@ -6437,28 +6468,41 @@ CreateRestartPoint(int flags)
        memcpy(&lastCheckPoint, &XLogCtl->lastCheckPoint, sizeof(CheckPoint));
        SpinLockRelease(&xlogctl->info_lck);
 
-       /*
-        * If the last checkpoint record we've replayed is already our last
-        * restartpoint, we're done.
+       /* 
+        * Check that we're still in recovery mode. It's ok if we exit recovery
+        * mode after this check, the restart point is valid anyway.
         */
-       if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
-               XLByteLE(lastCheckPoint.redo, ControlFile->checkPointCopy.redo))
+       if (!IsRecoveryProcessingMode())
        {
                ereport(DEBUG2,
-                               (errmsg("skipping restartpoint, already performed at %X/%X",
-                                               lastCheckPoint.redo.xlogid, lastCheckPoint.redo.xrecoff)));
+                               (errmsg("skipping restartpoint, recovery has already ended")));
                LWLockRelease(CheckpointLock);
                return false;
        }
 
-       /* 
-        * Check that we're still in recovery mode. It's ok if we exit recovery
-        * mode after this check, the restart point is valid anyway.
+       /*
+        * If the last checkpoint record we've replayed is already our last
+        * restartpoint, we can't perform a new restart point. We still update
+        * minRecoveryPoint in that case, so that if this is a shutdown restart
+        * point, we won't start up earlier than before. That's not strictly
+        * necessary, but when we get hot standby capability, it would be rather
+        * weird if the database opened up for read-only connections at a
+        * point-in-time before the last shutdown. Such time travel is still
+        * possible in case of immediate shutdown, though.
+        *
+        * We don't explicitly advance minRecoveryPoint when we do create a
+        * restartpoint. It's assumed that flushing the buffers will do that
+        * as a side-effect.
         */
-       if (!IsRecoveryProcessingMode())
+       if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
+               XLByteLE(lastCheckPoint.redo, ControlFile->checkPointCopy.redo))
        {
+               XLogRecPtr InvalidXLogRecPtr = {0, 0};
                ereport(DEBUG2,
-                               (errmsg("skipping restartpoint, recovery has already ended")));
+                               (errmsg("skipping restartpoint, already performed at %X/%X",
+                                               lastCheckPoint.redo.xlogid, lastCheckPoint.redo.xrecoff)));
+
+               UpdateMinRecoveryPoint(InvalidXLogRecPtr, true);
                LWLockRelease(CheckpointLock);
                return false;
        }
@@ -7599,7 +7643,10 @@ startupproc_quickdie(SIGNAL_ARGS)
 static void
 StartupProcShutdownHandler(SIGNAL_ARGS)
 {
-       shutdown_requested = true;
+       if (in_restore_command)
+               proc_exit(0);
+       else
+               shutdown_requested = true;
 }
 
 /* Main entry point for startup process */