Add IsRecoveryProcessingMode() quick exits to XLogNeedsFlush,

author Heikki Linnakangas <[email protected]>

Mon, 9 Feb 2009 12:54:41 +0000 (14:54 +0200)

committer Heikki Linnakangas <[email protected]>

Mon, 9 Feb 2009 13:13:27 +0000 (15:13 +0200)
author Heikki Linnakangas <[email protected]>
Mon, 9 Feb 2009 12:54:41 +0000 (14:54 +0200)
committer Heikki Linnakangas <[email protected]>
Mon, 9 Feb 2009 13:13:27 +0000 (15:13 +0200)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c

index 0ace629e8ff5f0e94c2307e908b59effa74cae13..87e4551b0e306deaa5af412772b25cc438b53507 100644 (file)
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -429,6 +429,11 @@ static bool InRedo = false;
   * Flag set by interrupt handlers for later service in the redo loop.
   */
  static volatile sig_atomic_t shutdown_requested = false;
+/*
+ * Flag set when executing a restore command, to tell SIGTERM signal handler
+ * that it's safe to just proc_exit(0).
+ */
+static volatile sig_atomic_t in_restore_command = false;
  
  
  static void XLogArchiveNotify(const char *xlog);
@@ -460,7 +465,7 @@ static void PreallocXlogFiles(XLogRecPtr endptr);
  static void RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr);
  static void ValidateXLOGDirectoryStructure(void);
  static void CleanupBackupHistory(void);
-static void UpdateMinRecoveryPoint(XLogRecPtr lsn);
+static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force);
  static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode);
  static bool ValidXLOGHeader(XLogPageHeader hdr, int emode);
  static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt);
@@ -1766,14 +1771,16 @@ XLogSetAsyncCommitLSN(XLogRecPtr asyncCommitLSN)
   * Advance minRecoveryPoint in control file.
   *
   * If we crash during recovery, we must reach this point again before the
- * database is consistent. If minRecoveryPoint is already greater than or
- * equal to 'lsn', it is not updated.
+ * database is consistent. 
+ * 
+ * If 'force' is true, 'lsn' argument is ignored. Otherwise, minRecoveryPoint
+ * is is only updated if it's already greater than or equal to 'lsn'.
   */
  static void
-UpdateMinRecoveryPoint(XLogRecPtr lsn)
+UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
  {
         /* Quick check using our local copy of the variable */
-       if (!updateMinRecoveryPoint || XLByteLE(lsn, minRecoveryPoint))
+       if (!updateMinRecoveryPoint || (!force && XLByteLE(lsn, minRecoveryPoint)))
                 return;
  
         LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
@@ -1787,10 +1794,11 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn)
          */
         if (minRecoveryPoint.xlogid == 0 && minRecoveryPoint.xrecoff == 0)
                 updateMinRecoveryPoint = false;
-       else if (XLByteLT(minRecoveryPoint, lsn))
+       else if (force || XLByteLT(minRecoveryPoint, lsn))
         {
                 /* use volatile pointer to prevent code rearrangement */
                 volatile XLogCtlData *xlogctl = XLogCtl;
+               XLogRecPtr newMinRecoveryPoint;
  
                 /*
                  * To avoid having to update the control file too often, we update
@@ -1798,12 +1806,16 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn)
                  * would suffice for correctness.
                  */
                 SpinLockAcquire(&xlogctl->info_lck);
-               minRecoveryPoint = xlogctl->replayEndRecPtr;
+               newMinRecoveryPoint = xlogctl->replayEndRecPtr;
                 SpinLockRelease(&xlogctl->info_lck);
  
                 /* update control file */
-               ControlFile->minRecoveryPoint = minRecoveryPoint;
-               UpdateControlFile();
+               if (XLByteLT(ControlFile->minRecoveryPoint, newMinRecoveryPoint))
+               {
+                       ControlFile->minRecoveryPoint = newMinRecoveryPoint;
+                       UpdateControlFile();
+                       minRecoveryPoint = newMinRecoveryPoint;
+               }
  
                 elog(DEBUG2, "updated min recovery point to %X/%X",
                          minRecoveryPoint.xlogid, minRecoveryPoint.xrecoff);
@@ -1829,7 +1841,7 @@ XLogFlush(XLogRecPtr record)
          */
         if (IsRecoveryProcessingMode())
         {
-               UpdateMinRecoveryPoint(record);
+               UpdateMinRecoveryPoint(record, false);
                 return;
         }
  
@@ -1957,6 +1969,10 @@ XLogBackgroundFlush(void)
         XLogRecPtr      WriteRqstPtr;
         bool            flexible = true;
  
+       /* XLOG doesn't need flushing during recovery */
+       if (IsRecoveryProcessingMode())
+               return;
+
         /* read LogwrtResult and update local state */
         {
                 /* use volatile pointer to prevent code rearrangement */
@@ -2028,6 +2044,10 @@ XLogAsyncCommitFlush(void)
         /* use volatile pointer to prevent code rearrangement */
         volatile XLogCtlData *xlogctl = XLogCtl;
  
+       /* There's no asynchronously committed transactions during recovery */
+       if (IsRecoveryProcessingMode())
+               return;
+
         SpinLockAcquire(&xlogctl->info_lck);
         WriteRqstPtr = xlogctl->asyncCommitLSN;
         SpinLockRelease(&xlogctl->info_lck);
@@ -2044,6 +2064,10 @@ XLogAsyncCommitFlush(void)
  bool
  XLogNeedsFlush(XLogRecPtr record)
  {
+       /* XLOG doesn't flushing during recovery */
+       if (IsRecoveryProcessingMode())
+               return false;
+
         /* Quick exit if already known flushed */
         if (XLByteLE(record, LogwrtResult.Flush))
                 return false;
@@ -2718,10 +2742,23 @@ RestoreArchivedFile(char *path, const char *xlogfname,
                         (errmsg_internal("executing restore command \"%s\"",
                                                          xlogRestoreCmd)));
  
+
+       /*
+        * Set in_restore_command to indicate that we should just exit on
+        * SIGTERM. We know that we're in a safe point to do that. Check
+        * if we had already received the signal.
+        */
+       in_restore_command = true;
+       if (shutdown_requested)
+               proc_exit(0);
+
         /*
          * Copy xlog from archival storage to XLOGDIR
          */
         rc = system(xlogRestoreCmd);
+
+       in_restore_command = false;
+
         if (rc == 0)
         {
                 /*
@@ -2774,25 +2811,22 @@ RestoreArchivedFile(char *path, const char *xlogfname,
          * assume that recovery is complete and start up the database!) It's
          * essential to abort on child SIGINT and SIGQUIT, because per spec
          * system() ignores SIGINT and SIGQUIT while waiting; if we see one of
-        * those it's a good bet we should have gotten it too.  Aborting on other
-        * signals such as SIGTERM seems a good idea as well.
+        * those it's a good bet we should have gotten it too.
          *
-        * However, if we were requested to terminate, we don't really care what
-        * happened to the restore command, so we just exit cleanly. In fact,
-        * the restore command most likely received the SIGTERM too, and we don't
-        * want to complain about that.
+        * On SIGTERM, assume we have received a fast shutdown request, and exit
+        * cleanly. It's pure chance whether we receive the SIGTERM first, or the
+        * child process. If we receive it first, the signal handler will call
+        * proc_exit(0), otherwise we do it here. If we received SIGTERM for any
+        * other reason, postmaster will perform an immediate shutdown when it
+        * sees us exiting unexpectedly.
          *
          * Per the Single Unix Spec, shells report exit status > 128 when a called
          * command died on a signal.  Also, 126 and 127 are used to report
          * problems such as an unfindable command; treat those as fatal errors
          * too.
          */
-       if (shutdown_requested && InRedo)
-       {
-               /* XXX: Is EndRecPtr always the right value here? */
-               UpdateMinRecoveryPoint(EndRecPtr);
+       if (WTERMSIG(rc) == SIGTERM)
                 proc_exit(0);
-       }
  
         signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
  
@@ -5335,10 +5369,7 @@ StartupXLOG(void)
                                  * recovery.
                                  */
                                 if (shutdown_requested)
-                               {
-                                       UpdateMinRecoveryPoint(ReadRecPtr);
                                         proc_exit(0);
-                               }
  
                                 /*
                                  * Have we reached our safe starting point? If so, we can
@@ -6437,28 +6468,41 @@ CreateRestartPoint(int flags)
         memcpy(&lastCheckPoint, &XLogCtl->lastCheckPoint, sizeof(CheckPoint));
         SpinLockRelease(&xlogctl->info_lck);
  
-       /*
-        * If the last checkpoint record we've replayed is already our last
-        * restartpoint, we're done.
+       /* 
+        * Check that we're still in recovery mode. It's ok if we exit recovery
+        * mode after this check, the restart point is valid anyway.
          */
-       if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
-               XLByteLE(lastCheckPoint.redo, ControlFile->checkPointCopy.redo))
+       if (!IsRecoveryProcessingMode())
         {
                 ereport(DEBUG2,
-                               (errmsg("skipping restartpoint, already performed at %X/%X",
-                                               lastCheckPoint.redo.xlogid, lastCheckPoint.redo.xrecoff)));
+                               (errmsg("skipping restartpoint, recovery has already ended")));
                 LWLockRelease(CheckpointLock);
                 return false;
         }
  
-       /* 
-        * Check that we're still in recovery mode. It's ok if we exit recovery
-        * mode after this check, the restart point is valid anyway.
+       /*
+        * If the last checkpoint record we've replayed is already our last
+        * restartpoint, we can't perform a new restart point. We still update
+        * minRecoveryPoint in that case, so that if this is a shutdown restart
+        * point, we won't start up earlier than before. That's not strictly
+        * necessary, but when we get hot standby capability, it would be rather
+        * weird if the database opened up for read-only connections at a
+        * point-in-time before the last shutdown. Such time travel is still
+        * possible in case of immediate shutdown, though.
+        *
+        * We don't explicitly advance minRecoveryPoint when we do create a
+        * restartpoint. It's assumed that flushing the buffers will do that
+        * as a side-effect.
          */
-       if (!IsRecoveryProcessingMode())
+       if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
+               XLByteLE(lastCheckPoint.redo, ControlFile->checkPointCopy.redo))
         {
+               XLogRecPtr InvalidXLogRecPtr = {0, 0};
                 ereport(DEBUG2,
-                               (errmsg("skipping restartpoint, recovery has already ended")));
+                               (errmsg("skipping restartpoint, already performed at %X/%X",
+                                               lastCheckPoint.redo.xlogid, lastCheckPoint.redo.xrecoff)));
+
+               UpdateMinRecoveryPoint(InvalidXLogRecPtr, true);
                 LWLockRelease(CheckpointLock);
                 return false;
         }
@@ -7599,7 +7643,10 @@ startupproc_quickdie(SIGNAL_ARGS)
  static void
  StartupProcShutdownHandler(SIGNAL_ARGS)
  {
-       shutdown_requested = true;
+       if (in_restore_command)
+               proc_exit(0);
+       else
+               shutdown_requested = true;
  }
  
  /* Main entry point for startup process */
author	Heikki Linnakangas <[email protected]>
	Mon, 9 Feb 2009 12:54:41 +0000 (14:54 +0200)
committer	Heikki Linnakangas <[email protected]>
	Mon, 9 Feb 2009 13:13:27 +0000 (15:13 +0200)