Don't rename recovery.conf out of the way until the first checkpoint, like

author Heikki Linnakangas <[email protected]>

Wed, 4 Feb 2009 18:15:44 +0000 (20:15 +0200)

committer Heikki Linnakangas <[email protected]>

Wed, 4 Feb 2009 18:15:44 +0000 (20:15 +0200)
author Heikki Linnakangas <[email protected]>
Wed, 4 Feb 2009 18:15:44 +0000 (20:15 +0200)
committer Heikki Linnakangas <[email protected]>
Wed, 4 Feb 2009 18:15:44 +0000 (20:15 +0200)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c

index 79505a01452e58368f33d69f179fdf936544831a..961bcf3c439f1a547a4ec4436aeac2ff5db352da 100644 (file)
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -1765,6 +1765,13 @@ XLogSetAsyncCommitLSN(XLogRecPtr asyncCommitLSN)
         SpinLockRelease(&xlogctl->info_lck);
  }
  
+/*
+ * Advance minRecoveryPoint in control file.
+ *
+ * If we crash during reocvery, we must reach this point again before
+ * the database is consistent. If minRecoveryPoint is already greater than
+ * or equal to 'lsn', it is not updated.
+ */
  static void
  UpdateMinRecoveryPoint(XLogRecPtr lsn)
  {
@@ -1772,14 +1779,6 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn)
         if (!updateMinRecoveryPoint || XLByteLE(lsn, minRecoveryPoint))
                 return;
  
-       /* XXX
-        * Calculate and write out a new safeStartPoint. This defines
-        * the latest LSN that might appear on-disk while we apply
-        * the WAL records in this file. If we crash during recovery
-        * we must reach this point again before we can prove
-        * database consistency. Not a restartpoint! Restart points
-        * define where we should start recovery from, if we crash.
-        */
         LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
  
         /* update local copy */
@@ -1797,21 +1796,22 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn)
                 volatile XLogCtlData *xlogctl = XLogCtl;
  
                 /*
-                * We need to update the control file. To avoid having to update it
-                * too often, we update it all the way to EndRecPtr, even though 'lsn'
+                * To avoid having to update the control file too often, we update
+                * it all the way to the last record being replayed, even though 'lsn'
                  * would suffice for correctness.
                  */
                 SpinLockAcquire(&xlogctl->info_lck);
                 minRecoveryPoint = xlogctl->replayEndRecPtr;
                 SpinLockRelease(&xlogctl->info_lck);
  
+               /* update control file */
                 ControlFile->minRecoveryPoint = minRecoveryPoint;
                 UpdateControlFile();
+
+               elog(DEBUG2, "updated min recovery point to %X/%X",
+                        minRecoveryPoint.xlogid, minRecoveryPoint.xrecoff);
         }
         LWLockRelease(ControlFileLock);
-       
-       elog(LOG, "updated min recovery point to %X/%X",
-                minRecoveryPoint.xlogid, minRecoveryPoint.xrecoff);
  }
  
  /*
@@ -4835,18 +4835,13 @@ exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg)
         unlink(recoveryPath);           /* ignore any error */
  
         /*
-        * Rename the config file out of the way, so that we don't accidentally
-        * re-enter archive recovery mode in a subsequent crash. We have already
-        * restored all the WAL segments we need from the archive, and we trust
-        * that they are not going to go away even if we crash. (XXX: should
-        * we fsync() them all to ensure that?)
+        * As of 8.4 we no longer rename the recovery.conf file out of the
+        * way until after we have performed a full checkpoint. This ensures
+        * that any crash between now and the end of the checkpoint does not
+        * attempt to restart from a WAL file that is no longer available to us.
+        * As soon as we remove recovery.conf we lose our recovery_command and
+        * cannot reaccess WAL files from the archive.
          */
-       unlink(RECOVERY_COMMAND_DONE);
-       if (rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE) != 0)
-               ereport(FATAL,
-                               (errcode_for_file_access(),
-                                errmsg("could not rename file \"%s\" to \"%s\": %m",
-                                               RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE)));
  
         ereport(LOG,
                         (errmsg("archive recovery complete")));
@@ -5279,16 +5274,23 @@ StartupXLOG(void)
                         /* use volatile pointer to prevent code rearrangement */
                         volatile XLogCtlData *xlogctl = XLogCtl;
  
-                       InRedo = true;
-                       ereport(LOG,
-                                       (errmsg("redo starts at %X/%X",
-                                                       ReadRecPtr.xlogid, ReadRecPtr.xrecoff)));
-
                         /* Update shared copy of replayEndRecPtr */
                         SpinLockAcquire(&xlogctl->info_lck);
                         xlogctl->replayEndRecPtr = ReadRecPtr;
                         SpinLockRelease(&xlogctl->info_lck);
  
+                       InRedo = true;
+
+                       if (minRecoveryPoint.xlogid == 0 && minRecoveryPoint.xrecoff == 0)
+                               ereport(LOG,
+                                               (errmsg("redo starts at %X/%X",
+                                                               ReadRecPtr.xlogid, ReadRecPtr.xrecoff)));
+                       else
+                               ereport(LOG,
+                                               (errmsg("redo starts at %X/%X, consistency will be reached at %X/%X",
+                                               ReadRecPtr.xlogid, ReadRecPtr.xrecoff,
+                                               minRecoveryPoint.xlogid, minRecoveryPoint.xrecoff)));
+
                         /*
                          * Let postmaster know we've started redo now.
                          *
@@ -5355,8 +5357,7 @@ StartupXLOG(void)
                                         if (InArchiveRecovery)
                                         {
                                                 ereport(LOG,
-                                                       (errmsg("consistent recovery state reached at %X/%X",
-                                                               EndRecPtr.xlogid, EndRecPtr.xrecoff)));
+                                                               (errmsg("consistent recovery state reached")));
                                                 if (IsUnderPostmaster)
                                                         SendPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT);
                                         }
@@ -6029,6 +6030,7 @@ CreateCheckPoint(int flags)
         uint32          _logSeg;
         TransactionId *inCommitXids;
         int                     nInCommit;
+       bool            leavingArchiveRecovery;
  
         /* shouldn't happen */
         if (IsRecoveryProcessingMode())
@@ -6042,6 +6044,13 @@ CreateCheckPoint(int flags)
          */
         LWLockAcquire(CheckpointLock, LW_EXCLUSIVE);
  
+       /*
+        * Find out if this is the first checkpoint after archive recovery.
+        */
+       LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+       leavingArchiveRecovery = (ControlFile->state == DB_IN_ARCHIVE_RECOVERY);
+       LWLockRelease(ControlFileLock);
+
         /*
          * Prepare to accumulate statistics.
          *
@@ -6286,6 +6295,21 @@ CreateCheckPoint(int flags)
         UpdateControlFile();
         LWLockRelease(ControlFileLock);
  
+       if (leavingArchiveRecovery)
+       {
+               /*
+                * Rename the config file out of the way, so that we don't accidentally
+                * re-enter archive recovery mode in a subsequent crash. Prior to
+                * 8.4 this step was performed at end of exitArchiveRecovery().
+                */
+               unlink(RECOVERY_COMMAND_DONE);
+               if (rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE) != 0)
+                       ereport(ERROR,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not rename file \"%s\" to \"%s\": %m",
+                                                       RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE)));
+       }
+
         /* Update shared-memory copy of checkpoint XID/epoch */
         {
                 /* use volatile pointer to prevent code rearrangement */
@@ -6329,8 +6353,7 @@ CreateCheckPoint(int flags)
          * Truncate pg_subtrans if possible.  We can throw away all data before
          * the oldest XMIN of any running transaction.  No future transaction will
          * attempt to reference any pg_subtrans entry older than that (see Asserts
-        * in subtrans.c).      During recovery, though, we mustn't do this because
-        * StartupSUBTRANS hasn't been called yet.
+        * in subtrans.c).
          */
         TruncateSUBTRANS(GetOldestXmin(true, false));
  
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h

index c3b3ec7ee1e95207288011c7386da0ab1221e98f..b97a6afbf0bb9a5804f4100eaee387272f8e78f8 100644 (file)
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -168,9 +168,9 @@ extern bool XLOG_DEBUG;
  
  /* These directly affect the behavior of CreateCheckPoint and subsidiaries */
  #define CHECKPOINT_IS_SHUTDOWN 0x0001  /* Checkpoint is for shutdown */
-#define CHECKPOINT_IMMEDIATE   0x0002  /* Do it without delays */
-#define CHECKPOINT_FORCE               0x0004  /* Force even if no activity */
-#define CHECKPOINT_STARTUP             0x0008  /* Startup checkpoint */
+#define CHECKPOINT_IS_STARTUP  0x0002  /* Startup checkpoint */
+#define CHECKPOINT_IMMEDIATE   0x0003  /* Do it without delays */
+#define CHECKPOINT_FORCE               0x0008  /* Force even if no activity */
  /* These are important to RequestCheckpoint */
  #define CHECKPOINT_WAIT                        0x0010  /* Wait for completion */
  /* These indicate the cause of a checkpoint request */
author	Heikki Linnakangas <[email protected]>
	Wed, 4 Feb 2009 18:15:44 +0000 (20:15 +0200)
committer	Heikki Linnakangas <[email protected]>
	Wed, 4 Feb 2009 18:15:44 +0000 (20:15 +0200)
src/backend/access/transam/xlog.c		patch \| blob \| blame \| history
src/include/access/xlog.h		patch \| blob \| blame \| history