Bring back startup checkpoints. Plus some other small changes

author Heikki Linnakangas <[email protected]>

Thu, 5 Feb 2009 11:21:20 +0000 (13:21 +0200)

committer Heikki Linnakangas <[email protected]>

Thu, 5 Feb 2009 11:21:20 +0000 (13:21 +0200)
author Heikki Linnakangas <[email protected]>
Thu, 5 Feb 2009 11:21:20 +0000 (13:21 +0200)
committer Heikki Linnakangas <[email protected]>
Thu, 5 Feb 2009 11:21:20 +0000 (13:21 +0200)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c

index 961bcf3c439f1a547a4ec4436aeac2ff5db352da..0f647de5aa708d9c361cc7a7c9e5a176caf38d8d 100644 (file)
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -422,7 +422,7 @@ static XLogRecPtr EndRecPtr;        /* end+1 of last record read. Also in shared mem */
  static XLogRecord *nextRecord = NULL;
  static TimeLineID lastPageTLI = 0;
  static XLogRecPtr minRecoveryPoint; /* local copy of ControlFile->minRecoveryPoint */
-static bool              updateMinRecoveryPoint = true;
+static bool    updateMinRecoveryPoint = true;
  
  static bool InRedo = false;
  
@@ -440,7 +440,6 @@ static void XLogArchiveCleanup(const char *xlog);
  static void readRecoveryCommandFile(void);
  static void exitArchiveRecovery(TimeLineID endTLI,
                                         uint32 endLogId, uint32 endLogSeg);
-static void exitRecovery(void);
  static bool recoveryStopsHere(XLogRecord *record, bool *includeThis);
  static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags);
  
@@ -526,10 +525,9 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
         bool            updrqst;
         bool            doPageWrites;
         bool            isLogSwitch = (rmid == RM_XLOG_ID && info == XLOG_SWITCH);
-       bool            isRecoveryEnd = (rmid == RM_XLOG_ID && info == XLOG_RECOVERY_END);
  
         /* cross-check on whether we should be here or not */
-       if (IsRecoveryProcessingMode() && !isRecoveryEnd)
+       if (IsRecoveryProcessingMode())
                 elog(FATAL, "cannot make new WAL entries during recovery");
  
         /* info's high bits are reserved for use by me */
@@ -1826,7 +1824,10 @@ XLogFlush(XLogRecPtr record)
         XLogRecPtr      WriteRqstPtr;
         XLogwrtRqst WriteRqst;
  
-       /* During REDO, we don't try to flush the WAL, but update minRecoveryPoint instead */
+       /*
+        * During REDO, we don't try to flush the WAL, but update minRecoveryPoint
+        * instead.
+        */
         if (IsRecoveryProcessingMode())
         {
                 UpdateMinRecoveryPoint(record);
@@ -1930,7 +1931,7 @@ XLogFlush(XLogRecPtr record)
          * and so we will not force a restart for a bad LSN on a data page.
          */
         if (XLByteLT(LogwrtResult.Flush, record))
-               elog(ERROR,
+               elog(InRecovery ? WARNING : ERROR,
                 "xlog flush request %X/%X is not satisfied --- flushed only to %X/%X",
                          record.xlogid, record.xrecoff,
                          LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff);
@@ -2508,6 +2509,7 @@ XLogFileRead(uint32 log, uint32 seg, int emode)
                         snprintf(activitymsg, sizeof(activitymsg), "recovering %s",
                                          xlogfname);
                         set_ps_display(activitymsg, false);
+
                         return fd;
                 }
                 if (errno != ENOENT)    /* unexpected failure? */
@@ -2788,7 +2790,7 @@ RestoreArchivedFile(char *path, const char *xlogfname,
          */
         if (shutdown_requested && InRedo)
         {
-               /* XXX: We should update minRecoveryPoint to the exact value here */
+               /* XXX: Is EndRecPtr always the right value? */
                 UpdateMinRecoveryPoint(EndRecPtr);
                 proc_exit(0);
         }
@@ -4835,13 +4837,15 @@ exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg)
         unlink(recoveryPath);           /* ignore any error */
  
         /*
-        * As of 8.4 we no longer rename the recovery.conf file out of the
-        * way until after we have performed a full checkpoint. This ensures
-        * that any crash between now and the end of the checkpoint does not
-        * attempt to restart from a WAL file that is no longer available to us.
-        * As soon as we remove recovery.conf we lose our recovery_command and
-        * cannot reaccess WAL files from the archive.
+        * Rename the config file out of the way, so that we don't accidentally
+        * re-enter archive recovery mode in a subsequent crash.
          */
+       unlink(RECOVERY_COMMAND_DONE);
+       if (rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE) != 0)
+               ereport(FATAL,
+                               (errcode_for_file_access(),
+                                errmsg("could not rename file \"%s\" to \"%s\": %m",
+                                               RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE)));
  
         ereport(LOG,
                         (errmsg("archive recovery complete")));
@@ -4977,7 +4981,6 @@ StartupXLOG(void)
         bool            wasShutdown;
         bool            reachedStopPoint = false;
         bool            reachedMinRecoveryPoint = false;
-       bool            performedRecovery = false;
         bool            haveBackupLabel = false;
         XLogRecPtr      RecPtr,
                                 LastRec,
@@ -5331,24 +5334,14 @@ StartupXLOG(void)
                                 {
                                         /*
                                          * We were requested to exit without finishing recovery.
-                                        *
-                                        * XXX: We should update minRecoveryPoint to the exact
-                                        * value here.
                                          */
-                                       UpdateMinRecoveryPoint(EndRecPtr);
+                                       UpdateMinRecoveryPoint(ReadRecPtr);
                                         proc_exit(0);
                                 }
  
                                 /*
                                  * Have we reached our safe starting point? If so, we can
-                                * signal postmaster to enter consistent recovery mode.
-                                * XXX
-                                * There are two points in the log we must pass. The first is
-                                * the minRecoveryPoint, which is the LSN at the time the
-                                * base backup was taken that we are about to rollfoward from.
-                                * If recovery has ever crashed or was stopped there is 
-                                * another point also: minSafeStartPoint, which is the
-                                * latest LSN that recovery could have reached prior to crash.
+                                * tell postmaster that the database is consistent now.
                                  */
                                 if (!reachedMinRecoveryPoint && 
                                          XLByteLE(minRecoveryPoint, EndRecPtr))
@@ -5437,7 +5430,7 @@ StartupXLOG(void)
          * Complain if we did not roll forward far enough to render the backup
          * dump consistent.
          */
-       if (InRecovery && !reachedMinRecoveryPoint)
+       if (InRecovery && XLByteLT(EndOfLog, minRecoveryPoint))
         {
                 if (reachedStopPoint)   /* stopped because of stop request */
                         ereport(FATAL,
@@ -5539,6 +5532,12 @@ StartupXLOG(void)
         /* Pre-scan prepared transactions to find out the range of XIDs present */
         oldestActiveXID = PrescanPreparedTransactions();
  
+       /*
+        * Allow writing WAL for us. But not for other backends! That's done
+        * after writing the shutdown checkpoint and finishing recovery.
+        */
+       LocalRecoveryProcessingMode = false;
+
         if (InRecovery)
         {
                 int                     rmid;
@@ -5559,14 +5558,30 @@ StartupXLOG(void)
                 XLogCheckInvalidPages();
  
                 /*
-                * Finally exit recovery and mark that in WAL. Pre-8.4 we wrote
-                * a shutdown checkpoint here, but we ask bgwriter to do that now.
+                * Perform a checkpoint to update all our recovery activity to disk.
+                *
+                * Note that we write a shutdown checkpoint rather than an on-line
+                * one. This is not particularly critical, but since we may be
+                * assigning a new TLI, using a shutdown checkpoint allows us to have
+                * the rule that TLI only changes in shutdown checkpoints, which
+                * allows some extra error checking in xlog_redo.
                  */
-               exitRecovery();
-
-               performedRecovery = true;
+               CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
         }
  
+       /*
+        * Preallocate additional log files, if wanted.
+        */
+       PreallocXlogFiles(EndOfLog);
+
+       InRecovery = false;
+
+       LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+       ControlFile->state = DB_IN_PRODUCTION;
+       ControlFile->time = (pg_time_t) time(NULL);
+       UpdateControlFile();
+       LWLockRelease(ControlFileLock);
+
         /* start the archive_timeout timer running */
         XLogCtl->Write.lastSegSwitchTime = (pg_time_t) time(NULL);
  
@@ -5605,38 +5620,9 @@ StartupXLOG(void)
         }
  
         /*
-        * If we had to replay any WAL records, request a checkpoint. This isn't
-        * strictly necessary: if we crash now, the recovery will simply restart
-        * from the same point as this time (or from the last restartpoint). The
-        * control file is left in DB_IN_*_RECOVERY state; the first checkpoint
-        * will change that to DB_IN_PRODUCTION.
+        * All done. Allow others to write WAL.
          */
-       if (performedRecovery)
-       {
-               /*
-                * Okay, we can come up now. Allow others to write WAL.
-                */
-               XLogCtl->SharedRecoveryProcessingMode = false;
-
-               RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_IMMEDIATE |
-                                                 CHECKPOINT_STARTUP);
-       }
-       else
-       {
-               /*
-                * No recovery, so let's just get on with it. 
-                */
-               LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
-               ControlFile->state = DB_IN_PRODUCTION;
-               ControlFile->time = (pg_time_t) time(NULL);
-               UpdateControlFile();
-               LWLockRelease(ControlFileLock);
-
-               /*
-                * Okay, we're officially UP.
-                */
-               XLogCtl->SharedRecoveryProcessingMode = false;
-       }
+       XLogCtl->SharedRecoveryProcessingMode = false;
  }
  
  /*
@@ -5946,7 +5932,6 @@ LogCheckpointStart(int flags, bool restartpoint)
  
         elog(LOG, msg,
                  (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
-                (flags & CHECKPOINT_STARTUP) ? " startup" : "",
                  (flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "",
                  (flags & CHECKPOINT_FORCE) ? " force" : "",
                  (flags & CHECKPOINT_WAIT) ? " wait" : "",
@@ -6030,7 +6015,6 @@ CreateCheckPoint(int flags)
         uint32          _logSeg;
         TransactionId *inCommitXids;
         int                     nInCommit;
-       bool            leavingArchiveRecovery;
  
         /* shouldn't happen */
         if (IsRecoveryProcessingMode())
@@ -6044,13 +6028,6 @@ CreateCheckPoint(int flags)
          */
         LWLockAcquire(CheckpointLock, LW_EXCLUSIVE);
  
-       /*
-        * Find out if this is the first checkpoint after archive recovery.
-        */
-       LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
-       leavingArchiveRecovery = (ControlFile->state == DB_IN_ARCHIVE_RECOVERY);
-       LWLockRelease(ControlFileLock);
-
         /*
          * Prepare to accumulate statistics.
          *
@@ -6284,10 +6261,6 @@ CreateCheckPoint(int flags)
          * if this is the first checkpoint after recovery.
          */
         LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
-       if (shutdown)
-               ControlFile->state = DB_SHUTDOWNED;
-       else
-               ControlFile->state = DB_IN_PRODUCTION;
         ControlFile->prevCheckPoint = ControlFile->checkPoint;
         ControlFile->checkPoint = ProcLastRecPtr;
         ControlFile->checkPointCopy = checkPoint;
@@ -6295,21 +6268,6 @@ CreateCheckPoint(int flags)
         UpdateControlFile();
         LWLockRelease(ControlFileLock);
  
-       if (leavingArchiveRecovery)
-       {
-               /*
-                * Rename the config file out of the way, so that we don't accidentally
-                * re-enter archive recovery mode in a subsequent crash. Prior to
-                * 8.4 this step was performed at end of exitArchiveRecovery().
-                */
-               unlink(RECOVERY_COMMAND_DONE);
-               if (rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE) != 0)
-                       ereport(ERROR,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not rename file \"%s\" to \"%s\": %m",
-                                                       RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE)));
-       }
-
         /* Update shared-memory copy of checkpoint XID/epoch */
         {
                 /* use volatile pointer to prevent code rearrangement */
@@ -6588,39 +6546,6 @@ RequestXLogSwitch(void)
         return RecPtr;
  }
  
-/*
- * exitRecovery()
- *
- * Exit recovery state and write a XLOG_RECOVERY_END record. This is the
- * only record type that can record a change of timelineID. We assume
- * caller has already set ThisTimeLineID, if appropriate.
- */
-static void
-exitRecovery(void)
-{
-       XLogRecData rdata;
-
-       rdata.buffer = InvalidBuffer;
-       rdata.data = (char *) (&ThisTimeLineID);
-       rdata.len = sizeof(TimeLineID);
-       rdata.next = NULL;
-
-       /*
-        * This is the only type of WAL message that can be inserted during
-        * recovery. This ensures that we don't allow others to get access
-        * until after we have changed state.
-        */
-       (void) XLogInsert(RM_XLOG_ID, XLOG_RECOVERY_END, &rdata);
-
-       /*
-        * We don't XLogFlush() here otherwise we'll end up zeroing the WAL
-        * file ourselves. So just let bgwriter's forthcoming checkpoint do
-        * that for us.
-        */
-
-       InRecovery = false;
-}
-
  /*
   * XLOG resource manager's routines
   *
@@ -6669,33 +6594,6 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
  
                 RecoveryRestartPoint(&checkPoint);
         }
-       else if (info == XLOG_RECOVERY_END)
-       {
-               TimeLineID      tli;
-
-               memcpy(&tli, XLogRecGetData(record), sizeof(TimeLineID));
-
-               /*
-                * TLI may change when recovery ends, but it shouldn't decrease.
-                *
-                * This is the only WAL record that can tell us to change timelineID
-                * while we process WAL records. 
-                *
-                * We can *choose* to stop recovery at any point, generating a
-                * new timelineID which is recorded using this record type.
-                */
-               if (tli != ThisTimeLineID)
-               {
-                       if (tli < ThisTimeLineID ||
-                               !list_member_int(expectedTLIs,
-                                                                (int) tli))
-                               ereport(PANIC,
-                                               (errmsg("unexpected timeline ID %u (after %u) at recovery end record",
-                                                               tli, ThisTimeLineID)));
-                       /* Following WAL records should be run with new TLI */
-                       ThisTimeLineID = tli;
-               }
-       }
         else if (info == XLOG_CHECKPOINT_ONLINE)
         {
                 CheckPoint      checkPoint;
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c

index 4c8c54c587b8822bd2f7f02c0108f9d6003c3f7b..d38e0c645230d7bdcc9a694432cd5ca112726b25 100644 (file)
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -889,7 +889,6 @@ BgWriterShmemInit(void)
   *
   * flags is a bitwise OR of the following:
   *     CHECKPOINT_IS_SHUTDOWN: checkpoint is for database shutdown.
- *     CHECKPOINT_IS_STARTUP: checkpoint is for database startup.
   *     CHECKPOINT_IMMEDIATE: finish the checkpoint ASAP,
   *             ignoring checkpoint_completion_target parameter.
   *     CHECKPOINT_FORCE: force a checkpoint even if no XLOG activity has occured
@@ -956,18 +955,6 @@ RequestCheckpoint(int flags)
         {
                 if (BgWriterShmem->bgwriter_pid == 0)
                 {
-                       /*
-                        * The only difference between a startup checkpoint and a normal
-                        * online checkpoint is that it's quite normal for the bgwriter
-                        * to not be up yet when the startup checkpoint is requested.
-                        * (it might be, though). That's ok, background writer will
-                        * perform the checkpoint as soon as it starts up.
-                        */
-                       if (flags & CHECKPOINT_STARTUP)
-                       {
-                               Assert(!(flags & CHECKPOINT_WAIT));
-                               break;
-                       }
                         if (ntries >= 20)               /* max wait 2.0 sec */
                         {
                                 elog((flags & CHECKPOINT_WAIT) ? ERROR : LOG,
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h

index b97a6afbf0bb9a5804f4100eaee387272f8e78f8..2a9ed7078ef4ad31cb015d539b6cacd6df987f68 100644 (file)
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -168,14 +168,13 @@ extern bool XLOG_DEBUG;
  
  /* These directly affect the behavior of CreateCheckPoint and subsidiaries */
  #define CHECKPOINT_IS_SHUTDOWN 0x0001  /* Checkpoint is for shutdown */
-#define CHECKPOINT_IS_STARTUP  0x0002  /* Startup checkpoint */
-#define CHECKPOINT_IMMEDIATE   0x0003  /* Do it without delays */
-#define CHECKPOINT_FORCE               0x0008  /* Force even if no activity */
+#define CHECKPOINT_IMMEDIATE   0x0002  /* Do it without delays */
+#define CHECKPOINT_FORCE               0x0004  /* Force even if no activity */
  /* These are important to RequestCheckpoint */
-#define CHECKPOINT_WAIT                        0x0010  /* Wait for completion */
+#define CHECKPOINT_WAIT                        0x0008  /* Wait for completion */
  /* These indicate the cause of a checkpoint request */
-#define CHECKPOINT_CAUSE_XLOG  0x0020  /* XLOG consumption */
-#define CHECKPOINT_CAUSE_TIME  0x0040  /* Elapsed time */
+#define CHECKPOINT_CAUSE_XLOG  0x0010  /* XLOG consumption */
+#define CHECKPOINT_CAUSE_TIME  0x0020  /* Elapsed time */
  
  /* Checkpoint statistics */
  typedef struct CheckpointStatsData
diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h

index 275fc1dddf48db920190cbb47146d5be0ac7a00b..400f32c74919293aa86dea3d4eb212a314ce4aae 100644 (file)
--- a/src/include/catalog/pg_control.h
+++ b/src/include/catalog/pg_control.h
@@ -46,7 +46,7 @@ typedef struct CheckPoint
  #define XLOG_NOOP                                              0x20
  #define XLOG_NEXTOID                                   0x30
  #define XLOG_SWITCH                                            0x40
-#define XLOG_RECOVERY_END                      0x50
+
  
  /* System status indicator */
  typedef enum DBState
author	Heikki Linnakangas <[email protected]>
	Thu, 5 Feb 2009 11:21:20 +0000 (13:21 +0200)
committer	Heikki Linnakangas <[email protected]>
	Thu, 5 Feb 2009 11:21:20 +0000 (13:21 +0200)
src/backend/access/transam/xlog.c		patch \| blob \| blame \| history
src/backend/postmaster/bgwriter.c		patch \| blob \| blame \| history
src/include/access/xlog.h		patch \| blob \| blame \| history
src/include/catalog/pg_control.h		patch \| blob \| blame \| history