Bring back startup checkpoints. Plus some other small changes
authorHeikki Linnakangas <[email protected]>
Thu, 5 Feb 2009 11:21:20 +0000 (13:21 +0200)
committerHeikki Linnakangas <[email protected]>
Thu, 5 Feb 2009 11:21:20 +0000 (13:21 +0200)
src/backend/access/transam/xlog.c
src/backend/postmaster/bgwriter.c
src/include/access/xlog.h
src/include/catalog/pg_control.h

index 961bcf3c439f1a547a4ec4436aeac2ff5db352da..0f647de5aa708d9c361cc7a7c9e5a176caf38d8d 100644 (file)
@@ -422,7 +422,7 @@ static XLogRecPtr EndRecPtr;        /* end+1 of last record read. Also in shared mem */
 static XLogRecord *nextRecord = NULL;
 static TimeLineID lastPageTLI = 0;
 static XLogRecPtr minRecoveryPoint; /* local copy of ControlFile->minRecoveryPoint */
-static bool              updateMinRecoveryPoint = true;
+static bool    updateMinRecoveryPoint = true;
 
 static bool InRedo = false;
 
@@ -440,7 +440,6 @@ static void XLogArchiveCleanup(const char *xlog);
 static void readRecoveryCommandFile(void);
 static void exitArchiveRecovery(TimeLineID endTLI,
                                        uint32 endLogId, uint32 endLogSeg);
-static void exitRecovery(void);
 static bool recoveryStopsHere(XLogRecord *record, bool *includeThis);
 static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags);
 
@@ -526,10 +525,9 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
        bool            updrqst;
        bool            doPageWrites;
        bool            isLogSwitch = (rmid == RM_XLOG_ID && info == XLOG_SWITCH);
-       bool            isRecoveryEnd = (rmid == RM_XLOG_ID && info == XLOG_RECOVERY_END);
 
        /* cross-check on whether we should be here or not */
-       if (IsRecoveryProcessingMode() && !isRecoveryEnd)
+       if (IsRecoveryProcessingMode())
                elog(FATAL, "cannot make new WAL entries during recovery");
 
        /* info's high bits are reserved for use by me */
@@ -1826,7 +1824,10 @@ XLogFlush(XLogRecPtr record)
        XLogRecPtr      WriteRqstPtr;
        XLogwrtRqst WriteRqst;
 
-       /* During REDO, we don't try to flush the WAL, but update minRecoveryPoint instead */
+       /*
+        * During REDO, we don't try to flush the WAL, but update minRecoveryPoint
+        * instead.
+        */
        if (IsRecoveryProcessingMode())
        {
                UpdateMinRecoveryPoint(record);
@@ -1930,7 +1931,7 @@ XLogFlush(XLogRecPtr record)
         * and so we will not force a restart for a bad LSN on a data page.
         */
        if (XLByteLT(LogwrtResult.Flush, record))
-               elog(ERROR,
+               elog(InRecovery ? WARNING : ERROR,
                "xlog flush request %X/%X is not satisfied --- flushed only to %X/%X",
                         record.xlogid, record.xrecoff,
                         LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff);
@@ -2508,6 +2509,7 @@ XLogFileRead(uint32 log, uint32 seg, int emode)
                        snprintf(activitymsg, sizeof(activitymsg), "recovering %s",
                                         xlogfname);
                        set_ps_display(activitymsg, false);
+
                        return fd;
                }
                if (errno != ENOENT)    /* unexpected failure? */
@@ -2788,7 +2790,7 @@ RestoreArchivedFile(char *path, const char *xlogfname,
         */
        if (shutdown_requested && InRedo)
        {
-               /* XXX: We should update minRecoveryPoint to the exact value here */
+               /* XXX: Is EndRecPtr always the right value? */
                UpdateMinRecoveryPoint(EndRecPtr);
                proc_exit(0);
        }
@@ -4835,13 +4837,15 @@ exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg)
        unlink(recoveryPath);           /* ignore any error */
 
        /*
-        * As of 8.4 we no longer rename the recovery.conf file out of the
-        * way until after we have performed a full checkpoint. This ensures
-        * that any crash between now and the end of the checkpoint does not
-        * attempt to restart from a WAL file that is no longer available to us.
-        * As soon as we remove recovery.conf we lose our recovery_command and
-        * cannot reaccess WAL files from the archive.
+        * Rename the config file out of the way, so that we don't accidentally
+        * re-enter archive recovery mode in a subsequent crash.
         */
+       unlink(RECOVERY_COMMAND_DONE);
+       if (rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE) != 0)
+               ereport(FATAL,
+                               (errcode_for_file_access(),
+                                errmsg("could not rename file \"%s\" to \"%s\": %m",
+                                               RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE)));
 
        ereport(LOG,
                        (errmsg("archive recovery complete")));
@@ -4977,7 +4981,6 @@ StartupXLOG(void)
        bool            wasShutdown;
        bool            reachedStopPoint = false;
        bool            reachedMinRecoveryPoint = false;
-       bool            performedRecovery = false;
        bool            haveBackupLabel = false;
        XLogRecPtr      RecPtr,
                                LastRec,
@@ -5331,24 +5334,14 @@ StartupXLOG(void)
                                {
                                        /*
                                         * We were requested to exit without finishing recovery.
-                                        *
-                                        * XXX: We should update minRecoveryPoint to the exact
-                                        * value here.
                                         */
-                                       UpdateMinRecoveryPoint(EndRecPtr);
+                                       UpdateMinRecoveryPoint(ReadRecPtr);
                                        proc_exit(0);
                                }
 
                                /*
                                 * Have we reached our safe starting point? If so, we can
-                                * signal postmaster to enter consistent recovery mode.
-                                * XXX
-                                * There are two points in the log we must pass. The first is
-                                * the minRecoveryPoint, which is the LSN at the time the
-                                * base backup was taken that we are about to rollfoward from.
-                                * If recovery has ever crashed or was stopped there is 
-                                * another point also: minSafeStartPoint, which is the
-                                * latest LSN that recovery could have reached prior to crash.
+                                * tell postmaster that the database is consistent now.
                                 */
                                if (!reachedMinRecoveryPoint && 
                                         XLByteLE(minRecoveryPoint, EndRecPtr))
@@ -5437,7 +5430,7 @@ StartupXLOG(void)
         * Complain if we did not roll forward far enough to render the backup
         * dump consistent.
         */
-       if (InRecovery && !reachedMinRecoveryPoint)
+       if (InRecovery && XLByteLT(EndOfLog, minRecoveryPoint))
        {
                if (reachedStopPoint)   /* stopped because of stop request */
                        ereport(FATAL,
@@ -5539,6 +5532,12 @@ StartupXLOG(void)
        /* Pre-scan prepared transactions to find out the range of XIDs present */
        oldestActiveXID = PrescanPreparedTransactions();
 
+       /*
+        * Allow writing WAL for us. But not for other backends! That's done
+        * after writing the shutdown checkpoint and finishing recovery.
+        */
+       LocalRecoveryProcessingMode = false;
+
        if (InRecovery)
        {
                int                     rmid;
@@ -5559,14 +5558,30 @@ StartupXLOG(void)
                XLogCheckInvalidPages();
 
                /*
-                * Finally exit recovery and mark that in WAL. Pre-8.4 we wrote
-                * a shutdown checkpoint here, but we ask bgwriter to do that now.
+                * Perform a checkpoint to update all our recovery activity to disk.
+                *
+                * Note that we write a shutdown checkpoint rather than an on-line
+                * one. This is not particularly critical, but since we may be
+                * assigning a new TLI, using a shutdown checkpoint allows us to have
+                * the rule that TLI only changes in shutdown checkpoints, which
+                * allows some extra error checking in xlog_redo.
                 */
-               exitRecovery();
-
-               performedRecovery = true;
+               CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
        }
 
+       /*
+        * Preallocate additional log files, if wanted.
+        */
+       PreallocXlogFiles(EndOfLog);
+
+       InRecovery = false;
+
+       LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+       ControlFile->state = DB_IN_PRODUCTION;
+       ControlFile->time = (pg_time_t) time(NULL);
+       UpdateControlFile();
+       LWLockRelease(ControlFileLock);
+
        /* start the archive_timeout timer running */
        XLogCtl->Write.lastSegSwitchTime = (pg_time_t) time(NULL);
 
@@ -5605,38 +5620,9 @@ StartupXLOG(void)
        }
 
        /*
-        * If we had to replay any WAL records, request a checkpoint. This isn't
-        * strictly necessary: if we crash now, the recovery will simply restart
-        * from the same point as this time (or from the last restartpoint). The
-        * control file is left in DB_IN_*_RECOVERY state; the first checkpoint
-        * will change that to DB_IN_PRODUCTION.
+        * All done. Allow others to write WAL.
         */
-       if (performedRecovery)
-       {
-               /*
-                * Okay, we can come up now. Allow others to write WAL.
-                */
-               XLogCtl->SharedRecoveryProcessingMode = false;
-
-               RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_IMMEDIATE |
-                                                 CHECKPOINT_STARTUP);
-       }
-       else
-       {
-               /*
-                * No recovery, so let's just get on with it. 
-                */
-               LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
-               ControlFile->state = DB_IN_PRODUCTION;
-               ControlFile->time = (pg_time_t) time(NULL);
-               UpdateControlFile();
-               LWLockRelease(ControlFileLock);
-
-               /*
-                * Okay, we're officially UP.
-                */
-               XLogCtl->SharedRecoveryProcessingMode = false;
-       }
+       XLogCtl->SharedRecoveryProcessingMode = false;
 }
 
 /*
@@ -5946,7 +5932,6 @@ LogCheckpointStart(int flags, bool restartpoint)
 
        elog(LOG, msg,
                 (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
-                (flags & CHECKPOINT_STARTUP) ? " startup" : "",
                 (flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "",
                 (flags & CHECKPOINT_FORCE) ? " force" : "",
                 (flags & CHECKPOINT_WAIT) ? " wait" : "",
@@ -6030,7 +6015,6 @@ CreateCheckPoint(int flags)
        uint32          _logSeg;
        TransactionId *inCommitXids;
        int                     nInCommit;
-       bool            leavingArchiveRecovery;
 
        /* shouldn't happen */
        if (IsRecoveryProcessingMode())
@@ -6044,13 +6028,6 @@ CreateCheckPoint(int flags)
         */
        LWLockAcquire(CheckpointLock, LW_EXCLUSIVE);
 
-       /*
-        * Find out if this is the first checkpoint after archive recovery.
-        */
-       LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
-       leavingArchiveRecovery = (ControlFile->state == DB_IN_ARCHIVE_RECOVERY);
-       LWLockRelease(ControlFileLock);
-
        /*
         * Prepare to accumulate statistics.
         *
@@ -6284,10 +6261,6 @@ CreateCheckPoint(int flags)
         * if this is the first checkpoint after recovery.
         */
        LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
-       if (shutdown)
-               ControlFile->state = DB_SHUTDOWNED;
-       else
-               ControlFile->state = DB_IN_PRODUCTION;
        ControlFile->prevCheckPoint = ControlFile->checkPoint;
        ControlFile->checkPoint = ProcLastRecPtr;
        ControlFile->checkPointCopy = checkPoint;
@@ -6295,21 +6268,6 @@ CreateCheckPoint(int flags)
        UpdateControlFile();
        LWLockRelease(ControlFileLock);
 
-       if (leavingArchiveRecovery)
-       {
-               /*
-                * Rename the config file out of the way, so that we don't accidentally
-                * re-enter archive recovery mode in a subsequent crash. Prior to
-                * 8.4 this step was performed at end of exitArchiveRecovery().
-                */
-               unlink(RECOVERY_COMMAND_DONE);
-               if (rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE) != 0)
-                       ereport(ERROR,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not rename file \"%s\" to \"%s\": %m",
-                                                       RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE)));
-       }
-
        /* Update shared-memory copy of checkpoint XID/epoch */
        {
                /* use volatile pointer to prevent code rearrangement */
@@ -6588,39 +6546,6 @@ RequestXLogSwitch(void)
        return RecPtr;
 }
 
-/*
- * exitRecovery()
- *
- * Exit recovery state and write a XLOG_RECOVERY_END record. This is the
- * only record type that can record a change of timelineID. We assume
- * caller has already set ThisTimeLineID, if appropriate.
- */
-static void
-exitRecovery(void)
-{
-       XLogRecData rdata;
-
-       rdata.buffer = InvalidBuffer;
-       rdata.data = (char *) (&ThisTimeLineID);
-       rdata.len = sizeof(TimeLineID);
-       rdata.next = NULL;
-
-       /*
-        * This is the only type of WAL message that can be inserted during
-        * recovery. This ensures that we don't allow others to get access
-        * until after we have changed state.
-        */
-       (void) XLogInsert(RM_XLOG_ID, XLOG_RECOVERY_END, &rdata);
-
-       /*
-        * We don't XLogFlush() here otherwise we'll end up zeroing the WAL
-        * file ourselves. So just let bgwriter's forthcoming checkpoint do
-        * that for us.
-        */
-
-       InRecovery = false;
-}
-
 /*
  * XLOG resource manager's routines
  *
@@ -6669,33 +6594,6 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
 
                RecoveryRestartPoint(&checkPoint);
        }
-       else if (info == XLOG_RECOVERY_END)
-       {
-               TimeLineID      tli;
-
-               memcpy(&tli, XLogRecGetData(record), sizeof(TimeLineID));
-
-               /*
-                * TLI may change when recovery ends, but it shouldn't decrease.
-                *
-                * This is the only WAL record that can tell us to change timelineID
-                * while we process WAL records. 
-                *
-                * We can *choose* to stop recovery at any point, generating a
-                * new timelineID which is recorded using this record type.
-                */
-               if (tli != ThisTimeLineID)
-               {
-                       if (tli < ThisTimeLineID ||
-                               !list_member_int(expectedTLIs,
-                                                                (int) tli))
-                               ereport(PANIC,
-                                               (errmsg("unexpected timeline ID %u (after %u) at recovery end record",
-                                                               tli, ThisTimeLineID)));
-                       /* Following WAL records should be run with new TLI */
-                       ThisTimeLineID = tli;
-               }
-       }
        else if (info == XLOG_CHECKPOINT_ONLINE)
        {
                CheckPoint      checkPoint;
index 4c8c54c587b8822bd2f7f02c0108f9d6003c3f7b..d38e0c645230d7bdcc9a694432cd5ca112726b25 100644 (file)
@@ -889,7 +889,6 @@ BgWriterShmemInit(void)
  *
  * flags is a bitwise OR of the following:
  *     CHECKPOINT_IS_SHUTDOWN: checkpoint is for database shutdown.
- *     CHECKPOINT_IS_STARTUP: checkpoint is for database startup.
  *     CHECKPOINT_IMMEDIATE: finish the checkpoint ASAP,
  *             ignoring checkpoint_completion_target parameter.
  *     CHECKPOINT_FORCE: force a checkpoint even if no XLOG activity has occured
@@ -956,18 +955,6 @@ RequestCheckpoint(int flags)
        {
                if (BgWriterShmem->bgwriter_pid == 0)
                {
-                       /*
-                        * The only difference between a startup checkpoint and a normal
-                        * online checkpoint is that it's quite normal for the bgwriter
-                        * to not be up yet when the startup checkpoint is requested.
-                        * (it might be, though). That's ok, background writer will
-                        * perform the checkpoint as soon as it starts up.
-                        */
-                       if (flags & CHECKPOINT_STARTUP)
-                       {
-                               Assert(!(flags & CHECKPOINT_WAIT));
-                               break;
-                       }
                        if (ntries >= 20)               /* max wait 2.0 sec */
                        {
                                elog((flags & CHECKPOINT_WAIT) ? ERROR : LOG,
index b97a6afbf0bb9a5804f4100eaee387272f8e78f8..2a9ed7078ef4ad31cb015d539b6cacd6df987f68 100644 (file)
@@ -168,14 +168,13 @@ extern bool XLOG_DEBUG;
 
 /* These directly affect the behavior of CreateCheckPoint and subsidiaries */
 #define CHECKPOINT_IS_SHUTDOWN 0x0001  /* Checkpoint is for shutdown */
-#define CHECKPOINT_IS_STARTUP  0x0002  /* Startup checkpoint */
-#define CHECKPOINT_IMMEDIATE   0x0003  /* Do it without delays */
-#define CHECKPOINT_FORCE               0x0008  /* Force even if no activity */
+#define CHECKPOINT_IMMEDIATE   0x0002  /* Do it without delays */
+#define CHECKPOINT_FORCE               0x0004  /* Force even if no activity */
 /* These are important to RequestCheckpoint */
-#define CHECKPOINT_WAIT                        0x0010  /* Wait for completion */
+#define CHECKPOINT_WAIT                        0x0008  /* Wait for completion */
 /* These indicate the cause of a checkpoint request */
-#define CHECKPOINT_CAUSE_XLOG  0x0020  /* XLOG consumption */
-#define CHECKPOINT_CAUSE_TIME  0x0040  /* Elapsed time */
+#define CHECKPOINT_CAUSE_XLOG  0x0010  /* XLOG consumption */
+#define CHECKPOINT_CAUSE_TIME  0x0020  /* Elapsed time */
 
 /* Checkpoint statistics */
 typedef struct CheckpointStatsData
index 275fc1dddf48db920190cbb47146d5be0ac7a00b..400f32c74919293aa86dea3d4eb212a314ce4aae 100644 (file)
@@ -46,7 +46,7 @@ typedef struct CheckPoint
 #define XLOG_NOOP                                              0x20
 #define XLOG_NEXTOID                                   0x30
 #define XLOG_SWITCH                                            0x40
-#define XLOG_RECOVERY_END                      0x50
+
 
 /* System status indicator */
 typedef enum DBState