Write exact xlog position of timeline switch in the timeline history file.
authorHeikki Linnakangas <[email protected]>
Tue, 4 Dec 2012 13:28:58 +0000 (15:28 +0200)
committerHeikki Linnakangas <[email protected]>
Tue, 4 Dec 2012 15:29:07 +0000 (17:29 +0200)
This allows us to do some more rigorous sanity checking for various
incorrect point-in-time recovery scenarios, and provides more information
for debugging purposes. It will also come handy in the upcoming patch to
allow timeline switches to be replicated by streaming replication.

src/backend/access/transam/timeline.c
src/backend/access/transam/xlog.c
src/include/access/timeline.h

index 225ce465f7fdf116e0dd8b77ed63d2e1426aa507..324b6c18601c2cbbdb10d2fcb580a13f90c20438 100644 (file)
  *
  * Each line in the file represents a timeline switch:
  *
- * <parentTLI> <xlogfname> <reason>
+ * <parentTLI> <switchpoint> <reason>
  *
  *     parentTLI       ID of the parent timeline
- *     xlogfname       filename of the WAL segment where the switch happened
+ *     switchpoint     XLogRecPtr of the WAL position where the switch happened
  *     reason          human-readable explanation of why the timeline was changed
  *
  * The fields are separated by tabs. Lines beginning with # are comments, and
@@ -56,10 +56,18 @@ readTimeLineHistory(TimeLineID targetTLI)
        char            histfname[MAXFNAMELEN];
        char            fline[MAXPGPATH];
        FILE       *fd;
+       TimeLineHistoryEntry *entry;
+       TimeLineID      lasttli = 0;
+       XLogRecPtr      prevend;
 
        /* Timeline 1 does not have a history file, so no need to check */
        if (targetTLI == 1)
-               return list_make1_int((int) targetTLI);
+       {
+               entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
+               entry->tli = targetTLI;
+               entry->begin = entry->end = InvalidXLogRecPtr;
+               return list_make1(entry);
+       }
 
        if (InArchiveRecovery)
        {
@@ -77,7 +85,10 @@ readTimeLineHistory(TimeLineID targetTLI)
                                        (errcode_for_file_access(),
                                         errmsg("could not open file \"%s\": %m", path)));
                /* Not there, so assume no parents */
-               return list_make1_int((int) targetTLI);
+               entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
+               entry->tli = targetTLI;
+               entry->begin = entry->end = InvalidXLogRecPtr;
+               return list_make1(entry);
        }
 
        result = NIL;
@@ -85,12 +96,15 @@ readTimeLineHistory(TimeLineID targetTLI)
        /*
         * Parse the file...
         */
+       prevend = InvalidXLogRecPtr;
        while (fgets(fline, sizeof(fline), fd) != NULL)
        {
                /* skip leading whitespace and check for # comment */
                char       *ptr;
-               char       *endptr;
                TimeLineID      tli;
+               uint32          switchpoint_hi;
+               uint32          switchpoint_lo;
+               int                     nfields;
 
                for (ptr = fline; *ptr; ptr++)
                {
@@ -100,38 +114,56 @@ readTimeLineHistory(TimeLineID targetTLI)
                if (*ptr == '\0' || *ptr == '#')
                        continue;
 
-               /* expect a numeric timeline ID as first field of line */
-               tli = (TimeLineID) strtoul(ptr, &endptr, 0);
-               if (endptr == ptr)
+               nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo);
+
+               if (nfields < 1)
+               {
+                       /* expect a numeric timeline ID as first field of line */
                        ereport(FATAL,
                                        (errmsg("syntax error in history file: %s", fline),
                                         errhint("Expected a numeric timeline ID.")));
+               }
+               if (nfields != 3)
+                       ereport(FATAL,
+                                       (errmsg("syntax error in history file: %s", fline),
+                                        errhint("Expected an XLOG switchpoint location.")));
 
-               if (result &&
-                       tli <= (TimeLineID) linitial_int(result))
+               if (result && tli <= lasttli)
                        ereport(FATAL,
                                        (errmsg("invalid data in history file: %s", fline),
                                   errhint("Timeline IDs must be in increasing sequence.")));
 
+               lasttli = tli;
+
+               entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
+               entry->tli = tli;
+               entry->begin = prevend;
+               entry->end = ((uint64) (switchpoint_hi)) << 32 | (uint64) switchpoint_lo;
+               prevend = entry->end;
+
                /* Build list with newest item first */
-               result = lcons_int((int) tli, result);
+               result = lcons(entry, result);
 
                /* we ignore the remainder of each line */
        }
 
        FreeFile(fd);
 
-       if (result &&
-               targetTLI <= (TimeLineID) linitial_int(result))
+       if (result && targetTLI <= lasttli)
                ereport(FATAL,
                                (errmsg("invalid data in history file \"%s\"", path),
                        errhint("Timeline IDs must be less than child timeline's ID.")));
 
-       result = lcons_int((int) targetTLI, result);
+       /*
+        * Create one more entry for the "tip" of the timeline, which has no
+        * entry in the history file.
+        */
+       entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
+       entry->tli = targetTLI;
+       entry->begin = prevend;
+       entry->end = InvalidXLogRecPtr;
 
-       ereport(DEBUG3,
-                       (errmsg_internal("history of timeline %u is %s",
-                                                        targetTLI, nodeToString(result))));
+       result = lcons(entry, result);
 
        return result;
 }
@@ -214,7 +246,7 @@ findNewestTimeLine(TimeLineID startTLI)
  *
  *     newTLI: ID of the new timeline
  *     parentTLI: ID of its immediate parent
- *     endTLI et al: ID of the last used WAL file, for annotation purposes
+ *     switchpoint: XLOG position where the system switched to the new timeline
  *     reason: human-readable explanation of why the timeline was switched
  *
  * Currently this is only used at the end recovery, and so there are no locking
@@ -223,12 +255,11 @@ findNewestTimeLine(TimeLineID startTLI)
  */
 void
 writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
-                                        TimeLineID endTLI, XLogSegNo endLogSegNo, char *reason)
+                                        XLogRecPtr switchpoint, char *reason)
 {
        char            path[MAXPGPATH];
        char            tmppath[MAXPGPATH];
        char            histfname[MAXFNAMELEN];
-       char            xlogfname[MAXFNAMELEN];
        char            buffer[BLCKSZ];
        int                     srcfd;
        int                     fd;
@@ -313,13 +344,11 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
         * If we did have a parent file, insert an extra newline just in case the
         * parent file failed to end with one.
         */
-       XLogFileName(xlogfname, endTLI, endLogSegNo);
-
        snprintf(buffer, sizeof(buffer),
-                        "%s%u\t%s\t%s\n",
+                        "%s%u\t%X/%X\t%s\n",
                         (srcfd < 0) ? "" : "\n",
                         parentTLI,
-                        xlogfname,
+                        (uint32) (switchpoint >> 32), (uint32) (switchpoint),
                         reason);
 
        nbytes = strlen(buffer);
@@ -380,3 +409,70 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
        TLHistoryFileName(histfname, newTLI);
        XLogArchiveNotify(histfname);
 }
+
+/*
+ * Returns true if 'expectedTLEs' contains a timeline with id 'tli'
+ */
+bool
+tliInHistory(TimeLineID tli, List *expectedTLEs)
+{
+       ListCell *cell;
+
+       foreach(cell, expectedTLEs)
+       {
+               if (((TimeLineHistoryEntry *) lfirst(cell))->tli == tli)
+                       return true;
+       }
+
+       return false;
+}
+
+/*
+ * Returns the ID of the timeline in use at a particular point in time, in
+ * the given timeline history.
+ */
+TimeLineID
+tliOfPointInHistory(XLogRecPtr ptr, List *history)
+{
+       ListCell *cell;
+
+       foreach(cell, history)
+       {
+               TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
+               if ((XLogRecPtrIsInvalid(tle->begin) || XLByteLE(tle->begin, ptr)) &&
+                       (XLogRecPtrIsInvalid(tle->end) || XLByteLT(ptr, tle->end)))
+               {
+                       /* found it */
+                       return tle->tli;
+               }
+       }
+
+       /* shouldn't happen. */
+       elog(ERROR, "timeline history was not contiguous");
+       return 0;       /* keep compiler quiet */
+}
+
+/*
+ * Returns the point in history where we branched off the given timeline.
+ * Returns InvalidXLogRecPtr if the timeline is current (= we have not
+ * branched off from it), and throws an error if the timeline is not part of
+ * this server's history.
+ */
+XLogRecPtr
+tliSwitchPoint(TimeLineID tli, List *history)
+{
+       ListCell   *cell;
+
+       foreach (cell, history)
+       {
+               TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
+
+               if (tle->tli == tli)
+                       return tle->end;
+       }
+
+       ereport(ERROR,
+                       (errmsg("requested timeline %u is not in this server's history",
+                                       tli)));
+       return InvalidXLogRecPtr; /* keep compiler quiet */
+}
index b3356fd43491bed3b4b732cc37d265abeb10eac7..d60c2a3bfc5bcf0e464a86511b48cd3f1a2ca9cf 100644 (file)
@@ -226,7 +226,7 @@ static bool recoveryStopAfter;
  *
  * recoveryTargetIsLatest: was the requested target timeline 'latest'?
  *
- * expectedTLIs: an integer list of recoveryTargetTLI and the TLIs of
+ * expectedTLEs: a list of TimeLineHistoryEntries for recoveryTargetTLI and the timelines of
  * its known parents, newest first (so recoveryTargetTLI is always the
  * first list member). Only these TLIs are expected to be seen in the WAL
  * segments we read, and indeed only these TLIs will be considered as
@@ -240,7 +240,7 @@ static bool recoveryStopAfter;
  */
 static TimeLineID recoveryTargetTLI;
 static bool recoveryTargetIsLatest = false;
-static List *expectedTLIs;
+static List *expectedTLEs;
 static TimeLineID curFileTLI;
 
 /*
@@ -2515,7 +2515,7 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
 
        /*
         * Prefer link() to rename() here just to be really sure that we don't
-        * overwrite an existing logfile.  However, there shouldn't be one, so
+        * overwrite an existing file.  However, there shouldn't be one, so
         * rename() is an acceptable substitute except for the truly paranoid.
         */
 #if HAVE_WORKING_LINK
@@ -2716,7 +2716,7 @@ XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
 /*
  * Open a logfile segment for reading (during recovery).
  *
- * This version searches for the segment with any TLI listed in expectedTLIs.
+ * This version searches for the segment with any TLI listed in expectedTLEs.
  */
 static int
 XLogFileReadAnyTLI(XLogSegNo segno, int emode, int source)
@@ -2727,7 +2727,7 @@ XLogFileReadAnyTLI(XLogSegNo segno, int emode, int source)
 
        /*
         * Loop looking for a suitable timeline ID: we might need to read any of
-        * the timelines listed in expectedTLIs.
+        * the timelines listed in expectedTLEs.
         *
         * We expect curFileTLI on entry to be the TLI of the preceding file in
         * sequence, or 0 if there was no predecessor.  We do not allow curFileTLI
@@ -2735,9 +2735,9 @@ XLogFileReadAnyTLI(XLogSegNo segno, int emode, int source)
         * parent timeline extends to higher segment numbers than the child we
         * want to read.
         */
-       foreach(cell, expectedTLIs)
+       foreach(cell, expectedTLEs)
        {
-               TimeLineID      tli = (TimeLineID) lfirst_int(cell);
+               TimeLineID      tli = ((TimeLineHistoryEntry *) lfirst(cell))->tli;
 
                if (tli < curFileTLI)
                        break;                          /* don't bother looking at too-old TLIs */
@@ -3344,7 +3344,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
                /*
                 * Since we are going to a random position in WAL, forget any prior
                 * state about what timeline we were in, and allow it to be any
-                * timeline in expectedTLIs.  We also set a flag to allow curFileTLI
+                * timeline in expectedTLEs.  We also set a flag to allow curFileTLI
                 * to go backwards (but we can't reset that variable right here, since
                 * we might not change files at all).
                 */
@@ -3675,7 +3675,7 @@ ValidXLogPageHeader(XLogPageHeader hdr, int emode, bool segmentonly)
        /*
         * Check page TLI is one of the expected values.
         */
-       if (!list_member_int(expectedTLIs, (int) hdr->xlp_tli))
+       if (!tliInHistory(hdr->xlp_tli, expectedTLEs))
        {
                ereport(emode_for_corrupt_record(emode, recaddr),
                                (errmsg("unexpected timeline ID %u in log segment %s, offset %u",
@@ -3812,57 +3812,86 @@ ValidXLogRecordHeader(XLogRecPtr *RecPtr, XLogRecord *record, int emode,
 static bool
 rescanLatestTimeLine(void)
 {
+       List       *newExpectedTLEs;
+       bool            found;
+       ListCell   *cell;
        TimeLineID      newtarget;
+       TimeLineHistoryEntry *currentTle = NULL;
+       /* use volatile pointer to prevent code rearrangement */
+       volatile XLogCtlData *xlogctl = XLogCtl;
 
        newtarget = findNewestTimeLine(recoveryTargetTLI);
-       if (newtarget != recoveryTargetTLI)
+       if (newtarget == recoveryTargetTLI)
        {
-               /*
-                * Determine the list of expected TLIs for the new TLI
-                */
-               List       *newExpectedTLIs;
-
-               newExpectedTLIs = readTimeLineHistory(newtarget);
+               /* No new timelines found */
+               return false;
+       }
 
-               /*
-                * If the current timeline is not part of the history of the new
-                * timeline, we cannot proceed to it.
-                *
-                * XXX This isn't foolproof: The new timeline might have forked from
-                * the current one, but before the current recovery location. In that
-                * case we will still switch to the new timeline and proceed replaying
-                * from it even though the history doesn't match what we already
-                * replayed. That's not good. We will likely notice at the next online
-                * checkpoint, as the TLI won't match what we expected, but it's not
-                * guaranteed. The admin needs to make sure that doesn't happen.
-                */
-               if (!list_member_int(newExpectedTLIs,
-                                                        (int) recoveryTargetTLI))
-                       ereport(LOG,
-                                       (errmsg("new timeline %u is not a child of database system timeline %u",
-                                                       newtarget,
-                                                       ThisTimeLineID)));
-               else
-               {
-                       /* use volatile pointer to prevent code rearrangement */
-                       volatile XLogCtlData *xlogctl = XLogCtl;
+       /*
+        * Determine the list of expected TLIs for the new TLI
+        */
 
-                       /* Switch target */
-                       recoveryTargetTLI = newtarget;
-                       list_free(expectedTLIs);
-                       expectedTLIs = newExpectedTLIs;
+       newExpectedTLEs = readTimeLineHistory(newtarget);
 
-                       SpinLockAcquire(&xlogctl->info_lck);
-                       xlogctl->RecoveryTargetTLI = recoveryTargetTLI;
-                       SpinLockRelease(&xlogctl->info_lck);
+       /*
+        * If the current timeline is not part of the history of the new
+        * timeline, we cannot proceed to it.
+        */
+       found = false;
+       foreach (cell, newExpectedTLEs)
+       {
+               currentTle = (TimeLineHistoryEntry *) lfirst(cell);
 
-                       ereport(LOG,
-                                       (errmsg("new target timeline is %u",
-                                                       recoveryTargetTLI)));
-                       return true;
+               if (currentTle->tli == recoveryTargetTLI)
+               {
+                       found = true;
+                       break;
                }
        }
-       return false;
+       if (!found)
+       {
+               ereport(LOG,
+                               (errmsg("new timeline %u is not a child of database system timeline %u",
+                                               newtarget,
+                                               ThisTimeLineID)));
+               return false;
+       }
+
+       /*
+        * The current timeline was found in the history file, but check that the
+        * next timeline was forked off from it *after* the current recovery
+        * location.
+        */
+       if (XLByteLT(currentTle->end, EndRecPtr))
+       {
+               ereport(LOG,
+                               (errmsg("new timeline %u forked off current database system timeline %u before current recovery point %X/%X",
+                                               newtarget,
+                                               ThisTimeLineID,
+                                               (uint32) (EndRecPtr >> 32), (uint32) EndRecPtr)));
+               return false;
+       }
+
+       /* The new timeline history seems valid. Switch target */
+       recoveryTargetTLI = newtarget;
+       list_free_deep(expectedTLEs);
+       expectedTLEs = newExpectedTLEs;
+
+       SpinLockAcquire(&xlogctl->info_lck);
+       xlogctl->RecoveryTargetTLI = recoveryTargetTLI;
+       SpinLockRelease(&xlogctl->info_lck);
+
+       ereport(LOG,
+                       (errmsg("new target timeline is %u",
+                                       recoveryTargetTLI)));
+
+       /*
+        * Wake up any walsenders to notice that we have a new target timeline.
+        */
+       if (AllowCascadeReplication())
+               WalSndWakeup();
+
+       return true;
 }
 
 /*
@@ -5300,26 +5329,41 @@ StartupXLOG(void)
        readRecoveryCommandFile();
 
        /* Now we can determine the list of expected TLIs */
-       expectedTLIs = readTimeLineHistory(recoveryTargetTLI);
+       expectedTLEs = readTimeLineHistory(recoveryTargetTLI);
 
        /*
-        * If pg_control's timeline is not in expectedTLIs, then we cannot
-        * proceed: the backup is not part of the history of the requested
-        * timeline.
+        * If the location of the checkpoint record is not on the expected
+        * timeline in the history of the requested timeline, we cannot proceed:
+        * the backup is not part of the history of the requested timeline.
         */
-       if (!list_member_int(expectedTLIs,
-                                                (int) ControlFile->checkPointCopy.ThisTimeLineID))
+       if (tliOfPointInHistory(ControlFile->checkPoint, expectedTLEs) !=
+                       ControlFile->checkPointCopy.ThisTimeLineID)
+       {
+               XLogRecPtr switchpoint;
+
+               /*
+                * tliSwitchPoint will throw an error if the checkpoint's timeline
+                * is not in expectedTLEs at all.
+                */
+               switchpoint = tliSwitchPoint(ControlFile->checkPointCopy.ThisTimeLineID, expectedTLEs);
                ereport(FATAL,
-                               (errmsg("requested timeline %u is not a child of database system timeline %u",
-                                               recoveryTargetTLI,
-                                               ControlFile->checkPointCopy.ThisTimeLineID)));
+                               (errmsg("requested timeline %u is not a child of this server's history",
+                                               recoveryTargetTLI),
+                                errdetail("Latest checkpoint is at %X/%X on timeline %u, but in the history of the requested timeline, the server forked off from that timeline at %X/%X",
+                                                  (uint32) (ControlFile->checkPoint >> 32),
+                                                  (uint32) ControlFile->checkPoint,
+                                                  ControlFile->checkPointCopy.ThisTimeLineID,
+                                                  (uint32) (switchpoint >> 32),
+                                                  (uint32) switchpoint)));
+       }
 
        /*
         * The min recovery point should be part of the requested timeline's
         * history, too.
         */
        if (!XLogRecPtrIsInvalid(ControlFile->minRecoveryPoint) &&
-               !list_member_int(expectedTLIs, ControlFile->minRecoveryPointTLI))
+               tliOfPointInHistory(ControlFile->minRecoveryPoint - 1, expectedTLEs) !=
+                       ControlFile->minRecoveryPointTLI)
                ereport(FATAL,
                                (errmsg("requested timeline %u does not contain minimum recovery point %X/%X on timeline %u",
                                                recoveryTargetTLI,
@@ -6026,8 +6070,8 @@ StartupXLOG(void)
                                (errmsg("selected new timeline ID: %u", ThisTimeLineID)));
 
                /*
-                * Write comment to history file to explain why and where timeline
-                * changed. Comment varies according to the recovery target used.
+                * Create a comment for the history file to explain why and where
+                * timeline changed.
                 */
                if (recoveryTarget == RECOVERY_TARGET_XID)
                        snprintf(reason, sizeof(reason),
@@ -6047,7 +6091,7 @@ StartupXLOG(void)
                        snprintf(reason, sizeof(reason), "no recovery target specified");
 
                writeTimeLineHistory(ThisTimeLineID, recoveryTargetTLI,
-                                                        curFileTLI, endLogSegNo, reason);
+                                                        EndRecPtr, reason);
        }
 
        /* Save the selected TimeLineID in shared memory, too */
@@ -7916,8 +7960,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
                         * decrease.
                         */
                        if (checkPoint.ThisTimeLineID < ThisTimeLineID ||
-                               !list_member_int(expectedTLIs,
-                                                                (int) checkPoint.ThisTimeLineID))
+                               !tliInHistory(checkPoint.ThisTimeLineID, expectedTLEs))
                                ereport(PANIC,
                                                (errmsg("unexpected timeline ID %u (after %u) in checkpoint record",
                                                                checkPoint.ThisTimeLineID, ThisTimeLineID)));
index f2a7658bc45006c4dfcdaf0f432f11bb5433d8d8..785195bd36a9caf740a77ed55f3de60869042ac8 100644 (file)
 #include "access/xlogdefs.h"
 #include "nodes/pg_list.h"
 
+/*
+ * A list of these structs describes the timeline history of the server. Each
+ * TimeLineHistoryEntry represents a piece of WAL belonging to the history,
+ * from newest to oldest. All WAL positions between 'begin' and 'end' belong to
+ * the timeline represented by the entry. Together the 'begin' and 'end'
+ * pointers of all the entries form a contiguous line from beginning of time
+ * to infinity.
+ */
+typedef struct
+{
+       TimeLineID      tli;
+       XLogRecPtr      begin;  /* inclusive */
+       XLogRecPtr      end;    /* exclusive, 0 means infinity */
+} TimeLineHistoryEntry;
+
 extern List *readTimeLineHistory(TimeLineID targetTLI);
 extern bool existsTimeLineHistory(TimeLineID probeTLI);
 extern TimeLineID findNewestTimeLine(TimeLineID startTLI);
 extern void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
-                                        TimeLineID endTLI, XLogSegNo endLogSegNo, char *reason);
+                                        XLogRecPtr switchpoint, char *reason);
+extern bool tliInHistory(TimeLineID tli, List *expectedTLIs);
+extern TimeLineID tliOfPointInHistory(XLogRecPtr ptr, List *history);
+extern XLogRecPtr tliSwitchPoint(TimeLineID tli, List *history);
 
 #endif   /* TIMELINE_H */