From 1e616f639156b2431725f7823c999486ca46c1ea Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 2 Dec 2011 10:49:54 +0200 Subject: [PATCH] During recovery, if we reach consistent state and still have entries in the invalid-page hash table, PANIC immediately. Immediate PANIC is much better than waiting for end-of-recovery, which is what we did before, because the end-of-recovery might not come until months later if this is a standby server. Also refrain from creating a restartpoint if there are invalid-page entries in the hash table. Restarting recovery from such a restartpoint would not see the invalid references, and wouldn't be able to cross-check them when consistency is reached. That wouldn't matter when things are going smoothly, but the more sanity checks you have the better. Fujii Masao --- src/backend/access/transam/xlog.c | 32 +++++++++---- src/backend/access/transam/xlogutils.c | 63 ++++++++++++++++++-------- src/include/access/xlog.h | 2 + src/include/access/xlogutils.h | 1 + 4 files changed, 70 insertions(+), 28 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 20c04240b7..9bec660992 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -562,7 +562,7 @@ static TimeLineID lastPageTLI = 0; static XLogRecPtr minRecoveryPoint; /* local copy of * ControlFile->minRecoveryPoint */ static bool updateMinRecoveryPoint = true; -static bool reachedMinRecoveryPoint = false; +bool reachedMinRecoveryPoint = false; static bool InRedo = false; @@ -6758,12 +6758,6 @@ StartupXLOG(void) /* Disallow XLogInsert again */ LocalXLogInsertAllowed = -1; - /* - * Check to see if the XLOG sequence contained any unresolved - * references to uninitialized pages. - */ - XLogCheckInvalidPages(); - /* * Perform a checkpoint to update all our recovery activity to disk. * @@ -6906,6 +6900,12 @@ CheckRecoveryConsistency(void) XLByteLE(minRecoveryPoint, EndRecPtr) && XLogRecPtrIsInvalid(ControlFile->backupStartPoint)) { + /* + * Check to see if the XLOG sequence contained any unresolved + * references to uninitialized pages. + */ + XLogCheckInvalidPages(); + reachedMinRecoveryPoint = true; ereport(LOG, (errmsg("consistent recovery state reached at %X/%X", @@ -7907,7 +7907,7 @@ RecoveryRestartPoint(const CheckPoint *checkPoint) volatile XLogCtlData *xlogctl = XLogCtl; /* - * Is it safe to checkpoint? We must ask each of the resource managers + * Is it safe to restartpoint? We must ask each of the resource managers * whether they have any partial state information that might prevent a * correct restart from this point. If so, we skip this opportunity, but * return at the next checkpoint record for another try. @@ -7926,6 +7926,22 @@ RecoveryRestartPoint(const CheckPoint *checkPoint) } } + /* + * Also refrain from creating a restartpoint if we have seen any references + * to non-existent pages. Restarting recovery from the restartpoint would + * not see the references, so we would lose the cross-check that the pages + * belonged to a relation that was dropped later. + */ + if (XLogHaveInvalidPages()) + { + elog(trace_recovery(DEBUG2), + "could not record restart point at %X/%X because there " + "are unresolved references to invalid pages", + checkPoint->redo.xlogid, + checkPoint->redo.xrecoff); + return; + } + /* * Copy the checkpoint record to shared memory, so that checkpointer * can work out the next time it wants to perform a restartpoint. diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index 803d390772..350d434562 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -52,6 +52,22 @@ typedef struct xl_invalid_page static HTAB *invalid_page_tab = NULL; +/* Report a reference to an invalid page */ +static void +report_invalid_page(int elevel, RelFileNode node, ForkNumber forkno, + BlockNumber blkno, bool present) +{ + char *path = relpathperm(node, forkno); + + if (present) + elog(elevel, "page %u of relation %s is uninitialized", + blkno, path); + else + elog(elevel, "page %u of relation %s does not exist", + blkno, path); + pfree(path); +} + /* Log a reference to an invalid page */ static void log_invalid_page(RelFileNode node, ForkNumber forkno, BlockNumber blkno, @@ -61,23 +77,27 @@ log_invalid_page(RelFileNode node, ForkNumber forkno, BlockNumber blkno, xl_invalid_page *hentry; bool found; + /* + * Once recovery has reached a consistent state, the invalid-page table + * should be empty and remain so. If a reference to an invalid page is + * found after consistency is reached, PANIC immediately. This might + * seem aggressive, but it's better than letting the invalid reference + * linger in the hash table until the end of recovery and PANIC there, + * which might come only much later if this is a standby server. + */ + if (reachedMinRecoveryPoint) + { + report_invalid_page(WARNING, node, forkno, blkno, present); + elog(PANIC, "WAL contains references to invalid pages"); + } + /* * Log references to invalid pages at DEBUG1 level. This allows some * tracing of the cause (note the elog context mechanism will tell us * something about the XLOG record that generated the reference). */ if (log_min_messages <= DEBUG1 || client_min_messages <= DEBUG1) - { - char *path = relpathperm(node, forkno); - - if (present) - elog(DEBUG1, "page %u of relation %s is uninitialized", - blkno, path); - else - elog(DEBUG1, "page %u of relation %s does not exist", - blkno, path); - pfree(path); - } + report_invalid_page(DEBUG1, node, forkno, blkno, present); if (invalid_page_tab == NULL) { @@ -181,6 +201,16 @@ forget_invalid_pages_db(Oid dbid) } } +/* Are there any unresolved references to invalid pages? */ +bool +XLogHaveInvalidPages(void) +{ + if (invalid_page_tab != NULL && + hash_get_num_entries(invalid_page_tab) > 0) + return true; + return false; +} + /* Complain about any remaining invalid-page entries */ void XLogCheckInvalidPages(void) @@ -200,15 +230,8 @@ XLogCheckInvalidPages(void) */ while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL) { - char *path = relpathperm(hentry->key.node, hentry->key.forkno); - - if (hentry->present) - elog(WARNING, "page %u of relation %s was uninitialized", - hentry->key.blkno, path); - else - elog(WARNING, "page %u of relation %s did not exist", - hentry->key.blkno, path); - pfree(path); + report_invalid_page(WARNING, hentry->key.node, hentry->key.forkno, + hentry->key.blkno, hentry->present); foundone = true; } diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 6344a850dc..1fb56cdfd5 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -190,6 +190,8 @@ typedef enum extern XLogRecPtr XactLastRecEnd; +extern bool reachedMinRecoveryPoint; + /* these variables are GUC parameters related to XLOG */ extern int CheckPointSegments; extern int wal_keep_segments; diff --git a/src/include/access/xlogutils.h b/src/include/access/xlogutils.h index 5a26d26889..c838c9ebda 100644 --- a/src/include/access/xlogutils.h +++ b/src/include/access/xlogutils.h @@ -14,6 +14,7 @@ #include "storage/bufmgr.h" +extern bool XLogHaveInvalidPages(void); extern void XLogCheckInvalidPages(void); extern void XLogDropRelation(RelFileNode rnode, ForkNumber forknum); -- 2.39.5