If bgwriter skips a restartpoint because we haven't seen any new checkpoint
authorHeikki Linnakangas <[email protected]>
Mon, 9 Feb 2009 08:13:19 +0000 (10:13 +0200)
committerHeikki Linnakangas <[email protected]>
Mon, 9 Feb 2009 08:14:10 +0000 (10:14 +0200)
records since last restartpoint, try again in 15 s, instead of
checkpoint_timeout. Plus some comment changes

src/backend/access/transam/xlog.c
src/backend/postmaster/bgwriter.c
src/include/access/xlog.h

index 1772f193013025bf9a9508136b124b2201f09ed3..bcc54eb1746adbd7be3359beaccd66e99f744659 100644 (file)
@@ -338,7 +338,7 @@ typedef struct XLogCtlData
         * here. It's used by the background writer when it wants to create
         * a restartpoint.
         *
-        * is info_lck spinlock a bit too light-weight to protect these?
+        * Protected by info_lck. XXX Is a spinlock too light-weight for these?
         */
        XLogRecPtr      lastCheckPointRecPtr;
        CheckPoint      lastCheckPoint;
@@ -5294,7 +5294,11 @@ StartupXLOG(void)
                                                minRecoveryPoint.xlogid, minRecoveryPoint.xrecoff)));
 
                        /*
-                        * Let postmaster know we've started redo now.
+                        * Let postmaster know we've started redo now, so that it can
+                        * launch bgwriter.  We don't bother during crash recovery; we can
+                        * only perform restartpoints during archive recovery anyway. And
+                        * we'd like to keep crash recovery simple, to avoid introducing
+                        * bugs into that codepath that could stop you from recovering.
                         *
                         * After this point, we can no longer assume that there's no other
                         * processes running concurrently.
@@ -5327,13 +5331,11 @@ StartupXLOG(void)
 #endif
 
                                /*
-                                * Process any requests or signals received recently.
+                                * Check if we were requested to exit without finishing
+                                * recovery.
                                 */
                                if (shutdown_requested)
                                {
-                                       /*
-                                        * We were requested to exit without finishing recovery.
-                                        */
                                        UpdateMinRecoveryPoint(ReadRecPtr);
                                        proc_exit(0);
                                }
@@ -6391,8 +6393,12 @@ RecoveryRestartPoint(const CheckPoint *checkPoint)
  * This is similar to CreateCheckPoint, but is used during WAL recovery
  * to establish a point from which recovery can roll forward without
  * replaying the entire recovery log.
+ *
+ * Returns true if a new restartpoint was established. We can only establish
+ * a restartpoint if we have replayed a checkpoint record since last
+ * restartpoint.
  */
-void
+bool
 CreateRestartPoint(int flags)
 {
        XLogRecPtr lastCheckPointRecPtr;
@@ -6423,7 +6429,7 @@ CreateRestartPoint(int flags)
                                (errmsg("skipping restartpoint, already performed at %X/%X",
                                                lastCheckPoint.redo.xlogid, lastCheckPoint.redo.xrecoff)));
                LWLockRelease(CheckpointLock);
-               return;
+               return false;
        }
 
        /* 
@@ -6435,7 +6441,7 @@ CreateRestartPoint(int flags)
                ereport(DEBUG2,
                                (errmsg("skipping restartpoint, recovery has already ended")));
                LWLockRelease(CheckpointLock);
-               return;
+               return false;
        }
 
        if (log_checkpoints)
@@ -6482,6 +6488,7 @@ CreateRestartPoint(int flags)
                                        timestamptz_to_str(recoveryLastXTime))));
 
        LWLockRelease(CheckpointLock);
+       return true;
 }
 
 /*
index 58b982dac1410d2e3d01fbd9a419dbdaf3cb1668..c1f29189ea2fce471f0b7e36954463dcca3c373a 100644 (file)
@@ -439,6 +439,8 @@ BackgroundWriterMain(void)
                 */
                if (do_checkpoint)
                {
+                       bool    ckpt_performed = false;
+
                        /* use volatile pointer to prevent code rearrangement */
                        volatile BgWriterShmemStruct *bgs = BgWriterShmem;
 
@@ -481,9 +483,12 @@ BackgroundWriterMain(void)
                         * Do the checkpoint.
                         */
                        if (!BgWriterRecoveryMode)
+                       {
                                CreateCheckPoint(flags);
+                               ckpt_performed = true;
+                       }
                        else
-                               CreateRestartPoint(flags);
+                               ckpt_performed = CreateRestartPoint(flags);
 
                        /*
                         * After any checkpoint, close all smgr files.  This is so we
@@ -494,18 +499,31 @@ BackgroundWriterMain(void)
                        /*
                         * Indicate checkpoint completion to any waiting backends.
                         */
-                       SpinLockAcquire(&bgs->ckpt_lck);
-                       bgs->ckpt_done = bgs->ckpt_started;
-                       SpinLockRelease(&bgs->ckpt_lck);
+                       if (ckpt_performed)
+                       {
+                               SpinLockAcquire(&bgs->ckpt_lck);
+                               bgs->ckpt_done = bgs->ckpt_started;
+                               SpinLockRelease(&bgs->ckpt_lck);
+
+                               /*
+                                * Note we record the checkpoint start time not end time as
+                                * last_checkpoint_time.  This is so that time-driven
+                                * checkpoints happen at a predictable spacing.
+                                */
+                               last_checkpoint_time = now;
+                       }
+                       else
+                       {
+                               /*
+                                * We were not able to perform the restartpoint (checkpoints
+                                * throw an ERROR in case of error).  Most likely because we
+                                * have not received a new checkpoint WAL record since the
+                                * last restartpoint. Try again in 15 s.
+                                */
+                               last_checkpoint_time = now - CheckPointTimeout - 15;
+                       }
 
                        ckpt_active = false;
-
-                       /*
-                        * Note we record the checkpoint start time not end time as
-                        * last_checkpoint_time.  This is so that time-driven checkpoints
-                        * happen at a predictable spacing.
-                        */
-                       last_checkpoint_time = now;
                }
                else
                        BgBufferSync();
index 2a9ed7078ef4ad31cb015d539b6cacd6df987f68..d0155bd2de2ed3e1810fb56f6df7007538dba5a9 100644 (file)
@@ -218,7 +218,7 @@ extern void StartupXLOG(void);
 extern void ShutdownXLOG(int code, Datum arg);
 extern void InitXLOGAccess(void);
 extern void CreateCheckPoint(int flags);
-extern void CreateRestartPoint(int flags);
+extern bool CreateRestartPoint(int flags);
 extern void XLogPutNextOid(Oid nextOid);
 extern XLogRecPtr GetRedoRecPtr(void);
 extern XLogRecPtr GetInsertRecPtr(void);