Also trigger restartpoints based on max_wal_size on standby.

author Heikki Linnakangas <[email protected]>

Sun, 28 Jun 2015 21:09:10 +0000 (00:09 +0300)

committer Heikki Linnakangas <[email protected]>

Sun, 28 Jun 2015 21:09:10 +0000 (00:09 +0300)
author Heikki Linnakangas <[email protected]>
Sun, 28 Jun 2015 21:09:10 +0000 (00:09 +0300)
committer Heikki Linnakangas <[email protected]>
Sun, 28 Jun 2015 21:09:10 +0000 (00:09 +0300)
diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml

index f4083c3fe570ebac9a6d172854c86fb0809603f3..e3941c9391b7fe4f277e673e7a2f31ba7a168714 100644 (file)
--- a/doc/src/sgml/wal.sgml
+++ b/doc/src/sgml/wal.sgml
@@ -590,7 +590,11 @@
     A restartpoint is triggered when a checkpoint record is reached if at
     least <varname>checkpoint_timeout</> seconds have passed since the last
     restartpoint, or if WAL size is about to exceed
-   <varname>max_wal_size</>.
+   <varname>max_wal_size</>. However, because of limitations on when a
+   restartpoint can be performed, <varname>max_wal_size</> is often exceeded
+   during recovery, by up to one checkpoint cycle's worth of WAL.
+   (<varname>max_wal_size</> is never a hard limit anyway, so you should
+   always leave plenty of headroom to avoid running out of disk space.)
    </para>
  
    <para>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c

index 7830b47c8d1cb27ff6c55a73e0e4d4f112a48144..0def47d6ed5bb71e75c5d3b341d96f986082e7f0 100644 (file)
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -10943,7 +10943,7 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
          * Request a restartpoint if we've replayed too much xlog since the
          * last one.
          */
-       if (StandbyModeRequested && bgwriterLaunched)
+       if (bgwriterLaunched)
         {
             if (XLogCheckpointNeeded(readSegNo))
             {
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c

index 0dce6a8ffaa3f23a88bc55b22c0cbc98fff13268..3b3a09ef8860b0497a415cec0ca1d823aaa78841 100644 (file)
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -475,10 +475,12 @@ CheckpointerMain(void)
  
             /*
              * Initialize checkpointer-private variables used during
-            * checkpoint
+            * checkpoint.
              */
             ckpt_active = true;
-           if (!do_restartpoint)
+           if (do_restartpoint)
+               ckpt_start_recptr = GetXLogReplayRecPtr(NULL);
+           else
                 ckpt_start_recptr = GetInsertRecPtr();
             ckpt_start_time = now;
             ckpt_cached_elapsed = 0;
@@ -720,7 +722,7 @@ CheckpointWriteDelay(int flags, double progress)
  
  /*
   * IsCheckpointOnSchedule -- are we on schedule to finish this checkpoint
- *      in time?
+ *      (or restartpoint) in time?
   *
   * Compares the current progress against the time/segments elapsed since last
   * checkpoint, and returns true if the progress we've made this far is greater
@@ -757,17 +759,27 @@ IsCheckpointOnSchedule(double progress)
      * compares against RedoRecptr, so this is not completely accurate.
      * However, it's good enough for our purposes, we're only calculating an
      * estimate anyway.
+    *
+    * During recovery, we compare last replayed WAL record's location with
+    * the location computed before calling CreateRestartPoint. That maintains
+    * the same pacing as we have during checkpoints in normal operation, but
+    * we might exceed max_wal_size by a fair amount. That's because there can
+    * be a large gap between a checkpoint's redo-pointer and the checkpoint
+    * record itself, and we only start the restartpoint after we've seen the
+    * checkpoint record. (The gap is typically up to CheckPointSegments *
+    * checkpoint_completion_target where checkpoint_completion_target is the
+    * value that was in effect when the WAL was generated).
      */
-   if (!RecoveryInProgress())
-   {
+   if (RecoveryInProgress())
+       recptr = GetXLogReplayRecPtr(NULL);
+   else
         recptr = GetInsertRecPtr();
-       elapsed_xlogs = (((double) (recptr - ckpt_start_recptr)) / XLogSegSize) / CheckPointSegments;
+   elapsed_xlogs = (((double) (recptr - ckpt_start_recptr)) / XLogSegSize) / CheckPointSegments;
  
-       if (progress < elapsed_xlogs)
-       {
-           ckpt_cached_elapsed = elapsed_xlogs;
-           return false;
-       }
+   if (progress < elapsed_xlogs)
+   {
+       ckpt_cached_elapsed = elapsed_xlogs;
+       return false;
     }
  
     /*
author	Heikki Linnakangas <[email protected]>
	Sun, 28 Jun 2015 21:09:10 +0000 (00:09 +0300)
committer	Heikki Linnakangas <[email protected]>
	Sun, 28 Jun 2015 21:09:10 +0000 (00:09 +0300)
doc/src/sgml/wal.sgml		patch \| blob \| blame \| history
src/backend/access/transam/xlog.c		patch \| blob \| blame \| history
src/backend/postmaster/checkpointer.c		patch \| blob \| blame \| history