Detect the deadlocks between backends and the startup process.
authorFujii Masao <[email protected]>
Wed, 6 Jan 2021 03:29:43 +0000 (12:29 +0900)
committerFujii Masao <[email protected]>
Wed, 6 Jan 2021 03:31:23 +0000 (12:31 +0900)
The deadlocks that the recovery conflict on lock is involved in can
happen between hot-standby backends and the startup process.
If a backend takes an access exclusive lock on the table and which
finally triggers the deadlock, that deadlock can be detected
as expected. On the other hand, previously, if the startup process
took an access exclusive lock and which finally triggered the deadlock,
that deadlock could not be detected and could remain even after
deadlock_timeout passed. This is a bug.

The cause of this bug was that the code for handling the recovery
conflict on lock didn't take care of deadlock case at all. It assumed
that deadlocks involving the startup process and backends were able
to be detected by the deadlock detector invoked within backends.
But this assumption was incorrect. The startup process also should
have invoked the deadlock detector if necessary.

To fix this bug, this commit makes the startup process invoke
the deadlock detector if deadlock_timeout is reached while handling
the recovery conflict on lock. Specifically, in that case, the startup
process requests all the backends holding the conflicting locks to
check themselves for deadlocks.

Back-patch to v9.6. v9.5 has also this bug, but per discussion we decided
not to back-patch the fix to v9.5. Because v9.5 doesn't have some
infrastructure codes (e.g., 37c54863cf) that this bug fix patch depends on.
We can apply those codes for the back-patch, but since the next minor
version release is the final one for v9.5, it's risky to do that. If we
unexpectedly introduce new bug to v9.5 by the back-patch, there is no
chance to fix that. We determined that the back-patch to v9.5 would give
more risk than gain.

Author: Fujii Masao
Reviewed-by: Bertrand Drouvot, Masahiko Sawada, Kyotaro Horiguchi
Discussion: https://postgr.es/m/4041d6b6-cf24-a120-36fa-1294220f8243@oss.nttdata.com

src/backend/storage/ipc/procarray.c
src/backend/storage/ipc/standby.c
src/backend/storage/lmgr/proc.c
src/backend/tcop/postgres.c
src/include/storage/procarray.h

index 18a0f62ba67ca931babc60a3fe9a0db626e58045..fc5bb6dc26336205244c7c18338a5477372a433b 100644 (file)
@@ -2654,6 +2654,13 @@ GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
  */
 pid_t
 CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
+{
+   return SignalVirtualTransaction(vxid, sigmode, true);
+}
+
+pid_t
+SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode,
+                        bool conflictPending)
 {
    ProcArrayStruct *arrayP = procArray;
    int         index;
@@ -2672,7 +2679,7 @@ CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
        if (procvxid.backendId == vxid.backendId &&
            procvxid.localTransactionId == vxid.localTransactionId)
        {
-           proc->recoveryConflictPending = true;
+           proc->recoveryConflictPending = conflictPending;
            pid = proc->pid;
            if (pid != 0)
            {
index 5fb0eb78111a8f5d430aa57a4c8de3a619cd5d7d..1bef7b666d4677418f82e5007d6c3289e4322a9a 100644 (file)
@@ -42,6 +42,10 @@ int          max_standby_streaming_delay = 30 * 1000;
 
 static HTAB *RecoveryLockLists;
 
+/* Flags set by timeout handlers */
+static volatile sig_atomic_t got_standby_deadlock_timeout = false;
+static volatile sig_atomic_t got_standby_lock_timeout = false;
+
 static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
                                                   ProcSignalReason reason, bool report_waiting);
 static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
@@ -391,8 +395,10 @@ ResolveRecoveryConflictWithDatabase(Oid dbid)
  * lock.  As we are already queued to be granted the lock, no new lock
  * requests conflicting with ours will be granted in the meantime.
  *
- * Deadlocks involving the Startup process and an ordinary backend process
- * will be detected by the deadlock detector within the ordinary backend.
+ * We also must check for deadlocks involving the Startup process and
+ * hot-standby backend processes. If deadlock_timeout is reached in
+ * this function, all the backends holding the conflicting locks are
+ * requested to check themselves for deadlocks.
  */
 void
 ResolveRecoveryConflictWithLock(LOCKTAG locktag)
@@ -403,7 +409,7 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag)
 
    ltime = GetStandbyLimitTime();
 
-   if (GetCurrentTimestamp() >= ltime)
+   if (GetCurrentTimestamp() >= ltime && ltime != 0)
    {
        /*
         * We're already behind, so clear a path as quickly as possible.
@@ -424,19 +430,76 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag)
    else
    {
        /*
-        * Wait (or wait again) until ltime
+        * Wait (or wait again) until ltime, and check for deadlocks as well
+        * if we will be waiting longer than deadlock_timeout
         */
-       EnableTimeoutParams timeouts[1];
+       EnableTimeoutParams timeouts[2];
+       int         cnt = 0;
+
+       if (ltime != 0)
+       {
+           got_standby_lock_timeout = false;
+           timeouts[cnt].id = STANDBY_LOCK_TIMEOUT;
+           timeouts[cnt].type = TMPARAM_AT;
+           timeouts[cnt].fin_time = ltime;
+           cnt++;
+       }
 
-       timeouts[0].id = STANDBY_LOCK_TIMEOUT;
-       timeouts[0].type = TMPARAM_AT;
-       timeouts[0].fin_time = ltime;
-       enable_timeouts(timeouts, 1);
+       got_standby_deadlock_timeout = false;
+       timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT;
+       timeouts[cnt].type = TMPARAM_AFTER;
+       timeouts[cnt].delay_ms = DeadlockTimeout;
+       cnt++;
+
+       enable_timeouts(timeouts, cnt);
    }
 
    /* Wait to be signaled by the release of the Relation Lock */
    ProcWaitForSignal(PG_WAIT_LOCK | locktag.locktag_type);
 
+   /*
+    * Exit if ltime is reached. Then all the backends holding conflicting
+    * locks will be canceled in the next ResolveRecoveryConflictWithLock()
+    * call.
+    */
+   if (got_standby_lock_timeout)
+       goto cleanup;
+
+   if (got_standby_deadlock_timeout)
+   {
+       VirtualTransactionId *backends;
+
+       backends = GetLockConflicts(&locktag, AccessExclusiveLock, NULL);
+
+       /* Quick exit if there's no work to be done */
+       if (!VirtualTransactionIdIsValid(*backends))
+           goto cleanup;
+
+       /*
+        * Send signals to all the backends holding the conflicting locks, to
+        * ask them to check themselves for deadlocks.
+        */
+       while (VirtualTransactionIdIsValid(*backends))
+       {
+           SignalVirtualTransaction(*backends,
+                                    PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
+                                    false);
+           backends++;
+       }
+
+       /*
+        * Wait again here to be signaled by the release of the Relation Lock,
+        * to prevent the subsequent RecoveryConflictWithLock() from causing
+        * deadlock_timeout and sending a request for deadlocks check again.
+        * Otherwise the request continues to be sent every deadlock_timeout
+        * until the relation locks are released or ltime is reached.
+        */
+       got_standby_deadlock_timeout = false;
+       ProcWaitForSignal(PG_WAIT_LOCK | locktag.locktag_type);
+   }
+
+cleanup:
+
    /*
     * Clear any timeout requests established above.  We assume here that the
     * Startup process doesn't have any other outstanding timeouts than those
@@ -444,6 +507,8 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag)
     * timeouts individually, but that'd be slower.
     */
    disable_all_timeouts(false);
+   got_standby_lock_timeout = false;
+   got_standby_deadlock_timeout = false;
 }
 
 /*
@@ -482,15 +547,7 @@ ResolveRecoveryConflictWithBufferPin(void)
 
    ltime = GetStandbyLimitTime();
 
-   if (ltime == 0)
-   {
-       /*
-        * We're willing to wait forever for conflicts, so set timeout for
-        * deadlock check only
-        */
-       enable_timeout_after(STANDBY_DEADLOCK_TIMEOUT, DeadlockTimeout);
-   }
-   else if (GetCurrentTimestamp() >= ltime)
+   if (GetCurrentTimestamp() >= ltime && ltime != 0)
    {
        /*
         * We're already behind, so clear a path as quickly as possible.
@@ -504,19 +561,47 @@ ResolveRecoveryConflictWithBufferPin(void)
         * waiting longer than deadlock_timeout
         */
        EnableTimeoutParams timeouts[2];
+       int         cnt = 0;
 
-       timeouts[0].id = STANDBY_TIMEOUT;
-       timeouts[0].type = TMPARAM_AT;
-       timeouts[0].fin_time = ltime;
-       timeouts[1].id = STANDBY_DEADLOCK_TIMEOUT;
-       timeouts[1].type = TMPARAM_AFTER;
-       timeouts[1].delay_ms = DeadlockTimeout;
-       enable_timeouts(timeouts, 2);
+       if (ltime != 0)
+       {
+           timeouts[cnt].id = STANDBY_TIMEOUT;
+           timeouts[cnt].type = TMPARAM_AT;
+           timeouts[cnt].fin_time = ltime;
+           cnt++;
+       }
+
+       got_standby_deadlock_timeout = false;
+       timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT;
+       timeouts[cnt].type = TMPARAM_AFTER;
+       timeouts[cnt].delay_ms = DeadlockTimeout;
+       cnt++;
+
+       enable_timeouts(timeouts, cnt);
    }
 
    /* Wait to be signaled by UnpinBuffer() */
    ProcWaitForSignal(PG_WAIT_BUFFER_PIN);
 
+   if (got_standby_deadlock_timeout)
+   {
+       /*
+        * Send out a request for hot-standby backends to check themselves for
+        * deadlocks.
+        *
+        * XXX The subsequent ResolveRecoveryConflictWithBufferPin() will wait
+        * to be signaled by UnpinBuffer() again and send a request for
+        * deadlocks check if deadlock_timeout happens. This causes the
+        * request to continue to be sent every deadlock_timeout until the
+        * buffer is unpinned or ltime is reached. This would increase the
+        * workload in the startup process and backends. In practice it may
+        * not be so harmful because the period that the buffer is kept pinned
+        * is basically no so long. But we should fix this?
+        */
+       SendRecoveryConflictWithBufferPin(
+                                         PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+   }
+
    /*
     * Clear any timeout requests established above.  We assume here that the
     * Startup process doesn't have any other timeouts than what this function
@@ -524,6 +609,7 @@ ResolveRecoveryConflictWithBufferPin(void)
     * individually, but that'd be slower.
     */
    disable_all_timeouts(false);
+   got_standby_deadlock_timeout = false;
 }
 
 static void
@@ -583,13 +669,12 @@ CheckRecoveryConflictDeadlock(void)
 
 /*
  * StandbyDeadLockHandler() will be called if STANDBY_DEADLOCK_TIMEOUT
- * occurs before STANDBY_TIMEOUT.  Send out a request for hot-standby
- * backends to check themselves for deadlocks.
+ * occurs before STANDBY_TIMEOUT.
  */
 void
 StandbyDeadLockHandler(void)
 {
-   SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+   got_standby_deadlock_timeout = true;
 }
 
 /*
@@ -608,11 +693,11 @@ StandbyTimeoutHandler(void)
 
 /*
  * StandbyLockTimeoutHandler() will be called if STANDBY_LOCK_TIMEOUT is exceeded.
- * This doesn't need to do anything, simply waking up is enough.
  */
 void
 StandbyLockTimeoutHandler(void)
 {
+   got_standby_lock_timeout = true;
 }
 
 /*
index b15e576e449f798cf2e4f4e4be05c9a7bc970189..4850df2e14e8c069a2d15ea923a98c2cc8ba3d5a 100644 (file)
@@ -1783,6 +1783,9 @@ CheckDeadLockAlert(void)
     * Have to set the latch again, even if handle_sig_alarm already did. Back
     * then got_deadlock_timeout wasn't yet set... It's unlikely that this
     * ever would be a problem, but setting a set latch again is cheap.
+    *
+    * Note that, when this function runs inside procsignal_sigusr1_handler(),
+    * the handler function sets the latch again after the latch is set here.
     */
    SetLatch(MyLatch);
    errno = save_errno;
index ee1b4f685709a0707f16b245d1c421640b5b954b..5501f2aaec68801c58454c1fbb965938d98287f2 100644 (file)
@@ -2852,11 +2852,23 @@ RecoveryConflictInterrupt(ProcSignalReason reason)
            case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
 
                /*
-                * If we aren't blocking the Startup process there is nothing
-                * more to do.
+                * If PROCSIG_RECOVERY_CONFLICT_BUFFERPIN is requested but we
+                * aren't blocking the Startup process there is nothing more
+                * to do.
+                *
+                * When PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK is
+                * requested, if we're waiting for locks and the startup
+                * process is not waiting for buffer pin (i.e., also waiting
+                * for locks), we set the flag so that ProcSleep() will check
+                * for deadlocks.
                 */
                if (!HoldingBufferPinThatDelaysRecovery())
+               {
+                   if (reason == PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK &&
+                       GetStartupBufferPinWaitBufId() < 0)
+                       CheckDeadLockAlert();
                    return;
+               }
 
                MyProc->recoveryConflictPending = true;
 
index da8b672096f3d90e95b06b8337d4456976b90125..d1dc0ffe28b94e90b21ad2e3285f42d03ed62d8f 100644 (file)
@@ -105,6 +105,8 @@ extern VirtualTransactionId *GetCurrentVirtualXIDs(TransactionId limitXmin,
                                                   int *nvxids);
 extern VirtualTransactionId *GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid);
 extern pid_t CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode);
+extern pid_t SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode,
+                                     bool conflictPending);
 
 extern bool MinimumActiveBackends(int min);
 extern int CountDBBackends(Oid databaseid);