From 8eeb4a0f7c061ece7a8836e738ea8b7764617d3b Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 12 Nov 2025 20:59:36 +0200 Subject: [PATCH] Fix bug where we truncated CLOG that was still needed by LISTEN/NOTIFY MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The async notification queue contains the XID of the sender, and when processing notifications we call TransactionIdDidCommit() on the XID. But we had no safeguards to prevent the CLOG segments containing those XIDs from being truncated away. As a result, if a backend didn't for some reason process its notifications for a long time, or when a new backend issued LISTEN, you could get an error like: test=# listen c21; ERROR: 58P01: could not access status of transaction 14279685 DETAIL: Could not open file "pg_xact/000D": No such file or directory. LOCATION: SlruReportIOError, slru.c:1087 To fix, make VACUUM "freeze" the XIDs in the async notification queue before truncating the CLOG. Old XIDs are replaced with FrozenTransactionId or InvalidTransactionId. Note: This commit is not a full fix. A race condition remains, where a backend is executing asyncQueueReadAllNotifications() and has just made a local copy of an async SLRU page which contains old XIDs, while vacuum concurrently truncates the CLOG covering those XIDs. When the backend then calls TransactionIdDidCommit() on those XIDs from the local copy, you still get the error. The next commit will fix that remaining race condition. This was first reported by Sergey Zhuravlev in 2021, with many other people hitting the same issue later. Thanks to: - Alexandra Wang, Daniil Davydov, Andrei Varashen and Jacques Combrink for investigating and providing reproducable test cases, - Matheus Alcantara and Arseniy Mukhin for review and earlier proposed patches to fix this, - Álvaro Herrera and Masahiko Sawada for reviews, - Yura Sokolov aka funny-falcon for the idea of marking transactions as committed in the notification queue, and - Joel Jacobson for the final patch version. I hope I didn't forget anyone. Backpatch to all supported versions. I believe the bug goes back all the way to commit d1e027221d, which introduced the SLRU-based async notification queue. Discussion: https://www.postgresql.org/message-id/16961-25f29f95b3604a8a@postgresql.org Discussion: https://www.postgresql.org/message-id/18804-bccbbde5e77a68c2@postgresql.org Discussion: https://www.postgresql.org/message-id/CAK98qZ3wZLE-RZJN_Y%2BTFjiTRPPFPBwNBpBi5K5CU8hUHkzDpw@mail.gmail.com Backpatch-through: 14 --- src/backend/commands/async.c | 114 ++++++++++++++++++ src/backend/commands/vacuum.c | 7 ++ src/include/commands/async.h | 3 + src/test/modules/xid_wraparound/meson.build | 1 + .../xid_wraparound/t/004_notify_freeze.pl | 71 +++++++++++ 5 files changed, 196 insertions(+) create mode 100644 src/test/modules/xid_wraparound/t/004_notify_freeze.pl diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c index 59c5109ea48..c5be15bf223 100644 --- a/src/backend/commands/async.c +++ b/src/backend/commands/async.c @@ -2176,6 +2176,120 @@ asyncQueueAdvanceTail(void) LWLockRelease(NotifyQueueTailLock); } +/* + * AsyncNotifyFreezeXids + * + * Prepare the async notification queue for CLOG truncation by freezing + * transaction IDs that are about to become inaccessible. + * + * This function is called by VACUUM before advancing datfrozenxid. It scans + * the notification queue and replaces XIDs that would become inaccessible + * after CLOG truncation with special markers: + * - Committed transactions are set to FrozenTransactionId + * - Aborted/crashed transactions are set to InvalidTransactionId + * + * Only XIDs < newFrozenXid are processed, as those are the ones whose CLOG + * pages will be truncated. If XID < newFrozenXid, it cannot still be running + * (or it would have held back newFrozenXid through ProcArray). + * Therefore, if TransactionIdDidCommit returns false, we know the transaction + * either aborted explicitly or crashed, and we can safely mark it invalid. + */ +void +AsyncNotifyFreezeXids(TransactionId newFrozenXid) +{ + QueuePosition pos; + QueuePosition head; + int64 curpage = -1; + int slotno = -1; + char *page_buffer = NULL; + bool page_dirty = false; + + /* + * Acquire locks in the correct order to avoid deadlocks. As per the + * locking protocol: NotifyQueueTailLock, then NotifyQueueLock, then SLRU + * bank locks. + * + * We only need SHARED mode since we're just reading the head/tail + * positions, not modifying them. + */ + LWLockAcquire(NotifyQueueTailLock, LW_SHARED); + LWLockAcquire(NotifyQueueLock, LW_SHARED); + + pos = QUEUE_TAIL; + head = QUEUE_HEAD; + + /* Release NotifyQueueLock early, we only needed to read the positions */ + LWLockRelease(NotifyQueueLock); + + /* + * Scan the queue from tail to head, freezing XIDs as needed. We hold + * NotifyQueueTailLock throughout to ensure the tail doesn't move while + * we're working. + */ + while (!QUEUE_POS_EQUAL(pos, head)) + { + AsyncQueueEntry *qe; + TransactionId xid; + int64 pageno = QUEUE_POS_PAGE(pos); + int offset = QUEUE_POS_OFFSET(pos); + + /* If we need a different page, release old lock and get new one */ + if (pageno != curpage) + { + LWLock *lock; + + /* Release previous page if any */ + if (slotno >= 0) + { + if (page_dirty) + { + NotifyCtl->shared->page_dirty[slotno] = true; + page_dirty = false; + } + LWLockRelease(SimpleLruGetBankLock(NotifyCtl, curpage)); + } + + lock = SimpleLruGetBankLock(NotifyCtl, pageno); + LWLockAcquire(lock, LW_EXCLUSIVE); + slotno = SimpleLruReadPage(NotifyCtl, pageno, true, + InvalidTransactionId); + page_buffer = NotifyCtl->shared->page_buffer[slotno]; + curpage = pageno; + } + + qe = (AsyncQueueEntry *) (page_buffer + offset); + xid = qe->xid; + + if (TransactionIdIsNormal(xid) && + TransactionIdPrecedes(xid, newFrozenXid)) + { + if (TransactionIdDidCommit(xid)) + { + qe->xid = FrozenTransactionId; + page_dirty = true; + } + else + { + qe->xid = InvalidTransactionId; + page_dirty = true; + } + } + + /* Advance to next entry */ + asyncQueueAdvance(&pos, qe->length); + } + + /* Release final page lock if we acquired one */ + if (slotno >= 0) + { + if (page_dirty) + NotifyCtl->shared->page_dirty[slotno] = true; + LWLockRelease(SimpleLruGetBankLock(NotifyCtl, curpage)); + } + + LWLockRelease(NotifyQueueTailLock); +} + /* * ProcessIncomingNotify * diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index ed03e3bd50d..e785dd55ce5 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -37,6 +37,7 @@ #include "catalog/namespace.h" #include "catalog/pg_database.h" #include "catalog/pg_inherits.h" +#include "commands/async.h" #include "commands/cluster.h" #include "commands/defrem.h" #include "commands/progress.h" @@ -1941,6 +1942,12 @@ vac_truncate_clog(TransactionId frozenXID, return; } + /* + * Freeze any old transaction IDs in the async notification queue before + * CLOG truncation. + */ + AsyncNotifyFreezeXids(frozenXID); + /* * Advance the oldest value for commit timestamps before truncating, so * that if a user requests a timestamp for a transaction we're truncating diff --git a/src/include/commands/async.h b/src/include/commands/async.h index f75c3df9556..aaec7314c10 100644 --- a/src/include/commands/async.h +++ b/src/include/commands/async.h @@ -46,4 +46,7 @@ extern void HandleNotifyInterrupt(void); /* process interrupts */ extern void ProcessNotifyInterrupt(bool flush); +/* freeze old transaction IDs in notify queue (called by VACUUM) */ +extern void AsyncNotifyFreezeXids(TransactionId newFrozenXid); + #endif /* ASYNC_H */ diff --git a/src/test/modules/xid_wraparound/meson.build b/src/test/modules/xid_wraparound/meson.build index f7dada67f67..3aec430df8c 100644 --- a/src/test/modules/xid_wraparound/meson.build +++ b/src/test/modules/xid_wraparound/meson.build @@ -30,6 +30,7 @@ tests += { 't/001_emergency_vacuum.pl', 't/002_limits.pl', 't/003_wraparounds.pl', + 't/004_notify_freeze.pl', ], }, } diff --git a/src/test/modules/xid_wraparound/t/004_notify_freeze.pl b/src/test/modules/xid_wraparound/t/004_notify_freeze.pl new file mode 100644 index 00000000000..7241bd602ba --- /dev/null +++ b/src/test/modules/xid_wraparound/t/004_notify_freeze.pl @@ -0,0 +1,71 @@ +# Copyright (c) 2024-2025, PostgreSQL Global Development Group +# +# Test freezing XIDs in the async notification queue. This isn't +# really wraparound-related, but the test depends on the +# consume_xids() helper function. + +use strict; +use warnings FATAL => 'all'; +use PostgreSQL::Test::Cluster; +use Test::More; + +my $node = PostgreSQL::Test::Cluster->new('node'); +$node->init; +$node->start; + +if (!$ENV{PG_TEST_EXTRA} || $ENV{PG_TEST_EXTRA} !~ /\bxid_wraparound\b/) +{ + plan skip_all => "test xid_wraparound not enabled in PG_TEST_EXTRA"; +} + +# Setup +$node->safe_psql('postgres', 'CREATE EXTENSION xid_wraparound'); +$node->safe_psql('postgres', + 'ALTER DATABASE template0 WITH ALLOW_CONNECTIONS true'); + +# Start Session 1 and leave it idle in transaction +my $psql_session1 = $node->background_psql('postgres'); +$psql_session1->query_safe('listen s;', "Session 1 listens to 's'"); +$psql_session1->query_safe('begin;', "Session 1 starts a transaction"); + +# Send some notifys from other sessions +for my $i (1 .. 10) +{ + $node->safe_psql('postgres', "NOTIFY s, '$i'"); +} + +# Consume enough XIDs to trigger truncation, and one more with +# 'txid_current' to bump up the freeze horizon. +$node->safe_psql('postgres', 'select consume_xids(10000000);'); +$node->safe_psql('postgres', 'select txid_current()'); + +# Remember current datfrozenxid before vacuum freeze so that we can +# check that it is advanced. (Taking the min() this way assumes that +# XID wraparound doesn't happen.) +my $datafronzenxid = $node->safe_psql('postgres', + "select min(datfrozenxid::text::bigint) from pg_database"); + +# Execute vacuum freeze on all databases +$node->command_ok([ 'vacuumdb', '--all', '--freeze', '--port', $node->port ], + "vacuumdb --all --freeze"); + +# Check that vacuumdb advanced datfrozenxid +my $datafronzenxid_freeze = $node->safe_psql('postgres', + "select min(datfrozenxid::text::bigint) from pg_database"); +ok($datafronzenxid_freeze > $datafronzenxid, 'datfrozenxid advanced'); + +# On Session 1, commit and ensure that the all the notifications are +# received. This depends on correctly freezing the XIDs in the pending +# notification entries. +my $res = $psql_session1->query_safe('commit;', "commit listen s;"); +my $notifications_count = 0; +foreach my $i (split('\n', $res)) +{ + $notifications_count++; + like($i, + qr/Asynchronous notification "s" with payload "$notifications_count" received/ + ); +} +is($notifications_count, 10, 'received all committed notifications'); + +done_testing(); -- 2.39.5