At update of non-LP_NORMAL TID, fail instead of corrupting page header.
authorNoah Misch <[email protected]>
Sat, 25 Jan 2025 19:28:14 +0000 (11:28 -0800)
committerNoah Misch <[email protected]>
Sat, 25 Jan 2025 19:28:18 +0000 (11:28 -0800)
The right mix of DDL and VACUUM could corrupt a catalog page header such
that PageIsVerified() durably fails, requiring a restore from backup.
This affects only catalogs that both have a syscache and have DDL code
that uses syscache tuples to construct updates.  One of the test
permutations shows a variant not yet fixed.

This makes !TransactionIdIsValid(TM_FailureData.xmax) possible with
TM_Deleted.  I think core and PGXN are indifferent to that.

Per bug #17821 from Alexander Lakhin.  Back-patch to v13 (all supported
versions).  The test case is v17+, since it uses INJECTION_POINT.

Discussion: https://postgr.es/m/17821-dd8c334263399284@postgresql.org

src/backend/access/heap/heapam.c
src/backend/utils/cache/inval.c
src/include/access/tableam.h
src/test/modules/injection_points/Makefile
src/test/modules/injection_points/expected/syscache-update-pruned.out [new file with mode: 0644]
src/test/modules/injection_points/expected/syscache-update-pruned_1.out [new file with mode: 0644]
src/test/modules/injection_points/injection_points--1.0.sql
src/test/modules/injection_points/meson.build
src/test/modules/injection_points/regress_injection.c [new file with mode: 0644]
src/test/modules/injection_points/specs/syscache-update-pruned.spec [new file with mode: 0644]

index bbe64b1e53f59bedfed40d4620c9f473a3ed637d..95e3be524a7f8c2528d0c6273ae6bb17c5375228 100644 (file)
 #include "storage/procarray.h"
 #include "storage/standby.h"
 #include "utils/datum.h"
+#include "utils/injection_point.h"
 #include "utils/inval.h"
 #include "utils/relcache.h"
 #include "utils/snapmgr.h"
 #include "utils/spccache.h"
+#include "utils/syscache.h"
 
 
 static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
@@ -3251,6 +3253,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
    interesting_attrs = bms_add_members(interesting_attrs, id_attrs);
 
    block = ItemPointerGetBlockNumber(otid);
+   INJECTION_POINT("heap_update-before-pin");
    buffer = ReadBuffer(relation, block);
    page = BufferGetPage(buffer);
 
@@ -3266,7 +3269,51 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
    LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 
    lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid));
-   Assert(ItemIdIsNormal(lp));
+
+   /*
+    * Usually, a buffer pin and/or snapshot blocks pruning of otid, ensuring
+    * we see LP_NORMAL here.  When the otid origin is a syscache, we may have
+    * neither a pin nor a snapshot.  Hence, we may see other LP_ states, each
+    * of which indicates concurrent pruning.
+    *
+    * Failing with TM_Updated would be most accurate.  However, unlike other
+    * TM_Updated scenarios, we don't know the successor ctid in LP_UNUSED and
+    * LP_DEAD cases.  While the distinction between TM_Updated and TM_Deleted
+    * does matter to SQL statements UPDATE and MERGE, those SQL statements
+    * hold a snapshot that ensures LP_NORMAL.  Hence, the choice between
+    * TM_Updated and TM_Deleted affects only the wording of error messages.
+    * Settle on TM_Deleted, for two reasons.  First, it avoids complicating
+    * the specification of when tmfd->ctid is valid.  Second, it creates
+    * error log evidence that we took this branch.
+    *
+    * Since it's possible to see LP_UNUSED at otid, it's also possible to see
+    * LP_NORMAL for a tuple that replaced LP_UNUSED.  If it's a tuple for an
+    * unrelated row, we'll fail with "duplicate key value violates unique".
+    * XXX if otid is the live, newer version of the newtup row, we'll discard
+    * changes originating in versions of this catalog row after the version
+    * the caller got from syscache.  See syscache-update-pruned.spec.
+    */
+   if (!ItemIdIsNormal(lp))
+   {
+       Assert(RelationSupportsSysCache(RelationGetRelid(relation)));
+
+       UnlockReleaseBuffer(buffer);
+       Assert(!have_tuple_lock);
+       if (vmbuffer != InvalidBuffer)
+           ReleaseBuffer(vmbuffer);
+       tmfd->ctid = *otid;
+       tmfd->xmax = InvalidTransactionId;
+       tmfd->cmax = InvalidCommandId;
+       *update_indexes = TU_None;
+
+       bms_free(hot_attrs);
+       bms_free(sum_attrs);
+       bms_free(key_attrs);
+       bms_free(id_attrs);
+       /* modified_attrs not yet initialized */
+       bms_free(interesting_attrs);
+       return TM_Deleted;
+   }
 
    /*
     * Fill in enough data in oldtup for HeapDetermineColumnsInfo to work
index 6772db24b50138d2c71685db92bb40850819878b..3ce4775b829b3101ed8b78c8ad429bf807e60ad7 100644 (file)
 #include "storage/sinval.h"
 #include "storage/smgr.h"
 #include "utils/catcache.h"
+#include "utils/injection_point.h"
 #include "utils/inval.h"
 #include "utils/memdebug.h"
 #include "utils/memutils.h"
@@ -1031,6 +1032,8 @@ AtEOXact_Inval(bool isCommit)
    /* Must be at top of stack */
    Assert(transInvalInfo->my_level == 1 && transInvalInfo->parent == NULL);
 
+   INJECTION_POINT("AtEOXact_Inval-with-transInvalInfo");
+
    if (isCommit)
    {
        /*
index da661289c1fd47f842c14595f11361104536d0ee..7be7887b4a846031876fcb969086576a9c5815d8 100644 (file)
@@ -137,7 +137,8 @@ typedef enum TU_UpdateIndexes
  *
  * xmax is the outdating transaction's XID.  If the caller wants to visit the
  * replacement tuple, it must check that this matches before believing the
- * replacement is really a match.
+ * replacement is really a match.  This is InvalidTransactionId if the target
+ * was !LP_NORMAL (expected only for a TID retrieved from syscache).
  *
  * cmax is the outdating command's CID, but only when the failure code is
  * TM_SelfModified (i.e., something in the current transaction outdated the
index d1375f78f7eb8b5a63c8744eac1d057fc6e19b41..f19c9643ba968db9099770d284817f0e72375941 100644 (file)
@@ -1,7 +1,10 @@
 # src/test/modules/injection_points/Makefile
 
-MODULES = injection_points
-
+MODULE_big = injection_points
+OBJS = \
+   $(WIN32RES) \
+   injection_points.o \
+   regress_injection.o
 EXTENSION = injection_points
 DATA = injection_points--1.0.sql
 PGFILEDESC = "injection_points - facility for injection points"
@@ -9,7 +12,7 @@ PGFILEDESC = "injection_points - facility for injection points"
 REGRESS = injection_points reindex_conc
 REGRESS_OPTS = --dlpath=$(top_builddir)/src/test/regress
 
-ISOLATION = inplace
+ISOLATION = inplace syscache-update-pruned
 
 # The injection points are cluster-wide, so disable installcheck
 NO_INSTALLCHECK = 1
diff --git a/src/test/modules/injection_points/expected/syscache-update-pruned.out b/src/test/modules/injection_points/expected/syscache-update-pruned.out
new file mode 100644 (file)
index 0000000..5dc5a1d
--- /dev/null
@@ -0,0 +1,87 @@
+Parsed test spec with 4 sessions
+
+starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 wakegrant4
+step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50');
+step at2: 
+   CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50
+       FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger();
+ <waiting ...>
+step waitprunable4: CALL vactest.wait_prunable();
+step vac4: VACUUM pg_class;
+step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...>
+step wakeinval4: 
+   SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo');
+   SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo');
+ <waiting ...>
+step at2: <... completed>
+step wakeinval4: <... completed>
+step wakegrant4: 
+   SELECT FROM injection_points_detach('heap_update-before-pin');
+   SELECT FROM injection_points_wakeup('heap_update-before-pin');
+ <waiting ...>
+step grant1: <... completed>
+ERROR:  tuple concurrently deleted
+step wakegrant4: <... completed>
+
+starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 mkrels4 wakegrant4
+step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50');
+step at2: 
+   CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50
+       FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger();
+ <waiting ...>
+step waitprunable4: CALL vactest.wait_prunable();
+step vac4: VACUUM pg_class;
+step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...>
+step wakeinval4: 
+   SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo');
+   SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo');
+ <waiting ...>
+step at2: <... completed>
+step wakeinval4: <... completed>
+step mkrels4: 
+   SELECT FROM vactest.mkrels('intruder', 1, 100);  -- repopulate LP_UNUSED
+
+step wakegrant4: 
+   SELECT FROM injection_points_detach('heap_update-before-pin');
+   SELECT FROM injection_points_wakeup('heap_update-before-pin');
+ <waiting ...>
+step grant1: <... completed>
+ERROR:  duplicate key value violates unique constraint "pg_class_oid_index"
+step wakegrant4: <... completed>
+
+starting permutation: snap3 cachefill1 at2 mkrels4 r3 waitprunable4 vac4 grant1 wakeinval4 at4 wakegrant4 inspect4
+step snap3: BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT;
+step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50');
+step at2: 
+   CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50
+       FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger();
+ <waiting ...>
+step mkrels4: 
+   SELECT FROM vactest.mkrels('intruder', 1, 100);  -- repopulate LP_UNUSED
+
+step r3: ROLLBACK;
+step waitprunable4: CALL vactest.wait_prunable();
+step vac4: VACUUM pg_class;
+step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...>
+step wakeinval4: 
+   SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo');
+   SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo');
+ <waiting ...>
+step at2: <... completed>
+step wakeinval4: <... completed>
+step at4: ALTER TABLE vactest.child50 INHERIT vactest.orig50;
+step wakegrant4: 
+   SELECT FROM injection_points_detach('heap_update-before-pin');
+   SELECT FROM injection_points_wakeup('heap_update-before-pin');
+ <waiting ...>
+step grant1: <... completed>
+step wakegrant4: <... completed>
+step inspect4: 
+   SELECT relhastriggers, relhassubclass FROM pg_class
+       WHERE oid = 'vactest.orig50'::regclass;
+
+relhastriggers|relhassubclass
+--------------+--------------
+f             |f             
+(1 row)
+
diff --git a/src/test/modules/injection_points/expected/syscache-update-pruned_1.out b/src/test/modules/injection_points/expected/syscache-update-pruned_1.out
new file mode 100644 (file)
index 0000000..b18857c
--- /dev/null
@@ -0,0 +1,86 @@
+Parsed test spec with 4 sessions
+
+starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 wakegrant4
+step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50');
+step at2: 
+   CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50
+       FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger();
+ <waiting ...>
+step waitprunable4: CALL vactest.wait_prunable();
+step vac4: VACUUM pg_class;
+step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...>
+step wakeinval4: 
+   SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo');
+   SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo');
+ <waiting ...>
+step at2: <... completed>
+step wakeinval4: <... completed>
+step wakegrant4: 
+   SELECT FROM injection_points_detach('heap_update-before-pin');
+   SELECT FROM injection_points_wakeup('heap_update-before-pin');
+ <waiting ...>
+step grant1: <... completed>
+step wakegrant4: <... completed>
+
+starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 mkrels4 wakegrant4
+step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50');
+step at2: 
+   CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50
+       FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger();
+ <waiting ...>
+step waitprunable4: CALL vactest.wait_prunable();
+step vac4: VACUUM pg_class;
+step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...>
+step wakeinval4: 
+   SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo');
+   SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo');
+ <waiting ...>
+step at2: <... completed>
+step wakeinval4: <... completed>
+step mkrels4: 
+   SELECT FROM vactest.mkrels('intruder', 1, 100);  -- repopulate LP_UNUSED
+
+step wakegrant4: 
+   SELECT FROM injection_points_detach('heap_update-before-pin');
+   SELECT FROM injection_points_wakeup('heap_update-before-pin');
+ <waiting ...>
+step grant1: <... completed>
+step wakegrant4: <... completed>
+
+starting permutation: snap3 cachefill1 at2 mkrels4 r3 waitprunable4 vac4 grant1 wakeinval4 at4 wakegrant4 inspect4
+step snap3: BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT;
+step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50');
+step at2: 
+   CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50
+       FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger();
+ <waiting ...>
+step mkrels4: 
+   SELECT FROM vactest.mkrels('intruder', 1, 100);  -- repopulate LP_UNUSED
+
+step r3: ROLLBACK;
+step waitprunable4: CALL vactest.wait_prunable();
+step vac4: VACUUM pg_class;
+step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...>
+step wakeinval4: 
+   SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo');
+   SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo');
+ <waiting ...>
+step at2: <... completed>
+step wakeinval4: <... completed>
+step at4: ALTER TABLE vactest.child50 INHERIT vactest.orig50;
+step wakegrant4: 
+   SELECT FROM injection_points_detach('heap_update-before-pin');
+   SELECT FROM injection_points_wakeup('heap_update-before-pin');
+ <waiting ...>
+step grant1: <... completed>
+ERROR:  tuple concurrently updated
+step wakegrant4: <... completed>
+step inspect4: 
+   SELECT relhastriggers, relhassubclass FROM pg_class
+       WHERE oid = 'vactest.orig50'::regclass;
+
+relhastriggers|relhassubclass
+--------------+--------------
+t             |t             
+(1 row)
+
index c16a33b08dbc87e436d2204d41b04e748a483503..519641e6d04150d74c8d189607b5ad634461450b 100644 (file)
@@ -54,3 +54,11 @@ CREATE FUNCTION injection_points_detach(IN point_name TEXT)
 RETURNS void
 AS 'MODULE_PATHNAME', 'injection_points_detach'
 LANGUAGE C STRICT PARALLEL UNSAFE;
+
+--
+-- regress_injection.c functions
+--
+CREATE FUNCTION removable_cutoff(rel regclass)
+RETURNS xid8
+AS 'MODULE_PATHNAME'
+LANGUAGE C CALLED ON NULL INPUT;
index 33303089cffa4de1302d5441fa52853f03429c76..169c415f9c478819962a069c37d1e39322f30822 100644 (file)
@@ -6,6 +6,7 @@ endif
 
 injection_points_sources = files(
   'injection_points.c',
+  'regress_injection.c',
 )
 
 if host_system == 'windows'
@@ -41,7 +42,8 @@ tests += {
   'isolation': {
     'specs': [
       'inplace',
+      'syscache-update-pruned',
     ],
-    'runningcheck': false, # align with GNU make build system
+    'runningcheck': false, # see syscache-update-pruned
   },
 }
diff --git a/src/test/modules/injection_points/regress_injection.c b/src/test/modules/injection_points/regress_injection.c
new file mode 100644 (file)
index 0000000..422f416
--- /dev/null
@@ -0,0 +1,71 @@
+/*--------------------------------------------------------------------------
+ *
+ * regress_injection.c
+ *     Functions supporting test-specific subject matter.
+ *
+ * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *     src/test/modules/injection_points/regress_injection.c
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/table.h"
+#include "fmgr.h"
+#include "miscadmin.h"
+#include "storage/procarray.h"
+#include "utils/xid8.h"
+
+/*
+ * removable_cutoff - for syscache-update-pruned.spec
+ *
+ * Wrapper around GetOldestNonRemovableTransactionId().  In general, this can
+ * move backward.  runningcheck=false isolation tests can reasonably prevent
+ * that.  For the causes of backward movement, see
+ * postgr.es/m/CAEze2Wj%2BV0kTx86xB_YbyaqTr5hnE_igdWAwuhSyjXBYscf5-Q%40mail.gmail.com
+ * and the header comment for ComputeXidHorizons().  One can assume this
+ * doesn't move backward if one arranges for concurrent activity not to reach
+ * AbortTransaction() and not to allocate an XID while connected to another
+ * database.  Non-runningcheck tests can control most concurrent activity,
+ * except autovacuum and the isolationtester control connection.  Neither
+ * allocates XIDs, and AbortTransaction() in those would justify test failure.
+ */
+PG_FUNCTION_INFO_V1(removable_cutoff);
+Datum
+removable_cutoff(PG_FUNCTION_ARGS)
+{
+   Relation    rel = NULL;
+   TransactionId xid;
+   FullTransactionId next_fxid_before,
+               next_fxid;
+
+   /* could take other relkinds callee takes, but we've not yet needed it */
+   if (!PG_ARGISNULL(0))
+       rel = table_open(PG_GETARG_OID(0), AccessShareLock);
+
+   /*
+    * No lock or snapshot necessarily prevents oldestXid from advancing past
+    * "xid" while this function runs.  That concerns us only in that we must
+    * not ascribe "xid" to the wrong epoch.  (That may never arise in
+    * isolation testing, but let's set a good example.)  As a crude solution,
+    * retry until nextXid doesn't change.
+    */
+   next_fxid = ReadNextFullTransactionId();
+   do
+   {
+       CHECK_FOR_INTERRUPTS();
+       next_fxid_before = next_fxid;
+       xid = GetOldestNonRemovableTransactionId(rel);
+       next_fxid = ReadNextFullTransactionId();
+   } while (!FullTransactionIdEquals(next_fxid, next_fxid_before));
+
+   if (rel)
+       table_close(rel, AccessShareLock);
+
+   PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromAllowableAt(next_fxid,
+                                                                xid));
+}
diff --git a/src/test/modules/injection_points/specs/syscache-update-pruned.spec b/src/test/modules/injection_points/specs/syscache-update-pruned.spec
new file mode 100644 (file)
index 0000000..b48e897
--- /dev/null
@@ -0,0 +1,179 @@
+# Test race conditions involving:
+# - s1: heap_update($FROM_SYSCACHE), without a snapshot or pin
+# - s2: ALTER TABLE making $FROM_SYSCACHE a dead tuple
+# - s3: "VACUUM pg_class" making $FROM_SYSCACHE become LP_UNUSED
+
+# This is a derivative work of inplace.spec, which exercises the corresponding
+# race condition for inplace updates.
+
+# Despite local injection points, this is incompatible with runningcheck.
+# First, removable_cutoff() could move backward, per its header comment.
+# Second, other activity could trigger sinval queue overflow, negating our
+# efforts to delay inval.  Third, this deadlock emerges:
+#
+# - step at2 waits at an injection point, with interrupts held
+# - an unrelated backend waits for at2 to do PROCSIGNAL_BARRIER_SMGRRELEASE
+# - step waitprunable4 waits for the unrelated backend to release its xmin
+
+# The alternative expected output is for -DCATCACHE_FORCE_RELEASE, a setting
+# that thwarts testing the race conditions this spec seeks.
+
+
+# Need s2 to make a non-HOT update.  Otherwise, "VACUUM pg_class" would leave
+# an LP_REDIRECT that persists.  To get non-HOT, make rels so the pg_class row
+# for vactest.orig50 is on a filled page (assuming BLCKSZ=8192).  Just to save
+# on filesystem syscalls, use relkind=c for every other rel.
+setup
+{
+   CREATE EXTENSION injection_points;
+   CREATE SCHEMA vactest;
+   -- Ensure a leader RELOID catcache entry.  PARALLEL RESTRICTED since a
+   -- parallel worker running pg_relation_filenode() would lack that effect.
+   CREATE FUNCTION vactest.reloid_catcache_set(regclass) RETURNS int
+       LANGUAGE sql PARALLEL RESTRICTED
+       AS 'SELECT 0 FROM pg_relation_filenode($1)';
+   CREATE FUNCTION vactest.mkrels(text, int, int) RETURNS void
+       LANGUAGE plpgsql SET search_path = vactest AS $$
+   DECLARE
+       tname text;
+   BEGIN
+       FOR i in $2 .. $3 LOOP
+           tname := $1 || i;
+           EXECUTE FORMAT('CREATE TYPE ' || tname || ' AS ()');
+           RAISE DEBUG '% at %', tname, ctid
+               FROM pg_class WHERE oid = tname::regclass;
+       END LOOP;
+   END
+   $$;
+   CREATE PROCEDURE vactest.wait_prunable() LANGUAGE plpgsql AS $$
+   DECLARE
+       barrier xid8;
+       cutoff xid8;
+   BEGIN
+       barrier := pg_current_xact_id();
+       -- autovacuum worker RelationCacheInitializePhase3() or the
+       -- isolationtester control connection might hold a snapshot that
+       -- limits pruning.  Sleep until that clears.
+       LOOP
+           ROLLBACK;  -- release MyProc->xmin, which could be the oldest
+           cutoff := removable_cutoff('pg_class');
+           EXIT WHEN cutoff >= barrier;
+           RAISE LOG 'removable cutoff %; waiting for %', cutoff, barrier;
+           PERFORM pg_sleep(.1);
+       END LOOP;
+   END
+   $$;
+}
+setup  { CALL vactest.wait_prunable();  -- maximize next two VACUUMs }
+setup  { VACUUM FULL pg_class;  -- reduce free space }
+setup  { VACUUM FREEZE pg_class;  -- populate fsm etc. }
+setup
+{
+   SELECT FROM vactest.mkrels('orig', 1, 49);
+   CREATE TABLE vactest.orig50 (c int) WITH (autovacuum_enabled = off);
+   CREATE TABLE vactest.child50 (c int) WITH (autovacuum_enabled = off);
+   SELECT FROM vactest.mkrels('orig', 51, 100);
+}
+teardown
+{
+   DROP SCHEMA vactest CASCADE;
+   DROP EXTENSION injection_points;
+}
+
+# Wait during GRANT.  Disable debug_discard_caches, since we're here to
+# exercise an outcome that happens under permissible cache staleness.
+session s1
+setup  {
+   SET debug_discard_caches = 0;
+   SELECT FROM injection_points_set_local();
+   SELECT FROM injection_points_attach('heap_update-before-pin', 'wait');
+}
+step cachefill1    { SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); }
+step grant1    { GRANT SELECT ON vactest.orig50 TO PUBLIC; }
+
+# Update of the tuple that grant1 will update.  Wait before sending invals, so
+# s1 will not get a cache miss.  Choose the commands for making such updates
+# from among those whose heavyweight locking does not conflict with GRANT's
+# heavyweight locking.  (GRANT will see our XID as committed, so observing
+# that XID in the tuple xmax also won't block GRANT.)
+session s2
+setup  {
+   SELECT FROM injection_points_set_local();
+   SELECT FROM
+       injection_points_attach('AtEOXact_Inval-with-transInvalInfo', 'wait');
+}
+step at2   {
+   CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50
+       FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger();
+}
+
+# Hold snapshot to block pruning.
+session s3
+step snap3 { BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT; }
+step r3        { ROLLBACK; }
+
+# Non-blocking actions.
+session s4
+step waitprunable4 { CALL vactest.wait_prunable(); }
+step vac4      { VACUUM pg_class; }
+# Reuse the lp that s1 is waiting to change.  I've observed reuse at the 1st
+# or 18th CREATE, so create excess.
+step mkrels4   {
+   SELECT FROM vactest.mkrels('intruder', 1, 100);  -- repopulate LP_UNUSED
+}
+step wakegrant4    {
+   SELECT FROM injection_points_detach('heap_update-before-pin');
+   SELECT FROM injection_points_wakeup('heap_update-before-pin');
+}
+step at4   { ALTER TABLE vactest.child50 INHERIT vactest.orig50; }
+step wakeinval4    {
+   SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo');
+   SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo');
+}
+# Witness effects of steps at2 and/or at4.
+step inspect4  {
+   SELECT relhastriggers, relhassubclass FROM pg_class
+       WHERE oid = 'vactest.orig50'::regclass;
+}
+
+# TID from syscache becomes LP_UNUSED.  Before the bug fix, this permutation
+# made s1 fail with "attempted to update invisible tuple" or an assert.
+# However, suppose a pd_lsn value such that (pd_lsn.xlogid, pd_lsn.xrecoff)
+# passed for (xmin, xmax) with xmin known-committed and xmax known-aborted.
+# Persistent page header corruption ensued.  For example, s1 overwrote
+# pd_lower, pd_upper, and pd_special as though they were t_ctid.
+permutation
+   cachefill1          # reads pg_class tuple T0, xmax invalid
+   at2                 # T0 dead, T1 live
+   waitprunable4       # T0 prunable
+   vac4                # T0 becomes LP_UNUSED
+   grant1              # pauses at heap_update(T0)
+   wakeinval4(at2)     # at2 sends inval message
+   wakegrant4(grant1)  # s1 wakes: "tuple concurrently deleted"
+
+# add mkrels4: LP_UNUSED becomes a different rel's row
+permutation
+   cachefill1          # reads pg_class tuple T0, xmax invalid
+   at2                 # T0 dead, T1 live
+   waitprunable4       # T0 prunable
+   vac4                # T0 becomes LP_UNUSED
+   grant1              # pauses at heap_update(T0)
+   wakeinval4(at2)     # at2 sends inval message
+   mkrels4             # T0 becomes a new rel
+   wakegrant4(grant1)  # s1 wakes: "duplicate key value violates unique"
+
+# TID from syscache becomes LP_UNUSED, then becomes a newer version of the
+# original rel's row.
+permutation
+   snap3               # sets MyProc->xmin
+   cachefill1          # reads pg_class tuple T0, xmax invalid
+   at2                 # T0 dead, T1 live
+   mkrels4             # T1's page becomes full
+   r3                  # clears MyProc->xmin
+   waitprunable4       # T0 prunable
+   vac4                # T0 becomes LP_UNUSED
+   grant1              # pauses at heap_update(T0)
+   wakeinval4(at2)     # at2 sends inval message
+   at4                 # T1 dead, T0 live
+   wakegrant4(grant1)  # s1 wakes: T0 dead, T2 live
+   inspect4            # observe loss of at2+at4 changes XXX is an extant bug