Remove redundant snapshot copying from parallel leader to workers
authorHeikki Linnakangas <[email protected]>
Thu, 14 Mar 2024 13:18:10 +0000 (15:18 +0200)
committerHeikki Linnakangas <[email protected]>
Thu, 14 Mar 2024 13:18:10 +0000 (15:18 +0200)
The parallel query infrastructure copies the leader backend's active
snapshot to the worker processes. But BitmapHeapScan node also had
bespoken code to pass the snapshot from leader to the worker. That was
redundant, so remove it.

The removed code was analogous to the snapshot serialization in
table_parallelscan_initialize(), but that was the wrong role model. A
parallel bitmap heap scan is more like an independent non-parallel
bitmap heap scan in each parallel worker as far as the table AM is
concerned, because the coordination is done in nodeBitmapHeapscan.c,
and the table AM doesn't need to know anything about it.

This relies on the assumption that es_snapshot ==
GetActiveSnapshot(). That's not a new assumption, things would get
weird if you used the QueryDesc's snapshot for visibility checks in
the scans, but the active snapshot for evaluating quals, for
example. This could use some refactoring and cleanup, but for now,
just add some assertions.

Reviewed-by: Dilip Kumar, Robert Haas
Discussion: https://www.postgresql.org/message-id/5f3b9d59-0f43-419d-80ca-6d04c07cf61a@iki.fi

src/backend/access/table/tableam.c
src/backend/executor/execMain.c
src/backend/executor/execParallel.c
src/backend/executor/nodeBitmapHeapscan.c
src/include/access/tableam.h
src/include/nodes/execnodes.h

index 6ed8cca05a1af50a83411918fe7ab6288a29d3c5..e57a0b7ea310bce2c8459090e58591f0c522b4e0 100644 (file)
@@ -120,16 +120,6 @@ table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key)
                                                                                        NULL, flags);
 }
 
-void
-table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot)
-{
-       Assert(IsMVCCSnapshot(snapshot));
-
-       RegisterSnapshot(snapshot);
-       scan->rs_snapshot = snapshot;
-       scan->rs_flags |= SO_TEMP_SNAPSHOT;
-}
-
 
 /* ----------------------------------------------------------------------------
  * Parallel table scan related functions.
index 940499cc61af03507e630292a985b522eb3cf305..7eb1f7d020959f0ef9a1a1beaee4d233e83ad447 100644 (file)
@@ -147,6 +147,9 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
        Assert(queryDesc != NULL);
        Assert(queryDesc->estate == NULL);
 
+       /* caller must ensure the query's snapshot is active */
+       Assert(GetActiveSnapshot() == queryDesc->snapshot);
+
        /*
         * If the transaction is read-only, we need to check if any writes are
         * planned to non-temporary tables.  EXPLAIN is considered read-only.
@@ -319,6 +322,9 @@ standard_ExecutorRun(QueryDesc *queryDesc,
        Assert(estate != NULL);
        Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
 
+       /* caller must ensure the query's snapshot is active */
+       Assert(GetActiveSnapshot() == estate->es_snapshot);
+
        /*
         * Switch into per-query memory context
         */
index 3f84c002dc132c1f77a109a15c830855e0659983..8c53d1834e9f89f975cb6f1fe3c40fdafbc9e658 100644 (file)
@@ -720,6 +720,13 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate,
        shm_toc_estimate_chunk(&pcxt->estimator, dsa_minsize);
        shm_toc_estimate_keys(&pcxt->estimator, 1);
 
+       /*
+        * InitializeParallelDSM() passes the active snapshot to the parallel
+        * worker, which uses it to set es_snapshot.  Make sure we don't set
+        * es_snapshot differently in the child.
+        */
+       Assert(GetActiveSnapshot() == estate->es_snapshot);
+
        /* Everyone's had a chance to ask for space, so now create the DSM. */
        InitializeParallelDSM(pcxt);
 
index 345b67649ea876353a7947b1b37b18f63b34e265..ca548e44eb4594bbe44f90255b258c93b82e722c 100644 (file)
@@ -721,7 +721,6 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
        scanstate->prefetch_iterator = NULL;
        scanstate->prefetch_pages = 0;
        scanstate->prefetch_target = 0;
-       scanstate->pscan_len = 0;
        scanstate->initialized = false;
        scanstate->shared_tbmiterator = NULL;
        scanstate->shared_prefetch_iterator = NULL;
@@ -841,13 +840,7 @@ void
 ExecBitmapHeapEstimate(BitmapHeapScanState *node,
                                           ParallelContext *pcxt)
 {
-       EState     *estate = node->ss.ps.state;
-
-       node->pscan_len = add_size(offsetof(ParallelBitmapHeapState,
-                                                                               phs_snapshot_data),
-                                                          EstimateSnapshotSpace(estate->es_snapshot));
-
-       shm_toc_estimate_chunk(&pcxt->estimator, node->pscan_len);
+       shm_toc_estimate_chunk(&pcxt->estimator, sizeof(ParallelBitmapHeapState));
        shm_toc_estimate_keys(&pcxt->estimator, 1);
 }
 
@@ -862,14 +855,13 @@ ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
                                                        ParallelContext *pcxt)
 {
        ParallelBitmapHeapState *pstate;
-       EState     *estate = node->ss.ps.state;
        dsa_area   *dsa = node->ss.ps.state->es_query_dsa;
 
        /* If there's no DSA, there are no workers; initialize nothing. */
        if (dsa == NULL)
                return;
 
-       pstate = shm_toc_allocate(pcxt->toc, node->pscan_len);
+       pstate = shm_toc_allocate(pcxt->toc, sizeof(ParallelBitmapHeapState));
 
        pstate->tbmiterator = 0;
        pstate->prefetch_iterator = 0;
@@ -881,7 +873,6 @@ ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
        pstate->state = BM_INITIAL;
 
        ConditionVariableInit(&pstate->cv);
-       SerializeSnapshot(estate->es_snapshot, pstate->phs_snapshot_data);
 
        shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
        node->pstate = pstate;
@@ -927,13 +918,9 @@ ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
                                                           ParallelWorkerContext *pwcxt)
 {
        ParallelBitmapHeapState *pstate;
-       Snapshot        snapshot;
 
        Assert(node->ss.ps.state->es_query_dsa != NULL);
 
        pstate = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
        node->pstate = pstate;
-
-       snapshot = RestoreSnapshot(pstate->phs_snapshot_data);
-       table_scan_update_snapshot(node->ss.ss_currentScanDesc, snapshot);
 }
index 5f8474871d209e31cb2054dc779ff20d0de49d6e..8249b37bbf16266a4918a8787005395ce63beb2e 100644 (file)
@@ -1038,11 +1038,6 @@ table_rescan_set_params(TableScanDesc scan, struct ScanKeyData *key,
                                                                                 allow_pagemode);
 }
 
-/*
- * Update snapshot used by the scan.
- */
-extern void table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot);
-
 /*
  * Return next tuple from `scan`, store in slot.
  */
index 444a5f0fd5786a0b213e7d8905b1bb786a4f7989..27614ab50fbab6894ecc1a1980f8ad60d0bc29b8 100644 (file)
@@ -1689,7 +1689,6 @@ typedef enum
  *             prefetch_target                 current target prefetch distance
  *             state                                   current state of the TIDBitmap
  *             cv                                              conditional wait variable
- *             phs_snapshot_data               snapshot data shared to workers
  * ----------------
  */
 typedef struct ParallelBitmapHeapState
@@ -1701,7 +1700,6 @@ typedef struct ParallelBitmapHeapState
        int                     prefetch_target;
        SharedBitmapState state;
        ConditionVariable cv;
-       char            phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER];
 } ParallelBitmapHeapState;
 
 /* ----------------
@@ -1721,7 +1719,6 @@ typedef struct ParallelBitmapHeapState
  *             prefetch_pages     # pages prefetch iterator is ahead of current
  *             prefetch_target    current target prefetch distance
  *             prefetch_maximum   maximum value for prefetch_target
- *             pscan_len                  size of the shared memory for parallel bitmap
  *             initialized                is node is ready to iterate
  *             shared_tbmiterator         shared iterator
  *             shared_prefetch_iterator shared iterator for prefetching
@@ -1745,7 +1742,6 @@ typedef struct BitmapHeapScanState
        int                     prefetch_pages;
        int                     prefetch_target;
        int                     prefetch_maximum;
-       Size            pscan_len;
        bool            initialized;
        TBMSharedIterator *shared_tbmiterator;
        TBMSharedIterator *shared_prefetch_iterator;