hio: Take number of prior relation extensions into account
authorAndres Freund <[email protected]>
Mon, 14 Aug 2023 16:54:03 +0000 (09:54 -0700)
committerAndres Freund <[email protected]>
Mon, 14 Aug 2023 18:33:09 +0000 (11:33 -0700)
The new relation extension logic, introduced in 00d1e02be24, could lead to
slowdowns in some scenarios. E.g., when loading narrow rows into a table using
COPY, the caller of RelationGetBufferForTuple() will only request a small
number of pages. Without concurrency, we just extended using pwritev() in that
case. However, if there is *some* concurrency, we switched between extending
by a small number of pages and a larger number of pages, depending on the
number of waiters for the relation extension logic.  However, some
filesystems, XFS in particular, do not perform well when switching between
extending files using fallocate() and pwritev().

To avoid that issue, remember the number of prior relation extensions in
BulkInsertState and extend more aggressively if there were prior relation
extensions. That not just avoids the aforementioned slowdown, but also leads
to noticeable performance gains in other situations, primarily due to
extending more aggressively when there is no concurrency. I should have done
it this way from the get go.

Reported-by: Masahiko Sawada <[email protected]>
Author: Andres Freund <[email protected]>
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940=Kp6mszNGK3bq9yRN6g@mail.gmail.com
Backpatch: 16-, where the new relation extension code was added

src/backend/access/heap/heapam.c
src/backend/access/heap/hio.c
src/include/access/hio.h

index 7ed72abe597a47a031ee56c06d0743ec464aa279..6a66214a58041ef5850af0064f8af47159c088d8 100644 (file)
@@ -1776,6 +1776,7 @@ GetBulkInsertState(void)
        bistate->current_buf = InvalidBuffer;
        bistate->next_free = InvalidBlockNumber;
        bistate->last_free = InvalidBlockNumber;
+       bistate->already_extended_by = 0;
        return bistate;
 }
 
index c275b08494d02c87fa00df36d28db67d8e2525bd..21f808fecb5343f03d878a1121c66669c6bab0a6 100644 (file)
@@ -283,6 +283,24 @@ RelationAddBlocks(Relation relation, BulkInsertState bistate,
                 */
                extend_by_pages += extend_by_pages * waitcount;
 
+               /* ---
+                * If we previously extended using the same bistate, it's very likely
+                * we'll extend some more. Try to extend by as many pages as
+                * before. This can be important for performance for several reasons,
+                * including:
+                *
+                * - It prevents mdzeroextend() switching between extending the
+                *   relation in different ways, which is inefficient for some
+                *   filesystems.
+                *
+                * - Contention is often intermittent. Even if we currently don't see
+                *   other waiters (see above), extending by larger amounts can
+                *   prevent future contention.
+                * ---
+                */
+               if (bistate)
+                       extend_by_pages = Max(extend_by_pages, bistate->already_extended_by);
+
                /*
                 * Can't extend by more than MAX_BUFFERS_TO_EXTEND_BY, we need to pin
                 * them all concurrently.
@@ -409,6 +427,7 @@ RelationAddBlocks(Relation relation, BulkInsertState bistate,
                /* maintain bistate->current_buf */
                IncrBufferRefCount(buffer);
                bistate->current_buf = buffer;
+               bistate->already_extended_by += extend_by_pages;
        }
 
        return buffer;
index 228433ee4a2cd086ac006748b2f82b1dd7a3ef17..9bc563b7628fc83f64e266a0a40ae86604a31e75 100644 (file)
@@ -32,15 +32,22 @@ typedef struct BulkInsertStateData
        Buffer          current_buf;    /* current insertion target page */
 
        /*
-        * State for bulk extensions. Further pages that were unused at the time
-        * of the extension. They might be in use by the time we use them though,
-        * so rechecks are needed.
+        * State for bulk extensions.
+        *
+        * last_free..next_free are further pages that were unused at the time of
+        * the last extension. They might be in use by the time we use them
+        * though, so rechecks are needed.
         *
         * XXX: Eventually these should probably live in RelationData instead,
         * alongside targetblock.
+        *
+        * already_extended_by is the number of pages that this bulk inserted
+        * extended by. If we already extended by a significant number of pages,
+        * we can be more aggressive about extending going forward.
         */
        BlockNumber next_free;
        BlockNumber last_free;
+       uint32          already_extended_by;
 } BulkInsertStateData;