* BULKWRITE buffer selection strategy object to the buffer manager.
* Passing NULL for bistate selects the default behavior.
*
- * We always try to avoid filling existing pages further than the fillfactor.
- * This is OK since this routine is not consulted when updating a tuple and
- * keeping it on the same page, which is the scenario fillfactor is meant
- * to reserve space for.
+ * We don't fill existing pages further than the fillfactor, except for large
+ * tuples in nearly-empty pages. This is OK since this routine is not
+ * consulted when updating a tuple and keeping it on the same page, which is
+ * the scenario fillfactor is meant to reserve space for.
*
* ereport(ERROR) is allowed here, so this routine *must* be called
* before any (unlogged) changes are made in buffer pool.
bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM);
Buffer buffer = InvalidBuffer;
Page page;
- Size pageFreeSpace = 0,
- saveFreeSpace = 0;
+ Size nearlyEmptyFreeSpace,
+ pageFreeSpace = 0,
+ saveFreeSpace = 0,
+ targetFreeSpace = 0;
BlockNumber targetBlock,
otherBlock;
bool needLock;
saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
HEAP_DEFAULT_FILLFACTOR);
+ /*
+ * Since pages without tuples can still have line pointers, we consider
+ * pages "empty" when the unavailable space is slight. This threshold is
+ * somewhat arbitrary, but it should prevent most unnecessary relation
+ * extensions while inserting large tuples into low-fillfactor tables.
+ */
+ nearlyEmptyFreeSpace = MaxHeapTupleSize -
+ (MaxHeapTuplesPerPage / 8 * sizeof(ItemIdData));
+ if (len + saveFreeSpace > nearlyEmptyFreeSpace)
+ targetFreeSpace = Max(len, nearlyEmptyFreeSpace);
+ else
+ targetFreeSpace = len + saveFreeSpace;
+
if (otherBuffer != InvalidBuffer)
otherBlock = BufferGetBlockNumber(otherBuffer);
else
* When use_fsm is false, we either put the tuple onto the existing target
* page or extend the relation.
*/
- if (len + saveFreeSpace > MaxHeapTupleSize)
- {
- /* can't fit, don't bother asking FSM */
- targetBlock = InvalidBlockNumber;
- use_fsm = false;
- }
- else if (bistate && bistate->current_buf != InvalidBuffer)
+ if (bistate && bistate->current_buf != InvalidBuffer)
targetBlock = BufferGetBlockNumber(bistate->current_buf);
else
targetBlock = RelationGetTargetBlock(relation);
* We have no cached target page, so ask the FSM for an initial
* target.
*/
- targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
+ targetBlock = GetPageWithFreeSpace(relation, targetFreeSpace);
}
/*
}
pageFreeSpace = PageGetHeapFreeSpace(page);
- if (len + saveFreeSpace <= pageFreeSpace)
+ if (targetFreeSpace <= pageFreeSpace)
{
/* use this page as future insert target, too */
RelationSetTargetBlock(relation, targetBlock);
targetBlock = RecordAndGetPageWithFreeSpace(relation,
targetBlock,
pageFreeSpace,
- len + saveFreeSpace);
+ targetFreeSpace);
}
/*
* Check if some other backend has extended a block for us while
* we were waiting on the lock.
*/
- targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
+ targetBlock = GetPageWithFreeSpace(relation, targetFreeSpace);
/*
* If some other waiter has already extended the relation, we
drop table inserttest;
--
+-- tuple larger than fillfactor
+--
+CREATE TABLE large_tuple_test (a int, b text) WITH (fillfactor = 10);
+ALTER TABLE large_tuple_test ALTER COLUMN b SET STORAGE plain;
+-- create page w/ free space in range [nearlyEmptyFreeSpace, MaxHeapTupleSize)
+INSERT INTO large_tuple_test (select 1, NULL);
+-- should still fit on the page
+INSERT INTO large_tuple_test (select 2, repeat('a', 1000));
+SELECT pg_size_pretty(pg_relation_size('large_tuple_test'::regclass, 'main'));
+ pg_size_pretty
+----------------
+ 8192 bytes
+(1 row)
+
+-- add small record to the second page
+INSERT INTO large_tuple_test (select 3, NULL);
+-- now this tuple won't fit on the second page, but the insert should
+-- still succeed by extending the relation
+INSERT INTO large_tuple_test (select 4, repeat('a', 8126));
+DROP TABLE large_tuple_test;
+--
-- check indirection (field/array assignment), cf bug #14265
--
-- these tests are aware that transformInsertStmt has 3 separate code paths
drop table inserttest;
+--
+-- tuple larger than fillfactor
+--
+CREATE TABLE large_tuple_test (a int, b text) WITH (fillfactor = 10);
+ALTER TABLE large_tuple_test ALTER COLUMN b SET STORAGE plain;
+
+-- create page w/ free space in range [nearlyEmptyFreeSpace, MaxHeapTupleSize)
+INSERT INTO large_tuple_test (select 1, NULL);
+
+-- should still fit on the page
+INSERT INTO large_tuple_test (select 2, repeat('a', 1000));
+SELECT pg_size_pretty(pg_relation_size('large_tuple_test'::regclass, 'main'));
+
+-- add small record to the second page
+INSERT INTO large_tuple_test (select 3, NULL);
+
+-- now this tuple won't fit on the second page, but the insert should
+-- still succeed by extending the relation
+INSERT INTO large_tuple_test (select 4, repeat('a', 8126));
+
+DROP TABLE large_tuple_test;
+
--
-- check indirection (field/array assignment), cf bug #14265
--