* SLAB allocator definitions.
*
* SLAB is a MemoryContext implementation designed for cases where large
- * numbers of equally-sized objects are allocated (and freed).
+ * numbers of equally-sized objects can be allocated and freed efficiently
+ * with minimal memory wastage and fragmentation.
*
*
* Portions Copyright (c) 2017-2022, PostgreSQL Global Development Group
* NOTE:
* The constant allocation size allows significant simplification and various
* optimizations over more general purpose allocators. The blocks are carved
- * into chunks of exactly the right size (plus alignment), not wasting any
- * memory.
+ * into chunks of exactly the right size, wasting only the space required to
+ * MAXALIGN the allocated chunks.
*
- * The information about free chunks is maintained both at the block level and
- * global (context) level. This is possible as the chunk size (and thus also
- * the number of chunks per block) is fixed.
+ * Slab can also help reduce memory fragmentation in cases where longer-lived
+ * chunks remain stored on blocks while most of the other chunks have already
+ * been pfree'd. We give priority to putting new allocations into the
+ * "fullest" block. This help avoid having too many sparsely used blocks
+ * around and allows blocks to more easily become completely unused which
+ * allows them to be eventually free'd.
*
- * On each block, free chunks are tracked in a simple linked list. Contents
- * of free chunks is replaced with an index of the next free chunk, forming
- * a very simple linked list. Each block also contains a counter of free
- * chunks. Combined with the local block-level freelist, it makes it trivial
- * to eventually free the whole block.
+ * We identify the "fullest" block to put new allocations on by using a block
+ * from the lowest populated element of the context's "blocklist" array.
+ * This is an array of dlists containing blocks which we partition by the
+ * number of free chunks which block has. Blocks with fewer free chunks are
+ * stored in a lower indexed dlist array slot. Full blocks go on the 0th
+ * element of the blocklist array. So that we don't have to have too many
+ * elements in the array, each dlist in the array is responsible for a range
+ * of free chunks. When a chunk is palloc'd or pfree'd we may need to move
+ * the block onto another dlist if the number of free chunks crosses the
+ * range boundary that the current list is responsible for. Having just a
+ * few blocklist elements reduces the number of times we must move the block
+ * onto another dlist element.
*
- * At the context level, we use 'freelist' to track blocks ordered by number
- * of free chunks, starting with blocks having a single allocated chunk, and
- * with completely full blocks on the tail.
+ * We keep track of free chunks within each block by using a block-level free
+ * list. We consult this list when we allocate a new chunk in the block.
+ * The free list is a linked list, the head of which is pointed to with
+ * SlabBlock's freehead field. Each subsequent list item is stored in the
+ * free chunk's memory. We ensure chunks are large enough to store this
+ * address.
*
- * This also allows various optimizations - for example when searching for
- * free chunk, the allocator reuses space from the fullest blocks first, in
- * the hope that some of the less full blocks will get completely empty (and
- * returned back to the OS).
- *
- * For each block, we maintain pointer to the first free chunk - this is quite
- * cheap and allows us to skip all the preceding used chunks, eliminating
- * a significant number of lookups in many common usage patterns. In the worst
- * case this performs as if the pointer was not maintained.
- *
- * We cache the freelist index for the blocks with the fewest free chunks
- * (minFreeChunks), so that we don't have to search the freelist on every
- * SlabAlloc() call, which is quite expensive.
+ * When we allocate a new block, technically all chunks are free, however, to
+ * avoid having to write out the entire block to set the linked list for the
+ * free chunks for every chunk in the block, we instead store a pointer to
+ * the next "unused" chunk on the block and keep track of how many of these
+ * unused chunks there are. When a new block is malloc'd, all chunks are
+ * unused. The unused pointer starts with the first chunk on the block and
+ * as chunks are allocated, the unused pointer is incremented. As chunks are
+ * pfree'd, the unused pointer never goes backwards. The unused pointer can
+ * be thought of as a high watermark for the maximum number of chunks in the
+ * block which have been in use concurrently. When a chunk is pfree'd the
+ * chunk is put onto the head of the free list and the unused pointer is not
+ * changed. We only consume more unused chunks if we run out of free chunks
+ * on the free list. This method effectively gives priority to using
+ * previously used chunks over previously unused chunks, which should perform
+ * better due to CPU caching effects.
*
*-------------------------------------------------------------------------
*/
#define Slab_BLOCKHDRSZ MAXALIGN(sizeof(SlabBlock))
+#ifdef MEMORY_CONTEXT_CHECKING
+/*
+ * Size of the memory required to store the SlabContext.
+ * MEMORY_CONTEXT_CHECKING builds need some extra memory for the isChunkFree
+ * array.
+ */
+#define Slab_CONTEXT_HDRSZ(chunksPerBlock) \
+ (sizeof(SlabContext) + ((chunksPerBlock) * sizeof(bool)))
+#else
+#define Slab_CONTEXT_HDRSZ(chunksPerBlock) sizeof(SlabContext)
+#endif
+
+/*
+ * The number of partitions to divide the blocklist into based their number of
+ * free chunks. There must be at least 2.
+ */
+#define SLAB_BLOCKLIST_COUNT 3
+
+/* The maximum number of completely empty blocks to keep around for reuse. */
+#define SLAB_MAXIMUM_EMPTY_BLOCKS 10
+
/*
* SlabContext is a specialized implementation of MemoryContext.
*/
{
MemoryContextData header; /* Standard memory-context fields */
/* Allocation parameters for this context: */
- Size chunkSize; /* chunk size */
- Size fullChunkSize; /* chunk size including header and alignment */
- Size blockSize; /* block size */
- Size headerSize; /* allocated size of context header */
- int chunksPerBlock; /* number of chunks per block */
- int minFreeChunks; /* min number of free chunks in any block */
- int nblocks; /* number of blocks allocated */
+ Size chunkSize; /* the requested (non-aligned) chunk size */
+ Size fullChunkSize; /* chunk size with chunk header and alignment */
+ Size blockSize; /* the size to make each block of chunks */
+ int32 chunksPerBlock; /* number of chunks that fit in 1 block */
+ int32 curBlocklistIndex; /* index into the blocklist[] element
+ * containing the fullest, blocks */
#ifdef MEMORY_CONTEXT_CHECKING
- bool *freechunks; /* bitmap of free chunks in a block */
+ bool *isChunkFree; /* array to mark free chunks in a block during
+ * SlabCheck */
#endif
- /* blocks with free space, grouped by number of free chunks: */
- dlist_head freelist[FLEXIBLE_ARRAY_MEMBER];
+
+ int32 blocklist_shift; /* number of bits to shift the nfree count
+ * by to get the index into blocklist[] */
+ dclist_head emptyblocks; /* empty blocks to use up first instead of
+ * mallocing new blocks */
+
+ /*
+ * Blocks with free space, grouped by the number of free chunks they
+ * contain. Completely full blocks are stored in the 0th element.
+ * Completely empty blocks are stored in emptyblocks or free'd if we have
+ * enough empty blocks already.
+ */
+ dlist_head blocklist[SLAB_BLOCKLIST_COUNT];
} SlabContext;
/*
* SlabBlock
- * Structure of a single block in SLAB allocator.
+ * Structure of a single slab block.
*
- * node: doubly-linked list of blocks in global freelist
- * nfree: number of free chunks in this block
- * firstFreeChunk: index of the first free chunk
+ * slab: pointer back to the owning MemoryContext
+ * nfree: number of chunks on the block which are unallocated
+ * nunused: number of chunks on the block unallocated and not on the block's
+ * freelist.
+ * freehead: linked-list header storing a pointer to the first free chunk on
+ * the block. Subsequent pointers are stored in the chunk's memory. NULL
+ * indicates the end of the list.
+ * unused: pointer to the next chunk which has yet to be used.
+ * node: doubly-linked list node for the context's blocklist
*/
typedef struct SlabBlock
{
- dlist_node node; /* doubly-linked list */
- int nfree; /* number of free chunks */
- int firstFreeChunk; /* index of the first free chunk in the block */
SlabContext *slab; /* owning context */
+ int32 nfree; /* number of chunks on free + unused chunks */
+ int32 nunused; /* number of unused chunks */
+ MemoryChunk *freehead; /* pointer to the first free chunk */
+ MemoryChunk *unused; /* pointer to the next unused chunk */
+ dlist_node node; /* doubly-linked list for blocklist[] */
} SlabBlock;
#define Slab_CHUNKHDRSZ sizeof(MemoryChunk)
-#define SlabPointerGetChunk(ptr) \
- ((MemoryChunk *)(((char *)(ptr)) - sizeof(MemoryChunk)))
#define SlabChunkGetPointer(chk) \
- ((void *)(((char *)(chk)) + sizeof(MemoryChunk)))
-#define SlabBlockGetChunk(slab, block, idx) \
+ ((void *) (((char *) (chk)) + sizeof(MemoryChunk)))
+
+/*
+ * SlabBlockGetChunk
+ * Obtain a pointer to the nth (0-based) chunk in the block
+ */
+#define SlabBlockGetChunk(slab, block, n) \
((MemoryChunk *) ((char *) (block) + Slab_BLOCKHDRSZ \
- + (idx * slab->fullChunkSize)))
-#define SlabBlockStart(block) \
- ((char *) block + Slab_BLOCKHDRSZ)
+ + ((n) * (slab)->fullChunkSize)))
+
+#if defined(MEMORY_CONTEXT_CHECKING) || defined(USE_ASSERT_CHECKING)
+
+/*
+ * SlabChunkIndex
+ * Get the 0-based index of how many chunks into the block the given
+ * chunk is.
+*/
#define SlabChunkIndex(slab, block, chunk) \
- (((char *) chunk - SlabBlockStart(block)) / slab->fullChunkSize)
+ (((char *) (chunk) - (char *) SlabBlockGetChunk(slab, block, 0)) / \
+ (slab)->fullChunkSize)
+
+/*
+ * SlabChunkMod
+ * A MemoryChunk should always be at an address which is a multiple of
+ * fullChunkSize starting from the 0th chunk position. This will return
+ * non-zero if it's not.
+ */
+#define SlabChunkMod(slab, block, chunk) \
+ (((char *) (chunk) - (char *) SlabBlockGetChunk(slab, block, 0)) % \
+ (slab)->fullChunkSize)
+
+#endif
/*
* SlabIsValid
- * True iff set is valid slab allocation set.
+ * True iff set is a valid slab allocation set.
*/
-#define SlabIsValid(set) \
- (PointerIsValid(set) && IsA(set, SlabContext))
+#define SlabIsValid(set) (PointerIsValid(set) && IsA(set, SlabContext))
/*
* SlabBlockIsValid
- * True iff block is valid block of slab allocation set.
+ * True iff block is a valid block of slab allocation set.
*/
#define SlabBlockIsValid(block) \
(PointerIsValid(block) && SlabIsValid((block)->slab))
+/*
+ * SlabBlocklistIndex
+ * Determine the blocklist index that a block should be in for the given
+ * number of free chunks.
+ */
+static inline int32
+SlabBlocklistIndex(SlabContext *slab, int nfree)
+{
+ int32 index;
+ int32 blocklist_shift = slab->blocklist_shift;
+
+ Assert(nfree >= 0 && nfree <= slab->chunksPerBlock);
+
+ /*
+ * Determine the blocklist index based on the number of free chunks. We
+ * must ensure that 0 free chunks is dedicated to index 0. Everything
+ * else must be >= 1 and < SLAB_BLOCKLIST_COUNT.
+ *
+ * To make this as efficient as possible, we exploit some two's complement
+ * arithmetic where we reverse the sign before bit shifting. This results
+ * in an nfree of 0 using index 0 and anything non-zero staying non-zero.
+ * This is exploiting 0 and -0 being the same in two's complement. When
+ * we're done, we just need to flip the sign back over again for a
+ * positive index.
+ */
+ index = -((-nfree) >> blocklist_shift);
+
+ if (nfree == 0)
+ Assert(index == 0);
+ else
+ Assert(index >= 1 && index < SLAB_BLOCKLIST_COUNT);
+
+ return index;
+}
+
+/*
+ * SlabFindNextBlockListIndex
+ * Search blocklist for blocks which have free chunks and return the
+ * index of the blocklist found containing at least 1 block with free
+ * chunks. If no block can be found we return 0.
+ *
+ * Note: We give priority to fuller blocks so that these are filled before
+ * emptier blocks. This is done to increase the chances that mostly-empty
+ * blocks will eventually become completely empty so they can be free'd.
+ */
+static int32
+SlabFindNextBlockListIndex(SlabContext *slab)
+{
+ /* start at 1 as blocklist[0] is for full blocks. */
+ for (int i = 1; i < SLAB_BLOCKLIST_COUNT; i++)
+ {
+ /* return the first found non-empty index */
+ if (!dlist_is_empty(&slab->blocklist[i]))
+ return i;
+ }
+
+ /* no blocks with free space */
+ return 0;
+}
+
+/*
+ * SlabGetNextFreeChunk
+ * Return the next free chunk in block and update the block to account
+ * for the returned chunk now being used.
+ */
+static inline MemoryChunk *
+SlabGetNextFreeChunk(SlabContext *slab, SlabBlock *block)
+{
+ MemoryChunk *chunk;
+
+ Assert(block->nfree > 0);
+
+ if (block->freehead != NULL)
+ {
+ chunk = block->freehead;
+
+ /*
+ * Pop the chunk from the linked list of free chunks. The pointer to
+ * the next free chunk is stored in the chunk itself.
+ */
+ VALGRIND_MAKE_MEM_DEFINED(SlabChunkGetPointer(chunk), sizeof(MemoryChunk *));
+ block->freehead = *(MemoryChunk **) SlabChunkGetPointer(chunk);
+
+ /* check nothing stomped on the free chunk's memory */
+ Assert(block->freehead == NULL ||
+ (block->freehead >= SlabBlockGetChunk(slab, block, 0) &&
+ block->freehead <= SlabBlockGetChunk(slab, block, slab->chunksPerBlock - 1) &&
+ SlabChunkMod(slab, block, block->freehead) == 0));
+ }
+ else
+ {
+ Assert(block->nunused > 0);
+
+ chunk = block->unused;
+ block->unused = (MemoryChunk *) (((char *) block->unused) + slab->fullChunkSize);
+ block->nunused--;
+ }
+
+ block->nfree--;
+
+ return chunk;
+}
/*
* SlabContextCreate
{
int chunksPerBlock;
Size fullChunkSize;
- Size freelistSize;
- Size headerSize;
SlabContext *slab;
int i;
"sizeof(MemoryChunk) is not maxaligned");
Assert(MAXALIGN(chunkSize) <= MEMORYCHUNK_MAX_VALUE);
- /* Make sure the linked list node fits inside a freed chunk */
- if (chunkSize < sizeof(int))
- chunkSize = sizeof(int);
+ /*
+ * Ensure there's enough space to store the pointer to the next free chunk
+ * in the memory of the (otherwise) unused allocation.
+ */
+ if (chunkSize < sizeof(MemoryChunk *))
+ chunkSize = sizeof(MemoryChunk *);
- /* chunk, including SLAB header (both addresses nicely aligned) */
+ /* length of the maxaligned chunk including the chunk header */
#ifdef MEMORY_CONTEXT_CHECKING
/* ensure there's always space for the sentinel byte */
fullChunkSize = Slab_CHUNKHDRSZ + MAXALIGN(chunkSize + 1);
fullChunkSize = Slab_CHUNKHDRSZ + MAXALIGN(chunkSize);
#endif
- /* Make sure the block can store at least one chunk. */
- if (blockSize < fullChunkSize + Slab_BLOCKHDRSZ)
- elog(ERROR, "block size %zu for slab is too small for %zu chunks",
- blockSize, chunkSize);
-
- /* Compute maximum number of chunks per block */
+ /* compute the number of chunks that will fit on each block */
chunksPerBlock = (blockSize - Slab_BLOCKHDRSZ) / fullChunkSize;
- /* The freelist starts with 0, ends with chunksPerBlock. */
- freelistSize = sizeof(dlist_head) * (chunksPerBlock + 1);
-
- /*
- * Allocate the context header. Unlike aset.c, we never try to combine
- * this with the first regular block; not worth the extra complication.
- */
+ /* Make sure the block can store at least one chunk. */
+ if (chunksPerBlock == 0)
+ elog(ERROR, "block size %zu for slab is too small for %zu-byte chunks",
+ blockSize, chunkSize);
- /* Size of the memory context header */
- headerSize = offsetof(SlabContext, freelist) + freelistSize;
-#ifdef MEMORY_CONTEXT_CHECKING
-
- /*
- * With memory checking, we need to allocate extra space for the bitmap of
- * free chunks. The bitmap is an array of bools, so we don't need to worry
- * about alignment.
- */
- headerSize += chunksPerBlock * sizeof(bool);
-#endif
- slab = (SlabContext *) malloc(headerSize);
+ slab = (SlabContext *) malloc(Slab_CONTEXT_HDRSZ(chunksPerBlock));
if (slab == NULL)
{
MemoryContextStats(TopMemoryContext);
slab->chunkSize = chunkSize;
slab->fullChunkSize = fullChunkSize;
slab->blockSize = blockSize;
- slab->headerSize = headerSize;
slab->chunksPerBlock = chunksPerBlock;
- slab->minFreeChunks = 0;
- slab->nblocks = 0;
+ slab->curBlocklistIndex = 0;
- /* initialize the freelist slots */
- for (i = 0; i < (slab->chunksPerBlock + 1); i++)
- dlist_init(&slab->freelist[i]);
+ /*
+ * Compute a shift that guarantees that shifting chunksPerBlock with it is
+ * < SLAB_BLOCKLIST_COUNT - 1. The reason that we subtract 1 from
+ * SLAB_BLOCKLIST_COUNT in this calculation is that we reserve the 0th
+ * blocklist element for blocks which have no free chunks.
+ *
+ * We calculate the number of bits to shift by rather than a divisor to
+ * divide by as performing division each time we need to find the
+ * blocklist index would be much slower.
+ */
+ slab->blocklist_shift = 0;
+ while ((slab->chunksPerBlock >> slab->blocklist_shift) >= (SLAB_BLOCKLIST_COUNT - 1))
+ slab->blocklist_shift++;
+
+ /* initialize the list to store empty blocks to be reused */
+ dclist_init(&slab->emptyblocks);
+
+ /* initialize each blocklist slot */
+ for (i = 0; i < SLAB_BLOCKLIST_COUNT; i++)
+ dlist_init(&slab->blocklist[i]);
#ifdef MEMORY_CONTEXT_CHECKING
- /* set the freechunks pointer right after the freelists array */
- slab->freechunks
- = (bool *) slab + offsetof(SlabContext, freelist) + freelistSize;
+ /* set the isChunkFree pointer right after the end of the context */
+ slab->isChunkFree = (bool *) ((char *) slab + sizeof(SlabContext));
#endif
/* Finally, do the type-independent part of context creation */
SlabReset(MemoryContext context)
{
SlabContext *slab = (SlabContext *) context;
+ dlist_mutable_iter miter;
int i;
Assert(SlabIsValid(slab));
SlabCheck(context);
#endif
- /* walk over freelists and free the blocks */
- for (i = 0; i <= slab->chunksPerBlock; i++)
+ /* release any retained empty blocks */
+ dclist_foreach_modify(miter, &slab->emptyblocks)
{
- dlist_mutable_iter miter;
+ SlabBlock *block = dlist_container(SlabBlock, node, miter.cur);
+
+ dclist_delete_from(&slab->emptyblocks, miter.cur);
- dlist_foreach_modify(miter, &slab->freelist[i])
+#ifdef CLOBBER_FREED_MEMORY
+ wipe_mem(block, slab->blockSize);
+#endif
+ free(block);
+ context->mem_allocated -= slab->blockSize;
+ }
+
+ /* walk over blocklist and free the blocks */
+ for (i = 0; i < SLAB_BLOCKLIST_COUNT; i++)
+ {
+ dlist_foreach_modify(miter, &slab->blocklist[i])
{
SlabBlock *block = dlist_container(SlabBlock, node, miter.cur);
wipe_mem(block, slab->blockSize);
#endif
free(block);
- slab->nblocks--;
context->mem_allocated -= slab->blockSize;
}
}
- slab->minFreeChunks = 0;
+ slab->curBlocklistIndex = 0;
- Assert(slab->nblocks == 0);
Assert(context->mem_allocated == 0);
}
/*
* SlabAlloc
- * Returns pointer to allocated memory of given size or NULL if
+ * Returns a pointer to allocated memory of given size or NULL if
* request could not be completed; memory is added to the slab.
*/
void *
SlabContext *slab = (SlabContext *) context;
SlabBlock *block;
MemoryChunk *chunk;
- int idx;
Assert(SlabIsValid(slab));
- Assert((slab->minFreeChunks >= 0) &&
- (slab->minFreeChunks < slab->chunksPerBlock));
+ /* sanity check that this is pointing to a valid blocklist */
+ Assert(slab->curBlocklistIndex >= 0);
+ Assert(slab->curBlocklistIndex <= SlabBlocklistIndex(slab, slab->chunksPerBlock));
/* make sure we only allow correct request size */
- if (size != slab->chunkSize)
+ if (unlikely(size != slab->chunkSize))
elog(ERROR, "unexpected alloc chunk size %zu (expected %zu)",
size, slab->chunkSize);
/*
- * If there are no free chunks in any existing block, create a new block
- * and put it to the last freelist bucket.
- *
- * slab->minFreeChunks == 0 means there are no blocks with free chunks,
- * thanks to how minFreeChunks is updated at the end of SlabAlloc().
+ * Handle the case when there are no partially filled blocks available.
+ * SlabFree() will have updated the curBlocklistIndex setting it to zero
+ * to indicate that it has freed the final block. Also later in
+ * SlabAlloc() we will set the curBlocklistIndex to zero if we end up
+ * filling the final block.
*/
- if (slab->minFreeChunks == 0)
+ if (unlikely(slab->curBlocklistIndex == 0))
{
- block = (SlabBlock *) malloc(slab->blockSize);
+ dlist_head *blocklist;
+ int blocklist_idx;
+
+ /* to save allocating a new one, first check the empty blocks list */
+ if (dclist_count(&slab->emptyblocks) > 0)
+ {
+ dlist_node *node = dclist_pop_head_node(&slab->emptyblocks);
- if (block == NULL)
- return NULL;
+ block = dlist_container(SlabBlock, node, node);
- block->nfree = slab->chunksPerBlock;
- block->firstFreeChunk = 0;
- block->slab = slab;
+ /*
+ * SlabFree() should have left this block in a valid state with
+ * all chunks free. Ensure that's the case.
+ */
+ Assert(block->nfree == slab->chunksPerBlock);
- /*
- * Put all the chunks on a freelist. Walk the chunks and point each
- * one to the next one.
- */
- for (idx = 0; idx < slab->chunksPerBlock; idx++)
- {
- chunk = SlabBlockGetChunk(slab, block, idx);
- *(int32 *) MemoryChunkGetPointer(chunk) = (idx + 1);
+ /* fetch the next chunk from this block */
+ chunk = SlabGetNextFreeChunk(slab, block);
}
+ else
+ {
+ block = (SlabBlock *) malloc(slab->blockSize);
- /*
- * And add it to the last freelist with all chunks empty.
- *
- * We know there are no blocks in the freelist, otherwise we wouldn't
- * need a new block.
- */
- Assert(dlist_is_empty(&slab->freelist[slab->chunksPerBlock]));
+ if (unlikely(block == NULL))
+ return NULL;
- dlist_push_head(&slab->freelist[slab->chunksPerBlock], &block->node);
+ block->slab = slab;
+ context->mem_allocated += slab->blockSize;
- slab->minFreeChunks = slab->chunksPerBlock;
- slab->nblocks += 1;
- context->mem_allocated += slab->blockSize;
- }
+ /* use the first chunk in the new block */
+ chunk = SlabBlockGetChunk(slab, block, 0);
- /* grab the block from the freelist (even the new block is there) */
- block = dlist_head_element(SlabBlock, node,
- &slab->freelist[slab->minFreeChunks]);
+ block->nfree = slab->chunksPerBlock - 1;
+ block->unused = SlabBlockGetChunk(slab, block, 1);
+ block->freehead = NULL;
+ block->nunused = slab->chunksPerBlock - 1;
+ }
- /* make sure we actually got a valid block, with matching nfree */
- Assert(block != NULL);
- Assert(slab->minFreeChunks == block->nfree);
- Assert(block->nfree > 0);
+ /* find the blocklist element for storing blocks with 1 used chunk */
+ blocklist_idx = SlabBlocklistIndex(slab, block->nfree);
+ blocklist = &slab->blocklist[blocklist_idx];
- /* we know index of the first free chunk in the block */
- idx = block->firstFreeChunk;
+ /* this better be empty. We just added a block thinking it was */
+ Assert(dlist_is_empty(blocklist));
- /* make sure the chunk index is valid, and that it's marked as empty */
- Assert((idx >= 0) && (idx < slab->chunksPerBlock));
+ dlist_push_head(blocklist, &block->node);
- /* compute the chunk location block start (after the block header) */
- chunk = SlabBlockGetChunk(slab, block, idx);
+ slab->curBlocklistIndex = blocklist_idx;
+ }
+ else
+ {
+ dlist_head *blocklist = &slab->blocklist[slab->curBlocklistIndex];
+ int new_blocklist_idx;
- /*
- * Update the block nfree count, and also the minFreeChunks as we've
- * decreased nfree for a block with the minimum number of free chunks
- * (because that's how we chose the block).
- */
- block->nfree--;
- slab->minFreeChunks = block->nfree;
+ Assert(!dlist_is_empty(blocklist));
- /*
- * Remove the chunk from the freelist head. The index of the next free
- * chunk is stored in the chunk itself.
- */
- VALGRIND_MAKE_MEM_DEFINED(MemoryChunkGetPointer(chunk), sizeof(int32));
- block->firstFreeChunk = *(int32 *) MemoryChunkGetPointer(chunk);
+ /* grab the block from the blocklist */
+ block = dlist_head_element(SlabBlock, node, blocklist);
- Assert(block->firstFreeChunk >= 0);
- Assert(block->firstFreeChunk <= slab->chunksPerBlock);
+ /* make sure we actually got a valid block, with matching nfree */
+ Assert(block != NULL);
+ Assert(slab->curBlocklistIndex == SlabBlocklistIndex(slab, block->nfree));
+ Assert(block->nfree > 0);
- Assert((block->nfree != 0 &&
- block->firstFreeChunk < slab->chunksPerBlock) ||
- (block->nfree == 0 &&
- block->firstFreeChunk == slab->chunksPerBlock));
+ /* fetch the next chunk from this block */
+ chunk = SlabGetNextFreeChunk(slab, block);
- /* move the whole block to the right place in the freelist */
- dlist_delete(&block->node);
- dlist_push_head(&slab->freelist[block->nfree], &block->node);
+ /* get the new blocklist index based on the new free chunk count */
+ new_blocklist_idx = SlabBlocklistIndex(slab, block->nfree);
- /*
- * And finally update minFreeChunks, i.e. the index to the block with the
- * lowest number of free chunks. We only need to do that when the block
- * got full (otherwise we know the current block is the right one). We'll
- * simply walk the freelist until we find a non-empty entry.
- */
- if (slab->minFreeChunks == 0)
- {
- for (idx = 1; idx <= slab->chunksPerBlock; idx++)
+ /*
+ * Handle the case where the blocklist index changes. This also deals
+ * with blocks becoming full as only full blocks go at index 0.
+ */
+ if (unlikely(slab->curBlocklistIndex != new_blocklist_idx))
{
- if (dlist_is_empty(&slab->freelist[idx]))
- continue;
+ dlist_delete_from(blocklist, &block->node);
+ dlist_push_head(&slab->blocklist[new_blocklist_idx], &block->node);
- /* found a non-empty freelist */
- slab->minFreeChunks = idx;
- break;
+ if (dlist_is_empty(blocklist))
+ slab->curBlocklistIndex = SlabFindNextBlockListIndex(slab);
}
}
- if (slab->minFreeChunks == slab->chunksPerBlock)
- slab->minFreeChunks = 0;
+ /*
+ * Check that the chunk pointer is actually somewhere on the block and is
+ * aligned as expected.
+ */
+ Assert(chunk >= SlabBlockGetChunk(slab, block, 0));
+ Assert(chunk <= SlabBlockGetChunk(slab, block, slab->chunksPerBlock - 1));
+ Assert(SlabChunkMod(slab, block, chunk) == 0);
/* Prepare to initialize the chunk header. */
VALGRIND_MAKE_MEM_UNDEFINED(chunk, Slab_CHUNKHDRSZ);
randomize_mem((char *) MemoryChunkGetPointer(chunk), size);
#endif
- Assert(slab->nblocks * slab->blockSize == context->mem_allocated);
-
return MemoryChunkGetPointer(chunk);
}
MemoryChunk *chunk = PointerGetMemoryChunk(pointer);
SlabBlock *block = MemoryChunkGetBlock(chunk);
SlabContext *slab;
- int idx;
+ int curBlocklistIdx;
+ int newBlocklistIdx;
/*
* For speed reasons we just Assert that the referenced block is good.
slab->header.name, chunk);
#endif
- /* compute index of the chunk with respect to block start */
- idx = SlabChunkIndex(slab, block, chunk);
+ /* push this chunk onto the head of the block's free list */
+ *(MemoryChunk **) pointer = block->freehead;
+ block->freehead = chunk;
- /* add chunk to freelist, and update block nfree count */
- *(int32 *) pointer = block->firstFreeChunk;
- block->firstFreeChunk = idx;
block->nfree++;
Assert(block->nfree > 0);
Assert(block->nfree <= slab->chunksPerBlock);
#ifdef CLOBBER_FREED_MEMORY
- /* XXX don't wipe the int32 index, used for block-level freelist */
- wipe_mem((char *) pointer + sizeof(int32),
- slab->chunkSize - sizeof(int32));
+ /* don't wipe the free list MemoryChunk pointer stored in the chunk */
+ wipe_mem((char *) pointer + sizeof(MemoryChunk *),
+ slab->chunkSize - sizeof(MemoryChunk *));
#endif
- /* remove the block from a freelist */
- dlist_delete(&block->node);
+ curBlocklistIdx = SlabBlocklistIndex(slab, block->nfree - 1);
+ newBlocklistIdx = SlabBlocklistIndex(slab, block->nfree);
/*
- * See if we need to update the minFreeChunks field for the slab - we only
- * need to do that if there the block had that number of free chunks
- * before we freed one. In that case, we check if there still are blocks
- * in the original freelist and we either keep the current value (if there
- * still are blocks) or increment it by one (the new block is still the
- * one with minimum free chunks).
- *
- * The one exception is when the block will get completely free - in that
- * case we will free it, se we can't use it for minFreeChunks. It however
- * means there are no more blocks with free chunks.
+ * Check if the block needs to be moved to another element on the
+ * blocklist based on it now having 1 more free chunk.
*/
- if (slab->minFreeChunks == (block->nfree - 1))
+ if (unlikely(curBlocklistIdx != newBlocklistIdx))
{
- /* Have we removed the last chunk from the freelist? */
- if (dlist_is_empty(&slab->freelist[slab->minFreeChunks]))
+ /* do the move */
+ dlist_delete_from(&slab->blocklist[curBlocklistIdx], &block->node);
+ dlist_push_head(&slab->blocklist[newBlocklistIdx], &block->node);
+
+ /*
+ * It's possible that we've no blocks in the blocklist at the
+ * curBlocklistIndex position. When this happens we must find the
+ * next blocklist index which contains blocks. We can be certain
+ * we'll find a block as at least one must exist for the chunk we're
+ * currently freeing.
+ */
+ if (slab->curBlocklistIndex == curBlocklistIdx &&
+ dlist_is_empty(&slab->blocklist[curBlocklistIdx]))
{
- /* but if we made the block entirely free, we'll free it */
- if (block->nfree == slab->chunksPerBlock)
- slab->minFreeChunks = 0;
- else
- slab->minFreeChunks++;
+ slab->curBlocklistIndex = SlabFindNextBlockListIndex(slab);
+ Assert(slab->curBlocklistIndex > 0);
}
}
- /* If the block is now completely empty, free it. */
- if (block->nfree == slab->chunksPerBlock)
+ /* Handle when a block becomes completely empty */
+ if (unlikely(block->nfree == slab->chunksPerBlock))
{
- free(block);
- slab->nblocks--;
- slab->header.mem_allocated -= slab->blockSize;
- }
- else
- dlist_push_head(&slab->freelist[block->nfree], &block->node);
+ /* remove the block */
+ dlist_delete_from(&slab->blocklist[newBlocklistIdx], &block->node);
+
+ /*
+ * To avoid thrashing malloc/free, we keep a list of empty blocks that
+ * we can reuse again instead of having to malloc a new one.
+ */
+ if (dclist_count(&slab->emptyblocks) < SLAB_MAXIMUM_EMPTY_BLOCKS)
+ dclist_push_head(&slab->emptyblocks, &block->node);
+ else
+ {
+ /*
+ * When we have enough empty blocks stored already, we actually
+ * free the block.
+ */
+#ifdef CLOBBER_FREED_MEMORY
+ wipe_mem(block, slab->blockSize);
+#endif
+ free(block);
+ slab->header.mem_allocated -= slab->blockSize;
+ }
- Assert(slab->nblocks >= 0);
- Assert(slab->nblocks * slab->blockSize == slab->header.mem_allocated);
+ /*
+ * Check if we need to reset the blocklist index. This is required
+ * when the blocklist this block is on has become completely empty.
+ */
+ if (slab->curBlocklistIndex == newBlocklistIdx &&
+ dlist_is_empty(&slab->blocklist[newBlocklistIdx]))
+ slab->curBlocklistIndex = SlabFindNextBlockListIndex(slab);
+ }
}
/*
/*
* SlabIsEmpty
- * Is an Slab empty of any allocated space?
+ * Is the slab empty of any allocated space?
*/
bool
SlabIsEmpty(MemoryContext context)
{
- SlabContext *slab = (SlabContext *) context;
-
- Assert(SlabIsValid(slab));
+ Assert(SlabIsValid((SlabContext *) context));
- return (slab->nblocks == 0);
+ return (context->mem_allocated == 0);
}
/*
Assert(SlabIsValid(slab));
/* Include context header in totalspace */
- totalspace = slab->headerSize;
+ totalspace = Slab_CONTEXT_HDRSZ(slab->chunksPerBlock);
- for (i = 0; i <= slab->chunksPerBlock; i++)
+ /* Add the space consumed by blocks in the emptyblocks list */
+ totalspace += dclist_count(&slab->emptyblocks) * slab->blockSize;
+
+ for (i = 0; i < SLAB_BLOCKLIST_COUNT; i++)
{
dlist_iter iter;
- dlist_foreach(iter, &slab->freelist[i])
+ dlist_foreach(iter, &slab->blocklist[i])
{
SlabBlock *block = dlist_container(SlabBlock, node, iter.cur);
{
char stats_string[200];
+ /* XXX should we include free chunks on empty blocks? */
snprintf(stats_string, sizeof(stats_string),
- "%zu total in %zu blocks; %zu free (%zu chunks); %zu used",
- totalspace, nblocks, freespace, freechunks,
- totalspace - freespace);
+ "%zu total in %zu blocks; %u empty blocks; %zu free (%zu chunks); %zu used",
+ totalspace, nblocks, dclist_count(&slab->emptyblocks),
+ freespace, freechunks, totalspace - freespace);
printfunc(context, passthru, stats_string, print_to_stderr);
}
/*
* SlabCheck
- * Walk through chunks and check consistency of memory.
+ * Walk through all blocks looking for inconsistencies.
*
* NOTE: report errors as WARNING, *not* ERROR or FATAL. Otherwise you'll
* find yourself in an infinite loop when trouble occurs, because this
{
SlabContext *slab = (SlabContext *) context;
int i;
+ int nblocks = 0;
const char *name = slab->header.name;
+ dlist_iter iter;
Assert(SlabIsValid(slab));
Assert(slab->chunksPerBlock > 0);
- /* walk all the freelists */
- for (i = 0; i <= slab->chunksPerBlock; i++)
+ /*
+ * Have a look at the empty blocks. These should have all their chunks
+ * marked as free. Ensure that's the case.
+ */
+ dclist_foreach(iter, &slab->emptyblocks)
+ {
+ SlabBlock *block = dlist_container(SlabBlock, node, iter.cur);
+
+ if (block->nfree != slab->chunksPerBlock)
+ elog(WARNING, "problem in slab %s: empty block %p should have %d free chunks but has %d chunks free",
+ name, block, slab->chunksPerBlock, block->nfree);
+ }
+
+ /* walk the non-empty block lists */
+ for (i = 0; i < SLAB_BLOCKLIST_COUNT; i++)
{
int j,
nfree;
- dlist_iter iter;
- /* walk all blocks on this freelist */
- dlist_foreach(iter, &slab->freelist[i])
+ /* walk all blocks on this blocklist */
+ dlist_foreach(iter, &slab->blocklist[i])
{
- int idx;
SlabBlock *block = dlist_container(SlabBlock, node, iter.cur);
+ MemoryChunk *cur_chunk;
/*
* Make sure the number of free chunks (in the block header)
- * matches position in the freelist.
+ * matches the position in the blocklist.
*/
- if (block->nfree != i)
- elog(WARNING, "problem in slab %s: number of free chunks %d in block %p does not match freelist %d",
- name, block->nfree, block, i);
+ if (SlabBlocklistIndex(slab, block->nfree) != i)
+ elog(WARNING, "problem in slab %s: block %p is on blocklist %d but should be on blocklist %d",
+ name, block, i, SlabBlocklistIndex(slab, block->nfree));
+
+ /* make sure the block is not empty */
+ if (block->nfree >= slab->chunksPerBlock)
+ elog(WARNING, "problem in slab %s: empty block %p incorrectly stored on blocklist element %d",
+ name, block, i);
/* make sure the slab pointer correctly points to this context */
if (block->slab != slab)
elog(WARNING, "problem in slab %s: bogus slab link in block %p",
name, block);
- /* reset the bitmap of free chunks for this block */
- memset(slab->freechunks, 0, (slab->chunksPerBlock * sizeof(bool)));
- idx = block->firstFreeChunk;
+ /* reset the array of free chunks for this block */
+ memset(slab->isChunkFree, 0, (slab->chunksPerBlock * sizeof(bool)));
+ nfree = 0;
+
+ /* walk through the block's free list chunks */
+ cur_chunk = block->freehead;
+ while (cur_chunk != NULL)
+ {
+ int chunkidx = SlabChunkIndex(slab, block, cur_chunk);
+
+ /*
+ * Ensure the free list link points to something on the block
+ * at an address aligned according to the full chunk size.
+ */
+ if (cur_chunk < SlabBlockGetChunk(slab, block, 0) ||
+ cur_chunk > SlabBlockGetChunk(slab, block, slab->chunksPerBlock - 1) ||
+ SlabChunkMod(slab, block, cur_chunk) != 0)
+ elog(WARNING, "problem in slab %s: bogus free list link %p in block %p",
+ name, cur_chunk, block);
+
+ /* count the chunk and mark it free on the free chunk array */
+ nfree++;
+ slab->isChunkFree[chunkidx] = true;
+
+ /* read pointer of the next free chunk */
+ VALGRIND_MAKE_MEM_DEFINED(MemoryChunkGetPointer(cur_chunk), sizeof(MemoryChunk *));
+ cur_chunk = *(MemoryChunk **) SlabChunkGetPointer(cur_chunk);
+ }
+
+ /* check that the unused pointer matches what nunused claims */
+ if (SlabBlockGetChunk(slab, block, slab->chunksPerBlock - block->nunused) !=
+ block->unused)
+ elog(WARNING, "problem in slab %s: mismatch detected between nunused chunks and unused pointer in block %p",
+ name, block);
/*
- * Now walk through the chunks, count the free ones and also
- * perform some additional checks for the used ones. As the chunk
- * freelist is stored within the chunks themselves, we have to
- * walk through the chunks and construct our own bitmap.
+ * count the remaining free chunks that have yet to make it onto
+ * the block's free list.
*/
-
- nfree = 0;
- while (idx < slab->chunksPerBlock)
+ cur_chunk = block->unused;
+ for (j = 0; j < block->nunused; j++)
{
- MemoryChunk *chunk;
+ int chunkidx = SlabChunkIndex(slab, block, cur_chunk);
+
- /* count the chunk as free, add it to the bitmap */
+ /* count the chunk as free and mark it as so in the array */
nfree++;
- slab->freechunks[idx] = true;
+ if (chunkidx < slab->chunksPerBlock)
+ slab->isChunkFree[chunkidx] = true;
- /* read index of the next free chunk */
- chunk = SlabBlockGetChunk(slab, block, idx);
- VALGRIND_MAKE_MEM_DEFINED(MemoryChunkGetPointer(chunk), sizeof(int32));
- idx = *(int32 *) MemoryChunkGetPointer(chunk);
+ /* move forward 1 chunk */
+ cur_chunk = (MemoryChunk *) (((char *) cur_chunk) + slab->fullChunkSize);
}
for (j = 0; j < slab->chunksPerBlock; j++)
{
- /* non-zero bit in the bitmap means chunk the chunk is used */
- if (!slab->freechunks[j])
+ if (!slab->isChunkFree[j])
{
MemoryChunk *chunk = SlabBlockGetChunk(slab, block, j);
SlabBlock *chunkblock = (SlabBlock *) MemoryChunkGetBlock(chunk);
* in the block header).
*/
if (nfree != block->nfree)
- elog(WARNING, "problem in slab %s: number of free chunks %d in block %p does not match bitmap %d",
- name, block->nfree, block, nfree);
+ elog(WARNING, "problem in slab %s: nfree in block %p is %d but %d chunk were found as free",
+ name, block, block->nfree, nfree);
+
+ nblocks++;
}
}
- Assert(slab->nblocks * slab->blockSize == context->mem_allocated);
+ /* the stored empty blocks are tracked in mem_allocated too */
+ nblocks += dclist_count(&slab->emptyblocks);
+
+ Assert(nblocks * slab->blockSize == context->mem_allocated);
}
#endif /* MEMORY_CONTEXT_CHECKING */