* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/storage/freespace/freespace.c,v 1.2 2001/06/29 21:08:24 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/storage/freespace/freespace.c,v 1.3 2001/07/02 20:50:46 tgl Exp $
+ *
+ *
+ * NOTES:
+ *
+ * The only really interesting aspect of this code is the heuristics for
+ * deciding how much information we can afford to keep about each relation,
+ * given that we have a limited amount of workspace in shared memory.
+ * These currently work as follows:
+ *
+ * The number of distinct relations tracked is limited by a configuration
+ * variable (MaxFSMRelations). When this would be exceeded, we discard the
+ * least frequently used relation. A previously-unknown relation is always
+ * entered into the map with useCount 1 on first reference, even if this
+ * causes an existing entry with higher useCount to be discarded. This may
+ * cause a little bit of thrashing among the bottom entries in the list,
+ * but if we didn't do it then there'd be no way for a relation not in the
+ * map to get in once the map is full. Note we allow a relation to be in the
+ * map even if no pages are currently stored for it: this allows us to track
+ * its useCount & threshold, which may eventually go high enough to give it
+ * priority for page storage.
+ *
+ * The total number of pages tracked is also set by a configuration variable
+ * (MaxFSMPages). We allocate these dynamically among the known relations,
+ * giving preference to the more-frequently-referenced relations and those
+ * with smaller tuples. This is done by a thresholding mechanism: for each
+ * relation we keep track of a current target threshold, and allow only pages
+ * with free space >= threshold to be stored in the map. The threshold starts
+ * at BLCKSZ/2 (a somewhat arbitrary choice) for a newly entered relation.
+ * On each GetFreeSpace reference, the threshold is moved towards the
+ * request size using a slow exponential moving average; this means that
+ * for heavily used relations, the threshold will approach the average
+ * freespace request size (average tuple size). Whenever we run out of
+ * storage space in the map, we double the threshold of all existing relations
+ * (but not to more than BLCKSZ, so as to prevent runaway thresholds).
+ * Infrequently used relations will thus tend to have large thresholds.
+ *
+ * XXX this thresholding mechanism is experimental; need to see how well
+ * it works in practice.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
+#include <limits.h>
+
#include "storage/freespace.h"
#include "storage/itemid.h"
#include "storage/shmem.h"
/* Header for whole map */
struct FSMHeader
{
- HTAB *relationHash; /* hashtable of FSMRelation entries */
- FSMRelation *relationList; /* FSMRelations in order by recency of use */
- int numRelations; /* number of FSMRelations now in use */
+ HTAB *relHash; /* hashtable of FSMRelation entries */
+ FSMRelation *relList; /* FSMRelations in useCount order */
+ FSMRelation *relListTail; /* tail of FSMRelation list */
+ int numRels; /* number of FSMRelations now in use */
FSMChunk *freeChunks; /* linked list of currently-free chunks */
+ int numFreeChunks; /* number of free chunks */
};
/*
struct FSMRelation
{
RelFileNode key; /* hash key (must be first) */
- FSMRelation *nextRel; /* next rel in order by recency of use */
- FSMRelation *priorRel; /* prior rel in order by recency of use */
+ FSMRelation *nextRel; /* next rel in useCount order */
+ FSMRelation *priorRel; /* prior rel in useCount order */
+ int useCount; /* use count for prioritizing rels */
+ Size threshold; /* minimum amount of free space to keep */
+ int nextPage; /* index (from 0) to start next search at */
+ int numPages; /* total number of pages we have info about */
+ int numChunks; /* number of FSMChunks allocated to rel */
FSMChunk *relChunks; /* linked list of page info chunks */
};
#define SHMEM_FSMHASH_KEYSIZE sizeof(RelFileNode)
#define SHMEM_FSMHASH_DATASIZE (sizeof(FSMRelation) - SHMEM_FSMHASH_KEYSIZE)
+/*
+ * Info about individual pages in a relation is stored in chunks to reduce
+ * allocation overhead. Note that we allow any chunk of a relation's list
+ * to be partly full, not only the last chunk; this speeds deletion of
+ * individual page entries. When the total number of unused slots reaches
+ * CHUNKPAGES, we compact out the unused slots so that we can return a chunk
+ * to the freelist; but there's no point in doing the compaction before that.
+ */
+
#define CHUNKPAGES 32 /* each chunk can store this many pages */
struct FSMChunk
static FSMHeader *FreeSpaceMap; /* points to FSMHeader in shared memory */
+static FSMRelation *lookup_fsm_rel(RelFileNode *rel);
+static FSMRelation *create_fsm_rel(RelFileNode *rel);
+static void delete_fsm_rel(FSMRelation *fsmrel);
+static void link_fsm_rel_after(FSMRelation *fsmrel, FSMRelation *oldrel);
+static void unlink_fsm_rel(FSMRelation *fsmrel);
+static void free_chunk_chain(FSMChunk *fchunk);
+static BlockNumber find_free_space(FSMRelation *fsmrel, Size spaceNeeded);
+static void fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page,
+ Size spaceAvail);
+static bool lookup_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page,
+ FSMChunk **outChunk, int *outChunkRelIndex);
+static bool insert_fsm_page_entry(FSMRelation *fsmrel,
+ BlockNumber page, Size spaceAvail,
+ FSMChunk *chunk, int chunkRelIndex);
+static bool push_fsm_page_entry(BlockNumber page, Size spaceAvail,
+ FSMChunk *chunk, int chunkRelIndex);
+static void delete_fsm_page_entry(FSMRelation *fsmrel, FSMChunk *chunk,
+ int chunkRelIndex);
+static void compact_fsm_page_list(FSMRelation *fsmrel);
+static void acquire_fsm_free_space(void);
+
+
+/*
+ * Exported routines
+ */
+
+
/*
* InitFreeSpaceMap -- Initialize the freespace module.
*
* It builds the empty free space map table. FreeSpaceLock must also be
* initialized at some point, but is not touched here --- we assume there is
* no need for locking, since only the calling process can be accessing shared
- * memory as yet. FreeSpaceShmemSize() was called previously while computing
- * the space needed for shared memory.
+ * memory as yet.
*/
void
InitFreeSpaceMap(void)
info.datasize = SHMEM_FSMHASH_DATASIZE;
info.hash = tag_hash;
- FreeSpaceMap->relationHash = ShmemInitHash("Free Space Map Hash",
- MaxFSMRelations / 10,
- MaxFSMRelations,
- &info,
- (HASH_ELEM | HASH_FUNCTION));
+ FreeSpaceMap->relHash = ShmemInitHash("Free Space Map Hash",
+ MaxFSMRelations / 10,
+ MaxFSMRelations,
+ &info,
+ (HASH_ELEM | HASH_FUNCTION));
- if (!FreeSpaceMap->relationHash)
+ if (!FreeSpaceMap->relHash)
elog(FATAL, "Insufficient shared memory for free space map");
/* Allocate FSMChunks and fill up the free-chunks list */
nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1;
+ FreeSpaceMap->numFreeChunks = nchunks;
chunks = (FSMChunk *) ShmemAlloc(nchunks * sizeof(FSMChunk));
if (chunks == NULL)
FreeSpaceMap->freeChunks = prevchunk;
}
-
+/*
+ * Estimate amount of shmem space needed for FSM.
+ */
int
FreeSpaceShmemSize(void)
{
int size;
int nchunks;
- /*
- * There is no point in allowing less than one "chunk" per relation,
- * so force MaxFSMPages to be at least CHUNKPAGES * MaxFSMRelations.
- */
- Assert(MaxFSMRelations > 0);
- if (MaxFSMPages < CHUNKPAGES * MaxFSMRelations)
- MaxFSMPages = CHUNKPAGES * MaxFSMRelations;
-
/* table header */
size = MAXALIGN(sizeof(FSMHeader));
return size;
}
+/*
+ * GetPageWithFreeSpace - try to find a page in the given relation with
+ * at least the specified amount of free space.
+ *
+ * If successful, return the block number; if not, return InvalidBlockNumber.
+ *
+ * The caller must be prepared for the possibility that the returned page
+ * will turn out to have too little space available by the time the caller
+ * gets a lock on it. In that case, the caller should report the actual
+ * amount of free space available on that page (via RecordFreeSpace) and
+ * then try again. If InvalidBlockNumber is returned, extend the relation.
+ */
BlockNumber
GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded)
{
- return InvalidBlockNumber; /* stub */
+ FSMRelation *fsmrel;
+ BlockNumber freepage;
+
+ SpinAcquire(FreeSpaceLock);
+ /*
+ * We always add a rel to the hashtable when it is inquired about.
+ */
+ fsmrel = create_fsm_rel(rel);
+ /*
+ * Adjust the threshold towards the space request. This essentially
+ * implements an exponential moving average with an equivalent period
+ * of about 63 requests. Ignore silly requests, however, to ensure
+ * that the average stays in bounds.
+ *
+ * In theory, if the threshold increases here we should immediately
+ * delete any pages that fall below the new threshold. In practice
+ * it seems OK to wait until we have a need to compact space.
+ */
+ if (spaceNeeded > 0 && spaceNeeded < BLCKSZ)
+ {
+ int cur_avg = (int) fsmrel->threshold;
+
+ cur_avg += ((int) spaceNeeded - cur_avg) / 32;
+ fsmrel->threshold = (Size) cur_avg;
+ }
+ freepage = find_free_space(fsmrel, spaceNeeded);
+ SpinRelease(FreeSpaceLock);
+ return freepage;
}
+/*
+ * RecordFreeSpace - record the amount of free space available on a page.
+ *
+ * The FSM is at liberty to forget about the page instead, if the amount of
+ * free space is too small to be interesting. The only guarantee is that
+ * a subsequent request to get a page with more than spaceAvail space free
+ * will not return this page.
+ */
void
RecordFreeSpace(RelFileNode *rel, BlockNumber page, Size spaceAvail)
{
- /* stub */
+ FSMRelation *fsmrel;
+
+ /* Sanity check: ensure spaceAvail will fit into ItemLength */
+ AssertArg(spaceAvail < BLCKSZ);
+
+ SpinAcquire(FreeSpaceLock);
+ /*
+ * We choose not to add rels to the hashtable unless they've been
+ * inquired about with GetPageWithFreeSpace. Also, a Record operation
+ * does not increase the useCount or change threshold, only Get does.
+ */
+ fsmrel = lookup_fsm_rel(rel);
+ if (fsmrel)
+ fsm_record_free_space(fsmrel, page, spaceAvail);
+ SpinRelease(FreeSpaceLock);
}
+/*
+ * RecordAndGetPageWithFreeSpace - combo form to save one spinlock and
+ * hash table lookup cycle.
+ */
BlockNumber
RecordAndGetPageWithFreeSpace(RelFileNode *rel,
BlockNumber oldPage,
Size oldSpaceAvail,
Size spaceNeeded)
{
- return InvalidBlockNumber; /* stub */
+ FSMRelation *fsmrel;
+ BlockNumber freepage;
+
+ /* Sanity check: ensure spaceAvail will fit into ItemLength */
+ AssertArg(oldSpaceAvail < BLCKSZ);
+
+ SpinAcquire(FreeSpaceLock);
+ /*
+ * We always add a rel to the hashtable when it is inquired about.
+ */
+ fsmrel = create_fsm_rel(rel);
+ /*
+ * Adjust the threshold towards the space request, same as in
+ * GetPageWithFreeSpace.
+ *
+ * Note that we do this before storing data for oldPage, which means
+ * this isn't exactly equivalent to Record followed by Get; but it
+ * seems appropriate to adjust the threshold first.
+ */
+ if (spaceNeeded > 0 && spaceNeeded < BLCKSZ)
+ {
+ int cur_avg = (int) fsmrel->threshold;
+
+ cur_avg += ((int) spaceNeeded - cur_avg) / 32;
+ fsmrel->threshold = (Size) cur_avg;
+ }
+ /* Do the Record */
+ fsm_record_free_space(fsmrel, oldPage, oldSpaceAvail);
+ /* Do the Get */
+ freepage = find_free_space(fsmrel, spaceNeeded);
+ SpinRelease(FreeSpaceLock);
+ return freepage;
}
+/*
+ * MultiRecordFreeSpace - record available-space info about multiple pages
+ * of a relation in one call.
+ *
+ * First, if minPage <= maxPage, the FSM must discard any entries it has for
+ * pages in that page number range (inclusive). This allows obsolete info
+ * to be discarded. Second, if nPages > 0, record the page numbers and free
+ * space amounts in the given arrays. As with RecordFreeSpace, the FSM is at
+ * liberty to discard some of the information. However, it *must* discard
+ * previously stored info in the minPage..maxPage range (for example, this
+ * case is used to remove info about deleted pages during relation truncation).
+ */
void
MultiRecordFreeSpace(RelFileNode *rel,
BlockNumber minPage,
BlockNumber *pages,
Size *spaceAvail)
{
- /* stub */
+ FSMRelation *fsmrel;
+ int i;
+
+ SpinAcquire(FreeSpaceLock);
+ fsmrel = lookup_fsm_rel(rel);
+ if (fsmrel)
+ {
+ /*
+ * Remove entries in specified range
+ */
+ if (minPage <= maxPage)
+ {
+ FSMChunk *chunk;
+ int chunkRelIndex;
+ bool done;
+
+ /* Use lookup to locate first entry >= minPage */
+ lookup_fsm_page_entry(fsmrel, minPage, &chunk, &chunkRelIndex);
+ /* Set free space to 0 for each page within range */
+ done = false;
+ while (chunk && !done)
+ {
+ int numPages = chunk->numPages;
+
+ for (; chunkRelIndex < numPages; chunkRelIndex++)
+ {
+ if (chunk->pages[chunkRelIndex] > maxPage)
+ {
+ done = true;
+ break;
+ }
+ chunk->bytes[chunkRelIndex] = 0;
+ }
+ chunk = chunk->next;
+ chunkRelIndex = 0;
+ }
+ /* Now compact out the zeroed entries */
+ compact_fsm_page_list(fsmrel);
+ }
+ /*
+ * Add new entries, if appropriate.
+ *
+ * XXX we could probably be smarter about this than doing it
+ * completely separately for each one. FIXME later.
+ */
+ for (i = 0; i < nPages; i++)
+ fsm_record_free_space(fsmrel, pages[i], spaceAvail[i]);
+ }
+ SpinRelease(FreeSpaceLock);
}
+/*
+ * FreeSpaceMapForgetRel - forget all about a relation.
+ *
+ * This is called when a relation is deleted. Although we could just let
+ * the rel age out of the map, it's better to reclaim and reuse the space
+ * sooner.
+ */
void
FreeSpaceMapForgetRel(RelFileNode *rel)
{
- /* stub */
+ FSMRelation *fsmrel;
+
+ SpinAcquire(FreeSpaceLock);
+ fsmrel = lookup_fsm_rel(rel);
+ if (fsmrel)
+ delete_fsm_rel(fsmrel);
+ SpinRelease(FreeSpaceLock);
+}
+
+/*
+ * FreeSpaceMapForgetDatabase - forget all relations of a database.
+ *
+ * This is called during DROP DATABASE. As above, might as well reclaim
+ * map space sooner instead of later.
+ *
+ * XXX when we implement tablespaces, target Oid will need to be tablespace
+ * ID not database ID.
+ */
+void
+FreeSpaceMapForgetDatabase(Oid dbid)
+{
+ FSMRelation *fsmrel,
+ *nextrel;
+
+ SpinAcquire(FreeSpaceLock);
+ for (fsmrel = FreeSpaceMap->relList; fsmrel; fsmrel = nextrel)
+ {
+ nextrel = fsmrel->nextRel; /* in case we delete it */
+ if (fsmrel->key.tblNode == dbid)
+ delete_fsm_rel(fsmrel);
+ }
+ SpinRelease(FreeSpaceLock);
+}
+
+
+/*
+ * Internal routines. These all assume the caller holds the FreeSpaceLock.
+ */
+
+/*
+ * Lookup a relation in the hash table. If not present, return NULL.
+ * The relation's useCount is not changed.
+ */
+static FSMRelation *
+lookup_fsm_rel(RelFileNode *rel)
+{
+ FSMRelation *fsmrel;
+ bool found;
+
+ fsmrel = (FSMRelation *) hash_search(FreeSpaceMap->relHash,
+ (Pointer) rel,
+ HASH_FIND,
+ &found);
+ if (!fsmrel)
+ elog(ERROR, "FreeSpaceMap hashtable corrupted");
+
+ if (!found)
+ return NULL;
+
+ return fsmrel;
+}
+
+/*
+ * Lookup a relation in the hash table, creating an entry if not present.
+ *
+ * On successful lookup, the relation's useCount is incremented, possibly
+ * causing it to move up in the priority list.
+ */
+static FSMRelation *
+create_fsm_rel(RelFileNode *rel)
+{
+ FSMRelation *fsmrel,
+ *oldrel;
+ bool found;
+
+ fsmrel = (FSMRelation *) hash_search(FreeSpaceMap->relHash,
+ (Pointer) rel,
+ HASH_ENTER,
+ &found);
+ if (!fsmrel)
+ elog(ERROR, "FreeSpaceMap hashtable corrupted");
+
+ if (!found)
+ {
+ /* New hashtable entry, initialize it (hash_search set the key) */
+ fsmrel->useCount = 1;
+ fsmrel->threshold = BLCKSZ/2; /* starting point for new entry */
+ fsmrel->nextPage = 0;
+ fsmrel->numPages = 0;
+ fsmrel->numChunks = 0;
+ fsmrel->relChunks = NULL;
+ /* Discard lowest-priority existing rel, if we are over limit */
+ if (FreeSpaceMap->numRels >= MaxFSMRelations)
+ delete_fsm_rel(FreeSpaceMap->relListTail);
+ /*
+ * Add new entry in front of any others with useCount 1 (since it
+ * is more recently used than them).
+ */
+ oldrel = FreeSpaceMap->relListTail;
+ while (oldrel && oldrel->useCount <= 1)
+ oldrel = oldrel->priorRel;
+ link_fsm_rel_after(fsmrel, oldrel);
+ FreeSpaceMap->numRels++;
+ }
+ else
+ {
+ int myCount;
+
+ /* Existing entry, advance its useCount */
+ if (++(fsmrel->useCount) >= INT_MAX/2)
+ {
+ /* When useCounts threaten to overflow, reduce 'em all 2X */
+ for (oldrel = FreeSpaceMap->relList;
+ oldrel != NULL;
+ oldrel = oldrel->nextRel)
+ {
+ oldrel->useCount >>= 1;
+ }
+ }
+ /* If warranted, move it up the priority list */
+ oldrel = fsmrel->priorRel;
+ myCount = fsmrel->useCount;
+ if (oldrel && oldrel->useCount <= myCount)
+ {
+ unlink_fsm_rel(fsmrel);
+ while (oldrel && oldrel->useCount <= myCount)
+ oldrel = oldrel->priorRel;
+ link_fsm_rel_after(fsmrel, oldrel);
+ }
+ }
+
+ return fsmrel;
+}
+
+/*
+ * Remove an existing FSMRelation entry.
+ */
+static void
+delete_fsm_rel(FSMRelation *fsmrel)
+{
+ FSMRelation *result;
+ bool found;
+
+ free_chunk_chain(fsmrel->relChunks);
+ unlink_fsm_rel(fsmrel);
+ FreeSpaceMap->numRels--;
+ result = (FSMRelation *) hash_search(FreeSpaceMap->relHash,
+ (Pointer) &(fsmrel->key),
+ HASH_REMOVE,
+ &found);
+ if (!result || !found)
+ elog(ERROR, "FreeSpaceMap hashtable corrupted");
+}
+
+/*
+ * Link a FSMRelation into the priority list just after the given existing
+ * entry (or at the head of the list, if oldrel is NULL).
+ */
+static void
+link_fsm_rel_after(FSMRelation *fsmrel, FSMRelation *oldrel)
+{
+ if (oldrel == NULL)
+ {
+ /* insert at head */
+ fsmrel->priorRel = NULL;
+ fsmrel->nextRel = FreeSpaceMap->relList;
+ FreeSpaceMap->relList = fsmrel;
+ if (fsmrel->nextRel != NULL)
+ fsmrel->nextRel->priorRel = fsmrel;
+ else
+ FreeSpaceMap->relListTail = fsmrel;
+ }
+ else
+ {
+ /* insert after oldrel */
+ fsmrel->priorRel = oldrel;
+ fsmrel->nextRel = oldrel->nextRel;
+ oldrel->nextRel = fsmrel;
+ if (fsmrel->nextRel != NULL)
+ fsmrel->nextRel->priorRel = fsmrel;
+ else
+ FreeSpaceMap->relListTail = fsmrel;
+ }
+}
+
+/*
+ * Delink a FSMRelation from the priority list.
+ */
+static void
+unlink_fsm_rel(FSMRelation *fsmrel)
+{
+ if (fsmrel->priorRel != NULL)
+ fsmrel->priorRel->nextRel = fsmrel->nextRel;
+ else
+ FreeSpaceMap->relList = fsmrel->nextRel;
+ if (fsmrel->nextRel != NULL)
+ fsmrel->nextRel->priorRel = fsmrel->priorRel;
+ else
+ FreeSpaceMap->relListTail = fsmrel->priorRel;
+}
+
+/*
+ * Return all the FSMChunks in the chain starting at fchunk to the freelist.
+ * (Caller must handle unlinking them from wherever they were.)
+ */
+static void
+free_chunk_chain(FSMChunk *fchunk)
+{
+ int nchunks;
+ FSMChunk *lchunk;
+
+ if (fchunk == NULL)
+ return;
+ nchunks = 1;
+ lchunk = fchunk;
+ while (lchunk->next != NULL)
+ {
+ nchunks++;
+ lchunk = lchunk->next;
+ }
+ lchunk->next = FreeSpaceMap->freeChunks;
+ FreeSpaceMap->freeChunks = fchunk;
+ FreeSpaceMap->numFreeChunks += nchunks;
+}
+
+/*
+ * Look to see if a page with at least the specified amount of space is
+ * available in the given FSMRelation. If so, return its page number,
+ * and advance the nextPage counter so that the next inquiry will return
+ * a different page if possible. Return InvalidBlockNumber if no success.
+ */
+static BlockNumber
+find_free_space(FSMRelation *fsmrel, Size spaceNeeded)
+{
+ int pagesToCheck, /* outer loop counter */
+ pageIndex, /* current page index relative to relation */
+ chunkRelIndex; /* current page index relative to curChunk */
+ FSMChunk *curChunk;
+
+ pageIndex = fsmrel->nextPage;
+ /* Last operation may have left nextPage pointing past end */
+ if (pageIndex >= fsmrel->numPages)
+ pageIndex = 0;
+ curChunk = fsmrel->relChunks;
+ chunkRelIndex = pageIndex;
+
+ for (pagesToCheck = fsmrel->numPages; pagesToCheck > 0; pagesToCheck--)
+ {
+ /*
+ * Make sure we are in the right chunk. First time through, we
+ * may have to advance through multiple chunks; subsequent loops
+ * should do this at most once.
+ */
+ while (chunkRelIndex >= curChunk->numPages)
+ {
+ chunkRelIndex -= curChunk->numPages;
+ curChunk = curChunk->next;
+ }
+ /* Check the next page */
+ if ((Size) curChunk->bytes[chunkRelIndex] >= spaceNeeded)
+ {
+ fsmrel->nextPage = pageIndex+1;
+ return curChunk->pages[chunkRelIndex];
+ }
+ /* Advance pageIndex and chunkRelIndex, wrapping around if needed */
+ if (++pageIndex >= fsmrel->numPages)
+ {
+ pageIndex = 0;
+ curChunk = fsmrel->relChunks;
+ chunkRelIndex = 0;
+ }
+ else
+ chunkRelIndex++;
+ }
+
+ return InvalidBlockNumber; /* nothing found */
+}
+
+/*
+ * fsm_record_free_space - guts of RecordFreeSpace, which is also used by
+ * RecordAndGetPageWithFreeSpace.
+ */
+static void
+fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page, Size spaceAvail)
+{
+ FSMChunk *chunk;
+ int chunkRelIndex;
+
+ if (lookup_fsm_page_entry(fsmrel, page, &chunk, &chunkRelIndex))
+ {
+ /* Found an existing entry for page; update or delete it */
+ if (spaceAvail >= fsmrel->threshold)
+ chunk->bytes[chunkRelIndex] = (ItemLength) spaceAvail;
+ else
+ delete_fsm_page_entry(fsmrel, chunk, chunkRelIndex);
+ }
+ else
+ {
+ /*
+ * No existing entry; add one if spaceAvail exceeds threshold.
+ *
+ * CORNER CASE: if we have to do acquire_fsm_free_space then
+ * our own threshold will increase, possibly meaning that we
+ * shouldn't store the page after all. Loop to redo the test
+ * if that happens. The loop also covers the possibility that
+ * acquire_fsm_free_space must be executed more than once to
+ * free any space (ie, thresholds must be more than doubled).
+ */
+ while (spaceAvail >= fsmrel->threshold)
+ {
+ if (insert_fsm_page_entry(fsmrel, page, spaceAvail,
+ chunk, chunkRelIndex))
+ break;
+ /* No space, acquire some and recheck threshold */
+ acquire_fsm_free_space();
+ if (spaceAvail < fsmrel->threshold)
+ break;
+ /*
+ * Need to redo the lookup since our own page list may well
+ * have lost entries, so position is not correct anymore.
+ */
+ if (lookup_fsm_page_entry(fsmrel, page,
+ &chunk, &chunkRelIndex))
+ elog(ERROR, "fsm_record_free_space: unexpected match");
+ }
+ }
+}
+
+/*
+ * Look for an entry for a specific page (block number) in a FSMRelation.
+ * Returns TRUE if a matching entry exists, else FALSE.
+ *
+ * The output arguments *outChunk, *outChunkRelIndex are set to indicate where
+ * the entry exists (if TRUE result) or could be inserted (if FALSE result).
+ * *chunk is set to NULL if there is no place to insert, ie, the entry would
+ * need to be added to a new chunk.
+ */
+static bool
+lookup_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page,
+ FSMChunk **outChunk, int *outChunkRelIndex)
+{
+ FSMChunk *chunk;
+ int chunkRelIndex;
+
+ for (chunk = fsmrel->relChunks; chunk; chunk = chunk->next)
+ {
+ int numPages = chunk->numPages;
+
+ /* Can skip the chunk quickly if page must be after last in chunk */
+ if (numPages > 0 && page <= chunk->pages[numPages-1])
+ {
+ for (chunkRelIndex = 0; chunkRelIndex < numPages; chunkRelIndex++)
+ {
+ if (page <= chunk->pages[chunkRelIndex])
+ {
+ *outChunk = chunk;
+ *outChunkRelIndex = chunkRelIndex;
+ return (page == chunk->pages[chunkRelIndex]);
+ }
+ }
+ /* Should not get here, given above test */
+ Assert(false);
+ }
+ /*
+ * If we are about to fall off the end, and there's space available
+ * in the end chunk, return a pointer to it.
+ */
+ if (chunk->next == NULL && numPages < CHUNKPAGES)
+ {
+ *outChunk = chunk;
+ *outChunkRelIndex = numPages;
+ return false;
+ }
+ }
+ /*
+ * Adding the page would require a new chunk (or, perhaps, compaction
+ * of available free space --- not my problem here).
+ */
+ *outChunk = NULL;
+ *outChunkRelIndex = 0;
+ return false;
+}
+
+/*
+ * Insert a new page entry into a FSMRelation's list at given position
+ * (chunk == NULL implies at end).
+ *
+ * If there is no space available to insert the entry, return FALSE.
+ */
+static bool
+insert_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page, Size spaceAvail,
+ FSMChunk *chunk, int chunkRelIndex)
+{
+ FSMChunk *newChunk;
+ int newChunkRelIndex;
+
+ if (fsmrel->numPages >= fsmrel->numChunks * CHUNKPAGES)
+ {
+ /* No free space within chunk list, so need another chunk */
+ if ((newChunk = FreeSpaceMap->freeChunks) == NULL)
+ return false; /* can't do it */
+ FreeSpaceMap->freeChunks = newChunk->next;
+ FreeSpaceMap->numFreeChunks--;
+ newChunk->next = NULL;
+ newChunk->numPages = 0;
+ if (fsmrel->relChunks == NULL)
+ fsmrel->relChunks = newChunk;
+ else
+ {
+ FSMChunk *priorChunk = fsmrel->relChunks;
+
+ while (priorChunk->next != NULL)
+ priorChunk = priorChunk->next;
+ priorChunk->next = newChunk;
+ }
+ fsmrel->numChunks++;
+ if (chunk == NULL)
+ {
+ /* Original search found that new page belongs at end */
+ chunk = newChunk;
+ chunkRelIndex = 0;
+ }
+ }
+
+ /* Try to insert it the easy way, ie, just move down subsequent data */
+ if (chunk &&
+ push_fsm_page_entry(page, spaceAvail, chunk, chunkRelIndex))
+ {
+ fsmrel->numPages++;
+ fsmrel->nextPage++; /* don't return same page twice running */
+ return true;
+ }
+ /*
+ * There is space available, but evidently it's before the place
+ * where the page entry needs to go. Compact the list and try again.
+ * This will require us to redo the search for the appropriate place.
+ */
+ compact_fsm_page_list(fsmrel);
+ if (lookup_fsm_page_entry(fsmrel, page, &newChunk, &newChunkRelIndex))
+ elog(ERROR, "insert_fsm_page_entry: entry already exists!");
+ if (newChunk &&
+ push_fsm_page_entry(page, spaceAvail, newChunk, newChunkRelIndex))
+ {
+ fsmrel->numPages++;
+ fsmrel->nextPage++; /* don't return same page twice running */
+ return true;
+ }
+ /* Shouldn't get here given the initial if-test for space available */
+ elog(ERROR, "insert_fsm_page_entry: failed to insert entry!");
+ return false; /* keep compiler quiet */
+}
+
+/*
+ * Auxiliary routine for insert_fsm_page_entry: try to push entries to the
+ * right to insert at chunk/chunkRelIndex. Return TRUE if successful.
+ * Note that the FSMRelation's own fields are not updated.
+ */
+static bool
+push_fsm_page_entry(BlockNumber page, Size spaceAvail,
+ FSMChunk *chunk, int chunkRelIndex)
+{
+ int i;
+
+ if (chunk->numPages >= CHUNKPAGES)
+ {
+ if (chunk->next == NULL)
+ return false; /* no space */
+ /* try to push chunk's last item to next chunk */
+ if (! push_fsm_page_entry(chunk->pages[CHUNKPAGES-1],
+ chunk->bytes[CHUNKPAGES-1],
+ chunk->next, 0))
+ return false;
+ /* successfully pushed it */
+ chunk->numPages--;
+ }
+ for (i = chunk->numPages; i > chunkRelIndex; i--)
+ {
+ chunk->pages[i] = chunk->pages[i-1];
+ chunk->bytes[i] = chunk->bytes[i-1];
+ }
+ chunk->numPages++;
+ chunk->pages[chunkRelIndex] = page;
+ chunk->bytes[chunkRelIndex] = (ItemLength) spaceAvail;
+ return true;
+}
+
+/*
+ * Delete one page entry from a FSMRelation's list. Compact free space
+ * in the list, but only if a chunk can be returned to the freelist.
+ */
+static void
+delete_fsm_page_entry(FSMRelation *fsmrel, FSMChunk *chunk, int chunkRelIndex)
+{
+ int i,
+ lim;
+
+ Assert(chunk && chunkRelIndex >= 0 && chunkRelIndex < chunk->numPages);
+ /* Compact out space in this chunk */
+ lim = --chunk->numPages;
+ for (i = chunkRelIndex; i < lim; i++)
+ {
+ chunk->pages[i] = chunk->pages[i+1];
+ chunk->bytes[i] = chunk->bytes[i+1];
+ }
+ /* Compact the whole list if a chunk can be freed */
+ fsmrel->numPages--;
+ if (fsmrel->numPages <= (fsmrel->numChunks-1) * CHUNKPAGES)
+ compact_fsm_page_list(fsmrel);
+}
+
+/*
+ * Remove any pages with free space less than the current threshold for the
+ * FSMRelation, compact out free slots in non-last chunks, and return any
+ * completely freed chunks to the freelist.
+ */
+static void
+compact_fsm_page_list(FSMRelation *fsmrel)
+{
+ Size threshold = fsmrel->threshold;
+ FSMChunk *srcChunk,
+ *dstChunk;
+ int srcIndex,
+ dstIndex,
+ dstPages,
+ dstChunkCnt;
+
+ srcChunk = dstChunk = fsmrel->relChunks;
+ srcIndex = dstIndex = 0;
+ dstPages = 0; /* total pages kept */
+ dstChunkCnt = 1; /* includes current dstChunk */
+
+ while (srcChunk != NULL)
+ {
+ int srcPages = srcChunk->numPages;
+
+ while (srcIndex < srcPages)
+ {
+ if ((Size) srcChunk->bytes[srcIndex] >= threshold)
+ {
+ if (dstIndex >= CHUNKPAGES)
+ {
+ /*
+ * At this point srcChunk must be pointing to a later
+ * chunk, so it's OK to overwrite dstChunk->numPages.
+ */
+ dstChunk->numPages = dstIndex;
+ dstChunk = dstChunk->next;
+ dstChunkCnt++;
+ dstIndex = 0;
+ }
+ dstChunk->pages[dstIndex] = srcChunk->pages[srcIndex];
+ dstChunk->bytes[dstIndex] = srcChunk->bytes[srcIndex];
+ dstIndex++;
+ dstPages++;
+ }
+ srcIndex++;
+ }
+ srcChunk = srcChunk->next;
+ srcIndex = 0;
+ }
+
+ if (dstPages == 0)
+ {
+ /* No chunks to be kept at all */
+ fsmrel->nextPage = 0;
+ fsmrel->numPages = 0;
+ fsmrel->numChunks = 0;
+ fsmrel->relChunks = NULL;
+ free_chunk_chain(dstChunk);
+ }
+ else
+ {
+ /* we deliberately don't change nextPage here */
+ fsmrel->numPages = dstPages;
+ fsmrel->numChunks = dstChunkCnt;
+ dstChunk->numPages = dstIndex;
+ free_chunk_chain(dstChunk->next);
+ dstChunk->next = NULL;
+ }
+}
+
+/*
+ * Acquire some free space by raising the thresholds of all FSMRelations.
+ *
+ * Note there is no guarantee as to how much space will be freed by a single
+ * invocation; caller may repeat if necessary.
+ */
+static void
+acquire_fsm_free_space(void)
+{
+ FSMRelation *fsmrel;
+
+ for (fsmrel = FreeSpaceMap->relList; fsmrel; fsmrel = fsmrel->nextRel)
+ {
+ fsmrel->threshold *= 2;
+ /* Limit thresholds to BLCKSZ so they can't get ridiculously large */
+ if (fsmrel->threshold > BLCKSZ)
+ fsmrel->threshold = BLCKSZ;
+ /* Release any pages that don't meet the new threshold */
+ compact_fsm_page_list(fsmrel);
+ }
}
void
DumpFreeSpace(void)
{
- /* stub */
+ FSMRelation *fsmrel;
+ FSMRelation *prevrel = NULL;
+ int relNum = 0;
+ FSMChunk *chunk;
+ int chunkRelIndex;
+ int nChunks;
+ int nPages;
+
+ for (fsmrel = FreeSpaceMap->relList; fsmrel; fsmrel = fsmrel->nextRel)
+ {
+ relNum++;
+ fprintf(stderr, "Map %d: rel %u/%u useCount %d threshold %u nextPage %d\nMap= ",
+ relNum, fsmrel->key.tblNode, fsmrel->key.relNode,
+ fsmrel->useCount, fsmrel->threshold, fsmrel->nextPage);
+ nChunks = nPages = 0;
+ for (chunk = fsmrel->relChunks; chunk; chunk = chunk->next)
+ {
+ int numPages = chunk->numPages;
+
+ nChunks++;
+ for (chunkRelIndex = 0; chunkRelIndex < numPages; chunkRelIndex++)
+ {
+ nPages++;
+ fprintf(stderr, " %u:%u",
+ chunk->pages[chunkRelIndex],
+ chunk->bytes[chunkRelIndex]);
+ }
+ }
+ fprintf(stderr, "\n");
+ /* Cross-check local counters and list links */
+ if (nPages != fsmrel->numPages)
+ fprintf(stderr, "DumpFreeSpace: %d pages in rel, but numPages = %d\n",
+ nPages, fsmrel->numPages);
+ if (nChunks != fsmrel->numChunks)
+ fprintf(stderr, "DumpFreeSpace: %d chunks in rel, but numChunks = %d\n",
+ nChunks, fsmrel->numChunks);
+ if (prevrel != fsmrel->priorRel)
+ fprintf(stderr, "DumpFreeSpace: broken list links\n");
+ prevrel = fsmrel;
+ }
+ if (prevrel != FreeSpaceMap->relListTail)
+ fprintf(stderr, "DumpFreeSpace: broken list links\n");
+ /* Cross-check global counters */
+ if (relNum != FreeSpaceMap->numRels)
+ fprintf(stderr, "DumpFreeSpace: %d rels in list, but numRels = %d\n",
+ relNum, FreeSpaceMap->numRels);
+ nChunks = 0;
+ for (chunk = FreeSpaceMap->freeChunks; chunk; chunk = chunk->next)
+ nChunks++;
+ if (nChunks != FreeSpaceMap->numFreeChunks)
+ fprintf(stderr, "DumpFreeSpace: %d chunks in list, but numFreeChunks = %d\n",
+ nChunks, FreeSpaceMap->numFreeChunks);
}
#endif /* FREESPACE_DEBUG */