* gistbuild.c
* build algorithm for GiST indexes implementation.
*
+ * There are two different strategies:
+ *
+ * 1. Sort all input tuples, pack them into GiST leaf pages in the sorted
+ * order, and create downlinks and internal pages as we go. This builds
+ * the index from the bottom up, similar to how B-tree index build
+ * works.
+ *
+ * 2. Start with an empty index, and insert all tuples one by one.
+ *
+ * The sorted method is used if the operator classes for all columns have
+ * a 'sortsupport' defined. Otherwise, we resort to the second strategy.
+ *
+ * The second strategy can optionally use buffers at different levels of
+ * the tree to reduce I/O, see "Buffering build algorithm" in the README
+ * for a more detailed explanation. It initially calls insert over and
+ * over, but switches to the buffered algorithm after a certain number of
+ * tuples (unless buffering mode is disabled).
+ *
*
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
#include "storage/smgr.h"
#include "utils/memutils.h"
#include "utils/rel.h"
+#include "utils/tuplesort.h"
/* Step of index tuples for check whether to switch to buffering build mode */
#define BUFFERING_MODE_SWITCH_CHECK_STEP 256
*/
#define BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET 4096
+/*
+ * Strategy used to build the index. It can change between the
+ * GIST_BUFFERING_* modes on the fly, but if the Sorted method is used,
+ * that needs to be decided up-front and cannot be changed afterwards.
+ */
typedef enum
{
+ GIST_SORTED_BUILD, /* bottom-up build by sorting */
GIST_BUFFERING_DISABLED, /* in regular build mode and aren't going to
* switch */
GIST_BUFFERING_AUTO, /* in regular build mode, but will switch to
* before switching to the buffering build
* mode */
GIST_BUFFERING_ACTIVE /* in buffering build mode */
-} GistBufferingMode;
+} GistBuildMode;
/* Working state for gistbuild and its callback */
typedef struct
Relation heaprel;
GISTSTATE *giststate;
- int64 indtuples; /* number of tuples indexed */
- int64 indtuplesSize; /* total size of all indexed tuples */
-
Size freespace; /* amount of free space to leave on pages */
+ GistBuildMode buildMode;
+
+ int64 indtuples; /* number of tuples indexed */
+
/*
* Extra data structures used during a buffering build. 'gfbb' contains
* information related to managing the build buffers. 'parentMap' is a
* lookup table of the parent of each internal page.
*/
+ int64 indtuplesSize; /* total size of all indexed tuples */
GISTBuildBuffers *gfbb;
HTAB *parentMap;
- GistBufferingMode bufferingMode;
+ /*
+ * Extra data structures used during a sorting build.
+ */
+ Tuplesortstate *sortstate; /* state data for tuplesort.c */
+
+ BlockNumber pages_allocated;
+ BlockNumber pages_written;
+
+ int ready_num_pages;
+ BlockNumber ready_blknos[XLR_MAX_BLOCK_ID];
+ Page ready_pages[XLR_MAX_BLOCK_ID];
} GISTBuildState;
+/*
+ * In sorted build, we use a stack of these structs, one for each level,
+ * to hold an in-memory buffer of the righmost page at the level. When the
+ * page fills up, it is written out and a new page is allocated.
+ */
+typedef struct GistSortedBuildPageState
+{
+ Page page;
+ struct GistSortedBuildPageState *parent; /* Upper level, if any */
+} GistSortedBuildPageState;
+
/* prototypes for private functions */
+
+static void gistSortedBuildCallback(Relation index, ItemPointer tid,
+ Datum *values, bool *isnull,
+ bool tupleIsAlive, void *state);
+static void gist_indexsortbuild(GISTBuildState *state);
+static void gist_indexsortbuild_pagestate_add(GISTBuildState *state,
+ GistSortedBuildPageState *pagestate,
+ IndexTuple itup);
+static void gist_indexsortbuild_pagestate_flush(GISTBuildState *state,
+ GistSortedBuildPageState *pagestate);
+static void gist_indexsortbuild_flush_ready_pages(GISTBuildState *state);
+
static void gistInitBuffering(GISTBuildState *buildstate);
static int calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep);
static void gistBuildCallback(Relation index,
static void gistMemorizeAllDownlinks(GISTBuildState *buildstate, Buffer parent);
static BlockNumber gistGetParent(GISTBuildState *buildstate, BlockNumber child);
+
/*
- * Main entry point to GiST index build. Initially calls insert over and over,
- * but switches to more efficient buffering build algorithm after a certain
- * number of tuples (unless buffering mode is disabled).
+ * Main entry point to GiST index build.
*/
IndexBuildResult *
gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
IndexBuildResult *result;
double reltuples;
GISTBuildState buildstate;
- Buffer buffer;
- Page page;
MemoryContext oldcxt = CurrentMemoryContext;
int fillfactor;
+ Oid SortSupportFnOids[INDEX_MAX_KEYS];
+ bool hasallsortsupports;
+ int keyscount = IndexRelationGetNumberOfKeyAttributes(index);
+ GiSTOptions *options = NULL;
+
+ /*
+ * We expect to be called exactly once for any index relation. If that's
+ * not the case, big trouble's what we have.
+ */
+ if (RelationGetNumberOfBlocks(index) != 0)
+ elog(ERROR, "index \"%s\" already contains data",
+ RelationGetRelationName(index));
+
+ if (index->rd_options)
+ options = (GiSTOptions *) index->rd_options;
buildstate.indexrel = index;
buildstate.heaprel = heap;
+ buildstate.sortstate = NULL;
+ buildstate.giststate = initGISTstate(index);
- if (index->rd_options)
+ /*
+ * Create a temporary memory context that is reset once for each tuple
+ * processed. (Note: we don't bother to make this a child of the
+ * giststate's scanCxt, so we have to delete it separately at the end.)
+ */
+ buildstate.giststate->tempCxt = createTempGistContext();
+
+ /*
+ * Choose build strategy. If all keys support sorting, do that. Otherwise
+ * the default strategy is switch to buffering mode when the index grows
+ * too large to fit in cache.
+ */
+ hasallsortsupports = true;
+ for (int i = 0; i < keyscount; i++)
{
- /* Get buffering mode from the options string */
- GiSTOptions *options = (GiSTOptions *) index->rd_options;
+ SortSupportFnOids[i] = index_getprocid(index, i + 1,
+ GIST_SORTSUPPORT_PROC);
+ if (!OidIsValid(SortSupportFnOids[i]))
+ {
+ hasallsortsupports = false;
+ break;
+ }
+ }
+ if (hasallsortsupports)
+ {
+ buildstate.buildMode = GIST_SORTED_BUILD;
+ }
+ else if (options)
+ {
if (options->buffering_mode == GIST_OPTION_BUFFERING_ON)
- buildstate.bufferingMode = GIST_BUFFERING_STATS;
+ buildstate.buildMode = GIST_BUFFERING_STATS;
else if (options->buffering_mode == GIST_OPTION_BUFFERING_OFF)
- buildstate.bufferingMode = GIST_BUFFERING_DISABLED;
+ buildstate.buildMode = GIST_BUFFERING_DISABLED;
else
- buildstate.bufferingMode = GIST_BUFFERING_AUTO;
-
- fillfactor = options->fillfactor;
+ buildstate.buildMode = GIST_BUFFERING_AUTO;
}
else
{
- /*
- * By default, switch to buffering mode when the index grows too large
- * to fit in cache.
- */
- buildstate.bufferingMode = GIST_BUFFERING_AUTO;
- fillfactor = GIST_DEFAULT_FILLFACTOR;
+ buildstate.buildMode = GIST_BUFFERING_AUTO;
}
- /* Calculate target amount of free space to leave on pages */
+
+ /*
+ * Calculate target amount of free space to leave on pages.
+ */
+ fillfactor = options ? options->fillfactor : GIST_DEFAULT_FILLFACTOR;
buildstate.freespace = BLCKSZ * (100 - fillfactor) / 100;
/*
- * We expect to be called exactly once for any index relation. If that's
- * not the case, big trouble's what we have.
+ * Build the index using the chosen strategy.
*/
- if (RelationGetNumberOfBlocks(index) != 0)
- elog(ERROR, "index \"%s\" already contains data",
- RelationGetRelationName(index));
+ buildstate.indtuples = 0;
+ buildstate.indtuplesSize = 0;
- /* no locking is needed */
- buildstate.giststate = initGISTstate(index);
+ if (buildstate.buildMode == GIST_SORTED_BUILD)
+ {
+ /*
+ * Sort all data, build the index from bottom up.
+ */
+ buildstate.sortstate = tuplesort_begin_index_gist(heap,
+ index,
+ maintenance_work_mem,
+ NULL,
+ false);
+
+ /* Scan the table, adding all tuples to the tuplesort */
+ reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
+ gistSortedBuildCallback,
+ (void *) &buildstate, NULL);
+
+ /*
+ * Perform the sort and build index pages.
+ */
+ tuplesort_performsort(buildstate.sortstate);
+
+ gist_indexsortbuild(&buildstate);
+
+ tuplesort_end(buildstate.sortstate);
+ }
+ else
+ {
+ /*
+ * Initialize an empty index and insert all tuples, possibly using
+ * buffers on intermediate levels.
+ */
+ Buffer buffer;
+ Page page;
+
+ /* initialize the root page */
+ buffer = gistNewBuffer(index);
+ Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
+ page = BufferGetPage(buffer);
+
+ START_CRIT_SECTION();
+
+ GISTInitBuffer(buffer, F_LEAF);
+
+ MarkBufferDirty(buffer);
+ PageSetLSN(page, GistBuildLSN);
+
+ UnlockReleaseBuffer(buffer);
+
+ END_CRIT_SECTION();
+
+ /* Scan the table, inserting all the tuples to the index. */
+ reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
+ gistBuildCallback,
+ (void *) &buildstate, NULL);
+
+ /*
+ * If buffering was used, flush out all the tuples that are still in
+ * the buffers.
+ */
+ if (buildstate.buildMode == GIST_BUFFERING_ACTIVE)
+ {
+ elog(DEBUG1, "all tuples processed, emptying buffers");
+ gistEmptyAllBuffers(&buildstate);
+ gistFreeBuildBuffers(buildstate.gfbb);
+ }
+
+ /*
+ * We didn't write WAL records as we built the index, so if
+ * WAL-logging is required, write all pages to the WAL now.
+ */
+ if (RelationNeedsWAL(index))
+ {
+ log_newpage_range(index, MAIN_FORKNUM,
+ 0, RelationGetNumberOfBlocks(index),
+ true);
+ }
+ }
+
+ /* okay, all heap tuples are indexed */
+ MemoryContextSwitchTo(oldcxt);
+ MemoryContextDelete(buildstate.giststate->tempCxt);
+
+ freeGISTstate(buildstate.giststate);
/*
- * Create a temporary memory context that is reset once for each tuple
- * processed. (Note: we don't bother to make this a child of the
- * giststate's scanCxt, so we have to delete it separately at the end.)
+ * Return statistics
*/
- buildstate.giststate->tempCxt = createTempGistContext();
+ result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
- /* initialize the root page */
- buffer = gistNewBuffer(index);
- Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
- page = BufferGetPage(buffer);
+ result->heap_tuples = reltuples;
+ result->index_tuples = (double) buildstate.indtuples;
- START_CRIT_SECTION();
+ return result;
+}
- GISTInitBuffer(buffer, F_LEAF);
+/*-------------------------------------------------------------------------
+ * Routines for sorted build
+ *-------------------------------------------------------------------------
+ */
- MarkBufferDirty(buffer);
- PageSetLSN(page, GistBuildLSN);
+/*
+ * Per-tuple callback for table_index_build_scan.
+ */
+static void
+gistSortedBuildCallback(Relation index,
+ ItemPointer tid,
+ Datum *values,
+ bool *isnull,
+ bool tupleIsAlive,
+ void *state)
+{
+ GISTBuildState *buildstate = (GISTBuildState *) state;
+ MemoryContext oldCtx;
+ Datum compressed_values[INDEX_MAX_KEYS];
- UnlockReleaseBuffer(buffer);
+ oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);
- END_CRIT_SECTION();
+ /* Form an index tuple and point it at the heap tuple */
+ gistCompressValues(buildstate->giststate, index,
+ values, isnull,
+ true, compressed_values);
- /* build the index */
- buildstate.indtuples = 0;
- buildstate.indtuplesSize = 0;
+ tuplesort_putindextuplevalues(buildstate->sortstate,
+ buildstate->indexrel,
+ tid,
+ compressed_values, isnull);
+
+ MemoryContextSwitchTo(oldCtx);
+ MemoryContextReset(buildstate->giststate->tempCxt);
+
+ /* Update tuple count. */
+ buildstate->indtuples += 1;
+}
+
+/*
+ * Build GiST index from bottom up from pre-sorted tuples.
+ */
+static void
+gist_indexsortbuild(GISTBuildState *state)
+{
+ IndexTuple itup;
+ GistSortedBuildPageState *leafstate;
+ GistSortedBuildPageState *pagestate;
+ Page page;
+
+ state->pages_allocated = 0;
+ state->pages_written = 0;
+ state->ready_num_pages = 0;
/*
- * Do the heap scan.
+ * Write an empty page as a placeholder for the root page. It will be
+ * replaced with the real root page at the end.
*/
- reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
- gistBuildCallback,
- (void *) &buildstate, NULL);
+ page = palloc0(BLCKSZ);
+ smgrextend(state->indexrel->rd_smgr, MAIN_FORKNUM, GIST_ROOT_BLKNO,
+ page, true);
+ state->pages_allocated++;
+ state->pages_written++;
+
+ /* Allocate a temporary buffer for the first leaf page. */
+ leafstate = palloc(sizeof(GistSortedBuildPageState));
+ leafstate->page = page;
+ leafstate->parent = NULL;
+ gistinitpage(page, F_LEAF);
/*
- * If buffering was used, flush out all the tuples that are still in the
- * buffers.
+ * Fill index pages with tuples in the sorted order.
*/
- if (buildstate.bufferingMode == GIST_BUFFERING_ACTIVE)
+ while ((itup = tuplesort_getindextuple(state->sortstate, true)) != NULL)
{
- elog(DEBUG1, "all tuples processed, emptying buffers");
- gistEmptyAllBuffers(&buildstate);
- gistFreeBuildBuffers(buildstate.gfbb);
+ gist_indexsortbuild_pagestate_add(state, leafstate, itup);
+ MemoryContextReset(state->giststate->tempCxt);
}
- /* okay, all heap tuples are indexed */
- MemoryContextSwitchTo(oldcxt);
- MemoryContextDelete(buildstate.giststate->tempCxt);
-
- freeGISTstate(buildstate.giststate);
-
/*
- * We didn't write WAL records as we built the index, so if WAL-logging is
- * required, write all pages to the WAL now.
+ * Write out the partially full non-root pages.
+ *
+ * Keep in mind that flush can build a new root.
*/
- if (RelationNeedsWAL(index))
+ pagestate = leafstate;
+ while (pagestate->parent != NULL)
{
- log_newpage_range(index, MAIN_FORKNUM,
- 0, RelationGetNumberOfBlocks(index),
- true);
+ GistSortedBuildPageState *parent;
+
+ gist_indexsortbuild_pagestate_flush(state, pagestate);
+ parent = pagestate->parent;
+ pfree(pagestate->page);
+ pfree(pagestate);
+ pagestate = parent;
}
+ gist_indexsortbuild_flush_ready_pages(state);
+
+ /* Write out the root */
+ PageSetLSN(pagestate->page, GistBuildLSN);
+ smgrwrite(state->indexrel->rd_smgr, MAIN_FORKNUM, GIST_ROOT_BLKNO,
+ pagestate->page, true);
+ if (RelationNeedsWAL(state->indexrel))
+ log_newpage(&state->indexrel->rd_node, MAIN_FORKNUM, GIST_ROOT_BLKNO,
+ pagestate->page, true);
+
+ pfree(pagestate->page);
+ pfree(pagestate);
+}
+
+/*
+ * Add tuple to a page. If the pages is full, write it out and re-initialize
+ * a new page first.
+ */
+static void
+gist_indexsortbuild_pagestate_add(GISTBuildState *state,
+ GistSortedBuildPageState *pagestate,
+ IndexTuple itup)
+{
+ Size sizeNeeded;
+
+ /* Does the tuple fit? If not, flush */
+ sizeNeeded = IndexTupleSize(itup) + sizeof(ItemIdData) + state->freespace;
+ if (PageGetFreeSpace(pagestate->page) < sizeNeeded)
+ gist_indexsortbuild_pagestate_flush(state, pagestate);
+
+ gistfillbuffer(pagestate->page, &itup, 1, InvalidOffsetNumber);
+}
+
+static void
+gist_indexsortbuild_pagestate_flush(GISTBuildState *state,
+ GistSortedBuildPageState *pagestate)
+{
+ GistSortedBuildPageState *parent;
+ IndexTuple *itvec;
+ IndexTuple union_tuple;
+ int vect_len;
+ bool isleaf;
+ BlockNumber blkno;
+ MemoryContext oldCtx;
+
+ /* check once per page */
+ CHECK_FOR_INTERRUPTS();
+
+ if (state->ready_num_pages == XLR_MAX_BLOCK_ID)
+ gist_indexsortbuild_flush_ready_pages(state);
+
/*
- * Return statistics
+ * The page is now complete. Assign a block number to it, and add it to
+ * the list of finished pages. (We don't write it out immediately, because
+ * we want to WAL-log the pages in batches.)
*/
- result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
+ blkno = state->pages_allocated++;
+ state->ready_blknos[state->ready_num_pages] = blkno;
+ state->ready_pages[state->ready_num_pages] = pagestate->page;
+ state->ready_num_pages++;
- result->heap_tuples = reltuples;
- result->index_tuples = (double) buildstate.indtuples;
+ isleaf = GistPageIsLeaf(pagestate->page);
- return result;
+ /*
+ * Form a downlink tuple to represent all the tuples on the page.
+ */
+ oldCtx = MemoryContextSwitchTo(state->giststate->tempCxt);
+ itvec = gistextractpage(pagestate->page, &vect_len);
+ union_tuple = gistunion(state->indexrel, itvec, vect_len,
+ state->giststate);
+ ItemPointerSetBlockNumber(&(union_tuple->t_tid), blkno);
+ MemoryContextSwitchTo(oldCtx);
+
+ /*
+ * Insert the downlink to the parent page. If this was the root, create a
+ * new page as the parent, which becomes the new root.
+ */
+ parent = pagestate->parent;
+ if (parent == NULL)
+ {
+ parent = palloc(sizeof(GistSortedBuildPageState));
+ parent->page = (Page) palloc(BLCKSZ);
+ parent->parent = NULL;
+ gistinitpage(parent->page, 0);
+
+ pagestate->parent = parent;
+ }
+ gist_indexsortbuild_pagestate_add(state, parent, union_tuple);
+
+ /* Re-initialize the page buffer for next page on this level. */
+ pagestate->page = palloc(BLCKSZ);
+ gistinitpage(pagestate->page, isleaf ? F_LEAF : 0);
+}
+
+static void
+gist_indexsortbuild_flush_ready_pages(GISTBuildState *state)
+{
+ if (state->ready_num_pages == 0)
+ return;
+
+ for (int i = 0; i < state->ready_num_pages; i++)
+ {
+ Page page = state->ready_pages[i];
+
+ /* Currently, the blocks must be buffered in order. */
+ if (state->ready_blknos[i] != state->pages_written)
+ elog(ERROR, "unexpected block number to flush GiST sorting build");
+
+ PageSetLSN(page, GistBuildLSN);
+
+ smgrextend(state->indexrel->rd_smgr,
+ MAIN_FORKNUM,
+ state->pages_written++,
+ page,
+ true);
+ }
+
+ if (RelationNeedsWAL(state->indexrel))
+ log_newpages(&state->indexrel->rd_node, MAIN_FORKNUM, state->ready_num_pages,
+ state->ready_blknos, state->ready_pages, true);
+
+ for (int i = 0; i < state->ready_num_pages; i++)
+ pfree(state->ready_pages[i]);
+
+ state->ready_num_pages = 0;
}
+
+/*-------------------------------------------------------------------------
+ * Routines for non-sorted build
+ *-------------------------------------------------------------------------
+ */
+
/*
* Attempt to switch to buffering mode.
*
if (levelStep <= 0)
{
elog(DEBUG1, "failed to switch to buffered GiST build");
- buildstate->bufferingMode = GIST_BUFFERING_DISABLED;
+ buildstate->buildMode = GIST_BUFFERING_DISABLED;
return;
}
gistInitParentMap(buildstate);
- buildstate->bufferingMode = GIST_BUFFERING_ACTIVE;
+ buildstate->buildMode = GIST_BUFFERING_ACTIVE;
elog(DEBUG1, "switched to buffered GiST build; level step = %d, pagesPerBuffer = %d",
levelStep, pagesPerBuffer);
oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);
/* form an index tuple and point it at the heap tuple */
- itup = gistFormTuple(buildstate->giststate, index, values, isnull, true);
+ itup = gistFormTuple(buildstate->giststate, index,
+ values, isnull,
+ true);
itup->t_tid = *tid;
- if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE)
+ if (buildstate->buildMode == GIST_BUFFERING_ACTIVE)
{
/* We have buffers, so use them. */
gistBufferingBuildInsert(buildstate, itup);
MemoryContextSwitchTo(oldCtx);
MemoryContextReset(buildstate->giststate->tempCxt);
- if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE &&
+ if (buildstate->buildMode == GIST_BUFFERING_ACTIVE &&
buildstate->indtuples % BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET == 0)
{
/* Adjust the target buffer size now */
* To avoid excessive calls to smgrnblocks(), only check this every
* BUFFERING_MODE_SWITCH_CHECK_STEP index tuples
*/
- if ((buildstate->bufferingMode == GIST_BUFFERING_AUTO &&
+ if ((buildstate->buildMode == GIST_BUFFERING_AUTO &&
buildstate->indtuples % BUFFERING_MODE_SWITCH_CHECK_STEP == 0 &&
effective_cache_size < smgrnblocks(index->rd_smgr, MAIN_FORKNUM)) ||
- (buildstate->bufferingMode == GIST_BUFFERING_STATS &&
+ (buildstate->buildMode == GIST_BUFFERING_STATS &&
buildstate->indtuples >= BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET))
{
/*
#include "utils/builtins.h"
#include "utils/float.h"
#include "utils/geo_decls.h"
+#include "utils/sortsupport.h"
static bool gist_box_leaf_consistent(BOX *key, BOX *query,
static bool rtree_internal_consistent(BOX *key, BOX *query,
StrategyNumber strategy);
+static uint64 point_zorder_internal(float4 x, float4 y);
+static uint64 part_bits32_by2(uint32 x);
+static uint32 ieee_float32_to_uint32(float f);
+static int gist_bbox_zorder_cmp(Datum a, Datum b, SortSupport ssup);
+static Datum gist_bbox_zorder_abbrev_convert(Datum original, SortSupport ssup);
+static int gist_bbox_zorder_cmp_abbrev(Datum z1, Datum z2, SortSupport ssup);
+static bool gist_bbox_zorder_abbrev_abort(int memtupcount, SortSupport ssup);
+
+
/* Minimum accepted ratio of split */
#define LIMIT_RATIO 0.3
PG_RETURN_FLOAT8(distance);
}
+
+/*
+ * Z-order routines for fast index build
+ */
+
+/*
+ * Compute Z-value of a point
+ *
+ * Z-order (also known as Morton Code) maps a two-dimensional point to a
+ * single integer, in a way that preserves locality. Points that are close in
+ * the two-dimensional space are mapped to integer that are not far from each
+ * other. We do that by interleaving the bits in the X and Y components.
+ *
+ * Morton Code is normally defined only for integers, but the X and Y values
+ * of a point are floating point. We expect floats to be in IEEE format.
+ */
+static uint64
+point_zorder_internal(float4 x, float4 y)
+{
+ uint32 ix = ieee_float32_to_uint32(x);
+ uint32 iy = ieee_float32_to_uint32(y);
+
+ /* Interleave the bits */
+ return part_bits32_by2(ix) | (part_bits32_by2(iy) << 1);
+}
+
+/* Interleave 32 bits with zeroes */
+static uint64
+part_bits32_by2(uint32 x)
+{
+ uint64 n = x;
+
+ n = (n | (n << 16)) & UINT64CONST(0x0000FFFF0000FFFF);
+ n = (n | (n << 8)) & UINT64CONST(0x00FF00FF00FF00FF);
+ n = (n | (n << 4)) & UINT64CONST(0x0F0F0F0F0F0F0F0F);
+ n = (n | (n << 2)) & UINT64CONST(0x3333333333333333);
+ n = (n | (n << 1)) & UINT64CONST(0x5555555555555555);
+
+ return n;
+}
+
+/*
+ * Convert a 32-bit IEEE float to uint32 in a way that preserves the ordering
+ */
+static uint32
+ieee_float32_to_uint32(float f)
+{
+ /*----
+ *
+ * IEEE 754 floating point format
+ * ------------------------------
+ *
+ * IEEE 754 floating point numbers have this format:
+ *
+ * exponent (8 bits)
+ * |
+ * s eeeeeeee mmmmmmmmmmmmmmmmmmmmmmm
+ * | |
+ * sign mantissa (23 bits)
+ *
+ * Infinity has all bits in the exponent set and the mantissa is all
+ * zeros. Negative infinity is the same but with the sign bit set.
+ *
+ * NaNs are represented with all bits in the exponent set, and the least
+ * significant bit in the mantissa also set. The rest of the mantissa bits
+ * can be used to distinguish different kinds of NaNs.
+ *
+ * The IEEE format has the nice property that when you take the bit
+ * representation and interpret it as an integer, the order is preserved,
+ * except for the sign. That holds for the +-Infinity values too.
+ *
+ * Mapping to uint32
+ * -----------------
+ *
+ * In order to have a smooth transition from negative to positive numbers,
+ * we map floats to unsigned integers like this:
+ *
+ * x < 0 to range 0-7FFFFFFF
+ * x = 0 to value 8000000 (both positive and negative zero)
+ * x > 0 to range 8000001-FFFFFFFF
+ *
+ * We don't care to distinguish different kind of NaNs, so they are all
+ * mapped to the same arbitrary value, FFFFFFFF. Because of the IEEE bit
+ * representation of NaNs, there aren't any non-NaN values that would be
+ * mapped to FFFFFFFF. In fact, there is a range of unused values on both
+ * ends of the uint32 space.
+ */
+ if (isnan(f))
+ return 0xFFFFFFFF;
+ else
+ {
+ union
+ {
+ float f;
+ uint32 i;
+ } u;
+
+ u.f = f;
+
+ /* Check the sign bit */
+ if ((u.i & 0x80000000) != 0)
+ {
+ /*
+ * Map the negative value to range 0-7FFFFFFF. This flips the sign
+ * bit to 0 in the same instruction.
+ */
+ Assert(f <= 0); /* can be -0 */
+ u.i ^= 0xFFFFFFFF;
+ }
+ else
+ {
+ /* Map the positive value (or 0) to range 80000000-FFFFFFFF */
+ u.i |= 0x80000000;
+ }
+
+ return u.i;
+ }
+}
+
+/*
+ * Compare the Z-order of points
+ */
+static int
+gist_bbox_zorder_cmp(Datum a, Datum b, SortSupport ssup)
+{
+ Point *p1 = &(DatumGetBoxP(a)->low);
+ Point *p2 = &(DatumGetBoxP(b)->low);
+ uint64 z1;
+ uint64 z2;
+
+ /*
+ * Do a quick check for equality first. It's not clear if this is worth it
+ * in general, but certainly is when used as tie-breaker with abbreviated
+ * keys,
+ */
+ if (p1->x == p2->x && p1->y == p2->y)
+ return 0;
+
+ z1 = point_zorder_internal(p1->x, p1->y);
+ z2 = point_zorder_internal(p2->x, p2->y);
+ if (z1 > z2)
+ return 1;
+ else if (z1 < z2)
+ return -1;
+ else
+ return 0;
+}
+
+/*
+ * Abbreviated version of Z-order comparison
+ *
+ * The abbreviated format is a Z-order value computed from the two 32-bit
+ * floats. If SIZEOF_DATUM == 8, the 64-bit Z-order value fits fully in the
+ * abbreviated Datum, otherwise use its most significant bits.
+ */
+static Datum
+gist_bbox_zorder_abbrev_convert(Datum original, SortSupport ssup)
+{
+ Point *p = &(DatumGetBoxP(original)->low);
+ uint64 z;
+
+ z = point_zorder_internal(p->x, p->y);
+
+#if SIZEOF_DATUM == 8
+ return (Datum) z;
+#else
+ return (Datum) (z >> 32);
+#endif
+}
+
+static int
+gist_bbox_zorder_cmp_abbrev(Datum z1, Datum z2, SortSupport ssup)
+{
+ /*
+ * Compare the pre-computed Z-orders as unsigned integers. Datum is a
+ * typedef for 'uintptr_t', so no casting is required.
+ */
+ if (z1 > z2)
+ return 1;
+ else if (z1 < z2)
+ return -1;
+ else
+ return 0;
+}
+
+/*
+ * We never consider aborting the abbreviation.
+ *
+ * On 64-bit systems, the abbreviation is not lossy so it is always
+ * worthwhile. (Perhaps it's not on 32-bit systems, but we don't bother
+ * with logic to decide.)
+ */
+static bool
+gist_bbox_zorder_abbrev_abort(int memtupcount, SortSupport ssup)
+{
+ return false;
+}
+
+/*
+ * Sort support routine for fast GiST index build by sorting.
+ */
+Datum
+gist_point_sortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+
+ if (ssup->abbreviate)
+ {
+ ssup->comparator = gist_bbox_zorder_cmp_abbrev;
+ ssup->abbrev_converter = gist_bbox_zorder_abbrev_convert;
+ ssup->abbrev_abort = gist_bbox_zorder_abbrev_abort;
+ ssup->abbrev_full_comparator = gist_bbox_zorder_cmp;
+ }
+ else
+ {
+ ssup->comparator = gist_bbox_zorder_cmp;
+ }
+ PG_RETURN_VOID();
+}