Add support for building GiST index by sorting.

author Heikki Linnakangas <[email protected]>

Thu, 17 Sep 2020 08:33:40 +0000 (11:33 +0300)

committer Heikki Linnakangas <[email protected]>

Thu, 17 Sep 2020 08:33:40 +0000 (11:33 +0300)
author Heikki Linnakangas <[email protected]>
Thu, 17 Sep 2020 08:33:40 +0000 (11:33 +0300)
committer Heikki Linnakangas <[email protected]>
Thu, 17 Sep 2020 08:33:40 +0000 (11:33 +0300)
diff --git a/doc/src/sgml/gist.sgml b/doc/src/sgml/gist.sgml

index f9226e7a35cbbae69cce918f5b17662d0114fc19..192338be88105af2db0839d8d071f88e9a00f8da 100644 (file)
--- a/doc/src/sgml/gist.sgml
+++ b/doc/src/sgml/gist.sgml
@@ -259,6 +259,8 @@ CREATE INDEX ON my_table USING GIST (my_inet_column inet_ops);
     <function>compress</function> method is omitted. The optional tenth method
     <function>options</function> is needed if the operator class provides
     the user-specified parameters.
+   The <function>sortsupport</function> method is also optional and is used to
+   speed up building a <acronym>GiST</acronym> index.
   </para>
  
   <variablelist>
@@ -1065,6 +1067,74 @@ my_compress(PG_FUNCTION_ARGS)
        </para>
       </listitem>
      </varlistentry>
+
+    <varlistentry>
+     <term><function>sortsupport</function></term>
+     <listitem>
+      <para>
+       Returns a comparator function to sort data in a way that preserves
+       locality. It is used by <command>CREATE INDEX</command> and
+       <command>REINDEX</command> commands. The quality of the created index
+       depends on how well the sort order determined by the comparator function
+       preserves locality of the inputs.
+      </para>
+      <para>
+       The <function>sortsupport</function> method is optional. If it is not
+       provided, <command>CREATE INDEX</command> builds the index by inserting
+       each tuple to the tree using the <function>penalty</function> and
+       <function>picksplit</function> functions, which is much slower.
+      </para>
+
+      <para>
+       The <acronym>SQL</acronym> declaration of the function must look like
+       this:
+
+<programlisting>
+CREATE OR REPLACE FUNCTION my_sortsupport(internal)
+RETURNS void
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+</programlisting>
+
+       The argument is a pointer to a <structname>SortSupport</structname>
+       struct. At a minimum, the function must fill in its comparator field.
+       The comparator takes three arguments: two Datums to compare, and
+       a pointer to the <structname>SortSupport</structname> struct. The
+       Datums are the two indexed values in the format that they are stored
+       in the index; that is, in the format returned by the
+       <function>compress</function> method. The full API is defined in
+       <filename>src/include/utils/sortsupport.h</filename>.
+       </para>
+
+       <para>
+        The matching code in the C module could then follow this skeleton:
+
+<programlisting>
+PG_FUNCTION_INFO_V1(my_sortsupport);
+
+static int
+my_fastcmp(Datum x, Datum y, SortSupport ssup)
+{
+  /* establish order between x and y by computing some sorting value z */
+
+  int z1 = ComputeSpatialCode(x);
+  int z2 = ComputeSpatialCode(y);
+
+  return z1 == z2 ? 0 : z1 > z2 ? 1 : -1;
+}
+
+Datum
+my_sortsupport(PG_FUNCTION_ARGS)
+{
+  SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+
+  ssup->comparator = my_fastcmp;
+  PG_RETURN_VOID();
+}
+</programlisting>
+      </para>
+     </listitem>
+    </varlistentry>
    </variablelist>
  
    <para>
diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c

index 671b5e9186ff0972e3f0af0284f0c87a4d1c7050..230625cf1e2c680e05801708340b164e0c885d43 100644 (file)
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -3,6 +3,24 @@
   * gistbuild.c
   *   build algorithm for GiST indexes implementation.
   *
+ * There are two different strategies:
+ *
+ * 1. Sort all input tuples, pack them into GiST leaf pages in the sorted
+ *    order, and create downlinks and internal pages as we go. This builds
+ *    the index from the bottom up, similar to how B-tree index build
+ *    works.
+ *
+ * 2. Start with an empty index, and insert all tuples one by one.
+ *
+ * The sorted method is used if the operator classes for all columns have
+ * a 'sortsupport' defined. Otherwise, we resort to the second strategy.
+ *
+ * The second strategy can optionally use buffers at different levels of
+ * the tree to reduce I/O, see "Buffering build algorithm" in the README
+ * for a more detailed explanation. It initially calls insert over and
+ * over, but switches to the buffered algorithm after a certain number of
+ * tuples (unless buffering mode is disabled).
+ *
   *
   * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
@@ -28,6 +46,7 @@
  #include "storage/smgr.h"
  #include "utils/memutils.h"
  #include "utils/rel.h"
+#include "utils/tuplesort.h"
  
  /* Step of index tuples for check whether to switch to buffering build mode */
  #define BUFFERING_MODE_SWITCH_CHECK_STEP 256
@@ -40,8 +59,14 @@
   */
  #define BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET 4096
  
+/*
+ * Strategy used to build the index. It can change between the
+ * GIST_BUFFERING_* modes on the fly, but if the Sorted method is used,
+ * that needs to be decided up-front and cannot be changed afterwards.
+ */
  typedef enum
  {
+   GIST_SORTED_BUILD,          /* bottom-up build by sorting */
     GIST_BUFFERING_DISABLED,    /* in regular build mode and aren't going to
                                  * switch */
     GIST_BUFFERING_AUTO,        /* in regular build mode, but will switch to
@@ -51,7 +76,7 @@ typedef enum
                                  * before switching to the buffering build
                                  * mode */
     GIST_BUFFERING_ACTIVE       /* in buffering build mode */
-} GistBufferingMode;
+} GistBuildMode;
  
  /* Working state for gistbuild and its callback */
  typedef struct
@@ -60,23 +85,58 @@ typedef struct
     Relation    heaprel;
     GISTSTATE  *giststate;
  
-   int64       indtuples;      /* number of tuples indexed */
-   int64       indtuplesSize;  /* total size of all indexed tuples */
-
     Size        freespace;      /* amount of free space to leave on pages */
  
+   GistBuildMode buildMode;
+
+   int64       indtuples;      /* number of tuples indexed */
+
     /*
      * Extra data structures used during a buffering build. 'gfbb' contains
      * information related to managing the build buffers. 'parentMap' is a
      * lookup table of the parent of each internal page.
      */
+   int64       indtuplesSize;  /* total size of all indexed tuples */
     GISTBuildBuffers *gfbb;
     HTAB       *parentMap;
  
-   GistBufferingMode bufferingMode;
+   /*
+    * Extra data structures used during a sorting build.
+    */
+   Tuplesortstate *sortstate;  /* state data for tuplesort.c */
+
+   BlockNumber pages_allocated;
+   BlockNumber pages_written;
+
+   int         ready_num_pages;
+   BlockNumber ready_blknos[XLR_MAX_BLOCK_ID];
+   Page        ready_pages[XLR_MAX_BLOCK_ID];
  } GISTBuildState;
  
+/*
+ * In sorted build, we use a stack of these structs, one for each level,
+ * to hold an in-memory buffer of the righmost page at the level. When the
+ * page fills up, it is written out and a new page is allocated.
+ */
+typedef struct GistSortedBuildPageState
+{
+   Page        page;
+   struct GistSortedBuildPageState *parent;    /* Upper level, if any */
+} GistSortedBuildPageState;
+
  /* prototypes for private functions */
+
+static void gistSortedBuildCallback(Relation index, ItemPointer tid,
+                                   Datum *values, bool *isnull,
+                                   bool tupleIsAlive, void *state);
+static void gist_indexsortbuild(GISTBuildState *state);
+static void gist_indexsortbuild_pagestate_add(GISTBuildState *state,
+                                             GistSortedBuildPageState *pagestate,
+                                             IndexTuple itup);
+static void gist_indexsortbuild_pagestate_flush(GISTBuildState *state,
+                                               GistSortedBuildPageState *pagestate);
+static void gist_indexsortbuild_flush_ready_pages(GISTBuildState *state);
+
  static void gistInitBuffering(GISTBuildState *buildstate);
  static int calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep);
  static void gistBuildCallback(Relation index,
@@ -107,10 +167,9 @@ static void gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child,
  static void gistMemorizeAllDownlinks(GISTBuildState *buildstate, Buffer parent);
  static BlockNumber gistGetParent(GISTBuildState *buildstate, BlockNumber child);
  
+
  /*
- * Main entry point to GiST index build. Initially calls insert over and over,
- * but switches to more efficient buffering build algorithm after a certain
- * number of tuples (unless buffering mode is disabled).
+ * Main entry point to GiST index build.
   */
  IndexBuildResult *
  gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
@@ -118,124 +177,407 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
     IndexBuildResult *result;
     double      reltuples;
     GISTBuildState buildstate;
-   Buffer      buffer;
-   Page        page;
     MemoryContext oldcxt = CurrentMemoryContext;
     int         fillfactor;
+   Oid         SortSupportFnOids[INDEX_MAX_KEYS];
+   bool        hasallsortsupports;
+   int         keyscount = IndexRelationGetNumberOfKeyAttributes(index);
+   GiSTOptions *options = NULL;
+
+   /*
+    * We expect to be called exactly once for any index relation. If that's
+    * not the case, big trouble's what we have.
+    */
+   if (RelationGetNumberOfBlocks(index) != 0)
+       elog(ERROR, "index \"%s\" already contains data",
+            RelationGetRelationName(index));
+
+   if (index->rd_options)
+       options = (GiSTOptions *) index->rd_options;
  
     buildstate.indexrel = index;
     buildstate.heaprel = heap;
+   buildstate.sortstate = NULL;
+   buildstate.giststate = initGISTstate(index);
  
-   if (index->rd_options)
+   /*
+    * Create a temporary memory context that is reset once for each tuple
+    * processed.  (Note: we don't bother to make this a child of the
+    * giststate's scanCxt, so we have to delete it separately at the end.)
+    */
+   buildstate.giststate->tempCxt = createTempGistContext();
+
+   /*
+    * Choose build strategy. If all keys support sorting, do that. Otherwise
+    * the default strategy is switch to buffering mode when the index grows
+    * too large to fit in cache.
+    */
+   hasallsortsupports = true;
+   for (int i = 0; i < keyscount; i++)
     {
-       /* Get buffering mode from the options string */
-       GiSTOptions *options = (GiSTOptions *) index->rd_options;
+       SortSupportFnOids[i] = index_getprocid(index, i + 1,
+                                              GIST_SORTSUPPORT_PROC);
+       if (!OidIsValid(SortSupportFnOids[i]))
+       {
+           hasallsortsupports = false;
+           break;
+       }
+   }
  
+   if (hasallsortsupports)
+   {
+       buildstate.buildMode = GIST_SORTED_BUILD;
+   }
+   else if (options)
+   {
         if (options->buffering_mode == GIST_OPTION_BUFFERING_ON)
-           buildstate.bufferingMode = GIST_BUFFERING_STATS;
+           buildstate.buildMode = GIST_BUFFERING_STATS;
         else if (options->buffering_mode == GIST_OPTION_BUFFERING_OFF)
-           buildstate.bufferingMode = GIST_BUFFERING_DISABLED;
+           buildstate.buildMode = GIST_BUFFERING_DISABLED;
         else
-           buildstate.bufferingMode = GIST_BUFFERING_AUTO;
-
-       fillfactor = options->fillfactor;
+           buildstate.buildMode = GIST_BUFFERING_AUTO;
     }
     else
     {
-       /*
-        * By default, switch to buffering mode when the index grows too large
-        * to fit in cache.
-        */
-       buildstate.bufferingMode = GIST_BUFFERING_AUTO;
-       fillfactor = GIST_DEFAULT_FILLFACTOR;
+       buildstate.buildMode = GIST_BUFFERING_AUTO;
     }
-   /* Calculate target amount of free space to leave on pages */
+
+   /*
+    * Calculate target amount of free space to leave on pages.
+    */
+   fillfactor = options ? options->fillfactor : GIST_DEFAULT_FILLFACTOR;
     buildstate.freespace = BLCKSZ * (100 - fillfactor) / 100;
  
     /*
-    * We expect to be called exactly once for any index relation. If that's
-    * not the case, big trouble's what we have.
+    * Build the index using the chosen strategy.
      */
-   if (RelationGetNumberOfBlocks(index) != 0)
-       elog(ERROR, "index \"%s\" already contains data",
-            RelationGetRelationName(index));
+   buildstate.indtuples = 0;
+   buildstate.indtuplesSize = 0;
  
-   /* no locking is needed */
-   buildstate.giststate = initGISTstate(index);
+   if (buildstate.buildMode == GIST_SORTED_BUILD)
+   {
+       /*
+        * Sort all data, build the index from bottom up.
+        */
+       buildstate.sortstate = tuplesort_begin_index_gist(heap,
+                                                         index,
+                                                         maintenance_work_mem,
+                                                         NULL,
+                                                         false);
+
+       /* Scan the table, adding all tuples to the tuplesort */
+       reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
+                                          gistSortedBuildCallback,
+                                          (void *) &buildstate, NULL);
+
+       /*
+        * Perform the sort and build index pages.
+        */
+       tuplesort_performsort(buildstate.sortstate);
+
+       gist_indexsortbuild(&buildstate);
+
+       tuplesort_end(buildstate.sortstate);
+   }
+   else
+   {
+       /*
+        * Initialize an empty index and insert all tuples, possibly using
+        * buffers on intermediate levels.
+        */
+       Buffer      buffer;
+       Page        page;
+
+       /* initialize the root page */
+       buffer = gistNewBuffer(index);
+       Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
+       page = BufferGetPage(buffer);
+
+       START_CRIT_SECTION();
+
+       GISTInitBuffer(buffer, F_LEAF);
+
+       MarkBufferDirty(buffer);
+       PageSetLSN(page, GistBuildLSN);
+
+       UnlockReleaseBuffer(buffer);
+
+       END_CRIT_SECTION();
+
+       /* Scan the table, inserting all the tuples to the index. */
+       reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
+                                          gistBuildCallback,
+                                          (void *) &buildstate, NULL);
+
+       /*
+        * If buffering was used, flush out all the tuples that are still in
+        * the buffers.
+        */
+       if (buildstate.buildMode == GIST_BUFFERING_ACTIVE)
+       {
+           elog(DEBUG1, "all tuples processed, emptying buffers");
+           gistEmptyAllBuffers(&buildstate);
+           gistFreeBuildBuffers(buildstate.gfbb);
+       }
+
+       /*
+        * We didn't write WAL records as we built the index, so if
+        * WAL-logging is required, write all pages to the WAL now.
+        */
+       if (RelationNeedsWAL(index))
+       {
+           log_newpage_range(index, MAIN_FORKNUM,
+                             0, RelationGetNumberOfBlocks(index),
+                             true);
+       }
+   }
+
+   /* okay, all heap tuples are indexed */
+   MemoryContextSwitchTo(oldcxt);
+   MemoryContextDelete(buildstate.giststate->tempCxt);
+
+   freeGISTstate(buildstate.giststate);
  
     /*
-    * Create a temporary memory context that is reset once for each tuple
-    * processed.  (Note: we don't bother to make this a child of the
-    * giststate's scanCxt, so we have to delete it separately at the end.)
+    * Return statistics
      */
-   buildstate.giststate->tempCxt = createTempGistContext();
+   result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
  
-   /* initialize the root page */
-   buffer = gistNewBuffer(index);
-   Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
-   page = BufferGetPage(buffer);
+   result->heap_tuples = reltuples;
+   result->index_tuples = (double) buildstate.indtuples;
  
-   START_CRIT_SECTION();
+   return result;
+}
  
-   GISTInitBuffer(buffer, F_LEAF);
+/*-------------------------------------------------------------------------
+ * Routines for sorted build
+ *-------------------------------------------------------------------------
+ */
  
-   MarkBufferDirty(buffer);
-   PageSetLSN(page, GistBuildLSN);
+/*
+ * Per-tuple callback for table_index_build_scan.
+ */
+static void
+gistSortedBuildCallback(Relation index,
+                       ItemPointer tid,
+                       Datum *values,
+                       bool *isnull,
+                       bool tupleIsAlive,
+                       void *state)
+{
+   GISTBuildState *buildstate = (GISTBuildState *) state;
+   MemoryContext oldCtx;
+   Datum       compressed_values[INDEX_MAX_KEYS];
  
-   UnlockReleaseBuffer(buffer);
+   oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);
  
-   END_CRIT_SECTION();
+   /* Form an index tuple and point it at the heap tuple */
+   gistCompressValues(buildstate->giststate, index,
+                      values, isnull,
+                      true, compressed_values);
  
-   /* build the index */
-   buildstate.indtuples = 0;
-   buildstate.indtuplesSize = 0;
+   tuplesort_putindextuplevalues(buildstate->sortstate,
+                                 buildstate->indexrel,
+                                 tid,
+                                 compressed_values, isnull);
+
+   MemoryContextSwitchTo(oldCtx);
+   MemoryContextReset(buildstate->giststate->tempCxt);
+
+   /* Update tuple count. */
+   buildstate->indtuples += 1;
+}
+
+/*
+ * Build GiST index from bottom up from pre-sorted tuples.
+ */
+static void
+gist_indexsortbuild(GISTBuildState *state)
+{
+   IndexTuple  itup;
+   GistSortedBuildPageState *leafstate;
+   GistSortedBuildPageState *pagestate;
+   Page        page;
+
+   state->pages_allocated = 0;
+   state->pages_written = 0;
+   state->ready_num_pages = 0;
  
     /*
-    * Do the heap scan.
+    * Write an empty page as a placeholder for the root page. It will be
+    * replaced with the real root page at the end.
      */
-   reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
-                                      gistBuildCallback,
-                                      (void *) &buildstate, NULL);
+   page = palloc0(BLCKSZ);
+   smgrextend(state->indexrel->rd_smgr, MAIN_FORKNUM, GIST_ROOT_BLKNO,
+              page, true);
+   state->pages_allocated++;
+   state->pages_written++;
+
+   /* Allocate a temporary buffer for the first leaf page. */
+   leafstate = palloc(sizeof(GistSortedBuildPageState));
+   leafstate->page = page;
+   leafstate->parent = NULL;
+   gistinitpage(page, F_LEAF);
  
     /*
-    * If buffering was used, flush out all the tuples that are still in the
-    * buffers.
+    * Fill index pages with tuples in the sorted order.
      */
-   if (buildstate.bufferingMode == GIST_BUFFERING_ACTIVE)
+   while ((itup = tuplesort_getindextuple(state->sortstate, true)) != NULL)
     {
-       elog(DEBUG1, "all tuples processed, emptying buffers");
-       gistEmptyAllBuffers(&buildstate);
-       gistFreeBuildBuffers(buildstate.gfbb);
+       gist_indexsortbuild_pagestate_add(state, leafstate, itup);
+       MemoryContextReset(state->giststate->tempCxt);
     }
  
-   /* okay, all heap tuples are indexed */
-   MemoryContextSwitchTo(oldcxt);
-   MemoryContextDelete(buildstate.giststate->tempCxt);
-
-   freeGISTstate(buildstate.giststate);
-
     /*
-    * We didn't write WAL records as we built the index, so if WAL-logging is
-    * required, write all pages to the WAL now.
+    * Write out the partially full non-root pages.
+    *
+    * Keep in mind that flush can build a new root.
      */
-   if (RelationNeedsWAL(index))
+   pagestate = leafstate;
+   while (pagestate->parent != NULL)
     {
-       log_newpage_range(index, MAIN_FORKNUM,
-                         0, RelationGetNumberOfBlocks(index),
-                         true);
+       GistSortedBuildPageState *parent;
+
+       gist_indexsortbuild_pagestate_flush(state, pagestate);
+       parent = pagestate->parent;
+       pfree(pagestate->page);
+       pfree(pagestate);
+       pagestate = parent;
     }
  
+   gist_indexsortbuild_flush_ready_pages(state);
+
+   /* Write out the root */
+   PageSetLSN(pagestate->page, GistBuildLSN);
+   smgrwrite(state->indexrel->rd_smgr, MAIN_FORKNUM, GIST_ROOT_BLKNO,
+             pagestate->page, true);
+   if (RelationNeedsWAL(state->indexrel))
+       log_newpage(&state->indexrel->rd_node, MAIN_FORKNUM, GIST_ROOT_BLKNO,
+                   pagestate->page, true);
+
+   pfree(pagestate->page);
+   pfree(pagestate);
+}
+
+/*
+ * Add tuple to a page. If the pages is full, write it out and re-initialize
+ * a new page first.
+ */
+static void
+gist_indexsortbuild_pagestate_add(GISTBuildState *state,
+                                 GistSortedBuildPageState *pagestate,
+                                 IndexTuple itup)
+{
+   Size        sizeNeeded;
+
+   /* Does the tuple fit? If not, flush */
+   sizeNeeded = IndexTupleSize(itup) + sizeof(ItemIdData) + state->freespace;
+   if (PageGetFreeSpace(pagestate->page) < sizeNeeded)
+       gist_indexsortbuild_pagestate_flush(state, pagestate);
+
+   gistfillbuffer(pagestate->page, &itup, 1, InvalidOffsetNumber);
+}
+
+static void
+gist_indexsortbuild_pagestate_flush(GISTBuildState *state,
+                                   GistSortedBuildPageState *pagestate)
+{
+   GistSortedBuildPageState *parent;
+   IndexTuple *itvec;
+   IndexTuple  union_tuple;
+   int         vect_len;
+   bool        isleaf;
+   BlockNumber blkno;
+   MemoryContext oldCtx;
+
+   /* check once per page */
+   CHECK_FOR_INTERRUPTS();
+
+   if (state->ready_num_pages == XLR_MAX_BLOCK_ID)
+       gist_indexsortbuild_flush_ready_pages(state);
+
     /*
-    * Return statistics
+    * The page is now complete. Assign a block number to it, and add it to
+    * the list of finished pages. (We don't write it out immediately, because
+    * we want to WAL-log the pages in batches.)
      */
-   result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
+   blkno = state->pages_allocated++;
+   state->ready_blknos[state->ready_num_pages] = blkno;
+   state->ready_pages[state->ready_num_pages] = pagestate->page;
+   state->ready_num_pages++;
  
-   result->heap_tuples = reltuples;
-   result->index_tuples = (double) buildstate.indtuples;
+   isleaf = GistPageIsLeaf(pagestate->page);
  
-   return result;
+   /*
+    * Form a downlink tuple to represent all the tuples on the page.
+    */
+   oldCtx = MemoryContextSwitchTo(state->giststate->tempCxt);
+   itvec = gistextractpage(pagestate->page, &vect_len);
+   union_tuple = gistunion(state->indexrel, itvec, vect_len,
+                           state->giststate);
+   ItemPointerSetBlockNumber(&(union_tuple->t_tid), blkno);
+   MemoryContextSwitchTo(oldCtx);
+
+   /*
+    * Insert the downlink to the parent page. If this was the root, create a
+    * new page as the parent, which becomes the new root.
+    */
+   parent = pagestate->parent;
+   if (parent == NULL)
+   {
+       parent = palloc(sizeof(GistSortedBuildPageState));
+       parent->page = (Page) palloc(BLCKSZ);
+       parent->parent = NULL;
+       gistinitpage(parent->page, 0);
+
+       pagestate->parent = parent;
+   }
+   gist_indexsortbuild_pagestate_add(state, parent, union_tuple);
+
+   /* Re-initialize the page buffer for next page on this level. */
+   pagestate->page = palloc(BLCKSZ);
+   gistinitpage(pagestate->page, isleaf ? F_LEAF : 0);
+}
+
+static void
+gist_indexsortbuild_flush_ready_pages(GISTBuildState *state)
+{
+   if (state->ready_num_pages == 0)
+       return;
+
+   for (int i = 0; i < state->ready_num_pages; i++)
+   {
+       Page        page = state->ready_pages[i];
+
+       /* Currently, the blocks must be buffered in order. */
+       if (state->ready_blknos[i] != state->pages_written)
+           elog(ERROR, "unexpected block number to flush GiST sorting build");
+
+       PageSetLSN(page, GistBuildLSN);
+
+       smgrextend(state->indexrel->rd_smgr,
+                  MAIN_FORKNUM,
+                  state->pages_written++,
+                  page,
+                  true);
+   }
+
+   if (RelationNeedsWAL(state->indexrel))
+       log_newpages(&state->indexrel->rd_node, MAIN_FORKNUM, state->ready_num_pages,
+                    state->ready_blknos, state->ready_pages, true);
+
+   for (int i = 0; i < state->ready_num_pages; i++)
+       pfree(state->ready_pages[i]);
+
+   state->ready_num_pages = 0;
  }
  
+
+/*-------------------------------------------------------------------------
+ * Routines for non-sorted build
+ *-------------------------------------------------------------------------
+ */
+
  /*
   * Attempt to switch to buffering mode.
   *
@@ -375,7 +717,7 @@ gistInitBuffering(GISTBuildState *buildstate)
     if (levelStep <= 0)
     {
         elog(DEBUG1, "failed to switch to buffered GiST build");
-       buildstate->bufferingMode = GIST_BUFFERING_DISABLED;
+       buildstate->buildMode = GIST_BUFFERING_DISABLED;
         return;
     }
  
@@ -392,7 +734,7 @@ gistInitBuffering(GISTBuildState *buildstate)
  
     gistInitParentMap(buildstate);
  
-   buildstate->bufferingMode = GIST_BUFFERING_ACTIVE;
+   buildstate->buildMode = GIST_BUFFERING_ACTIVE;
  
     elog(DEBUG1, "switched to buffered GiST build; level step = %d, pagesPerBuffer = %d",
          levelStep, pagesPerBuffer);
@@ -453,10 +795,12 @@ gistBuildCallback(Relation index,
     oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);
  
     /* form an index tuple and point it at the heap tuple */
-   itup = gistFormTuple(buildstate->giststate, index, values, isnull, true);
+   itup = gistFormTuple(buildstate->giststate, index,
+                        values, isnull,
+                        true);
     itup->t_tid = *tid;
  
-   if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE)
+   if (buildstate->buildMode == GIST_BUFFERING_ACTIVE)
     {
         /* We have buffers, so use them. */
         gistBufferingBuildInsert(buildstate, itup);
@@ -478,7 +822,7 @@ gistBuildCallback(Relation index,
     MemoryContextSwitchTo(oldCtx);
     MemoryContextReset(buildstate->giststate->tempCxt);
  
-   if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE &&
+   if (buildstate->buildMode == GIST_BUFFERING_ACTIVE &&
         buildstate->indtuples % BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET == 0)
     {
         /* Adjust the target buffer size now */
@@ -493,10 +837,10 @@ gistBuildCallback(Relation index,
      * To avoid excessive calls to smgrnblocks(), only check this every
      * BUFFERING_MODE_SWITCH_CHECK_STEP index tuples
      */
-   if ((buildstate->bufferingMode == GIST_BUFFERING_AUTO &&
+   if ((buildstate->buildMode == GIST_BUFFERING_AUTO &&
          buildstate->indtuples % BUFFERING_MODE_SWITCH_CHECK_STEP == 0 &&
          effective_cache_size < smgrnblocks(index->rd_smgr, MAIN_FORKNUM)) ||
-       (buildstate->bufferingMode == GIST_BUFFERING_STATS &&
+       (buildstate->buildMode == GIST_BUFFERING_STATS &&
          buildstate->indtuples >= BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET))
     {
         /*
diff --git a/src/backend/access/gist/gistproc.c b/src/backend/access/gist/gistproc.c

index 9ace64c3c4a9fc1a96c39c9617cca9de482990f1..27d9c0f77c3065fe93f38c8cf1c1684e967dbcb3 100644 (file)
--- a/src/backend/access/gist/gistproc.c
+++ b/src/backend/access/gist/gistproc.c
@@ -24,6 +24,7 @@
  #include "utils/builtins.h"
  #include "utils/float.h"
  #include "utils/geo_decls.h"
+#include "utils/sortsupport.h"
  
  
  static bool gist_box_leaf_consistent(BOX *key, BOX *query,
@@ -31,6 +32,15 @@ static bool gist_box_leaf_consistent(BOX *key, BOX *query,
  static bool rtree_internal_consistent(BOX *key, BOX *query,
                                       StrategyNumber strategy);
  
+static uint64 point_zorder_internal(float4 x, float4 y);
+static uint64 part_bits32_by2(uint32 x);
+static uint32 ieee_float32_to_uint32(float f);
+static int gist_bbox_zorder_cmp(Datum a, Datum b, SortSupport ssup);
+static Datum gist_bbox_zorder_abbrev_convert(Datum original, SortSupport ssup);
+static int gist_bbox_zorder_cmp_abbrev(Datum z1, Datum z2, SortSupport ssup);
+static bool gist_bbox_zorder_abbrev_abort(int memtupcount, SortSupport ssup);
+
+
  /* Minimum accepted ratio of split */
  #define LIMIT_RATIO 0.3
  
@@ -1540,3 +1550,222 @@ gist_poly_distance(PG_FUNCTION_ARGS)
  
     PG_RETURN_FLOAT8(distance);
  }
+
+/*
+ * Z-order routines for fast index build
+ */
+
+/*
+ * Compute Z-value of a point
+ *
+ * Z-order (also known as Morton Code) maps a two-dimensional point to a
+ * single integer, in a way that preserves locality. Points that are close in
+ * the two-dimensional space are mapped to integer that are not far from each
+ * other. We do that by interleaving the bits in the X and Y components.
+ *
+ * Morton Code is normally defined only for integers, but the X and Y values
+ * of a point are floating point. We expect floats to be in IEEE format.
+ */
+static uint64
+point_zorder_internal(float4 x, float4 y)
+{
+   uint32      ix = ieee_float32_to_uint32(x);
+   uint32      iy = ieee_float32_to_uint32(y);
+
+   /* Interleave the bits */
+   return part_bits32_by2(ix) | (part_bits32_by2(iy) << 1);
+}
+
+/* Interleave 32 bits with zeroes */
+static uint64
+part_bits32_by2(uint32 x)
+{
+   uint64      n = x;
+
+   n = (n | (n << 16)) & UINT64CONST(0x0000FFFF0000FFFF);
+   n = (n | (n << 8)) & UINT64CONST(0x00FF00FF00FF00FF);
+   n = (n | (n << 4)) & UINT64CONST(0x0F0F0F0F0F0F0F0F);
+   n = (n | (n << 2)) & UINT64CONST(0x3333333333333333);
+   n = (n | (n << 1)) & UINT64CONST(0x5555555555555555);
+
+   return n;
+}
+
+/*
+ * Convert a 32-bit IEEE float to uint32 in a way that preserves the ordering
+ */
+static uint32
+ieee_float32_to_uint32(float f)
+{
+   /*----
+    *
+    * IEEE 754 floating point format
+    * ------------------------------
+    *
+    * IEEE 754 floating point numbers have this format:
+    *
+    *   exponent (8 bits)
+    *   |
+    * s eeeeeeee mmmmmmmmmmmmmmmmmmmmmmm
+    * |          |
+    * sign       mantissa (23 bits)
+    *
+    * Infinity has all bits in the exponent set and the mantissa is all
+    * zeros. Negative infinity is the same but with the sign bit set.
+    *
+    * NaNs are represented with all bits in the exponent set, and the least
+    * significant bit in the mantissa also set. The rest of the mantissa bits
+    * can be used to distinguish different kinds of NaNs.
+    *
+    * The IEEE format has the nice property that when you take the bit
+    * representation and interpret it as an integer, the order is preserved,
+    * except for the sign. That holds for the +-Infinity values too.
+    *
+    * Mapping to uint32
+    * -----------------
+    *
+    * In order to have a smooth transition from negative to positive numbers,
+    * we map floats to unsigned integers like this:
+    *
+    * x < 0 to range 0-7FFFFFFF
+    * x = 0 to value 8000000 (both positive and negative zero)
+    * x > 0 to range 8000001-FFFFFFFF
+    *
+    * We don't care to distinguish different kind of NaNs, so they are all
+    * mapped to the same arbitrary value, FFFFFFFF. Because of the IEEE bit
+    * representation of NaNs, there aren't any non-NaN values that would be
+    * mapped to FFFFFFFF. In fact, there is a range of unused values on both
+    * ends of the uint32 space.
+    */
+   if (isnan(f))
+       return 0xFFFFFFFF;
+   else
+   {
+       union
+       {
+           float       f;
+           uint32      i;
+       }           u;
+
+       u.f = f;
+
+       /* Check the sign bit */
+       if ((u.i & 0x80000000) != 0)
+       {
+           /*
+            * Map the negative value to range 0-7FFFFFFF. This flips the sign
+            * bit to 0 in the same instruction.
+            */
+           Assert(f <= 0);     /* can be -0 */
+           u.i ^= 0xFFFFFFFF;
+       }
+       else
+       {
+           /* Map the positive value (or 0) to range 80000000-FFFFFFFF */
+           u.i |= 0x80000000;
+       }
+
+       return u.i;
+   }
+}
+
+/*
+ * Compare the Z-order of points
+ */
+static int
+gist_bbox_zorder_cmp(Datum a, Datum b, SortSupport ssup)
+{
+   Point      *p1 = &(DatumGetBoxP(a)->low);
+   Point      *p2 = &(DatumGetBoxP(b)->low);
+   uint64      z1;
+   uint64      z2;
+
+   /*
+    * Do a quick check for equality first. It's not clear if this is worth it
+    * in general, but certainly is when used as tie-breaker with abbreviated
+    * keys,
+    */
+   if (p1->x == p2->x && p1->y == p2->y)
+       return 0;
+
+   z1 = point_zorder_internal(p1->x, p1->y);
+   z2 = point_zorder_internal(p2->x, p2->y);
+   if (z1 > z2)
+       return 1;
+   else if (z1 < z2)
+       return -1;
+   else
+       return 0;
+}
+
+/*
+ * Abbreviated version of Z-order comparison
+ *
+ * The abbreviated format is a Z-order value computed from the two 32-bit
+ * floats. If SIZEOF_DATUM == 8, the 64-bit Z-order value fits fully in the
+ * abbreviated Datum, otherwise use its most significant bits.
+ */
+static Datum
+gist_bbox_zorder_abbrev_convert(Datum original, SortSupport ssup)
+{
+   Point      *p = &(DatumGetBoxP(original)->low);
+   uint64      z;
+
+   z = point_zorder_internal(p->x, p->y);
+
+#if SIZEOF_DATUM == 8
+   return (Datum) z;
+#else
+   return (Datum) (z >> 32);
+#endif
+}
+
+static int
+gist_bbox_zorder_cmp_abbrev(Datum z1, Datum z2, SortSupport ssup)
+{
+   /*
+    * Compare the pre-computed Z-orders as unsigned integers. Datum is a
+    * typedef for 'uintptr_t', so no casting is required.
+    */
+   if (z1 > z2)
+       return 1;
+   else if (z1 < z2)
+       return -1;
+   else
+       return 0;
+}
+
+/*
+ * We never consider aborting the abbreviation.
+ *
+ * On 64-bit systems, the abbreviation is not lossy so it is always
+ * worthwhile. (Perhaps it's not on 32-bit systems, but we don't bother
+ * with logic to decide.)
+ */
+static bool
+gist_bbox_zorder_abbrev_abort(int memtupcount, SortSupport ssup)
+{
+   return false;
+}
+
+/*
+ * Sort support routine for fast GiST index build by sorting.
+ */
+Datum
+gist_point_sortsupport(PG_FUNCTION_ARGS)
+{
+   SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+
+   if (ssup->abbreviate)
+   {
+       ssup->comparator = gist_bbox_zorder_cmp_abbrev;
+       ssup->abbrev_converter = gist_bbox_zorder_abbrev_convert;
+       ssup->abbrev_abort = gist_bbox_zorder_abbrev_abort;
+       ssup->abbrev_full_comparator = gist_bbox_zorder_cmp;
+   }
+   else
+   {
+       ssup->comparator = gist_bbox_zorder_cmp;
+   }
+   PG_RETURN_VOID();
+}
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c

index 0516059e3ddc9d6b7705e6eb3f580e1054f863b7..615b5ade23310cdcf79959bb697f31e12467e4d0 100644 (file)
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -572,12 +572,31 @@ gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
  
  IndexTuple
  gistFormTuple(GISTSTATE *giststate, Relation r,
-             Datum attdata[], bool isnull[], bool isleaf)
+             Datum *attdata, bool *isnull, bool isleaf)
  {
     Datum       compatt[INDEX_MAX_KEYS];
-   int         i;
     IndexTuple  res;
  
+   gistCompressValues(giststate, r, attdata, isnull, isleaf, compatt);
+
+   res = index_form_tuple(isleaf ? giststate->leafTupdesc :
+                          giststate->nonLeafTupdesc,
+                          compatt, isnull);
+
+   /*
+    * The offset number on tuples on internal pages is unused. For historical
+    * reasons, it is set to 0xffff.
+    */
+   ItemPointerSetOffsetNumber(&(res->t_tid), 0xffff);
+   return res;
+}
+
+void
+gistCompressValues(GISTSTATE *giststate, Relation r,
+                  Datum *attdata, bool *isnull, bool isleaf, Datum *compatt)
+{
+   int         i;
+
     /*
      * Call the compress method on each attribute.
      */
@@ -617,17 +636,6 @@ gistFormTuple(GISTSTATE *giststate, Relation r,
                 compatt[i] = attdata[i];
         }
     }
-
-   res = index_form_tuple(isleaf ? giststate->leafTupdesc :
-                          giststate->nonLeafTupdesc,
-                          compatt, isnull);
-
-   /*
-    * The offset number on tuples on internal pages is unused. For historical
-    * reasons, it is set to 0xffff.
-    */
-   ItemPointerSetOffsetNumber(&(res->t_tid), 0xffff);
-   return res;
  }
  
  /*
@@ -745,14 +753,11 @@ gistpenalty(GISTSTATE *giststate, int attno,
   * Initialize a new index page
   */
  void
-GISTInitBuffer(Buffer b, uint32 f)
+gistinitpage(Page page, uint32 f)
  {
     GISTPageOpaque opaque;
-   Page        page;
-   Size        pageSize;
+   Size        pageSize = BLCKSZ;
  
-   pageSize = BufferGetPageSize(b);
-   page = BufferGetPage(b);
     PageInit(page, pageSize, sizeof(GISTPageOpaqueData));
  
     opaque = GistPageGetOpaque(page);
@@ -763,6 +768,18 @@ GISTInitBuffer(Buffer b, uint32 f)
     opaque->gist_page_id = GIST_PAGE_ID;
  }
  
+/*
+ * Initialize a new index buffer
+ */
+void
+GISTInitBuffer(Buffer b, uint32 f)
+{
+   Page        page;
+
+   page = BufferGetPage(b);
+   gistinitpage(page, f);
+}
+
  /*
   * Verify that a freshly-read page looks sane.
   */
diff --git a/src/backend/access/gist/gistvalidate.c b/src/backend/access/gist/gistvalidate.c

index 2b9ab693be18871c9343d277c8ae7a617db57f74..8a14620fab2785f1db95028fbbfca6e9d8e73f3e 100644 (file)
--- a/src/backend/access/gist/gistvalidate.c
+++ b/src/backend/access/gist/gistvalidate.c
@@ -143,6 +143,10 @@ gistvalidate(Oid opclassoid)
             case GIST_OPTIONS_PROC:
                 ok = check_amoptsproc_signature(procform->amproc);
                 break;
+           case GIST_SORTSUPPORT_PROC:
+               ok = check_amproc_signature(procform->amproc, VOIDOID, true,
+                                           1, 1, INTERNALOID);
+               break;
             default:
                 ereport(INFO,
                         (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
@@ -263,7 +267,7 @@ gistvalidate(Oid opclassoid)
             continue;           /* got it */
         if (i == GIST_DISTANCE_PROC || i == GIST_FETCH_PROC ||
             i == GIST_COMPRESS_PROC || i == GIST_DECOMPRESS_PROC ||
-           i == GIST_OPTIONS_PROC)
+           i == GIST_OPTIONS_PROC  || i == GIST_SORTSUPPORT_PROC)
             continue;           /* optional methods */
         ereport(INFO,
                 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c

index c526bb19281e05d9957071cd70219d80a3155ca9..1f0e4e01e69b1ea16da8545bdd56782fb86c8f3a 100644 (file)
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -1019,6 +1019,63 @@ log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
     return recptr;
  }
  
+/*
+ * Like log_newpage(), but allows logging multiple pages in one operation.
+ * It is more efficient than calling log_newpage() for each page separately,
+ * because we can write multiple pages in a single WAL record.
+ */
+void
+log_newpages(RelFileNode *rnode, ForkNumber forkNum, int num_pages,
+            BlockNumber *blknos, Page *pages, bool page_std)
+{
+   int         flags;
+   XLogRecPtr  recptr;
+   int         i;
+   int         j;
+
+   flags = REGBUF_FORCE_IMAGE;
+   if (page_std)
+       flags |= REGBUF_STANDARD;
+
+   /*
+    * Iterate over all the pages. They are collected into batches of
+    * XLR_MAX_BLOCK_ID pages, and a single WAL-record is written for each
+    * batch.
+    */
+   XLogEnsureRecordSpace(XLR_MAX_BLOCK_ID - 1, 0);
+
+   i = 0;
+   while (i < num_pages)
+   {
+       int         batch_start = i;
+       int         nbatch;
+
+       XLogBeginInsert();
+
+       nbatch = 0;
+       while (nbatch < XLR_MAX_BLOCK_ID && i < num_pages)
+       {
+           XLogRegisterBlock(nbatch, rnode, forkNum, blknos[i], pages[i], flags);
+           i++;
+           nbatch++;
+       }
+
+       recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI);
+
+       for (j = batch_start; j < i; j++)
+       {
+           /*
+            * The page may be uninitialized. If so, we can't set the LSN because that
+            * would corrupt the page.
+            */
+           if (!PageIsNew(pages[j]))
+           {
+               PageSetLSN(pages[j], recptr);
+           }
+       }
+   }
+}
+
  /*
   * Write a WAL record containing a full image of a page.
   *
diff --git a/src/backend/utils/sort/sortsupport.c b/src/backend/utils/sort/sortsupport.c

index fcfe6e831a19b5c73c6a5b31311748029d7bbc05..c436fbb4ce1e877ad7b9708e416e0d81ca00bc18 100644 (file)
--- a/src/backend/utils/sort/sortsupport.c
+++ b/src/backend/utils/sort/sortsupport.c
@@ -15,6 +15,7 @@
  
  #include "postgres.h"
  
+#include "access/gist.h"
  #include "access/nbtree.h"
  #include "catalog/pg_am.h"
  #include "fmgr.h"
@@ -175,3 +176,36 @@ PrepareSortSupportFromIndexRel(Relation indexRel, int16 strategy,
  
     FinishSortSupportFunction(opfamily, opcintype, ssup);
  }
+
+/*
+ * Fill in SortSupport given a GiST index relation
+ *
+ * Caller must previously have zeroed the SortSupportData structure and then
+ * filled in ssup_cxt, ssup_attno, ssup_collation, and ssup_nulls_first.  This
+ * will fill in ssup_reverse (always false for GiST index build), as well as
+ * the comparator function pointer.
+ */
+void
+PrepareSortSupportFromGistIndexRel(Relation indexRel, SortSupport ssup)
+{
+   Oid         opfamily = indexRel->rd_opfamily[ssup->ssup_attno - 1];
+   Oid         opcintype = indexRel->rd_opcintype[ssup->ssup_attno - 1];
+   Oid         sortSupportFunction;
+
+   Assert(ssup->comparator == NULL);
+
+   if (indexRel->rd_rel->relam != GIST_AM_OID)
+       elog(ERROR, "unexpected non-gist AM: %u", indexRel->rd_rel->relam);
+   ssup->ssup_reverse = false;
+
+   /*
+    * Look up the sort support function. This is simpler than for B-tree
+    * indexes because we don't support the old-style btree comparators.
+    */
+   sortSupportFunction = get_opfamily_proc(opfamily, opcintype, opcintype,
+                                           GIST_SORTSUPPORT_PROC);
+   if (!OidIsValid(sortSupportFunction))
+       elog(ERROR, "missing support function %d(%u,%u) in opfamily %u",
+            GIST_SORTSUPPORT_PROC, opcintype, opcintype, opfamily);
+   OidFunctionCall1(sortSupportFunction, PointerGetDatum(ssup));
+}
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c

index cbda911f465288084a3ec143b33e186be3c528a2..d0cc04a878a1485d3b92a4fd8d865fd5ea36e5df 100644 (file)
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -1167,6 +1167,63 @@ tuplesort_begin_index_hash(Relation heapRel,
     return state;
  }
  
+Tuplesortstate *
+tuplesort_begin_index_gist(Relation heapRel,
+                          Relation indexRel,
+                          int workMem,
+                          SortCoordinate coordinate,
+                          bool randomAccess)
+{
+   Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
+                                                  randomAccess);
+   MemoryContext oldcontext;
+   int         i;
+
+   oldcontext = MemoryContextSwitchTo(state->sortcontext);
+
+#ifdef TRACE_SORT
+   if (trace_sort)
+       elog(LOG,
+            "begin index sort: workMem = %d, randomAccess = %c",
+            workMem, randomAccess ? 't' : 'f');
+#endif
+
+   state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);
+
+   state->comparetup = comparetup_index_btree;
+   state->copytup = copytup_index;
+   state->writetup = writetup_index;
+   state->readtup = readtup_index;
+
+   state->heapRel = heapRel;
+   state->indexRel = indexRel;
+
+   /* Prepare SortSupport data for each column */
+   state->sortKeys = (SortSupport) palloc0(state->nKeys *
+                                           sizeof(SortSupportData));
+
+   for (i = 0; i < state->nKeys; i++)
+   {
+       SortSupport sortKey = state->sortKeys + i;
+
+       sortKey->ssup_cxt = CurrentMemoryContext;
+       sortKey->ssup_collation = indexRel->rd_indcollation[i];
+       sortKey->ssup_nulls_first = false;
+       sortKey->ssup_attno = i + 1;
+       /* Convey if abbreviation optimization is applicable in principle */
+       sortKey->abbreviate = (i == 0);
+
+       AssertState(sortKey->ssup_attno != 0);
+
+       /* Look for a sort support function */
+       PrepareSortSupportFromGistIndexRel(indexRel, sortKey);
+   }
+
+   MemoryContextSwitchTo(oldcontext);
+
+   return state;
+}
+
  Tuplesortstate *
  tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
                       bool nullsFirstFlag, int workMem,
diff --git a/src/include/access/gist.h b/src/include/access/gist.h

index 4994351697c3456809b6f6cc7cf0398a00648614..4f6dae9a76b0cdd4e2f37263664cbbbadcf51ce5 100644 (file)
--- a/src/include/access/gist.h
+++ b/src/include/access/gist.h
@@ -37,7 +37,8 @@
  #define GIST_DISTANCE_PROC             8
  #define GIST_FETCH_PROC                    9
  #define GIST_OPTIONS_PROC              10
-#define GISTNProcs                     10
+#define GIST_SORTSUPPORT_PROC          11
+#define GISTNProcs                 11
  
  /*
   * Page opaque data in a GiST index page.
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h

index 02e985549f635f61adc30de85fd7209349fa626a..b68c01a5f246ae8ca4fa166ff40bc8ffae38c9aa 100644 (file)
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -501,12 +501,15 @@ extern IndexTuple gistgetadjusted(Relation r,
                                   GISTSTATE *giststate);
  extern IndexTuple gistFormTuple(GISTSTATE *giststate,
                                 Relation r, Datum *attdata, bool *isnull, bool isleaf);
+extern void gistCompressValues(GISTSTATE *giststate, Relation r,
+                              Datum *attdata, bool *isnull, bool isleaf, Datum *compatt);
  
  extern OffsetNumber gistchoose(Relation r, Page p,
                                IndexTuple it,
                                GISTSTATE *giststate);
  
  extern void GISTInitBuffer(Buffer b, uint32 f);
+extern void gistinitpage(Page page, uint32 f);
  extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
                            Datum k, Relation r, Page pg, OffsetNumber o,
                            bool l, bool isNull);
diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h

index 63df25ae90fefbf54de49a32ed4f6e4439f685b8..4ba2c56be60be4daef20ec4dfebaa58a05eb459a 100644 (file)
--- a/src/include/access/xloginsert.h
+++ b/src/include/access/xloginsert.h
@@ -54,6 +54,8 @@ extern bool XLogCheckBufferNeedsBackup(Buffer buffer);
  
  extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
                               BlockNumber blk, char *page, bool page_std);
+extern void log_newpages(RelFileNode *rnode, ForkNumber forkNum, int num_pages,
+                        BlockNumber *blknos, char **pages, bool page_std);
  extern XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std);
  extern void log_newpage_range(Relation rel, ForkNumber forkNum,
                               BlockNumber startblk, BlockNumber endblk, bool page_std);
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 0bbe0a122afd3c0cd79579023fe82e09e3642e90..06ddb1f16b43083403d645c7d0b2d8ac4dbf52e4 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,7 @@
   */
  
  /*                         yyyymmddN */
+/* FIXME: bump this before pushing! */
  #define CATALOG_VERSION_NO 202009031
  
  #endif
diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat

index 37b580883fcb90cb5807fa55332bde14fe086be4..a8e0c4ff8a5279614c276e8557b4ea507f3d0270 100644 (file)
--- a/src/include/catalog/pg_amproc.dat
+++ b/src/include/catalog/pg_amproc.dat
@@ -480,6 +480,8 @@
    amproc => 'gist_point_distance' },
  { amprocfamily => 'gist/point_ops', amproclefttype => 'point',
    amprocrighttype => 'point', amprocnum => '9', amproc => 'gist_point_fetch' },
+{ amprocfamily => 'gist/point_ops', amproclefttype => 'point',
+  amprocrighttype => 'point', amprocnum => '11', amproc => 'gist_point_sortsupport' },
  { amprocfamily => 'gist/box_ops', amproclefttype => 'box',
    amprocrighttype => 'box', amprocnum => '1', amproc => 'gist_box_consistent' },
  { amprocfamily => 'gist/box_ops', amproclefttype => 'box',
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat

index 687509ba9265b04f0315602c902db8d6585a0841..96d7efd4270cf70a68f88be504b59c1e1206ff65 100644 (file)
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -8062,6 +8062,9 @@
    proname => 'gist_poly_distance', prorettype => 'float8',
    proargtypes => 'internal polygon int2 oid internal',
    prosrc => 'gist_poly_distance' },
+{ oid => '3435', descr => 'sort support',
+  proname => 'gist_point_sortsupport', prorettype => 'void',
+  proargtypes => 'internal', prosrc => 'gist_point_sortsupport' },
  
  # GIN array support
  { oid => '2743', descr => 'GIN array support',
diff --git a/src/include/utils/sortsupport.h b/src/include/utils/sortsupport.h

index 264aec820b1cc34a1a2eb4b766ca22ed4bbd153f..fb262c6e8d42ffbe78ccf7b8e2cdc940cacd189b 100644 (file)
--- a/src/include/utils/sortsupport.h
+++ b/src/include/utils/sortsupport.h
@@ -272,5 +272,6 @@ extern void PrepareSortSupportComparisonShim(Oid cmpFunc, SortSupport ssup);
  extern void PrepareSortSupportFromOrderingOp(Oid orderingOp, SortSupport ssup);
  extern void PrepareSortSupportFromIndexRel(Relation indexRel, int16 strategy,
                                            SortSupport ssup);
+extern void PrepareSortSupportFromGistIndexRel(Relation indexRel, SortSupport ssup);
  
  #endif                         /* SORTSUPPORT_H */
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h

index 9e76666fe9483701e7cfb0acf0c0fabd06b7f720..c69b36e209ad6774f3140e6f0e1f5693a5ae1e01 100644 (file)
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -217,6 +217,10 @@ extern Tuplesortstate *tuplesort_begin_index_hash(Relation heapRel,
                                                   uint32 max_buckets,
                                                   int workMem, SortCoordinate coordinate,
                                                   bool randomAccess);
+extern Tuplesortstate *tuplesort_begin_index_gist(Relation heapRel,
+                                                 Relation indexRel,
+                                                 int workMem, SortCoordinate coordinate,
+                                                 bool randomAccess);
  extern Tuplesortstate *tuplesort_begin_datum(Oid datumType,
                                              Oid sortOperator, Oid sortCollation,
                                              bool nullsFirstFlag,
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out

index 64c0c668593e8d1c9b75577f74cca00bfbe53a14..6ace7662ee1f7c9105772708e7c334de5b7d0da6 100644 (file)
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -523,8 +523,8 @@ SELECT * FROM point_tbl ORDER BY f1 <-> '0,1';
  SELECT * FROM point_tbl ORDER BY f1 <-> '0,1';
          f1         
  -------------------
- (0,0)
   (1e-300,-1e-300)
+ (0,0)
   (-3,4)
   (-10,0)
   (10,10)
@@ -561,8 +561,8 @@ SELECT * FROM point_tbl WHERE f1 IS NOT NULL ORDER BY f1 <-> '0,1';
  SELECT * FROM point_tbl WHERE f1 IS NOT NULL ORDER BY f1 <-> '0,1';
          f1         
  -------------------
- (0,0)
   (1e-300,-1e-300)
+ (0,0)
   (-3,4)
   (-10,0)
   (10,10)
@@ -584,8 +584,8 @@ SELECT * FROM point_tbl WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0
  SELECT * FROM point_tbl WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0,1';
          f1        
  ------------------
- (0,0)
   (1e-300,-1e-300)
+ (0,0)
   (-3,4)
   (-10,0)
   (10,10)
author	Heikki Linnakangas <[email protected]>
	Thu, 17 Sep 2020 08:33:40 +0000 (11:33 +0300)
committer	Heikki Linnakangas <[email protected]>
	Thu, 17 Sep 2020 08:33:40 +0000 (11:33 +0300)
doc/src/sgml/gist.sgml		patch \| blob \| blame \| history
src/backend/access/gist/gistbuild.c		patch \| blob \| blame \| history
src/backend/access/gist/gistproc.c		patch \| blob \| blame \| history
src/backend/access/gist/gistutil.c		patch \| blob \| blame \| history
src/backend/access/gist/gistvalidate.c		patch \| blob \| blame \| history
src/backend/access/transam/xloginsert.c		patch \| blob \| blame \| history
src/backend/utils/sort/sortsupport.c		patch \| blob \| blame \| history
src/backend/utils/sort/tuplesort.c		patch \| blob \| blame \| history
src/include/access/gist.h		patch \| blob \| blame \| history
src/include/access/gist_private.h		patch \| blob \| blame \| history
src/include/access/xloginsert.h		patch \| blob \| blame \| history
src/include/catalog/catversion.h		patch \| blob \| blame \| history
src/include/catalog/pg_amproc.dat		patch \| blob \| blame \| history
src/include/catalog/pg_proc.dat		patch \| blob \| blame \| history
src/include/utils/sortsupport.h		patch \| blob \| blame \| history
src/include/utils/tuplesort.h		patch \| blob \| blame \| history
src/test/regress/expected/create_index.out		patch \| blob \| blame \| history