An attempt at making insertion work.
authorRobert Haas <[email protected]>
Tue, 24 Jul 2012 16:30:41 +0000 (12:30 -0400)
committerRobert Haas <[email protected]>
Tue, 24 Jul 2012 16:31:53 +0000 (12:31 -0400)
src/backend/utils/hash/chash.c

index cb370dfcae7f929861dab00826d611c9f9f129f9..8caf516f9b9d67ebc6109dac2e2ef0967a62a769 100644 (file)
@@ -121,8 +121,12 @@ typedef struct
  */
 typedef struct
 {
-       CHashPtr        next;                           /* arena offset of next element */
-       uint32          hashcode;                       /* hash(key) */
+       CHashPtr                next;                   /* arena offset of next element */
+       union
+       {
+               uint32          hashcode;               /* hash(key) */
+               CHashPtr        gcnext;                 /* arena offset of next garbage item */
+       } un;
 } CHashNode;
 
 #define SizeOfCHashNode                MAXALIGN(sizeof(CHashNode))
@@ -176,6 +180,11 @@ typedef struct CHashTableData
                MyProc->chash_bucket = 0; \
        } while (0)
 
+/* Function prototypes. */
+static CHashPtr CHashAllocate(CHashTable table);
+static bool CHashRemoveMarked(CHashTable table, uint32 bucket,
+                                 CHashPtr *cp, volatile CHashPtr *p);
+
 /*
  * First stage of CHashTable initialization.  We fill in all the constants
  * here, but not the pointers.
@@ -335,7 +344,7 @@ CHashInitialize(CHashTable table, CHashDescriptor *desc)
                CHashBucket        *f = &table->freelist[i % table->nfreelists];
                CHashNode          *n = CHashTableGetNode(table, i);
 
-               n->next = f->head;
+               n->un.gcnext = f->head;
                f->head = i;
        }
 
@@ -361,7 +370,7 @@ CHashSearch(CHashTable table, void *entry)
        uint32  hashcode = hash_any(entry, table->desc.key_size);
        uint32  bucket = hashcode & table->bucket_mask;
        CHashPtr        c;
-       CHashNode  *n;
+       volatile CHashNode  *n;
        bool    found = false;
 
        /* Suppress garbage collection for target bucket. */
@@ -377,18 +386,12 @@ CHashSearch(CHashTable table, void *entry)
                if (c == InvalidCHashPtr)
                        break;
 
-               /*
-                * A dependency barrier is needed after reading a pointer value and
-                * before dereferencing it.  c is, in effect, a pointer which we're
-                * about to deference.
-                */
-               pg_read_barrier_depends();
-
                /* Compare current node by hashcode, then by memcmp. */
                n = CHashTableGetNode(table, CHashPtrGetOffset(c));
-               if (n->hashcode == hashcode)
+               pg_read_barrier_depends();
+               if (n->un.hashcode == hashcode)
                        cmp = memcmp(CHashNodeGetItem(n), entry, table->desc.key_size);
-               else if (n->hashcode > hashcode)
+               else if (n->un.hashcode > hashcode)
                        cmp = 1;
                else
                        cmp = -1;
@@ -424,9 +427,6 @@ CHashSearch(CHashTable table, void *entry)
                }
        }
 
-       /* Done scanning bucket. */
-       CHashTableUnsuppressGC();
-
        /* Return result to caller. */
        return found;
 }
@@ -441,6 +441,109 @@ CHashSearch(CHashTable table, void *entry)
 bool
 CHashInsert(CHashTable table, void *entry)
 {
+       uint32  hashcode = hash_any(entry, table->desc.key_size);
+       uint32  bucket = hashcode & table->bucket_mask;
+       CHashPtr        new;
+       CHashPtr        c;
+       volatile CHashPtr   *p;
+       volatile CHashNode  *n;
+       volatile CHashNode  *nnew;
+       bool            found = false;
+
+       /*
+        * Allocate and initialize a new entry, on the assumption that the insert
+        * will succeed.  If it ends up failing, we must be sure to put this back
+        * on some free list, lest it be permanently leaked.
+        */
+       new = CHashAllocate(table);
+       nnew = CHashTableGetNode(table, new);
+       nnew->un.hashcode = hashcode;
+       memcpy(CHashNodeGetItem(nnew), entry, table->desc.element_size);
+
+       /* Suppress garbage collection for target bucket. */
+       CHashTableSuppressGC(table, bucket);
+
+       /* Scan bucket. */
+retry:
+       p = &table->bucket[bucket].head;
+       c = *p;
+       for (;;)
+       {
+               int             cmp;
+
+               /*
+                * We can't safely update a delete-marked pointer, so remove any
+                * such pointers we find from the bucket chain.  Sometimes, concurrent
+                * activity may force us to restart the whole scan, but that should
+                * be rare.
+                */
+               if (CHashPtrIsMarked(c) && CHashRemoveMarked(table, bucket, &c, p))
+                       goto retry;
+
+               /* If we reach the end of the bucket chain, stop. */
+               if (c == InvalidCHashPtr)
+                       break;
+
+               /* Compare current node by hashcode, then by memcmp. */
+               n = CHashTableGetNode(table, CHashPtrGetOffset(c));
+               pg_read_barrier_depends();
+               if (n->un.hashcode == hashcode)
+                       cmp = memcmp(CHashNodeGetItem(n), entry, table->desc.key_size);
+               else if (n->un.hashcode > hashcode)
+                       cmp = 1;
+               else
+                       cmp = -1;
+
+               /* If we found the key, or passed where it should be, we're done. */
+               if (cmp >= 0)
+               {
+                       found = (cmp == 0);
+                       break;
+               }
+
+               /* Move to next node. */
+               p = &n->next;
+               c = *p;
+       }
+
+       if (!found)
+       {
+               bool    fail = false;
+
+               /* If we didn't find the key, try to insert. */
+               nnew->next = c;
+               SpinLockAcquire(&table->bucket[bucket].mutex);
+               if (*p == c)
+                       *p = new;
+               else
+                       fail = true;
+               SpinLockRelease(&table->bucket[bucket].mutex);
+
+               /*
+                * If we failed, it means that somebody concurrently inserted or
+                * deleted an element.  The correct insertion point might have changed,
+                * or the key we're trying to insert might now be present when it
+                * wasn't before, so we'll have to search the bucket chain anew.
+                */
+               if (fail)
+                       goto retry;
+       }
+       else
+       {
+               /*
+                * If we did find the key, return the corresponding value to the
+                * caller.
+                */
+               memcpy(((char *) entry) + table->desc.key_size,
+                          CHashNodeGetItem(n) + table->desc.key_size,
+                          table->desc.element_size - table->desc.key_size);
+       }
+
+       /* Done scanning bucket. */
+       CHashTableUnsuppressGC();
+
+       /* The insert succeeded if and only if no duplicate was found. */
+       return !found;
 }
 
 /*
@@ -503,6 +606,7 @@ CHashAllocate(CHashTable table)
                if (vtable->garbage[table->gc_next].head != InvalidCHashPtr)
                {
                        volatile CHashBucket *b = &vtable->freelist[f_current];
+                       volatile CHashNode  *n;
                        uint32          i;
                        uint64          chash_bucket;
 
@@ -515,6 +619,10 @@ CHashAllocate(CHashTable table)
                        /* Anything to recycle? */
                        if (garbage != InvalidCHashPtr)
                        {
+                               CHashPtr        fhead;
+                               CHashPtr        fcurrent;
+                               CHashPtr        fnext;
+
                                /*
                                 * Be certain that the writes associated with popping the
                                 * garbage list are complete before we start checking whether
@@ -545,8 +653,101 @@ CHashAllocate(CHashTable table)
                                 */
                                pg_memory_barrier();
 
-                               /* XXX.  Recycle garbage here! */
+                               /* Remove one item from list to satisfy current allocation. */
+                               new = garbage;
+                               n = CHashTableGetNode(table, CHashPtrGetOffset(new));
+                               fhead = n->un.gcnext;
+
+                               /* If that's all there was, we're done. */
+                               if (fhead == InvalidCHashPtr)
+                                       return new;
+
+                               /* Walk list of reclaimed elements to end. */
+                               fcurrent = fhead;
+                               for (;;)
+                               {
+                                       n = CHashTableGetNode(table, CHashPtrGetOffset(fcurrent));
+                                       fnext = n->un.gcnext;
+                                       if (fnext == InvalidCHashPtr)
+                                               break;
+                                       fcurrent = fnext;
+                               }
+
+                               /* Push reclaimed elements onto home free list. */
+                               b = &vtable->freelist[f_current];
+                               SpinLockAcquire(&b->mutex);
+                               n->next = b->head;
+                               b->head = fhead;
+                               SpinLockRelease(&b->mutex);
+
+                               /* Return the element we saved for ourselves. */
+                               return new;
                        }
                }
        }
 }
+
+/*
+ * Attempt to remove marked elements from a bucket chain.
+ *
+ * p is a pointer into shared memory; it points to the CHashPtr that must be
+ * updated to remove deleted elements from the chain.
+ *
+ * cp is a pointer into backend-private memory; it is the delete-marked pointer
+ * fetched from the node to which p points.
+ *
+ * The return value is true if the caller must retry, or false if the caller
+ * may continue the scan.  In the latter case, *cp is updated to contain a
+ * pointer to the node from which the scan should be resumed.
+ */
+static bool
+CHashRemoveMarked(CHashTable table, uint32 bucket, CHashPtr *cp,
+                                 volatile CHashPtr *p)
+{
+       CHashPtr        c = *cp;
+       CHashPtr        cc;
+       bool            retry_needed = false;
+
+       do
+       {
+               volatile CHashNode  *n;
+
+               /*
+                * c is logically a pointer, so we must insert a dependency barrier
+                * before deferencing it.
+                */
+               pg_read_barrier_depends();
+
+               /* Read next-pointer of deleted node. */
+               n = CHashTableGetNode(table, CHashPtrGetOffset(c));
+               cc = n->next;
+
+               /*
+                * Redirect next-pointer of prior node to next-pointer of deleted
+                * node, unless someone else has meanwhile modified the bucket
+                * chain.
+                */
+               SpinLockAcquire(&table->bucket[bucket].mutex);
+               if (*p == c)
+                       *p = cc;
+               else
+                       retry_needed = true;
+               SpinLockRelease(&table->bucket[bucket].mutex);
+
+               /*
+                * If we failed to update the logical pointer, caller must rescan
+                * the bucket.  There's no intelligent way to continue the scan,
+                * because for all we know the node that contains the pointer we're
+                * try to update may itself be deleted by now.
+                */
+               if (retry_needed)
+                       return true;
+
+               /* The new target of the pointer may also be delete-marked, so loop. */
+               c = cc;
+       } while (CHashPtrIsMarked(c));
+
+       /* Success! */
+       *cp = c;
+       return false;
+}