First attempt at CHashSearch.
authorRobert Haas <[email protected]>
Fri, 13 Jul 2012 20:25:35 +0000 (16:25 -0400)
committerRobert Haas <[email protected]>
Tue, 24 Jul 2012 16:31:53 +0000 (12:31 -0400)
src/backend/utils/hash/chash.c
src/include/storage/proc.h

index 6f1effd839d715d46f90a77c0078b5ad71c7842d..3d9adca098f3bde3e61b0204e1a88ee3a4873864 100644 (file)
@@ -81,6 +81,9 @@
 #include "postgres.h"
 
 #include "miscadmin.h"
+#include "access/hash.h"
+#include "storage/barrier.h"
+#include "storage/proc.h"
 #include "storage/shmem.h"
 #include "storage/spin.h"
 #include "utils/chash.h"
@@ -91,8 +94,7 @@
  * used to implement concurrent deletion.
  */
 typedef uint32 CHashPtr;
-#define InvalidCHashPtr             ((uint32) -1)
-#define ReclaimCHashPtr                                ((uint32) -2)
+#define InvalidCHashPtr             ((uint32) -2)
 #define CHashPtrIsMarked(x)         ((x) & 1)
 #define CHashPtrGetOffset(x)        ((x) >> 1)
 #define CHashPtrMark(x)             ((x) | 1)
@@ -118,11 +120,11 @@ typedef struct
 typedef struct
 {
        CHashPtr        next;                           /* arena offset of next element */
-       uint32          hash_value;                     /* hash(key) */
+       uint32          hashcode;                       /* hash(key) */
 } CHashNode;
 
 #define SizeOfCHashNode                MAXALIGN(sizeof(CHashNode))
-#define CHashNodeGetItem(x)    ((void *) (((char *) x) + SizeOfCHashNode))
+#define CHashNodeGetItem(x)    (((char *) x) + SizeOfCHashNode)
 
 /*
  * CHashTableData stores all the information that we need in order to access
@@ -144,9 +146,13 @@ typedef struct CHashTableData
        CHashBucket        *bucket;                     /* array of size nbuckets */
        CHashBucket        *garbage;            /* array of size ngarbage */
        CHashBucket        *freelist;           /* array of size nfreelists */
-       void               *arena;                      /* arena */
+       char               *arena;                      /* arena */
 } CHashTableData;
 
+#define CHashTableGetNode(table, offset) \
+       (AssertMacro((offset) < (table)->arena_limit), \
+        (CHashNode *) ((table)->arena + (table)->arena_stride * (offset)))
+
 /*
  * First stage of CHashTable initialization.  We fill in all the constants
  * here, but not the pointers.
@@ -162,6 +168,8 @@ CHashBootstrap(CHashDescriptor *desc)
        memcpy(&table->desc, desc, sizeof(CHashDescriptor)); 
 
        /* Sanity checks. */
+       if (desc->id == 0)
+               elog(ERROR, "concurrent hash table id must not be zero");
        if (desc->capacity < 1 || desc->capacity > CHashMaxCapacity)
                elog(ERROR, "invalid capacity for concurrent hash");
        if (desc->key_size < 1 || desc->key_size > desc->element_size)
@@ -278,6 +286,9 @@ CHashInitialize(CHashTable table, CHashDescriptor *desc)
        /* Arena follows the various lists. */
        table->arena = (void *) (&table->freelist[table->nfreelists]);
 
+       /* XXX. Must initialize spinlocks, set lists to empty, and then put
+        * all arena nodes on free lists. */
+
        /*
         * Copy table (with pointers now filled in) to shared memory.  This is
         * arguably unnecessary when not using EXEC_BACKEND, but we do it anyway.
@@ -286,3 +297,97 @@ CHashInitialize(CHashTable table, CHashDescriptor *desc)
 
        return table;
 }
+
+/*
+ * Search a concurrent hash table.  entry should be a block of memory large
+ * enough to hold a complete entry, with just the key portion filled in.  If
+ * a matching entry is found, this function will fill in the rest of the entry
+ * from the data in the hash table and return true.  If not, it will return
+ * false.
+ */
+bool
+CHashSearch(CHashTable table, void *entry)
+{
+       uint32  hashcode = hash_any(entry, table->desc.key_size);
+       uint32  bucket = hashcode & table->bucket_mask;
+       CHashPtr        c;
+       CHashNode  *n;
+       bool    found = false;
+
+       /*
+        * Suppress garbage collection for target bucket.  We need a memory
+        * barrier to make sure that our bucket advertisement is committed to
+        * memory before we begin scanning the bucket.
+        */
+       MyProc->chash_bucket = ((uint64) table->desc.id) << 32 | bucket;
+       pg_memory_barrier();
+
+       /* Scan bucket. */
+       c = table->bucket[bucket].head;
+       for (;;)
+       {
+               int             cmp;
+
+               /* If we've reached the end of the bucket chain, stop. */
+               if (c == InvalidCHashPtr)
+                       break;
+
+               /*
+                * A dependency barrier is needed after reading a pointer value and
+                * before dereferencing it.  c is, in effect, a pointer which we're
+                * about to deference.
+                */
+               pg_read_barrier_depends();
+
+               /* Compare current node by hashcode, then by memcmp. */
+               n = CHashTableGetNode(table, c);
+               if (n->hashcode == hashcode)
+                       cmp = memcmp(CHashNodeGetItem(n), entry, table->desc.key_size);
+               else if (n->hashcode > hashcode)
+                       cmp = 1;
+               else
+                       cmp = -1;
+
+               /* Stop if we've passed the list position where the entry should be. */
+               if (cmp > 0)
+                       break;
+
+               /* Fetch next-pointer. */
+               c = n->next;
+
+               /* Is it the item we're looking for? */
+               if (cmp == 0)
+               {
+                       /*
+                        * If the pointer is marked, it will be followed (if at all) by
+                        * a node which is "later" than this one in terms of either
+                        * hashcode or memcmp ordering; we won't find a duplicate of the
+                        * current key.  This is because a marked CHashPtr is never
+                        * further updated (or at least, not until after garbage
+                        * collection, which we've already prevented for this bucket),
+                        * so the successor element must have been present before this
+                        * one was deleted.
+                        */
+                       if (!CHashPtrIsMarked(c))
+                       {
+                               memcpy(((char *) entry) + table->desc.key_size,
+                                          CHashNodeGetItem(n) + table->desc.key_size,
+                                          table->desc.element_size - table->desc.key_size);
+                               found = true;
+                       }
+                       break;
+               }
+       }
+
+       /*
+        * Once we're done scanning the bucket, we can permit garbage collection
+        * of that bucket to resume.  The memory barrier ensures that all the reads
+        * that are part of the bucket scan finish before we allow garbage
+        * collection.
+        */
+       pg_memory_barrier();
+       MyProc->chash_bucket = 0;
+
+       /* Return result to caller. */
+       return found;
+}
index e10aafe99e43d19d2346e5a43b240ffa339808dc..1e91eedc2b88c26548cc76d724e6d9b8e8f88601 100644 (file)
@@ -139,6 +139,9 @@ struct PGPROC
        bool            fpVXIDLock;             /* are we holding a fast-path VXID lock? */
        LocalTransactionId fpLocalTransactionId;        /* lxid for fast-path VXID
                                                                                                 * lock */
+
+       /* chash bucket currently being scanned. */
+       uint64          chash_bucket;
 };
 
 /* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */