#include "postgres.h"
#include "miscadmin.h"
+#include "access/hash.h"
+#include "storage/barrier.h"
+#include "storage/proc.h"
#include "storage/shmem.h"
#include "storage/spin.h"
#include "utils/chash.h"
* used to implement concurrent deletion.
*/
typedef uint32 CHashPtr;
-#define InvalidCHashPtr ((uint32) -1)
-#define ReclaimCHashPtr ((uint32) -2)
+#define InvalidCHashPtr ((uint32) -2)
#define CHashPtrIsMarked(x) ((x) & 1)
#define CHashPtrGetOffset(x) ((x) >> 1)
#define CHashPtrMark(x) ((x) | 1)
typedef struct
{
CHashPtr next; /* arena offset of next element */
- uint32 hash_value; /* hash(key) */
+ uint32 hashcode; /* hash(key) */
} CHashNode;
#define SizeOfCHashNode MAXALIGN(sizeof(CHashNode))
-#define CHashNodeGetItem(x) ((void *) (((char *) x) + SizeOfCHashNode))
+#define CHashNodeGetItem(x) (((char *) x) + SizeOfCHashNode)
/*
* CHashTableData stores all the information that we need in order to access
CHashBucket *bucket; /* array of size nbuckets */
CHashBucket *garbage; /* array of size ngarbage */
CHashBucket *freelist; /* array of size nfreelists */
- void *arena; /* arena */
+ char *arena; /* arena */
} CHashTableData;
+#define CHashTableGetNode(table, offset) \
+ (AssertMacro((offset) < (table)->arena_limit), \
+ (CHashNode *) ((table)->arena + (table)->arena_stride * (offset)))
+
/*
* First stage of CHashTable initialization. We fill in all the constants
* here, but not the pointers.
memcpy(&table->desc, desc, sizeof(CHashDescriptor));
/* Sanity checks. */
+ if (desc->id == 0)
+ elog(ERROR, "concurrent hash table id must not be zero");
if (desc->capacity < 1 || desc->capacity > CHashMaxCapacity)
elog(ERROR, "invalid capacity for concurrent hash");
if (desc->key_size < 1 || desc->key_size > desc->element_size)
/* Arena follows the various lists. */
table->arena = (void *) (&table->freelist[table->nfreelists]);
+ /* XXX. Must initialize spinlocks, set lists to empty, and then put
+ * all arena nodes on free lists. */
+
/*
* Copy table (with pointers now filled in) to shared memory. This is
* arguably unnecessary when not using EXEC_BACKEND, but we do it anyway.
return table;
}
+
+/*
+ * Search a concurrent hash table. entry should be a block of memory large
+ * enough to hold a complete entry, with just the key portion filled in. If
+ * a matching entry is found, this function will fill in the rest of the entry
+ * from the data in the hash table and return true. If not, it will return
+ * false.
+ */
+bool
+CHashSearch(CHashTable table, void *entry)
+{
+ uint32 hashcode = hash_any(entry, table->desc.key_size);
+ uint32 bucket = hashcode & table->bucket_mask;
+ CHashPtr c;
+ CHashNode *n;
+ bool found = false;
+
+ /*
+ * Suppress garbage collection for target bucket. We need a memory
+ * barrier to make sure that our bucket advertisement is committed to
+ * memory before we begin scanning the bucket.
+ */
+ MyProc->chash_bucket = ((uint64) table->desc.id) << 32 | bucket;
+ pg_memory_barrier();
+
+ /* Scan bucket. */
+ c = table->bucket[bucket].head;
+ for (;;)
+ {
+ int cmp;
+
+ /* If we've reached the end of the bucket chain, stop. */
+ if (c == InvalidCHashPtr)
+ break;
+
+ /*
+ * A dependency barrier is needed after reading a pointer value and
+ * before dereferencing it. c is, in effect, a pointer which we're
+ * about to deference.
+ */
+ pg_read_barrier_depends();
+
+ /* Compare current node by hashcode, then by memcmp. */
+ n = CHashTableGetNode(table, c);
+ if (n->hashcode == hashcode)
+ cmp = memcmp(CHashNodeGetItem(n), entry, table->desc.key_size);
+ else if (n->hashcode > hashcode)
+ cmp = 1;
+ else
+ cmp = -1;
+
+ /* Stop if we've passed the list position where the entry should be. */
+ if (cmp > 0)
+ break;
+
+ /* Fetch next-pointer. */
+ c = n->next;
+
+ /* Is it the item we're looking for? */
+ if (cmp == 0)
+ {
+ /*
+ * If the pointer is marked, it will be followed (if at all) by
+ * a node which is "later" than this one in terms of either
+ * hashcode or memcmp ordering; we won't find a duplicate of the
+ * current key. This is because a marked CHashPtr is never
+ * further updated (or at least, not until after garbage
+ * collection, which we've already prevented for this bucket),
+ * so the successor element must have been present before this
+ * one was deleted.
+ */
+ if (!CHashPtrIsMarked(c))
+ {
+ memcpy(((char *) entry) + table->desc.key_size,
+ CHashNodeGetItem(n) + table->desc.key_size,
+ table->desc.element_size - table->desc.key_size);
+ found = true;
+ }
+ break;
+ }
+ }
+
+ /*
+ * Once we're done scanning the bucket, we can permit garbage collection
+ * of that bucket to resume. The memory barrier ensures that all the reads
+ * that are part of the bucket scan finish before we allow garbage
+ * collection.
+ */
+ pg_memory_barrier();
+ MyProc->chash_bucket = 0;
+
+ /* Return result to caller. */
+ return found;
+}