--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * chash.c
+ * concurrent hash tables
+ *
+ * The goal of this module is to implement a hash table that can be
+ * searched without any locking at all and updated with minimal locking.
+ * While a fully lock-free (or, better still, wait-free) hash table seems
+ * very desirable, currently known techniques require memory management
+ * techniques that are either very complex or difficult to implement in
+ * the context of a fixed-size shared memory segment.
+ *
+ * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/utils/hash/chash.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "storage/shmem.h"
+#include "storage/spin.h"
+#include "utils/chash.h"
+
+/*
+ * The memory needed to store the entries in a hash table is preallocated in
+ * a single chunk called the arena. We refer to entries using a CHashPtr
+ * rather than an ordinary pointer. One bit of each CHashPtr is reserved for
+ * use as a "mark" bit, which is used to implement concurrent deletion.
+ * The remaining bits form an offset into the arena. By storing offset
+ * rather than pointers, we can reduce the memory footprint of the hash table
+ * considerably, at the cost of limiting the maximum number of elements in a
+ * single concurrent hash table to 2^31. That limitation appears acceptable
+ * for now, and we can always switch to pointers or 64-bit integers here in
+ * the future, if needed.
+ */
+typedef uint32 CHashPtr;
+#define InvalidCHashPtr ((uint32) -1)
+#define ReclaimCHashPtr ((uint32) -2)
+#define CHashPtrIsMarked(x) ((x) & 1)
+#define CHashPtrGetOffset(x) ((x) >> 1)
+#define CHashPtrMark(x) ((x) | 1)
+#define CHashPtrUnmark(x) ((x) & ~1)
+#define MakeCHashPtr(x) ((x) << 1)
+
+static uint32 CHashMaxCapacity = CHashPtrGetOffset(InvalidCHashPtr);
+
+/*
+ * Each hash bucket is implemented as a pointer to the first item in the
+ * bucket, or InvalidCHashPtr if the bucket is empty. Each item contains a
+ * pointer to the next item in the bucket, or InvalidCHashPtr if there are no
+ * more items.
+ *
+ * Each bucket also has a spinlock which is used to serialize modifications
+ * to the bucket, but need not be taken when searching it.
+ */
+typedef struct
+{
+ CHashPtr head; /* arena offset of first element in bucket */
+ slock_t mutex; /* mutual exclusion for modifications */
+} CHashBucket;
+
+/*
+ * Each free list is implemented as a pointer to the first item on the
+ * free list, or InvalidCHashPtr if the free list is empty. Each free list
+ * is protected by a spinlock.
+ */
+typedef struct
+{
+ CHashPtr head; /* arena offset of first element in bucket */
+ slock_t mutex; /* mutual exclusion for modifications */
+} CHashFreeList;
+
+/*
+ * Each item stored in the hash table is represented by a CHashNode, which
+ * stores a pointer to the next item in the same bucket, and the exact hash
+ * value of the current item. Each CHashNode is followed by space for the
+ * item itself.
+ */
+typedef struct
+{
+ CHashPtr next; /* arena offset of next element in bucket */
+ uint32 hash_value; /* hash(key) */
+} CHashNode;
+#define CHashNodeGetItem(x) ((void *) (((char *) x) + sizeof(CHashNode)))
+
+/*
+ * CHashTableData stores all the information that we need in order to access
+ * a concurrent hash table. We store one copy of this data in shared memory,
+ * and an additional copy in the private memory of each backend accessing the
+ * table. None of this information changes after the initial setup of the
+ * hash table.
+ */
+typedef struct
+{
+ CHashDescriptor desc; /* descriptor for this hash table */
+ uint32 nbuckets; /* # of buckets; must be a power of two */
+ uint32 bucket_mask; /* # of buckets, minus one */
+ uint32 nfreelists; /* # of freelists, also a power of two */
+ void *arena; /* arena */
+ CHashBucket *bucket; /* array of size nbuckets */
+ CHashFreeList *freelist; /* array of size nfreelists */
+} CHashTableData;
+
+/*
+ * Compute the number of buckets and the number of freelists for a hash table
+ * with a given capacity.
+ */
+static void
+CHashSizingParameters(uint32 capacity, uint32 *nbuckets, uint32 *nfreelists)
+{
+ uint32 bucket_shift;
+ uint32 freelist_shift;
+
+ if (capacity < 1 || capacity > CHashMaxCapacity)
+ elog(ERROR, "invalid capacity for concurrent hash");
+
+ /*
+ * The number of buckets must be a power of two. To avoid (as much as
+ * possible) having to traverse long bucket chains, we aim for a load
+ * factor <= 1.0, so this is a pretty simple calculation: we just find the
+ * smallest power of two greater than or equal to the target capacity.
+ */
+ bucket_shift = fls(capacity) - 1;
+ *nbuckets = 1 << bucket_shift;
+
+ /*
+ * The number of freelists must also be a power of two, and must be no
+ * larger than the number of buckets.
+ */
+ freelist_shift = bucket_shift / 2;
+ *nfreelists = 1 << freelist_shift;
+}
+
+Size
+CHashEstimateSize(CHashDescriptor *desc)
+{
+ uint32 nbuckets,
+ nfreelists;
+ Size size;
+
+ CHashSizingParameters(desc->capacity, &nbuckets, &nfreelists);
+
+ size = MAXALIGN(sizeof(CHashTableData));
+ size = add_size(size, mul_size(MAXALIGN(sizeof(CHashBucket)), nbuckets));
+
+ return size;
+}
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * chash.h
+ * Concurrent shared-memory hash table.
+ *
+ * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/chash.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef CHASH_H
+#define CHASH_H
+
+/*
+ * A concurrent hash table stores a bounded number of fixed-size elements,
+ * each of which begins with a fixed-size key. This structure provides just
+ * enough information about a proposed concurrent hash table to estimate its
+ * size, or create it.
+ */
+typedef struct
+{
+ uint32 id; /* unique identifier for this hash table */
+ uint32 capacity; /* maximum size of hash table */
+ uint16 element_size; /* size of each element */
+ uint16 key_size; /* size of each key */
+} CHashDescriptor;
+
+/* Opaque handle for a concurrent hash table. */
+struct CHashTableData;
+typedef struct CHashTableData *CHashTable;
+
+/* Initialization functions. */
+extern Size CHashEstimateSize(CHashDescriptor *desc);
+extern CHashTable CHashInitialize(CHashDescriptor *desc);
+extern CHashTable CHashAttach(CHashDescriptor *desc);
+
+/* Accessor functions. */
+extern bool CHashInsert(CHashTable table, void *entry);
+extern bool CHashDelete(CHashTable table, void *key);
+extern bool CHashSearch(CHashTable table, void *entry);
+
+#endif /* CHASH_H */