From 4723083de74f22f4022a154dc01a9c28d55d5985 Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Wed, 4 Jul 2012 14:50:19 -0400
Subject: [PATCH] Start of work on chash.

---
 src/backend/utils/hash/Makefile |   2 +-
 src/backend/utils/hash/chash.c  | 151 ++++++++++++++++++++++++++++++++
 src/include/utils/chash.h       |  44 ++++++++++
 3 files changed, 196 insertions(+), 1 deletion(-)
 create mode 100644 src/backend/utils/hash/chash.c
 create mode 100644 src/include/utils/chash.h

diff --git a/src/backend/utils/hash/Makefile b/src/backend/utils/hash/Makefile
index 05d347c856..5d5338266d 100644
--- a/src/backend/utils/hash/Makefile
+++ b/src/backend/utils/hash/Makefile
@@ -12,6 +12,6 @@ subdir = src/backend/utils/hash
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = dynahash.o hashfn.o
+OBJS = chash.o dynahash.o hashfn.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/utils/hash/chash.c b/src/backend/utils/hash/chash.c
new file mode 100644
index 0000000000..c5d04e1b12
--- /dev/null
+++ b/src/backend/utils/hash/chash.c
@@ -0,0 +1,151 @@
+/*-------------------------------------------------------------------------
+ *
+ * chash.c
+ *	  concurrent hash tables
+ *
+ * The goal of this module is to implement a hash table that can be
+ * searched without any locking at all and updated with minimal locking.
+ * While a fully lock-free (or, better still, wait-free) hash table seems
+ * very desirable, currently known techniques require memory management
+ * techniques that are either very complex or difficult to implement in
+ * the context of a fixed-size shared memory segment.
+ *
+ * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/hash/chash.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "storage/shmem.h"
+#include "storage/spin.h"
+#include "utils/chash.h"
+
+/*
+ * The memory needed to store the entries in a hash table is preallocated in
+ * a single chunk called the arena.  We refer to entries using a CHashPtr
+ * rather than an ordinary pointer.  One bit of each CHashPtr is reserved for
+ * use as a "mark" bit, which is used to implement concurrent deletion.
+ * The remaining bits form an offset into the arena.  By storing offset
+ * rather than pointers, we can reduce the memory footprint of the hash table
+ * considerably, at the cost of limiting the maximum number of elements in a
+ * single concurrent hash table to 2^31.  That limitation appears acceptable
+ * for now, and we can always switch to pointers or 64-bit integers here in
+ * the future, if needed.
+ */
+typedef uint32 CHashPtr;
+#define InvalidCHashPtr             ((uint32) -1)
+#define ReclaimCHashPtr				((uint32) -2)
+#define CHashPtrIsMarked(x)         ((x) & 1)
+#define CHashPtrGetOffset(x)        ((x) >> 1)
+#define CHashPtrMark(x)             ((x) | 1)
+#define CHashPtrUnmark(x)           ((x) & ~1)
+#define MakeCHashPtr(x)				((x) << 1)
+
+static uint32 CHashMaxCapacity = CHashPtrGetOffset(InvalidCHashPtr);
+
+/*
+ * Each hash bucket is implemented as a pointer to the first item in the
+ * bucket, or InvalidCHashPtr if the bucket is empty.  Each item contains a
+ * pointer to the next item in the bucket, or InvalidCHashPtr if there are no
+ * more items.
+ *
+ * Each bucket also has a spinlock which is used to serialize modifications
+ * to the bucket, but need not be taken when searching it.
+ */
+typedef struct
+{
+	CHashPtr	head;		/* arena offset of first element in bucket */
+	slock_t		mutex;		/* mutual exclusion for modifications */
+} CHashBucket;
+
+/*
+ * Each free list is implemented as a pointer to the first item on the
+ * free list, or InvalidCHashPtr if the free list is empty.  Each free list
+ * is protected by a spinlock.
+ */
+typedef struct
+{
+	CHashPtr	head;		/* arena offset of first element in bucket */
+	slock_t		mutex;		/* mutual exclusion for modifications */
+} CHashFreeList;
+
+/*
+ * Each item stored in the hash table is represented by a CHashNode, which
+ * stores a pointer to the next item in the same bucket, and the exact hash
+ * value of the current item.  Each CHashNode is followed by space for the
+ * item itself.
+ */
+typedef struct
+{
+	CHashPtr	next;		/* arena offset of next element in bucket */
+	uint32		hash_value;	/* hash(key) */
+} CHashNode;
+#define CHashNodeGetItem(x)		((void *) (((char *) x) + sizeof(CHashNode)))
+
+/*
+ * CHashTableData stores all the information that we need in order to access
+ * a concurrent hash table.  We store one copy of this data in shared memory,
+ * and an additional copy in the private memory of each backend accessing the
+ * table.  None of this information changes after the initial setup of the
+ * hash table.
+ */
+typedef struct
+{
+	CHashDescriptor	desc;			/* descriptor for this hash table */
+	uint32			nbuckets;		/* # of buckets; must be a power of two */
+	uint32			bucket_mask;	/* # of buckets, minus one */
+	uint32			nfreelists;		/* # of freelists, also a power of two */
+	void		   *arena;			/* arena */
+	CHashBucket	   *bucket;			/* array of size nbuckets */
+	CHashFreeList  *freelist;		/* array of size nfreelists */
+} CHashTableData;
+
+/*
+ * Compute the number of buckets and the number of freelists for a hash table
+ * with a given capacity.
+ */
+static void
+CHashSizingParameters(uint32 capacity, uint32 *nbuckets, uint32 *nfreelists)
+{
+	uint32	bucket_shift;
+	uint32	freelist_shift;
+
+	if (capacity < 1 || capacity > CHashMaxCapacity)
+		elog(ERROR, "invalid capacity for concurrent hash");
+
+ 	/*
+	 * The number of buckets must be a power of two.  To avoid (as much as
+	 * possible) having to traverse long bucket chains, we aim for a load
+	 * factor <= 1.0, so this is a pretty simple calculation: we just find the
+	 * smallest power of two greater than or equal to the target capacity.
+	 */
+	bucket_shift = fls(capacity) - 1;
+	*nbuckets = 1 << bucket_shift;
+
+	/*
+	 * The number of freelists must also be a power of two, and must be no
+	 * larger than the number of buckets.
+	 */
+	freelist_shift = bucket_shift / 2;
+	*nfreelists = 1 << freelist_shift;
+}
+
+Size
+CHashEstimateSize(CHashDescriptor *desc)
+{
+	uint32	nbuckets,
+			nfreelists;
+	Size	size;
+
+	CHashSizingParameters(desc->capacity, &nbuckets, &nfreelists);
+
+	size = MAXALIGN(sizeof(CHashTableData));
+	size = add_size(size, mul_size(MAXALIGN(sizeof(CHashBucket)), nbuckets));
+
+	return size;
+}
diff --git a/src/include/utils/chash.h b/src/include/utils/chash.h
new file mode 100644
index 0000000000..c0adfd9a5f
--- /dev/null
+++ b/src/include/utils/chash.h
@@ -0,0 +1,44 @@
+/*-------------------------------------------------------------------------
+ *
+ * chash.h
+ *	  Concurrent shared-memory hash table.
+ *
+ * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/chash.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef CHASH_H 
+#define CHASH_H
+
+/*
+ * A concurrent hash table stores a bounded number of fixed-size elements,
+ * each of which begins with a fixed-size key.  This structure provides just
+ * enough information about a proposed concurrent hash table to estimate its
+ * size, or create it.
+ */
+typedef struct
+{
+	uint32		id;				/* unique identifier for this hash table */
+	uint32		capacity;		/* maximum size of hash table */
+	uint16		element_size;	/* size of each element */
+	uint16		key_size;		/* size of each key */
+} CHashDescriptor;
+
+/* Opaque handle for a concurrent hash table. */
+struct CHashTableData;
+typedef struct CHashTableData *CHashTable;
+
+/* Initialization functions. */
+extern Size CHashEstimateSize(CHashDescriptor *desc);
+extern CHashTable CHashInitialize(CHashDescriptor *desc);
+extern CHashTable CHashAttach(CHashDescriptor *desc);
+
+/* Accessor functions. */
+extern bool CHashInsert(CHashTable table, void *entry);
+extern bool CHashDelete(CHashTable table, void *key);
+extern bool CHashSearch(CHashTable table, void *entry);
+
+#endif   /* CHASH_H */
-- 
2.39.5