From 4723083de74f22f4022a154dc01a9c28d55d5985 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Wed, 4 Jul 2012 14:50:19 -0400 Subject: [PATCH] Start of work on chash. --- src/backend/utils/hash/Makefile | 2 +- src/backend/utils/hash/chash.c | 151 ++++++++++++++++++++++++++++++++ src/include/utils/chash.h | 44 ++++++++++ 3 files changed, 196 insertions(+), 1 deletion(-) create mode 100644 src/backend/utils/hash/chash.c create mode 100644 src/include/utils/chash.h diff --git a/src/backend/utils/hash/Makefile b/src/backend/utils/hash/Makefile index 05d347c856..5d5338266d 100644 --- a/src/backend/utils/hash/Makefile +++ b/src/backend/utils/hash/Makefile @@ -12,6 +12,6 @@ subdir = src/backend/utils/hash top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = dynahash.o hashfn.o +OBJS = chash.o dynahash.o hashfn.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/utils/hash/chash.c b/src/backend/utils/hash/chash.c new file mode 100644 index 0000000000..c5d04e1b12 --- /dev/null +++ b/src/backend/utils/hash/chash.c @@ -0,0 +1,151 @@ +/*------------------------------------------------------------------------- + * + * chash.c + * concurrent hash tables + * + * The goal of this module is to implement a hash table that can be + * searched without any locking at all and updated with minimal locking. + * While a fully lock-free (or, better still, wait-free) hash table seems + * very desirable, currently known techniques require memory management + * techniques that are either very complex or difficult to implement in + * the context of a fixed-size shared memory segment. + * + * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/hash/chash.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "storage/shmem.h" +#include "storage/spin.h" +#include "utils/chash.h" + +/* + * The memory needed to store the entries in a hash table is preallocated in + * a single chunk called the arena. We refer to entries using a CHashPtr + * rather than an ordinary pointer. One bit of each CHashPtr is reserved for + * use as a "mark" bit, which is used to implement concurrent deletion. + * The remaining bits form an offset into the arena. By storing offset + * rather than pointers, we can reduce the memory footprint of the hash table + * considerably, at the cost of limiting the maximum number of elements in a + * single concurrent hash table to 2^31. That limitation appears acceptable + * for now, and we can always switch to pointers or 64-bit integers here in + * the future, if needed. + */ +typedef uint32 CHashPtr; +#define InvalidCHashPtr ((uint32) -1) +#define ReclaimCHashPtr ((uint32) -2) +#define CHashPtrIsMarked(x) ((x) & 1) +#define CHashPtrGetOffset(x) ((x) >> 1) +#define CHashPtrMark(x) ((x) | 1) +#define CHashPtrUnmark(x) ((x) & ~1) +#define MakeCHashPtr(x) ((x) << 1) + +static uint32 CHashMaxCapacity = CHashPtrGetOffset(InvalidCHashPtr); + +/* + * Each hash bucket is implemented as a pointer to the first item in the + * bucket, or InvalidCHashPtr if the bucket is empty. Each item contains a + * pointer to the next item in the bucket, or InvalidCHashPtr if there are no + * more items. + * + * Each bucket also has a spinlock which is used to serialize modifications + * to the bucket, but need not be taken when searching it. + */ +typedef struct +{ + CHashPtr head; /* arena offset of first element in bucket */ + slock_t mutex; /* mutual exclusion for modifications */ +} CHashBucket; + +/* + * Each free list is implemented as a pointer to the first item on the + * free list, or InvalidCHashPtr if the free list is empty. Each free list + * is protected by a spinlock. + */ +typedef struct +{ + CHashPtr head; /* arena offset of first element in bucket */ + slock_t mutex; /* mutual exclusion for modifications */ +} CHashFreeList; + +/* + * Each item stored in the hash table is represented by a CHashNode, which + * stores a pointer to the next item in the same bucket, and the exact hash + * value of the current item. Each CHashNode is followed by space for the + * item itself. + */ +typedef struct +{ + CHashPtr next; /* arena offset of next element in bucket */ + uint32 hash_value; /* hash(key) */ +} CHashNode; +#define CHashNodeGetItem(x) ((void *) (((char *) x) + sizeof(CHashNode))) + +/* + * CHashTableData stores all the information that we need in order to access + * a concurrent hash table. We store one copy of this data in shared memory, + * and an additional copy in the private memory of each backend accessing the + * table. None of this information changes after the initial setup of the + * hash table. + */ +typedef struct +{ + CHashDescriptor desc; /* descriptor for this hash table */ + uint32 nbuckets; /* # of buckets; must be a power of two */ + uint32 bucket_mask; /* # of buckets, minus one */ + uint32 nfreelists; /* # of freelists, also a power of two */ + void *arena; /* arena */ + CHashBucket *bucket; /* array of size nbuckets */ + CHashFreeList *freelist; /* array of size nfreelists */ +} CHashTableData; + +/* + * Compute the number of buckets and the number of freelists for a hash table + * with a given capacity. + */ +static void +CHashSizingParameters(uint32 capacity, uint32 *nbuckets, uint32 *nfreelists) +{ + uint32 bucket_shift; + uint32 freelist_shift; + + if (capacity < 1 || capacity > CHashMaxCapacity) + elog(ERROR, "invalid capacity for concurrent hash"); + + /* + * The number of buckets must be a power of two. To avoid (as much as + * possible) having to traverse long bucket chains, we aim for a load + * factor <= 1.0, so this is a pretty simple calculation: we just find the + * smallest power of two greater than or equal to the target capacity. + */ + bucket_shift = fls(capacity) - 1; + *nbuckets = 1 << bucket_shift; + + /* + * The number of freelists must also be a power of two, and must be no + * larger than the number of buckets. + */ + freelist_shift = bucket_shift / 2; + *nfreelists = 1 << freelist_shift; +} + +Size +CHashEstimateSize(CHashDescriptor *desc) +{ + uint32 nbuckets, + nfreelists; + Size size; + + CHashSizingParameters(desc->capacity, &nbuckets, &nfreelists); + + size = MAXALIGN(sizeof(CHashTableData)); + size = add_size(size, mul_size(MAXALIGN(sizeof(CHashBucket)), nbuckets)); + + return size; +} diff --git a/src/include/utils/chash.h b/src/include/utils/chash.h new file mode 100644 index 0000000000..c0adfd9a5f --- /dev/null +++ b/src/include/utils/chash.h @@ -0,0 +1,44 @@ +/*------------------------------------------------------------------------- + * + * chash.h + * Concurrent shared-memory hash table. + * + * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/utils/chash.h + * + *------------------------------------------------------------------------- + */ +#ifndef CHASH_H +#define CHASH_H + +/* + * A concurrent hash table stores a bounded number of fixed-size elements, + * each of which begins with a fixed-size key. This structure provides just + * enough information about a proposed concurrent hash table to estimate its + * size, or create it. + */ +typedef struct +{ + uint32 id; /* unique identifier for this hash table */ + uint32 capacity; /* maximum size of hash table */ + uint16 element_size; /* size of each element */ + uint16 key_size; /* size of each key */ +} CHashDescriptor; + +/* Opaque handle for a concurrent hash table. */ +struct CHashTableData; +typedef struct CHashTableData *CHashTable; + +/* Initialization functions. */ +extern Size CHashEstimateSize(CHashDescriptor *desc); +extern CHashTable CHashInitialize(CHashDescriptor *desc); +extern CHashTable CHashAttach(CHashDescriptor *desc); + +/* Accessor functions. */ +extern bool CHashInsert(CHashTable table, void *entry); +extern bool CHashDelete(CHashTable table, void *key); +extern bool CHashSearch(CHashTable table, void *entry); + +#endif /* CHASH_H */ -- 2.39.5