uint32 bucket_mask; /* # of buckets, minus one */
uint32 garbage_shift; /* log2(nbuckets/ngarbage) */
uint32 ngarbage; /* # of garbage lists, a power of two */
+ uint32 gc_next; /* next garbage list to reclaim */
+ int gc_pid; /* PID that set gc_next */
uint32 nfreelists; /* # of freelists */
uint32 arena_limit; /* # of arena elements */
uint32 arena_stride; /* bytes allocated per arena element */
(AssertMacro((offset) < (table)->arena_limit), \
(CHashNode *) ((table)->arena + (table)->arena_stride * (offset)))
+/*
+ * We need a memory barrier to make sure that our bucket advertisement is
+ * committed to memory before we begin scanning the bucket, and another one
+ * to make sure the scan is done before we cease advertising it.
+ */
+#define CHashTableSuppressGC(table, bno) \
+ do { \
+ Assert(MyProc->chash_bucket == 0); \
+ MyProc->chash_bucket = \
+ ((uint64) table->desc.id)<<32 | (bucket>>table->garbage_shift); \
+ pg_memory_barrier(); \
+ } while (0)
+#define CHashTableUnsuppressGC() \
+ do { \
+ Assert(MyProc->chash_bucket != 0); \
+ pg_memory_barrier(); \
+ MyProc->chash_bucket = 0; \
+ } while (0)
+
/*
* First stage of CHashTable initialization. We fill in all the constants
* here, but not the pointers.
*/
table->garbage_shift = Min(bucket_shift, 6);
table->ngarbage = table->nbuckets >> table->garbage_shift;
+ table->gc_next = 0;
+ table->gc_pid = 0;
/*
* The number of freelists must be large enough to avoid contention;
CHashNode *n;
bool found = false;
- /*
- * Suppress garbage collection for target bucket. We need a memory
- * barrier to make sure that our bucket advertisement is committed to
- * memory before we begin scanning the bucket.
- */
- MyProc->chash_bucket = ((uint64) table->desc.id) << 32 | bucket;
- pg_memory_barrier();
+ /* Suppress garbage collection for target bucket. */
+ CHashTableSuppressGC(table, bucket);
/* Scan bucket. */
c = table->bucket[bucket].head;
pg_read_barrier_depends();
/* Compare current node by hashcode, then by memcmp. */
- n = CHashTableGetNode(table, c);
+ n = CHashTableGetNode(table, CHashPtrGetOffset(c));
if (n->hashcode == hashcode)
cmp = memcmp(CHashNodeGetItem(n), entry, table->desc.key_size);
else if (n->hashcode > hashcode)
}
}
- /*
- * Once we're done scanning the bucket, we can permit garbage collection
- * of that bucket to resume. The memory barrier ensures that all the reads
- * that are part of the bucket scan finish before we allow garbage
- * collection.
- */
- pg_memory_barrier();
- MyProc->chash_bucket = 0;
+ /* Done scanning bucket. */
+ CHashTableUnsuppressGC();
/* Return result to caller. */
return found;
}
+
+/*
+ * Insert into a concurrent hash table. entry should be the complete entry
+ * to be inserted. If no duplicate entry is found in the table, this function
+ * will insert the entry and return true. Otherwise, the duplicate entry's
+ * value will be copied into the supplied entry and the function will return
+ * false.
+ */
+bool
+CHashInsert(CHashTable table, void *entry)
+{
+}
+
+/*
+ * Allocate an arena slot for a new item to be inserted into a hash table.
+ *
+ * We don't want to wait until every single free-list is completely empty
+ * before beginning to garbage collect, because that could have undesirable
+ * latency characteristics, and might possibly render free-lists thoroughly
+ * worthless from the point of view of contention avoidance. Instead, we
+ * check free lists and garbage lists in alternation. If we find a non-empty
+ * free list, we allocate from it; if we find a non-empty garbage list, we
+ * garbage collect it and put the contents on our free list.
+ *
+ * We always start with the same free list - which one is based on our
+ * backend ID - but we try to round-robin among all the garbage lists.
+ */
+static CHashPtr
+CHashAllocate(CHashTable table)
+{
+ uint32 f_home = ((uint32) MyBackendId) % table->nfreelists;
+ uint32 f_current = f_home;
+ CHashPtr new;
+ CHashPtr garbage;
+ volatile CHashTable vtable = table;
+
+ /* If this process hasn't initialized gc_next yet, do that now. */
+ if (table->gc_pid != MyProcPid)
+ {
+ table->gc_pid = MyProcPid;
+ table->gc_next = ((uint32) MyProcPid) % table->ngarbage;
+ }
+
+ /* Loop until we allocate a buffer. */
+ for (;;)
+ {
+ /*
+ * Check one freelist for an available arena slot. To minimize
+ * spinlock traffic, we do an unlocked test first. We must recheck
+ * after acquiring the spinlock.
+ */
+ if (vtable->freelist[f_current].head != InvalidCHashPtr)
+ {
+ SpinLockAcquire(&vtable->freelist[f_current].mutex);
+ new = vtable->freelist[f_current].head;
+ if (new != InvalidCHashPtr)
+ {
+ CHashNode *n = CHashTableGetNode(table, new);
+ vtable->freelist[f_current].head = n->next;
+ SpinLockRelease(&vtable->freelist[f_current].mutex);
+ return new;
+ }
+ SpinLockRelease(&vtable->freelist[f_current].mutex);
+ }
+
+ /*
+ * Check the next garbage list for recyclable buffers. If we
+ * find any, try to garbage collect them.
+ */
+ table->gc_next = (table->gc_next + 1) % table->ngarbage;
+ if (vtable->garbage[table->gc_next].head != InvalidCHashPtr)
+ {
+ volatile CHashBucket *b = &vtable->freelist[f_current];
+ uint32 i;
+ uint64 chash_bucket;
+
+ /* Pop garbage off list. */
+ SpinLockAcquire(&b->mutex);
+ garbage = b->head;
+ b->head = InvalidCHashPtr;
+ SpinLockRelease(&b->mutex);
+
+ /* Anything to recycle? */
+ if (garbage != InvalidCHashPtr)
+ {
+ /*
+ * Be certain that the writes associated with popping the
+ * garbage list are complete before we start checking whether
+ * the garbage is recycleable.
+ */
+ pg_memory_barrier();
+
+ /*
+ * Spin until garbage is recyclable. We could have a "soft"
+ * version of this that merely requeues the garbage if it's not
+ * immediately recycleable, but it's not clear that we need
+ * such a thing. On the flip side we might want to eventually
+ * enter a longer sleep here, or PANIC, but it's not clear
+ * exactly how to calibrate that, either.
+ */
+ chash_bucket = ((uint64) table->desc.id)<<32 | table->gc_next;
+ for (i = 0; i < ProcGlobal->allProcCount; i++)
+ {
+ PGPROC *proc = &ProcGlobal->allProcs[i];
+
+ while (proc->chash_bucket == chash_bucket)
+ ;
+ }
+
+ /*
+ * Be certain that all prior reads are done before starting
+ * the next batch of writes.
+ */
+ pg_memory_barrier();
+
+ /* XXX. Recycle garbage here! */
+ }
+ }
+ }
+}