/* Function prototypes. */
static CHashPtr CHashAllocate(CHashTable table);
static void CHashAddToGarbage(CHashTable table, uint32 bucket, CHashPtr c);
-static void CHashImmediateFree(CHashTable table, CHashPtr c);
static void CHashBucketScan(CHashTable table,
CHashPtr *start,
uint32 hashcode,
memcpy(CHashNodeGetItem(nnew), entry, table->desc.element_size);
/* Prevent garbage collection for this bucket. */
- Assert(MyProc->hazard[0] == NULL);
MyProc->hazard[0] = b;
pg_memory_barrier();
pg_memory_barrier();
MyProc->hazard[0] = NULL;
- /* If the insert failed, free the entry we allocated. */
+ /*
+ * If the insert failed, add the entry we found to the appropriate
+ * garbage list. We can't simply put this back on the freelist,
+ * because that leads to an ABA problem: some other backend could
+ * read the head of the freelist, go into the tank, and then use
+ * CAS to pop an element. If at that point, it finds the same
+ * element at the head of the freelist but with a different successor,
+ * we're hosed. To prevent that, we ensure that elements are added
+ * to a given freelist only after verifying that any allocations in
+ * progress at the time we popped the freelist has completed. This
+ * guarantees that any allocation still in progress at the time this
+ * element makes it back to the freelist is trying to allocate some
+ * other node.
+ */
if (scan.found)
- CHashImmediateFree(table, new);
+ CHashAddToGarbage(table, bucket, new);
/* The insert succeeded if and only if no duplicate was found. */
return !scan.found;
{
CHashPtr *b;
- /* Try to pop a buffer from a freelist using compare-and-swap. */
+ /*
+ * Try to pop a buffer from a freelist using compare-and-swap.
+ *
+ * Note that this is only safe if it's impossible for the next pointer
+ * of the first element on the list to change between the time when
+ * we read it and the time we use CAS to pop it off the list. This
+ * means that we can't allow any element that is currently on this
+ * freelist to be allocated, marked as garbage, garbage collected,
+ * and returned back to this freelist before we finish the CAS.
+ */
b = &table->freelist[f_current];
+ MyProc->hazard[0] = b;
+ pg_memory_barrier();
new = *b;
if (!CHashPtrIsInvalid(new))
{
uint32 i;
CHashPtr fhead;
CHashNode *n;
+ CHashPtr *fh = &table->freelist[f_home];
/*
- * Spin until garbage is recyclable. We can't begin this operation
- * until the clearing of the garbage list has been committed to
- * memory, but since that was done using an atomic operation no
- * explicit barrier is needed here.
+ * Before removing elements removed from the garbage list to the
+ * freelist, we must wait until (1) all bucket scans that might
+ * still see elements on the freelist as part of the bucket chain
+ * have completed and (2) all allocations that might see an old
+ * version of the freelist containing one of the elements to be
+ * garbage collected have completed.
+ *
+ * Note: We can't begin this operation until the clearing of the
+ * garbage list has been committed to memory, but since that was
+ * done using an atomic operation no explicit barrier is needed
+ * here.
*
* Note: We could have a "soft" version of this that merely
* requeues the garbage if it's not immediately recycleable, but
* might want to eventually enter a longer sleep here, or PANIC,
* but it's not clear exactly how to calibrate that.
*/
+ MyProc->hazard[0] = NULL;
for (i = 0; i < ProcGlobal->allProcCount; i++)
{
- PGPROC *proc = &ProcGlobal->allProcs[i];
+ volatile PGPROC *proc = &ProcGlobal->allProcs[i];
+ void *hazard;
- while (proc->hazard[0] == b)
- ;
+ do
+ {
+ hazard = proc->hazard[0];
+ } while (hazard == b || hazard == fh);
}
/* Remove one item from list to satisfy current allocation. */
}
/* Push reclaimed elements onto home free list. */
- b = &table->freelist[f_home];
for (;;)
{
- oldhead = *b;
+ oldhead = *fh;
n->un.gcnext = oldhead;
- if (__sync_bool_compare_and_swap(b, oldhead, fhead))
+ if (__sync_bool_compare_and_swap(fh, oldhead, fhead))
break;
table->stats.s_gc_reclaim_retry++;
}
table->stats.s_garbage_retry++;
}
}
-
-/*
- * Free an arena slot immediately.
- *
- * When a slot that's actually in use is freed, we can't use this routine,
- * because a concurrent bucket scan might have a private pointer to the
- * object. However, if an insert fails due to a duplicate key, then we need
- * to put it back on the free list immediately.
- */
-static void
-CHashImmediateFree(CHashTable table, CHashPtr c)
-{
- CHashNode *n;
- CHashPtr *free;
- uint32 f_home;
- CHashPtr f;
-
- f_home = ((uint32) MyBackendId) % table->nfreelists;
- n = CHashTableGetNode(table, c);
- free = &table->freelist[f_home];
-
- for (;;)
- {
- f = *free;
- n->un.gcnext = f;
- if (__sync_bool_compare_and_swap(free, f, c))
- break;
- table->stats.s_free_retry++;
- }
-}