From: Robert Haas Date: Thu, 26 Jul 2012 18:45:10 +0000 (-0400) Subject: Adapt code to use compare-and-swap. X-Git-Url: http://git.postgresql.org/gitweb/-?a=commitdiff_plain;h=5b8a7b98c9c7b0a18c482f11853aefecdcc772c8;p=users%2Frhaas%2Fpostgres.git Adapt code to use compare-and-swap. --- diff --git a/src/backend/utils/hash/chash.c b/src/backend/utils/hash/chash.c index 0ed2721f51..9e0ed3a120 100644 --- a/src/backend/utils/hash/chash.c +++ b/src/backend/utils/hash/chash.c @@ -110,7 +110,6 @@ typedef uint32 CHashPtr; typedef struct { CHashPtr head; /* arena offset of bucket head */ - slock_t mutex; /* mutual exclusion for changes */ } CHashBucket; /* @@ -328,20 +327,11 @@ CHashInitialize(CHashTable table, CHashDescriptor *desc) /* Initialize all three sets of lists to empty. */ for (i = 0; i < table->nbuckets; ++i) - { table->bucket[i].head = InvalidCHashPtr; - SpinLockInit(&table->bucket[i].mutex); - } for (i = 0; i < table->ngarbage; ++i) - { table->garbage[i].head = InvalidCHashPtr; - SpinLockInit(&table->garbage[i].mutex); - } for (i = 0; i < table->nfreelists; ++i) - { table->freelist[i].head = InvalidCHashPtr; - SpinLockInit(&table->freelist[i].mutex); - } /* Put all arena elements on the free lists. */ for (i = 0; i < table->arena_limit; ++i) @@ -507,24 +497,14 @@ retry: if (!found) { - bool fail = false; - - /* If we didn't find the key, try to insert. */ - nnew->next = c; - SpinLockAcquire(&table->bucket[bucket].mutex); - if (*p == c) - *p = new; - else - fail = true; - SpinLockRelease(&table->bucket[bucket].mutex); - /* - * If we failed, it means that somebody concurrently inserted or + * If we fail, it means that somebody concurrently inserted or * deleted an element. The correct insertion point might have changed, * or the key we're trying to insert might now be present when it * wasn't before, so we'll have to search the bucket chain anew. */ - if (fail) + nnew->next = c; + if (!__sync_bool_compare_and_swap(p, c, new)) goto retry; } else @@ -610,27 +590,22 @@ retry: if (found) { CHashPtr cc; - bool removed = false; /* * Really do the deletion. Since we've held no lock up to this * point, it may well be that someone else has deleted the item out * from under us, so we recheck that after taking the lock. */ - SpinLockAcquire(&table->bucket[bucket].mutex); - cc = n->next; - if (CHashPtrIsMarked(cc)) - found = false; - else + do { - n->next = CHashPtrMark(cc); - if (*p == c) + cc = n->next; + if (CHashPtrIsMarked(cc)) { - *p = cc; - removed = true; + found = false; + break; } - } - SpinLockRelease(&table->bucket[bucket].mutex); + } while (!__sync_bool_compare_and_swap(&n->next, + cc, CHashPtrMark(cc))); /* * At this point the deletion is done. However, it's possible that @@ -641,7 +616,8 @@ retry: * case we need to rescan the list and remove any deleted items we * find. */ - if (removed) + Assert(!CHashPtrIsMarked(cc)); + if (__sync_bool_compare_and_swap(p, c, cc)) CHashAddToGarbage(table, bucket, c); else { @@ -675,7 +651,6 @@ CHashAllocate(CHashTable table) uint32 f_current = f_home; CHashPtr new; CHashPtr garbage; - volatile CHashTable vtable = table; /* If this process hasn't initialized gc_next yet, do that now. */ if (table->gc_pid != MyProcPid) @@ -687,23 +662,21 @@ CHashAllocate(CHashTable table) /* Loop until we allocate a buffer. */ for (;;) { + volatile CHashBucket *b; + /* - * Check one freelist for an available arena slot. To minimize - * spinlock traffic, we do an unlocked test first. We must recheck - * after acquiring the spinlock. + * Attempt to pop a buffer from a freelist using compare-and-swap. */ - if (vtable->freelist[f_current].head != InvalidCHashPtr) + b = &table->freelist[f_current]; + new = b->head; + if (new != InvalidCHashPtr) { - SpinLockAcquire(&vtable->freelist[f_current].mutex); - new = vtable->freelist[f_current].head; - if (new != InvalidCHashPtr) - { - CHashNode *n = CHashTableGetNode(table, new); - vtable->freelist[f_current].head = n->un.gcnext; - SpinLockRelease(&vtable->freelist[f_current].mutex); + volatile CHashNode *n; + + n = CHashTableGetNode(table, new); + pg_read_barrier_depends(); + if (__sync_bool_compare_and_swap(&b->head, new, n->un.gcnext)) return new; - } - SpinLockRelease(&vtable->freelist[f_current].mutex); } /* @@ -711,86 +684,79 @@ CHashAllocate(CHashTable table) * find any, try to garbage collect them. */ table->gc_next = (table->gc_next + 1) % table->ngarbage; - if (vtable->garbage[table->gc_next].head != InvalidCHashPtr) + b = &table->garbage[table->gc_next]; + garbage = b->head; + if (garbage != InvalidCHashPtr && + __sync_bool_compare_and_swap(&b->head, garbage, InvalidCHashPtr)) { - volatile CHashBucket *b = &vtable->freelist[f_current]; - volatile CHashNode *n; - uint32 i; + CHashPtr fhead; + CHashPtr fcurrent; + CHashPtr fnext; + CHashPtr oldhead; uint64 chash_bucket; + uint32 i; + volatile CHashNode *n; + + /* + * Be certain that the writes associated with popping the + * garbage list are complete before we start checking whether + * the garbage is recycleable. + */ + pg_memory_barrier(); + + /* + * Spin until garbage is recyclable. We could have a "soft" + * version of this that merely requeues the garbage if it's not + * immediately recycleable, but it's not clear that we need + * such a thing. On the flip side we might want to eventually + * enter a longer sleep here, or PANIC, but it's not clear + * exactly how to calibrate that, either. + */ + chash_bucket = ((uint64) table->desc.id)<<32 | table->gc_next; + for (i = 0; i < ProcGlobal->allProcCount; i++) + { + PGPROC *proc = &ProcGlobal->allProcs[i]; - /* Pop garbage off list. */ - SpinLockAcquire(&b->mutex); - garbage = b->head; - b->head = InvalidCHashPtr; - SpinLockRelease(&b->mutex); + while (proc->chash_bucket == chash_bucket) + ; + } - /* Anything to recycle? */ - if (garbage != InvalidCHashPtr) - { - CHashPtr fhead; - CHashPtr fcurrent; - CHashPtr fnext; - - /* - * Be certain that the writes associated with popping the - * garbage list are complete before we start checking whether - * the garbage is recycleable. - */ - pg_memory_barrier(); - - /* - * Spin until garbage is recyclable. We could have a "soft" - * version of this that merely requeues the garbage if it's not - * immediately recycleable, but it's not clear that we need - * such a thing. On the flip side we might want to eventually - * enter a longer sleep here, or PANIC, but it's not clear - * exactly how to calibrate that, either. - */ - chash_bucket = ((uint64) table->desc.id)<<32 | table->gc_next; - for (i = 0; i < ProcGlobal->allProcCount; i++) - { - PGPROC *proc = &ProcGlobal->allProcs[i]; - - while (proc->chash_bucket == chash_bucket) - ; - } - - /* - * Be certain that all prior reads are done before starting - * the next batch of writes. - */ - pg_memory_barrier(); - - /* Remove one item from list to satisfy current allocation. */ - new = garbage; - n = CHashTableGetNode(table, new); - fhead = n->un.gcnext; - - /* If that's all there was, we're done. */ - if (fhead == InvalidCHashPtr) - return new; - - /* Walk list of reclaimed elements to end. */ - fcurrent = fhead; - for (;;) - { - n = CHashTableGetNode(table, fcurrent); - fnext = n->un.gcnext; - if (fnext == InvalidCHashPtr) - break; - fcurrent = fnext; - } - - /* Push reclaimed elements onto home free list. */ - b = &vtable->freelist[f_current]; - SpinLockAcquire(&b->mutex); - n->next = b->head; - b->head = fhead; - SpinLockRelease(&b->mutex); - - /* Return the element we saved for ourselves. */ + /* + * Be certain that all prior reads are done before starting + * the next batch of writes. + */ + pg_memory_barrier(); + + /* Remove one item from list to satisfy current allocation. */ + new = garbage; + n = CHashTableGetNode(table, new); + fhead = n->un.gcnext; + + /* If that's all there was, we're done. */ + if (fhead == InvalidCHashPtr) return new; + + /* Walk list of reclaimed elements to end. */ + fcurrent = fhead; + for (;;) + { + n = CHashTableGetNode(table, fcurrent); + fnext = n->un.gcnext; + if (fnext == InvalidCHashPtr) + break; + fcurrent = fnext; } + + /* Push reclaimed elements onto home free list. */ + b = &table->freelist[f_home]; + do + { + oldhead = b->head; + n->un.gcnext = oldhead; + } while (__sync_bool_compare_and_swap(&b->head, oldhead, fhead)); + + /* Return the element we saved for ourselves. */ + return new; } /* Advance to next freelist. */ @@ -812,6 +778,7 @@ static void CHashAddToGarbage(CHashTable table, uint32 bucket, CHashPtr c) { uint32 garbage_bucket; + CHashPtr g; volatile CHashNode *n; volatile CHashBucket *garbage; @@ -819,10 +786,11 @@ CHashAddToGarbage(CHashTable table, uint32 bucket, CHashPtr c) n = CHashTableGetNode(table, c); garbage = &table->garbage[garbage_bucket]; - SpinLockAcquire(&garbage->mutex); - n->un.gcnext = garbage->head; - garbage->head = c; - SpinLockRelease(&garbage->mutex); + do + { + g = garbage->head; + n->un.gcnext = g; + } while (!__sync_bool_compare_and_swap(&garbage->head, g, c)); } /* @@ -836,15 +804,20 @@ CHashAddToGarbage(CHashTable table, uint32 bucket, CHashPtr c) static void CHashImmediateFree(CHashTable table, CHashPtr c) { - volatile CHashTable vtable = table; volatile CHashNode *n; - uint32 f_home = ((uint32) MyBackendId) % table->nfreelists; + volatile CHashBucket *free; + uint32 f_home; + CHashPtr f; + f_home = ((uint32) MyBackendId) % table->nfreelists; n = CHashTableGetNode(table, c); - SpinLockAcquire(&vtable->freelist[f_home].mutex); - n->un.gcnext = vtable->freelist[f_home].head; - vtable->freelist[f_home].head = c; - SpinLockRelease(&vtable->freelist[f_home].mutex); + free = &table->freelist[f_home]; + + do + { + f = free->head; + n->un.gcnext = f; + } while (!__sync_bool_compare_and_swap(&free->head, f, c)); } /* @@ -866,7 +839,6 @@ CHashRemoveMarked(CHashTable table, uint32 bucket, CHashPtr *cp, { CHashPtr c = *cp; CHashPtr cc; - bool retry_needed = false; do { @@ -878,29 +850,16 @@ CHashRemoveMarked(CHashTable table, uint32 bucket, CHashPtr *cp, */ pg_read_barrier_depends(); - /* Read next-pointer of deleted node. */ - n = CHashTableGetNode(table, c); - cc = n->next; - /* - * Redirect next-pointer of prior node to next-pointer of deleted - * node, unless someone else has meanwhile modified the bucket - * chain. - */ - SpinLockAcquire(&table->bucket[bucket].mutex); - if (*p == c) - *p = cc; - else - retry_needed = true; - SpinLockRelease(&table->bucket[bucket].mutex); - - /* - * If we failed to update the logical pointer, caller must rescan + * Attempt to remove the deleted node from the linked list. + * If we fail to update the logical pointer, caller must rescan * the bucket. There's no intelligent way to continue the scan, * because for all we know the node that contains the pointer we're * try to update may itself be deleted by now. */ - if (retry_needed) + n = CHashTableGetNode(table, c); + cc = n->next; + if (!__sync_bool_compare_and_swap(p, c, cc)) return true; /* Add c to garbage list. */