Adapt code to use compare-and-swap.
authorRobert Haas <[email protected]>
Thu, 26 Jul 2012 18:45:10 +0000 (14:45 -0400)
committerRobert Haas <[email protected]>
Thu, 26 Jul 2012 18:45:10 +0000 (14:45 -0400)
src/backend/utils/hash/chash.c

index 0ed2721f5115a7be81f2328b8f9f39116f3151cc..9e0ed3a120abc4ff2643fd0810dc0ee7fff6158c 100644 (file)
@@ -110,7 +110,6 @@ typedef uint32 CHashPtr;
 typedef struct
 {
        CHashPtr        head;                           /* arena offset of bucket head */
-       slock_t         mutex;                          /* mutual exclusion for changes */
 } CHashBucket;
 
 /*
@@ -328,20 +327,11 @@ CHashInitialize(CHashTable table, CHashDescriptor *desc)
 
        /* Initialize all three sets of lists to empty. */
        for (i = 0; i < table->nbuckets; ++i)
-       {
                table->bucket[i].head = InvalidCHashPtr;
-               SpinLockInit(&table->bucket[i].mutex);
-       }
        for (i = 0; i < table->ngarbage; ++i)
-       {
                table->garbage[i].head = InvalidCHashPtr;
-               SpinLockInit(&table->garbage[i].mutex);
-       }
        for (i = 0; i < table->nfreelists; ++i)
-       {
                table->freelist[i].head = InvalidCHashPtr;
-               SpinLockInit(&table->freelist[i].mutex);
-       }
 
        /* Put all arena elements on the free lists. */
        for (i = 0; i < table->arena_limit; ++i)
@@ -507,24 +497,14 @@ retry:
 
        if (!found)
        {
-               bool    fail = false;
-
-               /* If we didn't find the key, try to insert. */
-               nnew->next = c;
-               SpinLockAcquire(&table->bucket[bucket].mutex);
-               if (*p == c)
-                       *p = new;
-               else
-                       fail = true;
-               SpinLockRelease(&table->bucket[bucket].mutex);
-
                /*
-                * If we failed, it means that somebody concurrently inserted or
+                * If we fail, it means that somebody concurrently inserted or
                 * deleted an element.  The correct insertion point might have changed,
                 * or the key we're trying to insert might now be present when it
                 * wasn't before, so we'll have to search the bucket chain anew.
                 */
-               if (fail)
+               nnew->next = c;
+               if (!__sync_bool_compare_and_swap(p, c, new))
                        goto retry;
        }
        else
@@ -610,27 +590,22 @@ retry:
        if (found)
        {
                CHashPtr        cc;
-               bool            removed = false;
 
                /*
                 * Really do the deletion.  Since we've held no lock up to this
                 * point, it may well be that someone else has deleted the item out
                 * from under us, so we recheck that after taking the lock.
                 */
-               SpinLockAcquire(&table->bucket[bucket].mutex);
-               cc = n->next;
-               if (CHashPtrIsMarked(cc))
-                       found = false;
-               else
+               do
                {
-                       n->next = CHashPtrMark(cc);
-                       if (*p == c)
+                       cc = n->next;
+                       if (CHashPtrIsMarked(cc))
                        {
-                               *p = cc;
-                               removed = true;
+                               found = false;
+                               break;
                        }
-               }
-               SpinLockRelease(&table->bucket[bucket].mutex);
+               } while (!__sync_bool_compare_and_swap(&n->next,
+                                                                                          cc, CHashPtrMark(cc)));
 
                /*
                 * At this point the deletion is done.  However, it's possible that
@@ -641,7 +616,8 @@ retry:
                 * case we need to rescan the list and remove any deleted items we
                 * find.
                 */
-               if (removed)
+               Assert(!CHashPtrIsMarked(cc));
+               if (__sync_bool_compare_and_swap(p, c, cc))
                        CHashAddToGarbage(table, bucket, c);
                else
                {
@@ -675,7 +651,6 @@ CHashAllocate(CHashTable table)
        uint32          f_current = f_home;
        CHashPtr        new;
        CHashPtr        garbage;
-       volatile CHashTable vtable = table;
 
        /* If this process hasn't initialized gc_next yet, do that now. */
        if (table->gc_pid != MyProcPid)
@@ -687,23 +662,21 @@ CHashAllocate(CHashTable table)
        /* Loop until we allocate a buffer. */
        for (;;)
        {
+               volatile CHashBucket *b;
+
                /*
-                * Check one freelist for an available arena slot.  To minimize
-                * spinlock traffic, we do an unlocked test first.  We must recheck
-                * after acquiring the spinlock.
+                * Attempt to pop a buffer from a freelist using compare-and-swap.
                 */
-               if (vtable->freelist[f_current].head != InvalidCHashPtr)
+               b = &table->freelist[f_current];
+               new = b->head;
+               if (new != InvalidCHashPtr)
                {
-                       SpinLockAcquire(&vtable->freelist[f_current].mutex);
-                       new = vtable->freelist[f_current].head;
-                       if (new != InvalidCHashPtr)
-                       {
-                               CHashNode          *n = CHashTableGetNode(table, new);
-                               vtable->freelist[f_current].head = n->un.gcnext;
-                               SpinLockRelease(&vtable->freelist[f_current].mutex);
+                       volatile CHashNode  *n;
+
+                       n = CHashTableGetNode(table, new);
+                       pg_read_barrier_depends();
+                       if (__sync_bool_compare_and_swap(&b->head, new, n->un.gcnext))
                                return new;
-                       }
-                       SpinLockRelease(&vtable->freelist[f_current].mutex);
                }
 
                /*
@@ -711,86 +684,79 @@ CHashAllocate(CHashTable table)
                 * find any, try to garbage collect them.
                 */
                table->gc_next = (table->gc_next + 1) % table->ngarbage;
-               if (vtable->garbage[table->gc_next].head != InvalidCHashPtr)
+               b = &table->garbage[table->gc_next];
+               garbage = b->head;
+               if (garbage != InvalidCHashPtr &&
+                       __sync_bool_compare_and_swap(&b->head, garbage, InvalidCHashPtr))
                {
-                       volatile CHashBucket *b = &vtable->freelist[f_current];
-                       volatile CHashNode  *n;
-                       uint32          i;
+                       CHashPtr        fhead;
+                       CHashPtr        fcurrent;
+                       CHashPtr        fnext;
+                       CHashPtr        oldhead;
                        uint64          chash_bucket;
+                       uint32          i;
+                       volatile CHashNode *n;
+
+                       /*
+                        * Be certain that the writes associated with popping the
+                        * garbage list are complete before we start checking whether
+                        * the garbage is recycleable.
+                        */
+                       pg_memory_barrier();
+
+                       /*
+                        * Spin until garbage is recyclable.  We could have a "soft"
+                        * version of this that merely requeues the garbage if it's not
+                        * immediately recycleable, but it's not clear that we need
+                        * such a thing.  On the flip side we might want to eventually
+                        * enter a longer sleep here, or PANIC, but it's not clear
+                        * exactly how to calibrate that, either.
+                        */
+                       chash_bucket = ((uint64) table->desc.id)<<32 | table->gc_next;
+                       for (i = 0; i < ProcGlobal->allProcCount; i++)
+                       {
+                               PGPROC     *proc = &ProcGlobal->allProcs[i];
 
-                       /* Pop garbage off list. */
-                       SpinLockAcquire(&b->mutex);
-                       garbage = b->head;
-                       b->head = InvalidCHashPtr;
-                       SpinLockRelease(&b->mutex);
+                               while (proc->chash_bucket == chash_bucket)
+                                       ;
+                       }
 
-                       /* Anything to recycle? */
-                       if (garbage != InvalidCHashPtr)
-                       {
-                               CHashPtr        fhead;
-                               CHashPtr        fcurrent;
-                               CHashPtr        fnext;
-
-                               /*
-                                * Be certain that the writes associated with popping the
-                                * garbage list are complete before we start checking whether
-                                * the garbage is recycleable.
-                                */
-                               pg_memory_barrier();
-
-                               /*
-                                * Spin until garbage is recyclable.  We could have a "soft"
-                                * version of this that merely requeues the garbage if it's not
-                                * immediately recycleable, but it's not clear that we need
-                                * such a thing.  On the flip side we might want to eventually
-                                * enter a longer sleep here, or PANIC, but it's not clear
-                                * exactly how to calibrate that, either.
-                                */
-                               chash_bucket = ((uint64) table->desc.id)<<32 | table->gc_next;
-                               for (i = 0; i < ProcGlobal->allProcCount; i++)
-                               {
-                                       PGPROC     *proc = &ProcGlobal->allProcs[i];
-
-                                       while (proc->chash_bucket == chash_bucket)
-                                               ;
-                               }
-
-                               /*
-                                * Be certain that all prior reads are done before starting
-                                * the next batch of writes.
-                                */
-                               pg_memory_barrier();
-
-                               /* Remove one item from list to satisfy current allocation. */
-                               new = garbage;
-                               n = CHashTableGetNode(table, new);
-                               fhead = n->un.gcnext;
-
-                               /* If that's all there was, we're done. */
-                               if (fhead == InvalidCHashPtr)
-                                       return new;
-
-                               /* Walk list of reclaimed elements to end. */
-                               fcurrent = fhead;
-                               for (;;)
-                               {
-                                       n = CHashTableGetNode(table, fcurrent);
-                                       fnext = n->un.gcnext;
-                                       if (fnext == InvalidCHashPtr)
-                                               break;
-                                       fcurrent = fnext;
-                               }
-
-                               /* Push reclaimed elements onto home free list. */
-                               b = &vtable->freelist[f_current];
-                               SpinLockAcquire(&b->mutex);
-                               n->next = b->head;
-                               b->head = fhead;
-                               SpinLockRelease(&b->mutex);
-
-                               /* Return the element we saved for ourselves. */
+                       /*
+                        * Be certain that all prior reads are done before starting
+                        * the next batch of writes.
+                        */
+                       pg_memory_barrier();
+
+                       /* Remove one item from list to satisfy current allocation. */
+                       new = garbage;
+                       n = CHashTableGetNode(table, new);
+                       fhead = n->un.gcnext;
+
+                       /* If that's all there was, we're done. */
+                       if (fhead == InvalidCHashPtr)
                                return new;
+
+                       /* Walk list of reclaimed elements to end. */
+                       fcurrent = fhead;
+                       for (;;)
+                       {
+                               n = CHashTableGetNode(table, fcurrent);
+                               fnext = n->un.gcnext;
+                               if (fnext == InvalidCHashPtr)
+                                       break;
+                               fcurrent = fnext;
                        }
+
+                       /* Push reclaimed elements onto home free list. */
+                       b = &table->freelist[f_home];
+                       do
+                       {
+                               oldhead = b->head;
+                               n->un.gcnext = oldhead;
+                       } while (__sync_bool_compare_and_swap(&b->head, oldhead, fhead));
+
+                       /* Return the element we saved for ourselves. */
+                       return new;
                }
 
                /* Advance to next freelist. */
@@ -812,6 +778,7 @@ static void
 CHashAddToGarbage(CHashTable table, uint32 bucket, CHashPtr c)
 {
        uint32          garbage_bucket;
+       CHashPtr        g;
        volatile CHashNode *n;
        volatile CHashBucket   *garbage;
 
@@ -819,10 +786,11 @@ CHashAddToGarbage(CHashTable table, uint32 bucket, CHashPtr c)
        n = CHashTableGetNode(table, c);
        garbage = &table->garbage[garbage_bucket];
 
-       SpinLockAcquire(&garbage->mutex);
-       n->un.gcnext = garbage->head;
-       garbage->head = c;
-       SpinLockRelease(&garbage->mutex);
+       do
+       {
+               g = garbage->head;
+               n->un.gcnext = g;
+       } while (!__sync_bool_compare_and_swap(&garbage->head, g, c));
 }
 
 /*
@@ -836,15 +804,20 @@ CHashAddToGarbage(CHashTable table, uint32 bucket, CHashPtr c)
 static void
 CHashImmediateFree(CHashTable table, CHashPtr c)
 {
-       volatile CHashTable vtable = table;
        volatile CHashNode *n;
-       uint32          f_home = ((uint32) MyBackendId) % table->nfreelists;
+       volatile CHashBucket   *free;
+       uint32          f_home;
+       CHashPtr        f;
 
+       f_home = ((uint32) MyBackendId) % table->nfreelists;
        n = CHashTableGetNode(table, c);
-       SpinLockAcquire(&vtable->freelist[f_home].mutex);
-       n->un.gcnext = vtable->freelist[f_home].head;
-       vtable->freelist[f_home].head = c;
-       SpinLockRelease(&vtable->freelist[f_home].mutex);
+       free = &table->freelist[f_home];
+
+       do
+       {
+               f = free->head;
+               n->un.gcnext = f;
+       } while (!__sync_bool_compare_and_swap(&free->head, f, c));
 }
 
 /*
@@ -866,7 +839,6 @@ CHashRemoveMarked(CHashTable table, uint32 bucket, CHashPtr *cp,
 {
        CHashPtr        c = *cp;
        CHashPtr        cc;
-       bool            retry_needed = false;
 
        do
        {
@@ -878,29 +850,16 @@ CHashRemoveMarked(CHashTable table, uint32 bucket, CHashPtr *cp,
                 */
                pg_read_barrier_depends();
 
-               /* Read next-pointer of deleted node. */
-               n = CHashTableGetNode(table, c);
-               cc = n->next;
-
                /*
-                * Redirect next-pointer of prior node to next-pointer of deleted
-                * node, unless someone else has meanwhile modified the bucket
-                * chain.
-                */
-               SpinLockAcquire(&table->bucket[bucket].mutex);
-               if (*p == c)
-                       *p = cc;
-               else
-                       retry_needed = true;
-               SpinLockRelease(&table->bucket[bucket].mutex);
-
-               /*
-                * If we failed to update the logical pointer, caller must rescan
+                * Attempt to remove the deleted node from the linked list.
+                * If we fail to update the logical pointer, caller must rescan
                 * the bucket.  There's no intelligent way to continue the scan,
                 * because for all we know the node that contains the pointer we're
                 * try to update may itself be deleted by now.
                 */
-               if (retry_needed)
+               n = CHashTableGetNode(table, c);
+               cc = n->next;
+               if (!__sync_bool_compare_and_swap(p, c, cc))
                        return true;
 
                /* Add c to garbage list. */