Create a routine PageIndexMultiDelete() that replaces a loop around
authorTom Lane <[email protected]>
Tue, 22 Mar 2005 06:17:03 +0000 (06:17 +0000)
committerTom Lane <[email protected]>
Tue, 22 Mar 2005 06:17:03 +0000 (06:17 +0000)
PageIndexTupleDelete() with a single pass of compactification ---
logic mostly lifted from PageRepairFragmentation.  I noticed while
profiling that a VACUUM that's cleaning up a whole lot of deleted
tuples would spend as much as a third of its CPU time in
PageIndexTupleDelete; not too surprising considering the loop method
was roughly O(N^2) in the number of tuples involved.

src/backend/access/nbtree/nbtpage.c
src/backend/access/nbtree/nbtxlog.c
src/backend/storage/page/bufpage.c
src/include/storage/bufpage.h

index 3e2a9010a4417a6dccbee36d308499ad099a99da..2b82a87a1d4f4e614f6bd177d2c4e5f83ac9fb5f 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.81 2004/12/31 21:59:22 pgsql Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.82 2005/03/22 06:17:03 tgl Exp $
  *
  *     NOTES
  *        Postgres btree pages look like ordinary relation pages.      The opaque
@@ -639,17 +639,12 @@ _bt_delitems(Relation rel, Buffer buf,
                         OffsetNumber *itemnos, int nitems)
 {
        Page            page = BufferGetPage(buf);
-       int                     i;
 
        /* No ereport(ERROR) until changes are logged */
        START_CRIT_SECTION();
 
-       /*
-        * Delete the items in reverse order so we don't have to think about
-        * adjusting item numbers for previous deletions.
-        */
-       for (i = nitems - 1; i >= 0; i--)
-               PageIndexTupleDelete(page, itemnos[i]);
+       /* Fix the page */
+       PageIndexMultiDelete(page, itemnos, nitems);
 
        /* XLOG stuff */
        if (!rel->rd_istemp)
index 0bd2c418f03c228eda7bb1c7fa833506b44a91d7..ade60619a3d0de564bfbf56900f6b2838db4f85b 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.19 2004/12/31 21:59:22 pgsql Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.20 2005/03/22 06:17:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -411,12 +411,7 @@ btree_xlog_delete(bool redo, XLogRecPtr lsn, XLogRecord *record)
                unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete);
                unend = (OffsetNumber *) ((char *) xlrec + record->xl_len);
 
-               /* be careful to delete from back to front */
-               while (unused < unend)
-               {
-                       unend--;
-                       PageIndexTupleDelete(page, *unend);
-               }
+               PageIndexMultiDelete(page, unused, unend - unused);
        }
 
        PageSetLSN(page, lsn);
index 6d6957e279c7cc433643c363d6fcbb9606a0f0dc..c33a0011e600aca23b9a0d6d997707aea2205772 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.62 2004/12/31 22:01:10 pgsql Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.63 2005/03/22 06:17:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -274,13 +274,14 @@ PageRestoreTempPage(Page tempPage, Page oldPage)
 }
 
 /*
- * sorting support for PageRepairFragmentation
+ * sorting support for PageRepairFragmentation and PageIndexMultiDelete
  */
 typedef struct itemIdSortData
 {
        int                     offsetindex;    /* linp array index */
        int                     itemoff;                /* page offset of item data */
        Size            alignedlen;             /* MAXALIGN(item data len) */
+       ItemIdData      olditemid;              /* used only in PageIndexMultiDelete */
 } itemIdSortData;
 typedef itemIdSortData *itemIdSort;
 
@@ -297,7 +298,8 @@ itemoffcompare(const void *itemidp1, const void *itemidp2)
  *
  * Frees fragmented space on a page.
  * It doesn't remove unused line pointers! Please don't change this.
- * This routine is usable for heap pages only.
+ *
+ * This routine is usable for heap pages only, but see PageIndexMultiDelete.
  *
  * Returns number of unused line pointers on page.     If "unused" is not NULL
  * then the unused[] array is filled with indexes of unused line pointers.
@@ -543,3 +545,135 @@ PageIndexTupleDelete(Page page, OffsetNumber offnum)
                }
        }
 }
+
+
+/*
+ * PageIndexMultiDelete
+ *
+ * This routine handles the case of deleting multiple tuples from an
+ * index page at once.  It is considerably faster than a loop around
+ * PageIndexTupleDelete ... however, the caller *must* supply the array
+ * of item numbers to be deleted in item number order!
+ */
+void
+PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
+{
+       PageHeader      phdr = (PageHeader) page;
+       Offset          pd_lower = phdr->pd_lower;
+       Offset          pd_upper = phdr->pd_upper;
+       Offset          pd_special = phdr->pd_special;
+       itemIdSort      itemidbase,
+                               itemidptr;
+       ItemId          lp;
+       int                     nline,
+                               nused;
+       int                     i;
+       Size            totallen;
+       Offset          upper;
+       Size            size;
+       unsigned        offset;
+       int                     nextitm;
+       OffsetNumber offnum;
+
+       /*
+        * If there aren't very many items to delete, then retail
+        * PageIndexTupleDelete is the best way.  Delete the items in reverse
+        * order so we don't have to think about adjusting item numbers for
+        * previous deletions.
+        *
+        * TODO: tune the magic number here
+        */
+       if (nitems <= 2)
+       {
+               while (--nitems >= 0)
+                       PageIndexTupleDelete(page, itemnos[nitems]);
+               return;
+       }
+
+       /*
+        * As with PageRepairFragmentation, paranoia seems justified.
+        */
+       if (pd_lower < SizeOfPageHeaderData ||
+               pd_lower > pd_upper ||
+               pd_upper > pd_special ||
+               pd_special > BLCKSZ ||
+               pd_special != MAXALIGN(pd_special))
+               ereport(ERROR,
+                               (errcode(ERRCODE_DATA_CORRUPTED),
+                                errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
+                                        pd_lower, pd_upper, pd_special)));
+
+       /*
+        * Scan the item pointer array and build a list of just the ones we
+        * are going to keep.  Notice we do not modify the page yet, since
+        * we are still validity-checking.
+        */
+       nline = PageGetMaxOffsetNumber(page);
+       itemidbase = (itemIdSort) palloc(sizeof(itemIdSortData) * nline);
+       itemidptr = itemidbase;
+       totallen = 0;
+       nused = 0;
+       nextitm = 0;
+       for (offnum = 1; offnum <= nline; offnum++)
+       {
+               lp = PageGetItemId(page, offnum);
+               size = ItemIdGetLength(lp);
+               offset = ItemIdGetOffset(lp);
+               if (offset < pd_upper ||
+                       (offset + size) > pd_special ||
+                       offset != MAXALIGN(offset))
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_DATA_CORRUPTED),
+                                        errmsg("corrupted item pointer: offset = %u, size = %u",
+                                                       offset, (unsigned int) size)));
+
+               if (nextitm < nitems && offnum == itemnos[nextitm])
+               {
+                       /* skip item to be deleted */
+                       nextitm++;
+               }
+               else
+               {
+                       itemidptr->offsetindex = nused; /* where it will go */
+                       itemidptr->itemoff = offset;
+                       itemidptr->olditemid = *lp;
+                       itemidptr->alignedlen = MAXALIGN(size);
+                       totallen += itemidptr->alignedlen;
+                       itemidptr++;
+                       nused++;
+               }
+       }
+
+       /* this will catch invalid or out-of-order itemnos[] */
+       if (nextitm != nitems)
+               elog(ERROR, "incorrect index offsets supplied");
+
+       if (totallen > (Size) (pd_special - pd_lower))
+               ereport(ERROR,
+                               (errcode(ERRCODE_DATA_CORRUPTED),
+                                errmsg("corrupted item lengths: total %u, available space %u",
+                                               (unsigned int) totallen, pd_special - pd_lower)));
+
+       /* sort itemIdSortData array into decreasing itemoff order */
+       qsort((char *) itemidbase, nused, sizeof(itemIdSortData),
+                 itemoffcompare);
+
+       /* compactify page and install new itemids */
+       upper = pd_special;
+
+       for (i = 0, itemidptr = itemidbase; i < nused; i++, itemidptr++)
+       {
+               lp = PageGetItemId(page, itemidptr->offsetindex + 1);
+               upper -= itemidptr->alignedlen;
+               memmove((char *) page + upper,
+                               (char *) page + itemidptr->itemoff,
+                               itemidptr->alignedlen);
+               *lp = itemidptr->olditemid;
+               lp->lp_off = upper;
+       }
+
+       phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
+       phdr->pd_upper = upper;
+
+       pfree(itemidbase);
+}
index 71d043cd7f91fb938092bfc8f0d210407f5488cf..8b195132cb9e94e8b6f3a538f7193ca171650009 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.63 2004/12/31 22:03:42 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.64 2005/03/22 06:17:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -323,5 +323,6 @@ extern void PageRestoreTempPage(Page tempPage, Page oldPage);
 extern int     PageRepairFragmentation(Page page, OffsetNumber *unused);
 extern Size PageGetFreeSpace(Page page);
 extern void PageIndexTupleDelete(Page page, OffsetNumber offset);
+extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
 
 #endif   /* BUFPAGE_H */