autovacuum: Drop orphan temp tables more quickly but with more caution.

author Robert Haas <[email protected]>

Mon, 21 Nov 2016 17:54:19 +0000 (12:54 -0500)

committer Robert Haas <[email protected]>

Mon, 21 Nov 2016 18:01:50 +0000 (13:01 -0500)
author Robert Haas <[email protected]>
Mon, 21 Nov 2016 17:54:19 +0000 (12:54 -0500)
committer Robert Haas <[email protected]>
Mon, 21 Nov 2016 18:01:50 +0000 (13:01 -0500)
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c

index e3a6911fbadfab09e20d2bf4e993938ae665dedd..954c1a1052d0a1ff8b5a0487ea620e27a97eefdb 100644 (file)
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -130,6 +130,17 @@ int            Log_autovacuum_min_duration = -1;
  #define MIN_AUTOVAC_SLEEPTIME 100.0        /* milliseconds */
  #define MAX_AUTOVAC_SLEEPTIME 300      /* seconds */
  
+/*
+ * Maximum number of orphan temporary tables to drop in a single transaction.
+ * (If this is too high, we might run out of heavyweight locks.)
+ */
+#define MAX_ORPHAN_ITEMS       50
+
+/*
+ * After this many failures, stop trying to drop orphan temporary tables.
+ */
+#define MAX_ORPHAN_DROP_FAILURE    10
+
  /* Flags to tell if we are in an autovacuum process */
  static bool am_autovacuum_launcher = false;
  static bool am_autovacuum_worker = false;
@@ -1887,6 +1898,8 @@ do_autovacuum(void)
     HeapScanDesc relScan;
     Form_pg_database dbForm;
     List       *table_oids = NIL;
+   List       *orphan_oids = NIL;
+   List       *pending_oids = NIL;
     HASHCTL     ctl;
     HTAB       *table_toast_map;
     ListCell   *volatile cell;
@@ -1895,6 +1908,7 @@ do_autovacuum(void)
     BufferAccessStrategy bstrategy;
     ScanKeyData key;
     TupleDesc   pg_class_desc;
+   int         orphan_failures;
     int         effective_multixact_freeze_max_age;
  
     /*
@@ -2038,33 +2052,11 @@ do_autovacuum(void)
             if (backendID == MyBackendId || BackendIdGetProc(backendID) == NULL)
             {
                 /*
-                * We found an orphan temp table (which was probably left
-                * behind by a crashed backend).  If it's so old as to need
-                * vacuum for wraparound, forcibly drop it.  Otherwise just
-                * log a complaint.
+                * We found an orphan temp table which was probably left
+                * behind by a crashed backend.  Remember it, so we can attempt
+                * to drop it.
                  */
-               if (wraparound)
-               {
-                   ObjectAddress object;
-
-                   ereport(LOG,
-                           (errmsg("autovacuum: dropping orphan temp table \"%s\".\"%s\" in database \"%s\"",
-                                get_namespace_name(classForm->relnamespace),
-                                   NameStr(classForm->relname),
-                                   get_database_name(MyDatabaseId))));
-                   object.classId = RelationRelationId;
-                   object.objectId = relid;
-                   object.objectSubId = 0;
-                   performDeletion(&object, DROP_CASCADE, PERFORM_DELETION_INTERNAL);
-               }
-               else
-               {
-                   ereport(LOG,
-                           (errmsg("autovacuum: found orphan temp table \"%s\".\"%s\" in database \"%s\"",
-                                get_namespace_name(classForm->relnamespace),
-                                   NameStr(classForm->relname),
-                                   get_database_name(MyDatabaseId))));
-               }
+               orphan_oids = lappend_oid(orphan_oids, relid);
             }
         }
         else
@@ -2161,6 +2153,115 @@ do_autovacuum(void)
     heap_endscan(relScan);
     heap_close(classRel, AccessShareLock);
  
+   /*
+    * Loop through orphan temporary tables and drop them in batches.  If
+    * we're unable to drop one particular table, we'll retry to see if we
+    * can drop others, but if we fail too many times we'll give up and proceed
+    * with our regular work, so that this step hopefully can't wedge
+    * autovacuum for too long.
+    */
+   while (list_length(orphan_oids) > 0 &&
+          orphan_failures < MAX_ORPHAN_DROP_FAILURE)
+   {
+       Oid             relid = linitial_oid(orphan_oids);
+       ObjectAddress   object;
+       char           *namespace = get_namespace_name(get_rel_namespace(relid));
+       char           *relname = get_rel_name(relid);
+
+       orphan_oids = list_delete_first(orphan_oids);
+
+       PG_TRY();
+       {
+           ereport(LOG,
+                   (errmsg("autovacuum: dropping orphan temp table \"%s\".\"%s\" in database \"%s\"",
+                           namespace, relname,
+                           get_database_name(MyDatabaseId))));
+           object.classId = RelationRelationId;
+           object.objectId = relid;
+           object.objectSubId = 0;
+           performDeletion(&object, DROP_CASCADE, PERFORM_DELETION_INTERNAL);
+
+           /*
+            * This orphan table has been dropped correctly, add it to the
+            * list of tables whose drop should be attempted again if an
+            * error after in the same transaction.
+            */
+           pending_oids = lappend_oid(pending_oids, relid);
+       }
+       PG_CATCH();
+       {
+           /* Abort the current transaction. */
+           HOLD_INTERRUPTS();
+
+           errcontext("dropping of orphan temp table \"%s\".\"%s\" in database \"%s\"",
+                      namespace, relname,
+                      get_database_name(MyDatabaseId));
+
+           EmitErrorReport();
+
+           /* this resets the PGXACT flags too */
+           AbortOutOfAnyTransaction();
+           FlushErrorState();
+
+           /*
+            * Any tables were succesfully dropped before the failure now
+            * need to be dropped again.  Add them back into the list, but
+            * don't retry the table that failed.
+            */
+           orphan_oids = list_concat(orphan_oids, pending_oids);
+           orphan_failures++;
+
+           /* Start a new transaction. */
+           StartTransactionCommand();
+
+           /* StartTransactionCommand changed elsewhere the memory context */
+           MemoryContextSwitchTo(AutovacMemCxt);
+
+           RESUME_INTERRUPTS();
+       }
+       PG_END_TRY();
+
+       /*
+        * If we've successfully dropped quite a few tables, commit the
+        * transaction and begin a new one.  The main point of this is to
+        * avoid accumulating too many locks and blowing out the lock table,
+        * but it also minimizes the amount of work that will have to be rolled
+        * back if we fail to drop some table later in the list.
+        */
+       if (list_length(pending_oids) >= MAX_ORPHAN_ITEMS)
+       {
+           CommitTransactionCommand();
+           StartTransactionCommand();
+
+           /* StartTransactionCommand changed elsewhere */
+           MemoryContextSwitchTo(AutovacMemCxt);
+
+           list_free(pending_oids);
+           pending_oids = NIL;
+       }
+
+       pfree(relname);
+       pfree(namespace);
+   }
+
+   /*
+    * Commit current transaction to finish the cleanup done previously and
+    * restart a new one to not bloat the activity of the following steps.
+    * This needs to happen only if there are any items thought as previously
+    * pending, but are actually not as the last transaction doing the cleanup
+    * has been successful.
+    */
+   if (list_length(pending_oids) > 0)
+   {
+       CommitTransactionCommand();
+       StartTransactionCommand();
+
+       /* StartTransactionCommand changed elsewhere */
+       MemoryContextSwitchTo(AutovacMemCxt);
+
+       list_free(pending_oids);
+   }
+
     /*
      * Create a buffer access strategy object for VACUUM to use.  We want to
      * use the same one across all the vacuum operations we perform, since the
author	Robert Haas <[email protected]>
	Mon, 21 Nov 2016 17:54:19 +0000 (12:54 -0500)
committer	Robert Haas <[email protected]>
	Mon, 21 Nov 2016 18:01:50 +0000 (13:01 -0500)