Improve logging of auto-vacuum and auto-analyze
authorStephen Frost <[email protected]>
Tue, 16 Mar 2021 18:46:48 +0000 (14:46 -0400)
committerStephen Frost <[email protected]>
Tue, 16 Mar 2021 18:46:48 +0000 (14:46 -0400)
When logging auto-vacuum and auto-analyze activity, include the I/O
timing if track_io_timing is enabled.  Also, for auto-analyze, add the
read rate and the dirty rate, similar to how that information has
historically been logged for auto-vacuum.

Stephen Frost and Jakub Wartak

Reviewed-By: Heikki Linnakangas, Tomas Vondra
Discussion: https://www.postgresql.org/message-id/VI1PR0701MB69603A433348EDCF783C6ECBF6EF0%40VI1PR0701MB6960.eurprd07.prod.outlook.com

doc/src/sgml/config.sgml
src/backend/access/heap/vacuumlazy.c
src/backend/commands/analyze.c

index a218d78bef4ab5f3b577d45553829daa28d5ba54..5e551b9f6d64c3596f445a8b20e9f2f7dbfb0bf2 100644 (file)
@@ -7457,9 +7457,11 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
         I/O timing information is
         displayed in <link linkend="monitoring-pg-stat-database-view">
         <structname>pg_stat_database</structname></link>, in the output of
-        <xref linkend="sql-explain"/> when the <literal>BUFFERS</literal> option is
-        used, and by <xref linkend="pgstatstatements"/>.  Only superusers can
-        change this setting.
+        <xref linkend="sql-explain"/> when the <literal>BUFFERS</literal> option
+        is used, by autovacuum for auto-vacuums and auto-analyzes, when
+        <xref linkend="guc-log-autovacuum-min-duration"/> is set and by
+        <xref linkend="pgstatstatements"/>.  Only superusers can change this
+        setting.
        </para>
       </listitem>
      </varlistentry>
index 366c122bd1ee62d9a770a32c28e4d2133721fced..8341879d89be8e1529acb5631c1711dcc3c1c06e 100644 (file)
@@ -441,6 +441,8 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params,
    TransactionId new_frozen_xid;
    MultiXactId new_min_multi;
    ErrorContextCallback errcallback;
+   PgStat_Counter startreadtime = 0;
+   PgStat_Counter startwritetime = 0;
 
    Assert(params != NULL);
    Assert(params->index_cleanup != VACOPT_TERNARY_DEFAULT);
@@ -455,6 +457,11 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params,
    {
        pg_rusage_init(&ru0);
        starttime = GetCurrentTimestamp();
+       if (track_io_timing)
+       {
+           startreadtime = pgStatBlockReadTime;
+           startwritetime = pgStatBlockWriteTime;
+       }
    }
 
    if (params->options & VACOPT_VERBOSE)
@@ -675,6 +682,17 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params,
                             (long long) VacuumPageDirty);
            appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
                             read_rate, write_rate);
+           if (track_io_timing)
+           {
+               appendStringInfoString(&buf, _("I/O Timings:"));
+               if (pgStatBlockReadTime - startreadtime > 0)
+                   appendStringInfo(&buf, _(" read=%.3f"),
+                                    (double) (pgStatBlockReadTime - startreadtime) / 1000);
+               if (pgStatBlockWriteTime - startwritetime > 0)
+                   appendStringInfo(&buf, _(" write=%.3f"),
+                                    (double) (pgStatBlockWriteTime - startwritetime) / 1000);
+               appendStringInfoChar(&buf, '\n');
+           }
            appendStringInfo(&buf, _("system usage: %s\n"), pg_rusage_show(&ru0));
            appendStringInfo(&buf,
                             _("WAL usage: %ld records, %ld full page images, %llu bytes"),
index 3a9f358dd4f72f390e41fcac6bc1ed7e16ee8a96..596bab2f104431c65a8575ac10f04e7c4b354583 100644 (file)
@@ -312,6 +312,11 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
    Oid         save_userid;
    int         save_sec_context;
    int         save_nestlevel;
+   int64       AnalyzePageHit = VacuumPageHit;
+   int64       AnalyzePageMiss = VacuumPageMiss;
+   int64       AnalyzePageDirty = VacuumPageDirty;
+   PgStat_Counter startreadtime = 0;
+   PgStat_Counter startwritetime = 0;
 
    if (inh)
        ereport(elevel,
@@ -346,8 +351,14 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
    /* measure elapsed time iff autovacuum logging requires it */
    if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
    {
+       if (track_io_timing)
+       {
+           startreadtime = pgStatBlockReadTime;
+           startwritetime = pgStatBlockWriteTime;
+       }
+
        pg_rusage_init(&ru0);
-       if (params->log_min_duration > 0)
+       if (params->log_min_duration >= 0)
            starttime = GetCurrentTimestamp();
    }
 
@@ -692,15 +703,90 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
    /* Log the action if appropriate */
    if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
    {
+       TimestampTz endtime = GetCurrentTimestamp();
+
        if (params->log_min_duration == 0 ||
-           TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(),
+           TimestampDifferenceExceeds(starttime, endtime,
                                       params->log_min_duration))
+       {
+           long        delay_in_ms;
+           double      read_rate = 0;
+           double      write_rate = 0;
+           StringInfoData buf;
+
+           /*
+            * Calculate the difference in the Page Hit/Miss/Dirty that
+            * happened as part of the analyze by subtracting out the
+            * pre-analyze values which we saved above.
+            */
+           AnalyzePageHit = VacuumPageHit - AnalyzePageHit;
+           AnalyzePageMiss = VacuumPageMiss - AnalyzePageMiss;
+           AnalyzePageDirty = VacuumPageDirty - AnalyzePageDirty;
+
+           /*
+            * We do not expect an analyze to take > 25 days and it simplifies
+            * things a bit to use TimestampDifferenceMilliseconds.
+            */
+           delay_in_ms = TimestampDifferenceMilliseconds(starttime, endtime);
+
+           /*
+            * Note that we are reporting these read/write rates in the same
+            * manner as VACUUM does, which means that while the 'average read
+            * rate' here actually corresponds to page misses and resulting
+            * reads which are also picked up by track_io_timing, if enabled,
+            * the 'average write rate' is actually talking about the rate of
+            * pages being dirtied, not being written out, so it's typical to
+            * have a non-zero 'avg write rate' while I/O Timings only reports
+            * reads.
+            *
+            * It's not clear that an ANALYZE will ever result in
+            * FlushBuffer() being called, but we track and support reporting
+            * on I/O write time in case that changes as it's practically free
+            * to do so anyway.
+            */
+
+           if (delay_in_ms > 0)
+           {
+               read_rate = (double) BLCKSZ * AnalyzePageMiss / (1024 * 1024) /
+                   (delay_in_ms / 1000.0);
+               write_rate = (double) BLCKSZ * AnalyzePageDirty / (1024 * 1024) /
+                   (delay_in_ms / 1000.0);
+           }
+
+           /*
+            * We split this up so we don't emit empty I/O timing values when
+            * track_io_timing isn't enabled.
+            */
+
+           initStringInfo(&buf);
+           appendStringInfo(&buf, _("automatic analyze of table \"%s.%s.%s\"\n"),
+                            get_database_name(MyDatabaseId),
+                            get_namespace_name(RelationGetNamespace(onerel)),
+                            RelationGetRelationName(onerel));
+           appendStringInfo(&buf, _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
+                            (long long) AnalyzePageHit,
+                            (long long) AnalyzePageMiss,
+                            (long long) AnalyzePageDirty);
+           appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
+                            read_rate, write_rate);
+           if (track_io_timing)
+           {
+               appendStringInfoString(&buf, _("I/O Timings:"));
+               if (pgStatBlockReadTime - startreadtime > 0)
+                   appendStringInfo(&buf, _(" read=%.3f"),
+                                    (double) (pgStatBlockReadTime - startreadtime) / 1000);
+               if (pgStatBlockWriteTime - startwritetime > 0)
+                   appendStringInfo(&buf, _(" write=%.3f"),
+                                    (double) (pgStatBlockWriteTime - startwritetime) / 1000);
+               appendStringInfoChar(&buf, '\n');
+           }
+           appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
+
            ereport(LOG,
-                   (errmsg("automatic analyze of table \"%s.%s.%s\" system usage: %s",
-                           get_database_name(MyDatabaseId),
-                           get_namespace_name(RelationGetNamespace(onerel)),
-                           RelationGetRelationName(onerel),
-                           pg_rusage_show(&ru0))));
+                   (errmsg_internal("%s", buf.data)));
+
+           pfree(buf.data);
+       }
    }
 
    /* Roll back any GUC changes executed by index functions */