pg_amcheck: avoid unhelpful verification attempts.
authorPeter Geoghegan <[email protected]>
Wed, 13 Oct 2021 21:08:12 +0000 (14:08 -0700)
committerPeter Geoghegan <[email protected]>
Wed, 13 Oct 2021 21:08:12 +0000 (14:08 -0700)
Avoid calling contrib/amcheck functions with relations that are
unsuitable for checking.  Specifically, don't attempt verification of
temporary relations, or indexes whose pg_index entry indicates that the
index is invalid, or not ready.

These relations are not supported by any of the contrib/amcheck
functions, for reasons that are pretty fundamental.  For example, the
implementation of REINDEX CONCURRENTLY can add its own "transient"
pg_index entries, which has rather unclear implications for the B-Tree
verification functions, at least in the general case -- so they just
treat it as an error.  It falls to the amcheck caller (in this case
pg_amcheck) to deal with the situation at a higher level.

pg_amcheck now simply treats these conditions as additional "visibility
concerns" when it queries system catalogs.  This is a little arbitrary.
It seems to have the least problems among any of the available
alternatives.

Author: Mark Dilger <[email protected]>
Reported-By: Alexander Lakhin <[email protected]>
Reviewed-By: Peter Geoghegan <[email protected]>
Reviewed-By: Robert Haas <[email protected]>
Bug: #17212
Discussion: https://postgr.es/m/17212-34dd4a1d6bba98bf@postgresql.org
Backpatch: 14-, where pg_amcheck was introduced.

doc/src/sgml/ref/pg_amcheck.sgml
src/bin/pg_amcheck/pg_amcheck.c
src/bin/pg_amcheck/t/006_bad_targets.pl [new file with mode: 0644]

index 1fd0ecd9114b5cb7e7449d18c34b92f3cabeb205..cfef6c04655de48ea8089e0b495dbf12e97568c1 100644 (file)
@@ -435,6 +435,18 @@ PostgreSQL documentation
    </variablelist>
   </para>
 
+  <warning>
+   <para>
+    The extra checks performed against B-tree indexes when the
+    <option>--parent-check</option> option or the
+    <option>--rootdescend</option> option is specified require
+    relatively strong relation-level locks.  These checks are the only
+    checks that will block concurrent data modification from
+    <command>INSERT</command>, <command>UPDATE</command>, and
+    <command>DELETE</command> commands.
+   </para>
+  </warning>
+
   <para>
    The following command-line options control the connection to the server:
 
index ec04b977de0424fc970b2e7e63ab3676a4f4dab2..d4a53c8e6369f67a30ebbad5f5b245bf24bbd4b0 100644 (file)
@@ -816,6 +816,9 @@ main(int argc, char *argv[])
  * names matching the expectations of verify_heap_slot_handler, which will
  * receive and handle each row returned from the verify_heapam() function.
  *
+ * The constructed SQL command will silently skip temporary tables, as checking
+ * them would needlessly draw errors from the underlying amcheck function.
+ *
  * sql: buffer into which the heap table checking command will be written
  * rel: relation information for the heap table to be checked
  * conn: the connection to be used, for string escaping purposes
@@ -825,10 +828,10 @@ prepare_heap_command(PQExpBuffer sql, RelationInfo *rel, PGconn *conn)
 {
        resetPQExpBuffer(sql);
        appendPQExpBuffer(sql,
-                                         "SELECT blkno, offnum, attnum, msg FROM %s.verify_heapam("
-                                         "\nrelation := %u, on_error_stop := %s, check_toast := %s, skip := '%s'",
+                                         "SELECT v.blkno, v.offnum, v.attnum, v.msg "
+                                         "FROM pg_catalog.pg_class c, %s.verify_heapam("
+                                         "\nrelation := c.oid, on_error_stop := %s, check_toast := %s, skip := '%s'",
                                          rel->datinfo->amcheck_schema,
-                                         rel->reloid,
                                          opts.on_error_stop ? "true" : "false",
                                          opts.reconcile_toast ? "true" : "false",
                                          opts.skip);
@@ -838,7 +841,10 @@ prepare_heap_command(PQExpBuffer sql, RelationInfo *rel, PGconn *conn)
        if (opts.endblock >= 0)
                appendPQExpBuffer(sql, ", endblock := " INT64_FORMAT, opts.endblock);
 
-       appendPQExpBufferChar(sql, ')');
+       appendPQExpBuffer(sql,
+                                         "\n) v WHERE c.oid = %u "
+                                         "AND c.relpersistence != 't'",
+                                         rel->reloid);
 }
 
 /*
@@ -849,6 +855,10 @@ prepare_heap_command(PQExpBuffer sql, RelationInfo *rel, PGconn *conn)
  * functions do not return any, but rather return corruption information by
  * raising errors, which verify_btree_slot_handler expects.
  *
+ * The constructed SQL command will silently skip temporary indexes, and
+ * indexes being reindexed concurrently, as checking them would needlessly draw
+ * errors from the underlying amcheck functions.
+ *
  * sql: buffer into which the heap table checking command will be written
  * rel: relation information for the index to be checked
  * conn: the connection to be used, for string escaping purposes
@@ -858,27 +868,31 @@ prepare_btree_command(PQExpBuffer sql, RelationInfo *rel, PGconn *conn)
 {
        resetPQExpBuffer(sql);
 
-       /*
-        * Embed the database, schema, and relation name in the query, so if the
-        * check throws an error, the user knows which relation the error came
-        * from.
-        */
        if (opts.parent_check)
                appendPQExpBuffer(sql,
-                                                 "SELECT * FROM %s.bt_index_parent_check("
-                                                 "index := '%u'::regclass, heapallindexed := %s, "
-                                                 "rootdescend := %s)",
+                                                 "SELECT %s.bt_index_parent_check("
+                                                 "index := c.oid, heapallindexed := %s, rootdescend := %s)"
+                                                 "\nFROM pg_catalog.pg_class c, pg_catalog.pg_index i "
+                                                 "WHERE c.oid = %u "
+                                                 "AND c.oid = i.indexrelid "
+                                                 "AND c.relpersistence != 't' "
+                                                 "AND i.indisready AND i.indisvalid AND i.indislive",
                                                  rel->datinfo->amcheck_schema,
-                                                 rel->reloid,
                                                  (opts.heapallindexed ? "true" : "false"),
-                                                 (opts.rootdescend ? "true" : "false"));
+                                                 (opts.rootdescend ? "true" : "false"),
+                                                 rel->reloid);
        else
                appendPQExpBuffer(sql,
-                                                 "SELECT * FROM %s.bt_index_check("
-                                                 "index := '%u'::regclass, heapallindexed := %s)",
+                                                 "SELECT %s.bt_index_check("
+                                                 "index := c.oid, heapallindexed := %s)"
+                                                 "\nFROM pg_catalog.pg_class c, pg_catalog.pg_index i "
+                                                 "WHERE c.oid = %u "
+                                                 "AND c.oid = i.indexrelid "
+                                                 "AND c.relpersistence != 't' "
+                                                 "AND i.indisready AND i.indisvalid AND i.indislive",
                                                  rel->datinfo->amcheck_schema,
-                                                 rel->reloid,
-                                                 (opts.heapallindexed ? "true" : "false"));
+                                                 (opts.heapallindexed ? "true" : "false"),
+                                                 rel->reloid);
 }
 
 /*
@@ -1086,15 +1100,17 @@ verify_btree_slot_handler(PGresult *res, PGconn *conn, void *context)
 
        if (PQresultStatus(res) == PGRES_TUPLES_OK)
        {
-               int                     ntups = PQntuples(res);
+               int                     ntups = PQntuples(res);
 
-               if (ntups != 1)
+               if (ntups > 1)
                {
                        /*
                         * We expect the btree checking functions to return one void row
-                        * each, so we should output some sort of warning if we get
-                        * anything else, not because it indicates corruption, but because
-                        * it suggests a mismatch between amcheck and pg_amcheck versions.
+                        * each, or zero rows if the check was skipped due to the object
+                        * being in the wrong state to be checked, so we should output some
+                        * sort of warning if we get anything more, not because it
+                        * indicates corruption, but because it suggests a mismatch between
+                        * amcheck and pg_amcheck versions.
                         *
                         * In conjunction with --progress, anything written to stderr at
                         * this time would present strangely to the user without an extra
@@ -1889,10 +1905,16 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
                                                  "\nAND (c.relam = %u OR NOT ep.btree_only OR ep.rel_regex IS NULL)",
                                                  HEAP_TABLE_AM_OID, BTREE_AM_OID);
 
+       /*
+        * Exclude temporary tables and indexes, which must necessarily belong to
+        * other sessions.  (We don't create any ourselves.)  We must ultimately
+        * exclude indexes marked invalid or not ready, but we delay that decision
+        * until firing off the amcheck command, as the state of an index may
+        * change by then.
+        */
+       appendPQExpBufferStr(&sql, "\nWHERE c.relpersistence != 't'");
        if (opts.excludetbl || opts.excludeidx || opts.excludensp)
-               appendPQExpBufferStr(&sql, "\nWHERE ep.pattern_id IS NULL");
-       else
-               appendPQExpBufferStr(&sql, "\nWHERE true");
+               appendPQExpBufferStr(&sql, "\nAND ep.pattern_id IS NULL");
 
        /*
         * We need to be careful not to break the --no-dependent-toast and
@@ -1944,7 +1966,8 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
                                                                 "\nON ('pg_toast' ~ ep.nsp_regex OR ep.nsp_regex IS NULL)"
                                                                 "\nAND (t.relname ~ ep.rel_regex OR ep.rel_regex IS NULL)"
                                                                 "\nAND ep.heap_only"
-                                                                "\nWHERE ep.pattern_id IS NULL");
+                                                                "\nWHERE ep.pattern_id IS NULL"
+                                                                "\nAND t.relpersistence != 't'");
                appendPQExpBufferStr(&sql,
                                                         "\n)");
        }
@@ -1962,7 +1985,8 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
                                                  "\nINNER JOIN pg_catalog.pg_index i "
                                                  "ON r.oid = i.indrelid "
                                                  "INNER JOIN pg_catalog.pg_class c "
-                                                 "ON i.indexrelid = c.oid");
+                                                 "ON i.indexrelid = c.oid "
+                                                 "AND c.relpersistence != 't'");
                if (opts.excludeidx || opts.excludensp)
                        appendPQExpBufferStr(&sql,
                                                                 "\nINNER JOIN pg_catalog.pg_namespace n "
@@ -2000,7 +2024,8 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
                                                  "INNER JOIN pg_catalog.pg_index i "
                                                  "ON t.oid = i.indrelid"
                                                  "\nINNER JOIN pg_catalog.pg_class c "
-                                                 "ON i.indexrelid = c.oid");
+                                                 "ON i.indexrelid = c.oid "
+                                                 "AND c.relpersistence != 't'");
                if (opts.excludeidx)
                        appendPQExpBufferStr(&sql,
                                                                 "\nLEFT OUTER JOIN exclude_pat ep "
diff --git a/src/bin/pg_amcheck/t/006_bad_targets.pl b/src/bin/pg_amcheck/t/006_bad_targets.pl
new file mode 100644 (file)
index 0000000..e89d7d1
--- /dev/null
@@ -0,0 +1,233 @@
+
+# Copyright (c) 2021, PostgreSQL Global Development Group
+
+# This regression test checks the behavior of pg_amcheck in the presence of
+# inappropriate target relations.
+#
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 52;
+
+# Establish a primary and standby server, with temporary and unlogged tables.
+# The temporary tables should not be checked on either system, as pg_amcheck's
+# sessions won't be able to see their contents.  The unlogged tables should not
+# be checked on the standby, as they won't have relation forks there.
+#
+my $node_primary = PostgresNode->new('primary');
+$node_primary->init(
+       allows_streaming => 1,
+       auth_extra       => [ '--create-role', 'repl_role' ]);
+$node_primary->start;
+$node_primary->safe_psql('postgres', qq(
+CREATE EXTENSION amcheck;
+CREATE UNLOGGED TABLE unlogged_heap (i INTEGER[], b BOX);
+INSERT INTO unlogged_heap (i, b) VALUES (ARRAY[1,2,3]::INTEGER[], '((1,2),(3,4))'::BOX);
+CREATE INDEX unlogged_btree ON unlogged_heap USING BTREE (i);
+CREATE INDEX unlogged_brin ON unlogged_heap USING BRIN (b);
+CREATE INDEX unlogged_gin ON unlogged_heap USING GIN (i);
+CREATE INDEX unlogged_gist ON unlogged_heap USING GIST (b);
+CREATE INDEX unlogged_hash ON unlogged_heap USING HASH (i);
+));
+my $backup_name = 'my_backup';
+$node_primary->backup($backup_name);
+
+# Hold open a session with temporary tables and indexes defined
+#
+my $in = '';
+my $out = '';
+my $timer = IPC::Run::timeout(180);
+my $h = $node_primary->background_psql('postgres', \$in, \$out, $timer);
+$in = qq(
+BEGIN;
+CREATE TEMPORARY TABLE heap_temporary (i INTEGER[], b BOX) ON COMMIT PRESERVE ROWS;
+INSERT INTO heap_temporary (i, b) VALUES (ARRAY[1,2,3]::INTEGER[], '((1,2),(3,4))'::BOX);
+CREATE INDEX btree_temporary ON heap_temporary USING BTREE (i);
+CREATE INDEX brin_temporary ON heap_temporary USING BRIN (b);
+CREATE INDEX gin_temporary ON heap_temporary USING GIN (i);
+CREATE INDEX gist_temporary ON heap_temporary USING GIST (b);
+CREATE INDEX hash_temporary ON heap_temporary USING HASH (i);
+COMMIT;
+BEGIN;
+);
+$h->pump_nb;
+
+my $node_standby = PostgresNode->new('standby');
+$node_standby->init_from_backup($node_primary, $backup_name,
+       has_streaming => 1);
+$node_standby->start;
+$node_primary->wait_for_catchup($node_standby, 'replay',
+       $node_primary->lsn('replay'));
+
+# Check that running amcheck functions from SQL against inappropriate targets
+# fails sensibly.  This portion of the test arguably belongs in contrib/amcheck
+# rather than here, but we have already set up the necessary test environment,
+# so check here rather than duplicating the environment there.
+#
+my ($stdout, $stderr);
+for my $test (
+       [ $node_standby => "SELECT * FROM verify_heapam('unlogged_heap')",
+         qr/^$/,
+        "checking unlogged table during recovery does not complain" ],
+
+       [ $node_standby => "SELECT * FROM bt_index_check('unlogged_btree')",
+         qr/^$/,
+        "checking unlogged btree index during recovery does not complain" ],
+
+       [ $node_standby => "SELECT * FROM bt_index_check('unlogged_brin')",
+         qr/ERROR:  only B-Tree indexes are supported as targets for verification.*DETAIL:  Relation "unlogged_brin" is not a B-Tree index/s,
+        "checking unlogged brin index during recovery fails appropriately" ],
+
+       [ $node_standby => "SELECT * FROM bt_index_check('unlogged_gin')",
+         qr/ERROR:  only B-Tree indexes are supported as targets for verification.*DETAIL:  Relation "unlogged_gin" is not a B-Tree index/s,
+        "checking unlogged gin index during recovery fails appropriately" ],
+
+       [ $node_standby => "SELECT * FROM bt_index_check('unlogged_gist')",
+         qr/ERROR:  only B-Tree indexes are supported as targets for verification.*DETAIL:  Relation "unlogged_gist" is not a B-Tree index/s,
+        "checking unlogged gist index during recovery fails appropriately" ],
+
+       [ $node_standby => "SELECT * FROM bt_index_check('unlogged_hash')",
+         qr/ERROR:  only B-Tree indexes are supported as targets for verification.*DETAIL:  Relation "unlogged_hash" is not a B-Tree index/s,
+        "checking unlogged hash index during recovery fails appropriately" ],
+
+       )
+{
+       $test->[0]->psql('postgres', $test->[1],
+                                        stdout => \$stdout, stderr => \$stderr);
+       like ($stderr, $test->[2], $test->[3]);
+}
+
+# Verify that --all excludes the temporary relations and handles unlogged
+# relations as appropriate, without raising any warnings or exiting with a
+# non-zero status.
+#
+$node_primary->command_checks_all(
+       ['pg_amcheck', '--all', '-D', 'template1'],
+       0,
+       [ qr/^$/ ],
+       [ qr/^$/ ],
+       'pg_amcheck all objects on primary');
+
+$node_standby->command_checks_all(
+       ['pg_amcheck', '--all', '-D', 'template1'],
+       0,
+       [ qr/^$/ ],
+       [ qr/^$/ ],
+       'pg_amcheck all objects on standby');
+
+# Verify that explicitly asking for unlogged relations to be checked does not
+# raise any warnings or exit with a non-zero exit status, even when they cannot
+# be checked due to recovery being in progress.
+#
+# These relations will have no relation fork during recovery, so even without
+# checking them, we can say (by definition) that they are not corrupt, because
+# it is meaningless to declare a non-existent relation fork corrupt.
+#
+$node_primary->command_checks_all(
+       ['pg_amcheck', '--relation', '*unlogged*'],
+       0,
+       [ qr/^$/ ],
+       [ qr/^$/ ],
+       'pg_amcheck unlogged objects on primary');
+
+$node_standby->command_checks_all(
+       ['pg_amcheck', '--relation', '*unlogged*'],
+       0,
+       [ qr/^$/ ],
+       [ qr/^$/ ],
+       'pg_amcheck unlogged objects on standby');
+
+# Verify that the --heapallindexed check works on both primary and standby.
+#
+$node_primary->command_checks_all(
+       ['pg_amcheck', '--all', '-D', 'template1', '--heapallindexed'],
+       0,
+       [ qr/^$/ ],
+       [ qr/^$/ ],
+       'pg_amcheck --helpallindexed on primary');
+
+$node_standby->command_checks_all(
+       ['pg_amcheck', '--all', '-D', 'template1', '--heapallindexed'],
+       0,
+       [ qr/^$/ ],
+       [ qr/^$/ ],
+       'pg_amcheck --helpallindexed on standby');
+
+# Verify that the --parent-check and --rootdescend options work on the primary.
+#
+$node_primary->command_checks_all(
+       ['pg_amcheck', '--all', '-D', 'template1', '--rootdescend'],
+       0,
+       [ qr/^$/ ],
+       [ qr/^$/ ],
+       'pg_amcheck --rootdescend on primary');
+
+$node_primary->command_checks_all(
+       ['pg_amcheck', '--all', '-D', 'template1', '--parent-check'],
+       0,
+       [ qr/^$/ ],
+       [ qr/^$/ ],
+       'pg_amcheck --parent-check on primary');
+
+# Check that the failures on the standby from using --parent-check and
+# --rootdescend are the failures we expect
+#
+$node_standby->command_checks_all(
+       ['pg_amcheck', '--all', '-D', 'template1', '--rootdescend'],
+       2,
+       [ qr/ERROR:  cannot acquire lock mode ShareLock on database objects while recovery is in progress/,
+         qr/btree index "postgres\.pg_catalog\./,
+         qr/btree index "postgres\.pg_toast\./,
+       ],
+       [ qr/^$/ ],
+       'pg_amcheck --rootdescend on standby');
+
+$node_standby->command_checks_all(
+       ['pg_amcheck', '--all', '-D', 'template1', '--parent-check'],
+       2,
+       [ qr/ERROR:  cannot acquire lock mode ShareLock on database objects while recovery is in progress/,
+         qr/btree index "postgres\.pg_catalog\./,
+         qr/btree index "postgres\.pg_toast\./,
+       ],
+       [ qr/^$/ ],
+       'pg_amcheck --parent-check on standby');
+
+# Bug #17212
+#
+# Verify that explicitly asking for another session's temporary relations to be
+# checked fails, but only in the sense that nothing matched the parameter.  We
+# don't complain that they are uncheckable, only that you gave a --relation
+# option and we didn't find anything checkable matching the pattern.
+#
+$node_primary->command_checks_all(
+       ['pg_amcheck', '--relation', '*temporary*'],
+       1,
+       [ qr/^$/ ],
+       [ qr/error: no relations to check matching "\*temporary\*"/ ],
+       'pg_amcheck temporary objects on primary');
+
+$node_standby->command_checks_all(
+       ['pg_amcheck', '--relation', '*temporary*'],
+       1,
+       [ qr/^$/ ],
+       [ qr/error: no relations to check matching "\*temporary\*"/ ],
+       'pg_amcheck temporary objects on standby');
+
+# Verify that a relation pattern which only matches temporary relations draws
+# an error, even when other relation patterns are ok.  This differs from the
+# test above in that the set of all relations to check is not empty.
+#
+$node_primary->command_checks_all(
+       ['pg_amcheck', '--relation', '*temporary*', '--relation', '*unlogged*'],
+       1,
+       [ qr/^$/ ],
+       [ qr/error: no relations to check matching "\*temporary\*"/ ],
+       'pg_amcheck temporary objects on primary');
+
+$node_standby->command_checks_all(
+       ['pg_amcheck', '--relation', '*temporary*', '--relation', '*unlogged*'],
+       1,
+       [ qr/^$/ ],
+       [ qr/error: no relations to check matching "\*temporary\*"/ ],
+       'pg_amcheck temporary objects on standby');