From: Amit Kapila Date: Thu, 18 Mar 2021 01:55:27 +0000 (+0530) Subject: Add a new GUC and a reloption to enable inserts in parallel-mode. X-Git-Url: http://git.postgresql.org/gitweb/-?a=commitdiff_plain;h=c8f78b616167bf8e24bc5dc69112c37755ed3058;p=postgresql-pgindent.git Add a new GUC and a reloption to enable inserts in parallel-mode. Commit 05c8482f7f added the implementation of parallel SELECT for "INSERT INTO ... SELECT ..." which may incur non-negligible overhead in the additional parallel-safety checks that it performs, even when, in the end, those checks determine that parallelism can't be used. This is normally only ever a problem in the case of when the target table has a large number of partitions. A new GUC option "enable_parallel_insert" is added, to allow insert in parallel-mode. The default is on. In addition to the GUC option, the user may want a mechanism to allow inserts in parallel-mode with finer granularity at table level. The new table option "parallel_insert_enabled" allows this. The default is true. Author: "Hou, Zhijie" Reviewed-by: Greg Nancarrow, Amit Langote, Takayuki Tsunakawa, Amit Kapila Discussion: https://postgr.es/m/CAA4eK1K-cW7svLC2D7DHoGHxdAdg3P37BLgebqBOC2ZLc9a6QQ%40mail.gmail.com Discussion: https://postgr.es/m/CAJcOf-cXnB5cnMKqWEp2E2z7Mvcd04iLVmV=qpFJrR3AcrTS3g@mail.gmail.com --- diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 5e551b9f6d..8603cf3f94 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -5072,6 +5072,29 @@ ANY num_sync ( + enable_parallel_insert (boolean) + + enable_parallel_insert configuration parameter + + + + + Enables or disables the query planner's use of parallel plans for + INSERT commands. The default is on. + When enabled, the planner performs additional parallel-safety checks + on the target table's attributes and indexes, in order to determine + if it's safe to use a parallel plan for INSERT. In + cases such as when the target table has a large number of partitions, + and particularly also when that table uses something parallel-unsafe + that prevents parallelism, the overhead of these checks may become + prohibitively high. To address this potential overhead in these cases, + this option can be used to disable the use of parallel plans for + INSERT. + + + + diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml index c25ef5abd6..f82dce4a59 100644 --- a/doc/src/sgml/ref/alter_table.sgml +++ b/doc/src/sgml/ref/alter_table.sgml @@ -722,7 +722,8 @@ WITH ( MODULUS numeric_literal, REM SHARE UPDATE EXCLUSIVE lock will be taken for fillfactor, toast and autovacuum storage parameters, as well as the - planner parameter parallel_workers. + planner parameter parallel_workers and + parallel_insert_enabled. diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index 71703da85a..ff1b642722 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -1409,6 +1409,32 @@ WITH ( MODULUS numeric_literal, REM + + parallel_insert_enabled (boolean) + + parallel_insert_enabled storage parameter + + + + + Enables or disables the query planner's use of parallel insert for + this table. When enabled (and provided that + is also true), + the planner performs additional parallel-safety checks on the table's + attributes and indexes, in order to determine if it's safe to use a + parallel plan for INSERT. The default is + true. In cases such as when the table has a large + number of partitions, and particularly also when that table uses a + parallel-unsafe feature that prevents parallelism, the overhead of these + checks may become prohibitively high. To address this potential overhead + in these cases, this option can be used to disable the use of parallel + insert for this table. Note that if the target table of the parallel + insert is partitioned, the parallel_insert_enabled + option values of the partitions are ignored. + + + + autovacuum_enabled, toast.autovacuum_enabled (boolean) diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index d897bbec2b..5a0ae99750 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -168,6 +168,15 @@ static relopt_bool boolRelOpts[] = }, true }, + { + { + "parallel_insert_enabled", + "Enables \"parallel insert\" feature for this table", + RELOPT_KIND_HEAP | RELOPT_KIND_PARTITIONED, + ShareUpdateExclusiveLock + }, + true + }, /* list terminator */ {{NULL}} }; @@ -1859,7 +1868,9 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind) {"vacuum_index_cleanup", RELOPT_TYPE_BOOL, offsetof(StdRdOptions, vacuum_index_cleanup)}, {"vacuum_truncate", RELOPT_TYPE_BOOL, - offsetof(StdRdOptions, vacuum_truncate)} + offsetof(StdRdOptions, vacuum_truncate)}, + {"parallel_insert_enabled", RELOPT_TYPE_BOOL, + offsetof(StdRdOptions, parallel_insert_enabled)} }; return (bytea *) build_reloptions(reloptions, validate, kind, @@ -1961,13 +1972,15 @@ build_local_reloptions(local_relopts *relopts, Datum options, bool validate) bytea * partitioned_table_reloptions(Datum reloptions, bool validate) { - /* - * There are no options for partitioned tables yet, but this is able to do - * some validation. - */ + static const relopt_parse_elt tab[] = { + {"parallel_insert_enabled", RELOPT_TYPE_BOOL, + offsetof(PartitionedTableRdOptions, parallel_insert_enabled)} + }; + return (bytea *) build_reloptions(reloptions, validate, RELOPT_KIND_PARTITIONED, - 0, NULL, 0); + sizeof(PartitionedTableRdOptions), + tab, lengthof(tab)); } /* diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index a25b674a19..c81e2cf244 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -129,6 +129,8 @@ Cost disable_cost = 1.0e10; int max_parallel_workers_per_gather = 2; +bool enable_parallel_insert = true; + bool enable_seqscan = true; bool enable_indexscan = true; bool enable_indexonlyscan = true; diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index ade66a541c..c6be4f87c2 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -1265,8 +1265,10 @@ target_rel_chk_constr_max_parallel_hazard(Relation rel, * * It's not possible in the following cases: * - * 1) INSERT...ON CONFLICT...DO UPDATE - * 2) INSERT without SELECT + * 1) enable_parallel_insert is off + * 2) INSERT...ON CONFLICT...DO UPDATE + * 3) INSERT without SELECT + * 4) the reloption parallel_insert_enabled is set to off * * (Note: we don't do in-depth parallel-safety checks here, we do only the * cheaper tests that can quickly exclude obvious cases for which @@ -1277,12 +1279,17 @@ bool is_parallel_allowed_for_modify(Query *parse) { bool hasSubQuery; + bool parallel_enabled; RangeTblEntry *rte; ListCell *lc; + Relation rel; if (!IsModifySupportedInParallelMode(parse->commandType)) return false; + if (!enable_parallel_insert) + return false; + /* * UPDATE is not currently supported in parallel-mode, so prohibit * INSERT...ON CONFLICT...DO UPDATE... @@ -1313,7 +1320,28 @@ is_parallel_allowed_for_modify(Query *parse) } } - return hasSubQuery; + if (!hasSubQuery) + return false; + + /* + * Check if parallel_insert_enabled is enabled for the target table, if + * not, skip the safety checks. + * + * (Note: if the target table is partitioned, the parallel_insert_enabled + * option setting of the partitions are ignored). + */ + rte = rt_fetch(parse->resultRelation, parse->rtable); + + /* + * The target table is already locked by the caller (this is done in the + * parse/analyze phase), and remains locked until end-of-transaction. + */ + rel = table_open(rte->relid, NoLock); + + parallel_enabled = RelationGetParallelInsert(rel, true); + table_close(rel, NoLock); + + return parallel_enabled; } /***************************************************************************** diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 855076b1fd..b263e3493b 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1122,6 +1122,16 @@ static struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { + {"enable_parallel_insert", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enables the planner's use of parallel plans for INSERT commands."), + NULL, + GUC_EXPLAIN + }, + &enable_parallel_insert, + true, + NULL, NULL, NULL + }, { /* Not for general use --- used by SET SESSION AUTHORIZATION */ {"is_superuser", PGC_INTERNAL, UNGROUPED, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index f46c2dd7a8..6647f8fd6e 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -371,6 +371,7 @@ #enable_partitionwise_aggregate = off #enable_parallel_hash = on #enable_partition_pruning = on +#enable_parallel_insert = on # - Planner Cost Constants - diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index ecdb8d752b..d3fb734f05 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -1118,6 +1118,7 @@ static const char *const table_storage_parameters[] = { "autovacuum_vacuum_threshold", "fillfactor", "log_autovacuum_min_duration", + "parallel_insert_enabled", "parallel_workers", "toast.autovacuum_enabled", "toast.autovacuum_freeze_max_age", diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 1be93be098..22e6db96b6 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -47,6 +47,7 @@ typedef enum /* parameter variables and flags (see also optimizer.h) */ extern PGDLLIMPORT Cost disable_cost; extern PGDLLIMPORT int max_parallel_workers_per_gather; +extern PGDLLIMPORT bool enable_parallel_insert; extern PGDLLIMPORT bool enable_seqscan; extern PGDLLIMPORT bool enable_indexscan; extern PGDLLIMPORT bool enable_indexonlyscan; diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 10b63982c0..5375a37dd1 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -306,6 +306,8 @@ typedef struct StdRdOptions int parallel_workers; /* max number of parallel workers */ bool vacuum_index_cleanup; /* enables index vacuuming and cleanup */ bool vacuum_truncate; /* enables vacuum to truncate a relation */ + bool parallel_insert_enabled; /* enables planner's use of + * parallel insert */ } StdRdOptions; #define HEAP_MIN_FILLFACTOR 10 @@ -423,6 +425,29 @@ typedef struct ViewOptions ((ViewOptions *) (relation)->rd_options)->check_option == \ VIEW_OPTION_CHECK_OPTION_CASCADED) +/* + * PartitionedTableRdOptions + * Contents of rd_options for partitioned tables + */ +typedef struct PartitionedTableRdOptions +{ + int32 vl_len_; /* varlena header (do not touch directly!) */ + bool parallel_insert_enabled; /* enables planner's use of + * parallel insert */ +} PartitionedTableRdOptions; + +/* + * RelationGetParallelInsert + * Returns the relation's parallel_insert_enabled reloption setting. + * Note multiple eval of argument! + */ +#define RelationGetParallelInsert(relation, defaultpd) \ + ((relation)->rd_options ? \ + (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ? \ + ((PartitionedTableRdOptions *) (relation)->rd_options)->parallel_insert_enabled : \ + ((StdRdOptions *) (relation)->rd_options)->parallel_insert_enabled) : \ + (defaultpd)) + /* * RelationIsValid * True iff relation descriptor is valid. diff --git a/src/test/regress/expected/insert_parallel.out b/src/test/regress/expected/insert_parallel.out index 3599c21eba..164668e319 100644 --- a/src/test/regress/expected/insert_parallel.out +++ b/src/test/regress/expected/insert_parallel.out @@ -61,12 +61,45 @@ set max_parallel_workers_per_gather=4; create table para_insert_p1 ( unique1 int4 PRIMARY KEY, stringu1 name -); +) with (parallel_insert_enabled = off); create table para_insert_f1 ( unique1 int4 REFERENCES para_insert_p1(unique1), stringu1 name ); -- +-- Disable guc option enable_parallel_insert +-- +set enable_parallel_insert = off; +-- Test INSERT with underlying query when enable_parallel_insert=off and reloption.parallel_insert_enabled=off. +-- (should create plan with serial INSERT + SELECT) +-- +explain(costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1; + QUERY PLAN +-------------------------- + Insert on para_insert_p1 + -> Seq Scan on tenk1 +(2 rows) + +-- +-- Reset guc option enable_parallel_insert +-- +reset enable_parallel_insert; +-- +-- Test INSERT with underlying query when enable_parallel_insert=on and reloption.parallel_insert_enabled=off. +-- (should create plan with serial INSERT + SELECT) +-- +explain(costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1; + QUERY PLAN +-------------------------- + Insert on para_insert_p1 + -> Seq Scan on tenk1 +(2 rows) + +-- +-- Enable reloption parallel_insert_enabled +-- +alter table para_insert_p1 set (parallel_insert_enabled = on); +-- -- Test INSERT with underlying query. -- (should create plan with parallel SELECT, Gather parent node) -- @@ -362,9 +395,28 @@ explain (costs off) insert into testdef(a,d) select a,a*8 from test_data; -- -- Test INSERT into partition with underlying query. -- -create table parttable1 (a int, b name) partition by range (a); +create table parttable1 (a int, b name) partition by range (a) with (parallel_insert_enabled=off); create table parttable1_1 partition of parttable1 for values from (0) to (5000); create table parttable1_2 partition of parttable1 for values from (5000) to (10000); +-- +-- Test INSERT into partition when reloption.parallel_insert_enabled=off +-- (should not create a parallel plan) +-- +explain (costs off) insert into parttable1 select unique1,stringu1 from tenk1; + QUERY PLAN +------------------------- + Insert on parttable1 + -> Seq Scan on tenk1 +(2 rows) + +-- +-- Enable reloption parallel_insert_enabled +-- +alter table parttable1 set (parallel_insert_enabled = on); +-- +-- Test INSERT into partition when reloption.parallel_insert_enabled=on +-- (should create a parallel plan) +-- explain (costs off) insert into parttable1 select unique1,stringu1 from tenk1; QUERY PLAN ---------------------------------------- diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index 6d048e309c..a62bf5dc92 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -107,13 +107,14 @@ select name, setting from pg_settings where name like 'enable%'; enable_nestloop | on enable_parallel_append | on enable_parallel_hash | on + enable_parallel_insert | on enable_partition_pruning | on enable_partitionwise_aggregate | off enable_partitionwise_join | off enable_seqscan | on enable_sort | on enable_tidscan | on -(18 rows) +(19 rows) -- Test that the pg_timezone_names and pg_timezone_abbrevs views are -- more-or-less working. We can't test their contents in any great detail diff --git a/src/test/regress/sql/insert_parallel.sql b/src/test/regress/sql/insert_parallel.sql index 8eb4e9564e..171d8e5b84 100644 --- a/src/test/regress/sql/insert_parallel.sql +++ b/src/test/regress/sql/insert_parallel.sql @@ -77,13 +77,38 @@ set max_parallel_workers_per_gather=4; create table para_insert_p1 ( unique1 int4 PRIMARY KEY, stringu1 name -); +) with (parallel_insert_enabled = off); create table para_insert_f1 ( unique1 int4 REFERENCES para_insert_p1(unique1), stringu1 name ); +-- +-- Disable guc option enable_parallel_insert +-- +set enable_parallel_insert = off; + +-- Test INSERT with underlying query when enable_parallel_insert=off and reloption.parallel_insert_enabled=off. +-- (should create plan with serial INSERT + SELECT) +-- +explain(costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1; + +-- +-- Reset guc option enable_parallel_insert +-- +reset enable_parallel_insert; + +-- +-- Test INSERT with underlying query when enable_parallel_insert=on and reloption.parallel_insert_enabled=off. +-- (should create plan with serial INSERT + SELECT) +-- +explain(costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1; + +-- +-- Enable reloption parallel_insert_enabled +-- +alter table para_insert_p1 set (parallel_insert_enabled = on); -- -- Test INSERT with underlying query. @@ -208,10 +233,25 @@ explain (costs off) insert into testdef(a,d) select a,a*8 from test_data; -- -- Test INSERT into partition with underlying query. -- -create table parttable1 (a int, b name) partition by range (a); +create table parttable1 (a int, b name) partition by range (a) with (parallel_insert_enabled=off); create table parttable1_1 partition of parttable1 for values from (0) to (5000); create table parttable1_2 partition of parttable1 for values from (5000) to (10000); +-- +-- Test INSERT into partition when reloption.parallel_insert_enabled=off +-- (should not create a parallel plan) +-- +explain (costs off) insert into parttable1 select unique1,stringu1 from tenk1; + +-- +-- Enable reloption parallel_insert_enabled +-- +alter table parttable1 set (parallel_insert_enabled = on); + +-- +-- Test INSERT into partition when reloption.parallel_insert_enabled=on +-- (should create a parallel plan) +-- explain (costs off) insert into parttable1 select unique1,stringu1 from tenk1; insert into parttable1 select unique1,stringu1 from tenk1; select count(*) from parttable1_1; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 9e6777e9d0..1d1d5d2f0e 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1797,6 +1797,7 @@ PartitionSpec PartitionTupleRouting PartitionedRelPruneInfo PartitionedRelPruningData +PartitionedTableRdOptions PartitionwiseAggregateType PasswordType Path