Add enable_presorted_aggregate GUC
authorDavid Rowley <[email protected]>
Tue, 20 Dec 2022 09:28:58 +0000 (22:28 +1300)
committerDavid Rowley <[email protected]>
Tue, 20 Dec 2022 09:28:58 +0000 (22:28 +1300)
1349d279 added query planner support to allow more efficient execution of
aggregate functions which have an ORDER BY or a DISTINCT clause.  Prior to
that commit, the planner would only request that the lower planner produce
a plan with the order required for the GROUP BY clause and it would be
left up to nodeAgg.c to perform the final sort of records within each
group so that the aggregate transition functions were called in the
correct order.  Now that the planner requests the lower planner produce a
plan with the GROUP BY and the ORDER BY / DISTINCT aggregates in mind,
there is the possibility that the planner chooses a plan which could be
less efficient than what would have been produced before 1349d279.

While developing 1349d279, I had in mind that Incremental Sort would help
us in cases where an index exists only on the GROUP BY column(s).
Incremental Sort would just replace the implicit tuplesorts which are
being performed in nodeAgg.c.  However, because the planner has the
flexibility to instead choose a plan which just performs a full sort on
both the GROUP BY and ORDER BY / DISTINCT aggregate columns, there is
potential for the planner to make a bad choice.  The costing for
Incremental Sort is not perfect as it assumes an even distribution of rows
to sort within each sort group.

Here we add an escape hatch in the form of the enable_presorted_aggregate
GUC.  This will allow users to get the pre-PG16 behavior in cases where
they have no other means to convince the query planner to produce a plan
which only sorts on the GROUP BY column(s).

Discussion: https://postgr.es/m/CAApHDvr1Sm+g9hbv4REOVuvQKeDWXcKUAhmbK5K+dfun0s9CvA@mail.gmail.com

doc/src/sgml/config.sgml
src/backend/optimizer/path/costsize.c
src/backend/optimizer/plan/planner.c
src/backend/utils/misc/guc_tables.c
src/backend/utils/misc/postgresql.conf.sample
src/include/optimizer/cost.h
src/test/regress/expected/aggregates.out
src/test/regress/expected/sysviews.out
src/test/regress/sql/aggregates.sql

index 8e4145979dc4bcfaeb75870b1317b82e6d511464..9eedab652df64ec7a3d6e5af9a842a93ff5f249d 100644 (file)
@@ -5311,6 +5311,29 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-enable-presorted-aggregate" xreflabel="enable_presorted_aggregate">
+      <term><varname>enable_presorted_aggregate</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>enable_presorted_aggregate</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Controls if the query planner will produce a plan which will provide
+        rows which are presorted in the order required for the query's
+        <literal>ORDER BY</literal> / <literal>DISTINCT</literal> aggregate
+        functions.  When disabled, the query planner will produce a plan which
+        will always require the executor to perform a sort before performing
+        aggregation of each aggregate function containing an
+        <literal>ORDER BY</literal> or <literal>DISTINCT</literal> clause.
+        When enabled, the planner will try to produce a more efficient plan
+        which provides input to the aggregate functions which is presorted in
+        the order they require for aggregation.  The default value is
+        <literal>on</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-enable-seqscan" xreflabel="enable_seqscan">
       <term><varname>enable_seqscan</varname> (<type>boolean</type>)
       <indexterm>
index 0f0bcfb7e50c557096617a5bcfadf627661c8583..89d3c4352ce973fa21afdc16d69461dbfab36b79 100644 (file)
@@ -151,6 +151,7 @@ bool                enable_partitionwise_aggregate = false;
 bool           enable_parallel_append = true;
 bool           enable_parallel_hash = true;
 bool           enable_partition_pruning = true;
+bool           enable_presorted_aggregate = true;
 bool           enable_async_append = true;
 
 typedef struct
index dfda251d956f6a26135289525d098560e900d7c1..e21e72eb870670a76b9ef8a8a4a0d64537b1369d 100644 (file)
@@ -3191,7 +3191,8 @@ make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
         * sets.  All handling specific to ordered aggregates must be done by the
         * executor in that case.
         */
-       if (root->numOrderedAggs == 0 || root->parse->groupingSets != NIL)
+       if (root->numOrderedAggs == 0 || root->parse->groupingSets != NIL ||
+               !enable_presorted_aggregate)
                return grouppathkeys;
 
        /*
index 1bf14eec661418e079c42392f1743c615d290bd8..436afe1d2150c72c6e2a9708fb955c65ca6a7505 100644 (file)
@@ -971,6 +971,21 @@ struct config_bool ConfigureNamesBool[] =
                true,
                NULL, NULL, NULL
        },
+       {
+               {"enable_presorted_aggregate", PGC_USERSET, QUERY_TUNING_METHOD,
+                       gettext_noop("Enables the planner's ability to produce plans which "
+                                                "provide presorted input for ORDER BY / DISTINCT aggregate "
+                                                "functions."),
+                       gettext_noop("Allows the query planner to build plans which provide "
+                                                "presorted input for aggregate functions with an ORDER BY / "
+                                                "DISTINCT clause.  When disabled, implicit sorts are always "
+                                                "performed during execution."),
+                       GUC_EXPLAIN
+               },
+               &enable_presorted_aggregate,
+               true,
+               NULL, NULL, NULL
+       },
        {
                {"enable_async_append", PGC_USERSET, QUERY_TUNING_METHOD,
                        gettext_noop("Enables the planner's use of async append plans."),
index 043864597f86240bc86172e0714ab8340732b1e9..5afdeb04de8d5a31e67a1ef11fe1254326a6f932 100644 (file)
 #enable_partition_pruning = on
 #enable_partitionwise_join = off
 #enable_partitionwise_aggregate = off
+#enable_presorted_aggregate = on
 #enable_seqscan = on
 #enable_sort = on
 #enable_tidscan = on
index 204e94b6d1098315ae5677870d8b71313e54031c..b6cc2c9cd6019ee37ced7bb6f68b5a714dcb6531 100644 (file)
@@ -68,6 +68,7 @@ extern PGDLLIMPORT bool enable_partitionwise_aggregate;
 extern PGDLLIMPORT bool enable_parallel_append;
 extern PGDLLIMPORT bool enable_parallel_hash;
 extern PGDLLIMPORT bool enable_partition_pruning;
+extern PGDLLIMPORT bool enable_presorted_aggregate;
 extern PGDLLIMPORT bool enable_async_append;
 extern PGDLLIMPORT int constraint_exclusion;
 
index fc2bd40be2d8cd22ab08052f718a6b7770ccfa90..309c2dc8654f5130ae7dcfdb911f199a4463faa2 100644 (file)
@@ -1471,6 +1471,17 @@ group by ten;
          ->  Seq Scan on tenk1
 (5 rows)
 
+-- Ensure no ordering is requested when enable_presorted_aggregate is off
+set enable_presorted_aggregate to off;
+explain (costs off)
+select sum(two order by two) from tenk1;
+       QUERY PLAN        
+-------------------------
+ Aggregate
+   ->  Seq Scan on tenk1
+(2 rows)
+
+reset enable_presorted_aggregate;
 --
 -- Test combinations of DISTINCT and/or ORDER BY
 --
index 579b861d84f31386724ed3b41216054aa0b3bab8..001c6e7eb9dee0866d4b4d7315079749421fc409 100644 (file)
@@ -128,10 +128,11 @@ select name, setting from pg_settings where name like 'enable%';
  enable_partition_pruning       | on
  enable_partitionwise_aggregate | off
  enable_partitionwise_join      | off
+ enable_presorted_aggregate     | on
  enable_seqscan                 | on
  enable_sort                    | on
  enable_tidscan                 | on
-(20 rows)
+(21 rows)
 
 -- Test that the pg_timezone_names and pg_timezone_abbrevs views are
 -- more-or-less working.  We can't test their contents in any great detail
index a4c00ff7a9da0833f7eff4f5f48131ef874faa40..15f6be8e15ac2730d515d4225c82d9105e21972e 100644 (file)
@@ -546,6 +546,12 @@ select
 from tenk1
 group by ten;
 
+-- Ensure no ordering is requested when enable_presorted_aggregate is off
+set enable_presorted_aggregate to off;
+explain (costs off)
+select sum(two order by two) from tenk1;
+reset enable_presorted_aggregate;
+
 --
 -- Test combinations of DISTINCT and/or ORDER BY
 --