Remove redundant PARTITION BY columns from WindowClauses
authorDavid Rowley <[email protected]>
Mon, 3 Jul 2023 00:49:43 +0000 (12:49 +1200)
committerDavid Rowley <[email protected]>
Mon, 3 Jul 2023 00:49:43 +0000 (12:49 +1200)
Here we adjust the query planner to have it remove items from a window
clause's PARTITION BY clause in cases where the pathkey for a column in
the PARTITION BY clause is redundant.

Doing this allows the optimization added in 9d9c02ccd to stop window
aggregation early rather than going into "pass-through" mode to find
tuples belonging to the next partition.  Also, when we manage to remove
all PARTITION BY columns, we now no longer needlessly check that the
current tuple belongs to the same partition as the last tuple in
nodeWindowAgg.c.  If the pathkey was redundant then all tuples must
contain the same value for the given redundant column, so there's no point
in checking that during execution.

Author: David Rowley
Reviewed-by: Richard Guo
Discussion: https://postgr.es/m/CAApHDvo2ji+hdxrxfXtRtsfSVw3to2o1nCO20qimw0dUGK8hcQ@mail.gmail.com

src/backend/optimizer/plan/createplan.c
src/backend/optimizer/plan/planner.c
src/include/nodes/parsenodes.h

index 4bb38160b338a61c18d91edc749a68cebc5f8a38..ec73789bc2107ad1b87b646ac8b051eaeba6263e 100644 (file)
@@ -2623,12 +2623,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path)
 
        /*
         * Convert SortGroupClause lists into arrays of attr indexes and equality
-        * operators, as wanted by executor.  (Note: in principle, it's possible
-        * to drop some of the sort columns, if they were proved redundant by
-        * pathkey logic.  However, it doesn't seem worth going out of our way to
-        * optimize such cases.  In any case, we must *not* remove the ordering
-        * column for RANGE OFFSET cases, as the executor needs that for in_range
-        * tests even if it's known to be equal to some partitioning column.)
+        * operators, as wanted by executor.
         */
        partColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numPart);
        partOperators = (Oid *) palloc(sizeof(Oid) * numPart);
index 1e4dd27dbafbe07e92a91c1125eb65798c16828c..eaee9eda77142bbf04abfc622441cef6c18ed59d 100644 (file)
@@ -5999,6 +5999,9 @@ make_window_input_target(PlannerInfo *root,
  *             Create a pathkeys list describing the required input ordering
  *             for the given WindowClause.
  *
+ * Modifies wc's partitionClause to remove any clauses which are deemed
+ * redundant by the pathkey logic.
+ *
  * The required ordering is first the PARTITION keys, then the ORDER keys.
  * In the future we might try to implement windowing using hashing, in which
  * case the ordering could be relaxed, but for now we always sort.
@@ -6007,8 +6010,7 @@ static List *
 make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
                                                 List *tlist)
 {
-       List       *window_pathkeys;
-       List       *window_sortclauses;
+       List       *window_pathkeys = NIL;
 
        /* Throw error if can't sort */
        if (!grouping_is_sortable(wc->partitionClause))
@@ -6022,12 +6024,45 @@ make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
                                 errmsg("could not implement window ORDER BY"),
                                 errdetail("Window ordering columns must be of sortable datatypes.")));
 
-       /* Okay, make the combined pathkeys */
-       window_sortclauses = list_concat_copy(wc->partitionClause, wc->orderClause);
-       window_pathkeys = make_pathkeys_for_sortclauses(root,
-                                                                                                       window_sortclauses,
-                                                                                                       tlist);
-       list_free(window_sortclauses);
+       /*
+        * First fetch the pathkeys for the PARTITION BY clause.  We can safely
+        * remove any clauses from the wc->partitionClause for redundant pathkeys.
+        */
+       if (wc->partitionClause != NIL)
+       {
+               bool            sortable;
+
+               window_pathkeys = make_pathkeys_for_sortclauses_extended(root,
+                                                                                                                                &wc->partitionClause,
+                                                                                                                                tlist,
+                                                                                                                                true,
+                                                                                                                                &sortable);
+
+               Assert(sortable);
+       }
+
+       /*
+        * In principle, we could also consider removing redundant ORDER BY items
+        * too as doing so does not alter the result of peer row checks done by
+        * the executor.  However, we must *not* remove the ordering column for
+        * RANGE OFFSET cases, as the executor needs that for in_range tests even
+        * if it's known to be equal to some partitioning column.
+        */
+       if (wc->orderClause != NIL)
+       {
+               List       *orderby_pathkeys;
+
+               orderby_pathkeys = make_pathkeys_for_sortclauses(root,
+                                                                                                                wc->orderClause,
+                                                                                                                tlist);
+
+               /* Okay, make the combined pathkeys */
+               if (window_pathkeys != NIL)
+                       window_pathkeys = append_pathkeys(window_pathkeys, orderby_pathkeys);
+               else
+                       window_pathkeys = orderby_pathkeys;
+       }
+
        return window_pathkeys;
 }
 
index b3bec90e526a5c0956c74ab36d7b4e71eb9e960b..88b03cc4725934fa85e1033ea4811da1bf4060e0 100644 (file)
@@ -1473,6 +1473,8 @@ typedef struct GroupingSet
  * if the clause originally came from WINDOW, and is NULL if it originally
  * was an OVER clause (but note that we collapse out duplicate OVERs).
  * partitionClause and orderClause are lists of SortGroupClause structs.
+ * partitionClause is sanitized by the query planner to remove any columns or
+ * expressions belonging to redundant PathKeys.
  * If we have RANGE with offset PRECEDING/FOLLOWING, the semantics of that are
  * specified by startInRangeFunc/inRangeColl/inRangeAsc/inRangeNullsFirst
  * for the start offset, or endInRangeFunc/inRange* for the end offset.