#ifdef OPTIMIZER_DEBUG
#include "nodes/print.h"
#endif
+#include "nodes/supportnodes.h"
#include "optimizer/appendinfo.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
PathTarget *grouping_target,
Node *havingQual);
static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
+static void optimize_window_clauses(PlannerInfo *root,
+ WindowFuncLists *wflists);
static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
static PathTarget *make_window_input_target(PlannerInfo *root,
PathTarget *final_target,
wflists = find_window_functions((Node *) root->processed_tlist,
list_length(parse->windowClause));
if (wflists->numWindowFuncs > 0)
+ {
+ /*
+ * See if any modifications can be made to each WindowClause
+ * to allow the executor to execute the WindowFuncs more
+ * quickly.
+ */
+ optimize_window_clauses(root, wflists);
+
activeWindows = select_active_windows(root, wflists);
+ }
else
parse->hasWindowFuncs = false;
}
return new_tlist;
}
+/*
+ * optimize_window_clauses
+ * Call each WindowFunc's prosupport function to see if we're able to
+ * make any adjustments to any of the WindowClause's so that the executor
+ * can execute the window functions in a more optimal way.
+ *
+ * Currently we only allow adjustments to the WindowClause's frameOptions. We
+ * may allow more things to be done here in the future.
+ */
+static void
+optimize_window_clauses(PlannerInfo *root, WindowFuncLists *wflists)
+{
+ List *windowClause = root->parse->windowClause;
+ ListCell *lc;
+
+ foreach(lc, windowClause)
+ {
+ WindowClause *wc = lfirst_node(WindowClause, lc);
+ ListCell *lc2;
+ int optimizedFrameOptions = 0;
+
+ Assert(wc->winref <= wflists->maxWinRef);
+
+ /* skip any WindowClauses that have no WindowFuncs */
+ if (wflists->windowFuncs[wc->winref] == NIL)
+ continue;
+
+ foreach(lc2, wflists->windowFuncs[wc->winref])
+ {
+ SupportRequestOptimizeWindowClause req;
+ SupportRequestOptimizeWindowClause *res;
+ WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
+ Oid prosupport;
+
+ prosupport = get_func_support(wfunc->winfnoid);
+
+ /* Check if there's a support function for 'wfunc' */
+ if (!OidIsValid(prosupport))
+ break; /* can't optimize this WindowClause */
+
+ req.type = T_SupportRequestOptimizeWindowClause;
+ req.window_clause = wc;
+ req.window_func = wfunc;
+ req.frameOptions = wc->frameOptions;
+
+ /* call the support function */
+ res = (SupportRequestOptimizeWindowClause *)
+ DatumGetPointer(OidFunctionCall1(prosupport,
+ PointerGetDatum(&req)));
+
+ /*
+ * Skip to next WindowClause if the support function does not
+ * support this request type.
+ */
+ if (res == NULL)
+ break;
+
+ /*
+ * Save these frameOptions for the first WindowFunc for this
+ * WindowClause.
+ */
+ if (foreach_current_index(lc2) == 0)
+ optimizedFrameOptions = res->frameOptions;
+
+ /*
+ * On subsequent WindowFuncs, if the frameOptions are not the same
+ * then we're unable to optimize the frameOptions for this
+ * WindowClause.
+ */
+ else if (optimizedFrameOptions != res->frameOptions)
+ break; /* skip to the next WindowClause, if any */
+ }
+
+ /* adjust the frameOptions if all WindowFunc's agree that it's ok */
+ if (lc2 == NULL && wc->frameOptions != optimizedFrameOptions)
+ {
+ ListCell *lc3;
+
+ /* apply the new frame options */
+ wc->frameOptions = optimizedFrameOptions;
+
+ /*
+ * We now check to see if changing the frameOptions has caused
+ * this WindowClause to be a duplicate of some other WindowClause.
+ * This can only happen if we have multiple WindowClauses, so
+ * don't bother if there's only 1.
+ */
+ if (list_length(windowClause) == 1)
+ continue;
+
+ /*
+ * Do the duplicate check and reuse the existing WindowClause if
+ * we find a duplicate.
+ */
+ foreach(lc3, windowClause)
+ {
+ WindowClause *existing_wc = lfirst_node(WindowClause, lc3);
+
+ /* skip over the WindowClause we're currently editing */
+ if (existing_wc == wc)
+ continue;
+
+ /*
+ * Perform the same duplicate check that is done in
+ * transformWindowFuncCall.
+ */
+ if (equal(wc->partitionClause, existing_wc->partitionClause) &&
+ equal(wc->orderClause, existing_wc->orderClause) &&
+ wc->frameOptions == existing_wc->frameOptions &&
+ equal(wc->startOffset, existing_wc->startOffset) &&
+ equal(wc->endOffset, existing_wc->endOffset))
+ {
+ ListCell *lc4;
+
+ /*
+ * Now move each WindowFunc in 'wc' into 'existing_wc'.
+ * This required adjusting each WindowFunc's winref and
+ * moving the WindowFuncs in 'wc' to the list of
+ * WindowFuncs in 'existing_wc'.
+ */
+ foreach(lc4, wflists->windowFuncs[wc->winref])
+ {
+ WindowFunc *wfunc = lfirst_node(WindowFunc, lc4);
+
+ wfunc->winref = existing_wc->winref;
+ }
+
+ /* move list items */
+ wflists->windowFuncs[existing_wc->winref] = list_concat(wflists->windowFuncs[existing_wc->winref],
+ wflists->windowFuncs[wc->winref]);
+ wflists->windowFuncs[wc->winref] = NIL;
+
+ /*
+ * transformWindowFuncCall() should have made sure there
+ * are no other duplicates, so we needn't bother looking
+ * any further.
+ */
+ break;
+ }
+ }
+ }
+ }
+}
+
/*
* select_active_windows
* Create a list of the "active" window clauses (ie, those referenced
/* matched, no refname */ ;
else
continue;
+
+ /*
+ * Also see similar de-duplication code in optimize_window_clauses
+ */
if (equal(refwin->partitionClause, windef->partitionClause) &&
equal(refwin->orderClause, windef->orderClause) &&
refwin->frameOptions == windef->frameOptions &&
PG_RETURN_POINTER(req);
}
+ if (IsA(rawreq, SupportRequestOptimizeWindowClause))
+ {
+ SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq;
+
+ /*
+ * The frame options can always become "ROWS BETWEEN UNBOUNDED
+ * PRECEDING AND CURRENT ROW". row_number() always just increments by
+ * 1 with each row in the partition. Using ROWS instead of RANGE
+ * saves effort checking peer rows during execution.
+ */
+ req->frameOptions = (FRAMEOPTION_NONDEFAULT |
+ FRAMEOPTION_ROWS |
+ FRAMEOPTION_START_UNBOUNDED_PRECEDING |
+ FRAMEOPTION_END_CURRENT_ROW);
+
+ PG_RETURN_POINTER(req);
+ }
+
PG_RETURN_POINTER(NULL);
}
PG_RETURN_POINTER(req);
}
+ if (IsA(rawreq, SupportRequestOptimizeWindowClause))
+ {
+ SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq;
+
+ /*
+ * rank() is coded in such a way that it returns "(COUNT (*) OVER
+ * (<opt> RANGE UNBOUNDED PRECEDING) - COUNT (*) OVER (<opt> RANGE
+ * CURRENT ROW) + 1)" regardless of the frame options. We'll set the
+ * frame options to "ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW"
+ * so they agree with what window_row_number_support() optimized the
+ * frame options to be. Using ROWS instead of RANGE saves from doing
+ * peer row checks during execution.
+ */
+ req->frameOptions = (FRAMEOPTION_NONDEFAULT |
+ FRAMEOPTION_ROWS |
+ FRAMEOPTION_START_UNBOUNDED_PRECEDING |
+ FRAMEOPTION_END_CURRENT_ROW);
+
+ PG_RETURN_POINTER(req);
+ }
+
PG_RETURN_POINTER(NULL);
}
PG_RETURN_POINTER(req);
}
+ if (IsA(rawreq, SupportRequestOptimizeWindowClause))
+ {
+ SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq;
+
+ /*
+ * dense_rank() is unaffected by the frame options. Here we set the
+ * frame options to match what's done in row_number's support
+ * function. Using ROWS instead of RANGE (the default) saves the
+ * executor from having to check for peer rows.
+ */
+ req->frameOptions = (FRAMEOPTION_NONDEFAULT |
+ FRAMEOPTION_ROWS |
+ FRAMEOPTION_START_UNBOUNDED_PRECEDING |
+ FRAMEOPTION_END_CURRENT_ROW);
+
+ PG_RETURN_POINTER(req);
+ }
+
PG_RETURN_POINTER(NULL);
}
PG_RETURN_FLOAT8((float8) (context->rank - 1) / (float8) (totalrows - 1));
}
+/*
+ * window_percent_rank_support
+ * prosupport function for window_percent_rank()
+ */
+Datum
+window_percent_rank_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ if (IsA(rawreq, SupportRequestOptimizeWindowClause))
+ {
+ SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq;
+
+ /*
+ * percent_rank() is unaffected by the frame options. Here we set the
+ * frame options to match what's done in row_number's support
+ * function. Using ROWS instead of RANGE (the default) saves the
+ * executor from having to check for peer rows.
+ */
+ req->frameOptions = (FRAMEOPTION_NONDEFAULT |
+ FRAMEOPTION_ROWS |
+ FRAMEOPTION_START_UNBOUNDED_PRECEDING |
+ FRAMEOPTION_END_CURRENT_ROW);
+
+ PG_RETURN_POINTER(req);
+ }
+
+ PG_RETURN_POINTER(NULL);
+}
+
+
/*
* cume_dist
* return fraction between 0 and 1 inclusive,
PG_RETURN_FLOAT8((float8) context->rank / (float8) totalrows);
}
+/*
+ * window_cume_dist_support
+ * prosupport function for window_cume_dist()
+ */
+Datum
+window_cume_dist_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ if (IsA(rawreq, SupportRequestOptimizeWindowClause))
+ {
+ SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq;
+
+ /*
+ * cume_dist() is unaffected by the frame options. Here we set the
+ * frame options to match what's done in row_number's support
+ * function. Using ROWS instead of RANGE (the default) saves the
+ * executor from having to check for peer rows.
+ */
+ req->frameOptions = (FRAMEOPTION_NONDEFAULT |
+ FRAMEOPTION_ROWS |
+ FRAMEOPTION_START_UNBOUNDED_PRECEDING |
+ FRAMEOPTION_END_CURRENT_ROW);
+
+ PG_RETURN_POINTER(req);
+ }
+
+ PG_RETURN_POINTER(NULL);
+}
+
/*
* ntile
* compute an exact numeric value with scale 0 (zero),
PG_RETURN_INT32(context->ntile);
}
+/*
+ * window_ntile_support
+ * prosupport function for window_ntile()
+ */
+Datum
+window_ntile_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ if (IsA(rawreq, SupportRequestOptimizeWindowClause))
+ {
+ SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq;
+
+ /*
+ * ntile() is unaffected by the frame options. Here we set the frame
+ * options to match what's done in row_number's support function.
+ * Using ROWS instead of RANGE (the default) saves the executor from
+ * having to check for peer rows.
+ */
+ req->frameOptions = (FRAMEOPTION_NONDEFAULT |
+ FRAMEOPTION_ROWS |
+ FRAMEOPTION_START_UNBOUNDED_PRECEDING |
+ FRAMEOPTION_END_CURRENT_ROW);
+
+ PG_RETURN_POINTER(req);
+ }
+
+ PG_RETURN_POINTER(NULL);
+}
+
/*
* leadlag_common
* common operation of lead() and lag()
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 202212201
+#define CATALOG_VERSION_NO 202212231
#endif
proname => 'row_number', prosupport => 'window_row_number_support',
prokind => 'w', proisstrict => 'f', prorettype => 'int8', proargtypes => '',
prosrc => 'window_row_number' },
-{ oid => '6233', descr => 'planner support for row_number run condition',
+{ oid => '6233', descr => 'planner support for row_number',
proname => 'window_row_number_support', prorettype => 'internal',
proargtypes => 'internal', prosrc => 'window_row_number_support' },
{ oid => '3101', descr => 'integer rank with gaps',
proname => 'rank', prosupport => 'window_rank_support', prokind => 'w',
proisstrict => 'f', prorettype => 'int8', proargtypes => '',
prosrc => 'window_rank' },
-{ oid => '6234', descr => 'planner support for rank run condition',
+{ oid => '6234', descr => 'planner support for rank',
proname => 'window_rank_support', prorettype => 'internal',
proargtypes => 'internal', prosrc => 'window_rank_support' },
{ oid => '3102', descr => 'integer rank without gaps',
proname => 'dense_rank', prosupport => 'window_dense_rank_support',
prokind => 'w', proisstrict => 'f', prorettype => 'int8', proargtypes => '',
prosrc => 'window_dense_rank' },
-{ oid => '6235', descr => 'planner support for dense rank run condition',
+{ oid => '6235', descr => 'planner support for dense_rank',
proname => 'window_dense_rank_support', prorettype => 'internal',
proargtypes => 'internal', prosrc => 'window_dense_rank_support' },
{ oid => '3103', descr => 'fractional rank within partition',
proname => 'percent_rank', prokind => 'w', proisstrict => 'f',
prorettype => 'float8', proargtypes => '', prosrc => 'window_percent_rank' },
+{ oid => '9773', descr => 'planner support for percent_rank',
+ proname => 'window_percent_rank_support', prorettype => 'internal',
+ proargtypes => 'internal', prosrc => 'window_percent_rank_support' },
{ oid => '3104', descr => 'fractional row number within partition',
proname => 'cume_dist', prokind => 'w', proisstrict => 'f',
prorettype => 'float8', proargtypes => '', prosrc => 'window_cume_dist' },
+{ oid => '9774', descr => 'planner support for cume_dist',
+ proname => 'window_cume_dist_support', prorettype => 'internal',
+ proargtypes => 'internal', prosrc => 'window_cume_dist_support' },
{ oid => '3105', descr => 'split rows into N groups',
proname => 'ntile', prokind => 'w', prorettype => 'int4',
proargtypes => 'int4', prosrc => 'window_ntile' },
+{ oid => '9775', descr => 'planner support for ntile',
+ proname => 'window_ntile_support', prorettype => 'internal',
+ proargtypes => 'internal', prosrc => 'window_ntile_support' },
{ oid => '3106', descr => 'fetch the preceding row value',
proname => 'lag', prokind => 'w', prorettype => 'anyelement',
proargtypes => 'anyelement', prosrc => 'window_lag' },
MonotonicFunction monotonic;
} SupportRequestWFuncMonotonic;
+/*
+ * Some WindowFunc behavior might not be affected by certain variations in
+ * the WindowClause's frameOptions. For example, row_number() is coded in
+ * such a way that the frame options don't change the returned row number.
+ * nodeWindowAgg.c will have less work to do if the ROWS option is used
+ * instead of the RANGE option as no check needs to be done for peer rows.
+ * Since RANGE is included in the default frame options, window functions
+ * such as row_number() might want to change that to ROW.
+ *
+ * Here we allow a WindowFunc's support function to determine which, if
+ * anything, can be changed about the WindowClause which the WindowFunc
+ * belongs to. Currently only the frameOptions can be modified. However,
+ * we may want to allow more optimizations in the future.
+ *
+ * The support function is responsible for ensuring the optimized version of
+ * the frameOptions doesn't affect the result of the window function. The
+ * planner is responsible for only changing the frame options when all
+ * WindowFuncs using this particular WindowClause agree on what the optimized
+ * version of the frameOptions are. If a particular WindowFunc being used
+ * does not have a support function then the planner will not make any changes
+ * to the WindowClause's frameOptions.
+ *
+ * 'window_func' and 'window_clause' are set by the planner before calling the
+ * support function so that the support function has these fields available.
+ * These may be required in order to determine which optimizations are
+ * possible.
+ *
+ * 'frameOptions' is set by the planner to WindowClause.frameOptions. The
+ * support function must only adjust this if optimizations are possible for
+ * the given WindowFunc.
+ */
+typedef struct SupportRequestOptimizeWindowClause
+{
+ NodeTag type;
+
+ /* Input fields: */
+ WindowFunc *window_func; /* Pointer to the window function data */
+ struct WindowClause *window_clause; /* Pointer to the window clause data */
+
+ /* Input/Output fields: */
+ int frameOptions; /* New frameOptions, or left untouched if no
+ * optimizations are possible. */
+} SupportRequestOptimizeWindowClause;
+
#endif /* SUPPORTNODES_H */
14600 | 3 | | sales
(3 rows)
+--
+-- Test SupportRequestOptimizeWindowClause's ability to de-duplicate
+-- WindowClauses
+--
+-- Ensure WindowClause frameOptions are changed so that only a single
+-- WindowAgg exists in the plan.
+EXPLAIN (COSTS OFF)
+SELECT
+ empno,
+ depname,
+ row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn,
+ rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN
+ UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk,
+ dense_rank() OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN
+ CURRENT ROW AND CURRENT ROW) drnk
+FROM empsalary;
+ QUERY PLAN
+----------------------------------------
+ WindowAgg
+ -> Sort
+ Sort Key: depname, enroll_date
+ -> Seq Scan on empsalary
+(4 rows)
+
+-- Ensure WindowFuncs which cannot support their WindowClause's frameOptions
+-- being changed are untouched
+EXPLAIN (COSTS OFF, VERBOSE)
+SELECT
+ empno,
+ depname,
+ row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn,
+ rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN
+ UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk,
+ count(*) OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN
+ CURRENT ROW AND CURRENT ROW) cnt
+FROM empsalary;
+ QUERY PLAN
+------------------------------------------------------------------------------------------------------
+ WindowAgg
+ Output: empno, depname, (row_number() OVER (?)), (rank() OVER (?)), count(*) OVER (?), enroll_date
+ -> WindowAgg
+ Output: depname, enroll_date, empno, row_number() OVER (?), rank() OVER (?)
+ -> Sort
+ Output: depname, enroll_date, empno
+ Sort Key: empsalary.depname, empsalary.enroll_date
+ -> Seq Scan on pg_temp.empsalary
+ Output: depname, enroll_date, empno
+(9 rows)
+
+-- Ensure the above query gives us the expected results
+SELECT
+ empno,
+ depname,
+ row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn,
+ rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN
+ UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk,
+ count(*) OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN
+ CURRENT ROW AND CURRENT ROW) cnt
+FROM empsalary;
+ empno | depname | rn | rnk | cnt
+-------+-----------+----+-----+-----
+ 8 | develop | 1 | 1 | 1
+ 10 | develop | 2 | 2 | 1
+ 11 | develop | 3 | 3 | 1
+ 9 | develop | 4 | 4 | 2
+ 7 | develop | 5 | 4 | 2
+ 2 | personnel | 1 | 1 | 1
+ 5 | personnel | 2 | 2 | 1
+ 1 | sales | 1 | 1 | 1
+ 3 | sales | 2 | 2 | 1
+ 4 | sales | 3 | 3 | 1
+(10 rows)
+
-- Test pushdown of quals into a subquery containing window functions
-- pushdown is safe because all PARTITION BY clauses include depname:
EXPLAIN (COSTS OFF)
depname
FROM empsalary GROUP BY depname;
+--
+-- Test SupportRequestOptimizeWindowClause's ability to de-duplicate
+-- WindowClauses
+--
+
+-- Ensure WindowClause frameOptions are changed so that only a single
+-- WindowAgg exists in the plan.
+EXPLAIN (COSTS OFF)
+SELECT
+ empno,
+ depname,
+ row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn,
+ rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN
+ UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk,
+ dense_rank() OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN
+ CURRENT ROW AND CURRENT ROW) drnk
+FROM empsalary;
+
+-- Ensure WindowFuncs which cannot support their WindowClause's frameOptions
+-- being changed are untouched
+EXPLAIN (COSTS OFF, VERBOSE)
+SELECT
+ empno,
+ depname,
+ row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn,
+ rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN
+ UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk,
+ count(*) OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN
+ CURRENT ROW AND CURRENT ROW) cnt
+FROM empsalary;
+
+-- Ensure the above query gives us the expected results
+SELECT
+ empno,
+ depname,
+ row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn,
+ rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN
+ UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk,
+ count(*) OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN
+ CURRENT ROW AND CURRENT ROW) cnt
+FROM empsalary;
+
-- Test pushdown of quals into a subquery containing window functions
-- pushdown is safe because all PARTITION BY clauses include depname: