Correctly identify which EC members are computable at a plan node.
authorTom Lane <[email protected]>
Sat, 12 Oct 2024 18:56:08 +0000 (14:56 -0400)
committerTom Lane <[email protected]>
Sat, 12 Oct 2024 18:56:08 +0000 (14:56 -0400)
find_computable_ec_member() had the wrong mental model of what
its primary caller prepare_sort_from_pathkeys() would do with
the selected EquivalenceClass member expression.  We will not
compute the EC expression in a plan node atop the one returning
the passed-in targetlist; rather, the EC expression will be
computed as an additional column of that targetlist.  So any
Var or quasi-Var used in the given tlist is also available to the
EC expression.  In simple cases this makes no difference because
the given tlist is just a list of Vars or quasi-Vars --- but if
we are considering an appendrel member produced by flattening
a UNION ALL, the tlist may contain expressions, resulting in
failure to match and a "could not find pathkey item to sort"
error.

To fix, we can flatten both the tlist and the EC members with
pull_var_clause(), and then just check for subset-ness, so
that the code is actually shorter than before.

While this bug is quite old, the present patch only works back to
v13.  We could possibly make it work in v12 by back-patching parts
of 375398244.  On the whole though I don't like the risk/reward
ratio of that idea.  v12's final release is next month, meaning
there would be no chance to correct matters if the patch causes a
regression.  Since this failure has escaped notice for 14 years,
it's likely nobody will hit it in the field with v12.

Per bug #18652 from Alexander Lakhin.

Andrei Lepikhov and Tom Lane

Discussion: https://postgr.es/m/18652-deaa782ebcca85d1@postgresql.org

src/backend/optimizer/path/equivclass.c
src/test/regress/expected/inherit.out
src/test/regress/sql/inherit.sql

index 8f6f005ecb9fcd0d05421ce6ba6ffea1e851946a..fae137dd825e833bdcc154e70b906df6e1dff927 100644 (file)
@@ -38,7 +38,6 @@ static EquivalenceMember *add_eq_member(EquivalenceClass *ec,
                                                                                JoinDomain *jdomain,
                                                                                EquivalenceMember *parent,
                                                                                Oid datatype);
-static bool is_exprlist_member(Expr *node, List *exprs);
 static void generate_base_implied_equalities_const(PlannerInfo *root,
                                                                                                   EquivalenceClass *ec);
 static void generate_base_implied_equalities_no_const(PlannerInfo *root,
@@ -810,9 +809,18 @@ find_ec_member_matching_expr(EquivalenceClass *ec,
  *             expressions appearing in "exprs"; return NULL if no match.
  *
  * "exprs" can be either a list of bare expression trees, or a list of
- * TargetEntry nodes.  Either way, it should contain Vars and possibly
- * Aggrefs and WindowFuncs, which are matched to the corresponding elements
- * of the EquivalenceClass's expressions.
+ * TargetEntry nodes.  Typically it will contain Vars and possibly Aggrefs
+ * and WindowFuncs; however, when considering an appendrel member the list
+ * could contain arbitrary expressions.  We consider an EC member to be
+ * computable if all the Vars, PlaceHolderVars, Aggrefs, and WindowFuncs
+ * it needs are present in "exprs".
+ *
+ * There is some subtlety in that definition: for example, if an EC member is
+ * Var_A + 1 while what is in "exprs" is Var_A + 2, it's still computable.
+ * This works because in the final plan tree, the EC member's expression will
+ * be computed as part of the same plan node targetlist that is currently
+ * represented by "exprs".  So if we have Var_A available for the existing
+ * tlist member, it must be OK to use it in the EC expression too.
  *
  * Unlike find_ec_member_matching_expr, there's no special provision here
  * for binary-compatible relabeling.  This is intentional: if we have to
@@ -832,12 +840,24 @@ find_computable_ec_member(PlannerInfo *root,
                                                  Relids relids,
                                                  bool require_parallel_safe)
 {
+       List       *exprvars;
        ListCell   *lc;
 
+       /*
+        * Pull out the Vars and quasi-Vars present in "exprs".  In the typical
+        * non-appendrel case, this is just another representation of the same
+        * list.  However, it does remove the distinction between the case of a
+        * list of plain expressions and a list of TargetEntrys.
+        */
+       exprvars = pull_var_clause((Node *) exprs,
+                                                          PVC_INCLUDE_AGGREGATES |
+                                                          PVC_INCLUDE_WINDOWFUNCS |
+                                                          PVC_INCLUDE_PLACEHOLDERS);
+
        foreach(lc, ec->ec_members)
        {
                EquivalenceMember *em = (EquivalenceMember *) lfirst(lc);
-               List       *exprvars;
+               List       *emvars;
                ListCell   *lc2;
 
                /*
@@ -855,18 +875,18 @@ find_computable_ec_member(PlannerInfo *root,
                        continue;
 
                /*
-                * Match if all Vars and quasi-Vars are available in "exprs".
+                * Match if all Vars and quasi-Vars are present in "exprs".
                 */
-               exprvars = pull_var_clause((Node *) em->em_expr,
-                                                                  PVC_INCLUDE_AGGREGATES |
-                                                                  PVC_INCLUDE_WINDOWFUNCS |
-                                                                  PVC_INCLUDE_PLACEHOLDERS);
-               foreach(lc2, exprvars)
+               emvars = pull_var_clause((Node *) em->em_expr,
+                                                                PVC_INCLUDE_AGGREGATES |
+                                                                PVC_INCLUDE_WINDOWFUNCS |
+                                                                PVC_INCLUDE_PLACEHOLDERS);
+               foreach(lc2, emvars)
                {
-                       if (!is_exprlist_member(lfirst(lc2), exprs))
+                       if (!list_member(exprvars, lfirst(lc2)))
                                break;
                }
-               list_free(exprvars);
+               list_free(emvars);
                if (lc2)
                        continue;                       /* we hit a non-available Var */
 
@@ -884,31 +904,6 @@ find_computable_ec_member(PlannerInfo *root,
        return NULL;
 }
 
-/*
- * is_exprlist_member
- *       Subroutine for find_computable_ec_member: is "node" in "exprs"?
- *
- * Per the requirements of that function, "exprs" might or might not have
- * TargetEntry superstructure.
- */
-static bool
-is_exprlist_member(Expr *node, List *exprs)
-{
-       ListCell   *lc;
-
-       foreach(lc, exprs)
-       {
-               Expr       *expr = (Expr *) lfirst(lc);
-
-               if (expr && IsA(expr, TargetEntry))
-                       expr = ((TargetEntry *) expr)->expr;
-
-               if (equal(node, expr))
-                       return true;
-       }
-       return false;
-}
-
 /*
  * relation_can_be_sorted_early
  *             Can this relation be sorted on this EC before the final output step?
index c9defd7e9db1b60c2e6ab195695197112327613d..482fc47933cbdf7fce2daf7bdda6e36b7230a44f 100644 (file)
@@ -1678,6 +1678,36 @@ select min(1-id) from matest0;
 
 reset enable_seqscan;
 reset enable_parallel_append;
+explain (verbose, costs off)  -- bug #18652
+select 1 - id as c from
+(select id from matest3 t1 union all select id * 2 from matest3 t2) ss
+order by c;
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Result
+   Output: ((1 - t1.id))
+   ->  Merge Append
+         Sort Key: ((1 - t1.id))
+         ->  Index Scan using matest3i on public.matest3 t1
+               Output: t1.id, (1 - t1.id)
+         ->  Sort
+               Output: ((t2.id * 2)), ((1 - (t2.id * 2)))
+               Sort Key: ((1 - (t2.id * 2)))
+               ->  Seq Scan on public.matest3 t2
+                     Output: (t2.id * 2), (1 - (t2.id * 2))
+(11 rows)
+
+select 1 - id as c from
+(select id from matest3 t1 union all select id * 2 from matest3 t2) ss
+order by c;
+  c  
+-----
+ -11
+  -9
+  -5
+  -4
+(4 rows)
+
 drop table matest0 cascade;
 NOTICE:  drop cascades to 3 other objects
 DETAIL:  drop cascades to table matest1
index e3bcfdb181e6ca1c8e67051925f45c2957091ecb..51251b0e5116e1687d510d46b475381555f19028 100644 (file)
@@ -601,6 +601,14 @@ select min(1-id) from matest0;
 reset enable_seqscan;
 reset enable_parallel_append;
 
+explain (verbose, costs off)  -- bug #18652
+select 1 - id as c from
+(select id from matest3 t1 union all select id * 2 from matest3 t2) ss
+order by c;
+select 1 - id as c from
+(select id from matest3 t1 union all select id * 2 from matest3 t2) ss
+order by c;
+
 drop table matest0 cascade;
 
 --