Generate EquivalenceClass members for partitionwise child join rels.
authorTom Lane <[email protected]>
Tue, 5 Nov 2019 16:42:24 +0000 (11:42 -0500)
committerTom Lane <[email protected]>
Tue, 5 Nov 2019 16:42:24 +0000 (11:42 -0500)
Commit d25ea0127 got rid of what I thought were entirely unnecessary
derived child expressions in EquivalenceClasses for EC members that
mention multiple baserels.  But it turns out that some of the child
expressions that code created are necessary for partitionwise joins,
else we fail to find matching pathkeys for Sort nodes.  (This happens
only for certain shapes of the resulting plan; it may be that
partitionwise aggregation is also necessary to show the failure,
though I'm not sure of that.)

Reverting that commit entirely would be quite painful performance-wise
for large partition sets.  So instead, add code that explicitly
generates child expressions that match only partitionwise child join
rels we have actually generated.

Per report from Justin Pryzby.  (Amit Langote noticed the problem
earlier, though it's not clear if he recognized then that it could
result in a planner error, not merely failure to exploit partitionwise
join, in the code as-committed.)  Back-patch to v12 where commit
d25ea0127 came in.

Amit Langote, with lots of kibitzing from me

Discussion: https://postgr.es/m/CA+HiwqG2WVUGmLJqtR0tPFhniO=H=9qQ+Z3L_ZC+Y3-EVQHFGg@mail.gmail.com
Discussion: https://postgr.es/m/20191011143703[email protected]

src/backend/optimizer/path/equivclass.c
src/backend/optimizer/util/relnode.c
src/include/optimizer/paths.h
src/test/regress/expected/partition_join.out
src/test/regress/sql/partition_join.sql

index ccc07ba9f0de752821f823a7641b41796081a6b6..082776f7fe5d6d68890d343c951030a91bb80f95 100644 (file)
@@ -1132,7 +1132,7 @@ generate_join_implied_equalities(PlannerInfo *root,
        Relids          inner_relids = inner_rel->relids;
        Relids          nominal_inner_relids;
        Relids          nominal_join_relids;
-       Bitmapset matching_ecs;
+       Bitmapset  *matching_ecs;
        int                     i;
 
        /* If inner rel is a child, extra setup work is needed */
@@ -2209,7 +2209,7 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root,
 
 /*
  * add_child_rel_equivalences
- *       Search for EC members that reference the parent_rel, and
+ *       Search for EC members that reference the root parent of child_rel, and
  *       add transformed members referencing the child_rel.
  *
  * Note that this function won't be called at all unless we have at least some
@@ -2217,6 +2217,12 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root,
  *
  * parent_rel and child_rel could be derived from appinfo, but since the
  * caller has already computed them, we might as well just pass them in.
+ *
+ * The passed-in AppendRelInfo is not used when the parent_rel is not a
+ * top-level baserel, since it shows the mapping from the parent_rel but
+ * we need to translate EC expressions that refer to the top-level parent.
+ * Using it is faster than using adjust_appendrel_attrs_multilevel(), though,
+ * so we prefer it when we can.
  */
 void
 add_child_rel_equivalences(PlannerInfo *root,
@@ -2224,6 +2230,8 @@ add_child_rel_equivalences(PlannerInfo *root,
                                                   RelOptInfo *parent_rel,
                                                   RelOptInfo *child_rel)
 {
+       Relids          top_parent_relids = child_rel->top_parent_relids;
+       Relids          child_relids = child_rel->relids;
        int                     i;
 
        /*
@@ -2248,7 +2256,7 @@ add_child_rel_equivalences(PlannerInfo *root,
                        continue;
 
                /* Sanity check eclass_indexes only contain ECs for parent_rel */
-               Assert(bms_is_subset(child_rel->top_parent_relids, cur_ec->ec_relids));
+               Assert(bms_is_subset(top_parent_relids, cur_ec->ec_relids));
 
                /*
                 * We don't use foreach() here because there's no point in scanning
@@ -2268,13 +2276,14 @@ add_child_rel_equivalences(PlannerInfo *root,
                         * already-transformed child members.  Otherwise, if some original
                         * member expression references more than one appendrel, we'd get
                         * an O(N^2) explosion of useless derived expressions for
-                        * combinations of children.
+                        * combinations of children.  (But add_child_join_rel_equivalences
+                        * may add targeted combinations for partitionwise-join purposes.)
                         */
                        if (cur_em->em_is_child)
                                continue;               /* ignore children here */
 
                        /* Does this member reference child's topmost parent rel? */
-                       if (bms_overlap(cur_em->em_relids, child_rel->top_parent_relids))
+                       if (bms_overlap(cur_em->em_relids, top_parent_relids))
                        {
                                /* Yes, generate transformed child version */
                                Expr       *child_expr;
@@ -2295,8 +2304,8 @@ add_child_rel_equivalences(PlannerInfo *root,
                                        child_expr = (Expr *)
                                                adjust_appendrel_attrs_multilevel(root,
                                                                                                                  (Node *) cur_em->em_expr,
-                                                                                                                 child_rel->relids,
-                                                                                                                 child_rel->top_parent_relids);
+                                                                                                                 child_relids,
+                                                                                                                 top_parent_relids);
                                }
 
                                /*
@@ -2306,21 +2315,20 @@ add_child_rel_equivalences(PlannerInfo *root,
                                 * don't want the child member to be marked as constant.
                                 */
                                new_relids = bms_difference(cur_em->em_relids,
-                                                                                       child_rel->top_parent_relids);
-                               new_relids = bms_add_members(new_relids, child_rel->relids);
+                                                                                       top_parent_relids);
+                               new_relids = bms_add_members(new_relids, child_relids);
 
                                /*
                                 * And likewise for nullable_relids.  Note this code assumes
                                 * parent and child relids are singletons.
                                 */
                                new_nullable_relids = cur_em->em_nullable_relids;
-                               if (bms_overlap(new_nullable_relids,
-                                                               child_rel->top_parent_relids))
+                               if (bms_overlap(new_nullable_relids, top_parent_relids))
                                {
                                        new_nullable_relids = bms_difference(new_nullable_relids,
-                                                                                                                child_rel->top_parent_relids);
+                                                                                                                top_parent_relids);
                                        new_nullable_relids = bms_add_members(new_nullable_relids,
-                                                                                                                 child_rel->relids);
+                                                                                                                 child_relids);
                                }
 
                                (void) add_eq_member(cur_ec, child_expr,
@@ -2334,6 +2342,133 @@ add_child_rel_equivalences(PlannerInfo *root,
        }
 }
 
+/*
+ * add_child_join_rel_equivalences
+ *       Like add_child_rel_equivalences(), but for joinrels
+ *
+ * Here we find the ECs relevant to the top parent joinrel and add transformed
+ * member expressions that refer to this child joinrel.
+ *
+ * Note that this function won't be called at all unless we have at least some
+ * reason to believe that the EC members it generates will be useful.
+ */
+void
+add_child_join_rel_equivalences(PlannerInfo *root,
+                                                               int nappinfos, AppendRelInfo **appinfos,
+                                                               RelOptInfo *parent_joinrel,
+                                                               RelOptInfo *child_joinrel)
+{
+       Relids          top_parent_relids = child_joinrel->top_parent_relids;
+       Relids          child_relids = child_joinrel->relids;
+       Bitmapset  *matching_ecs;
+       int                     i;
+
+       Assert(IS_JOIN_REL(child_joinrel) && IS_JOIN_REL(parent_joinrel));
+
+       /* We need consider only ECs that mention the parent joinrel */
+       matching_ecs = get_eclass_indexes_for_relids(root, top_parent_relids);
+
+       i = -1;
+       while ((i = bms_next_member(matching_ecs, i)) >= 0)
+       {
+               EquivalenceClass *cur_ec = (EquivalenceClass *) list_nth(root->eq_classes, i);
+               int                     num_members;
+
+               /*
+                * If this EC contains a volatile expression, then generating child
+                * EMs would be downright dangerous, so skip it.  We rely on a
+                * volatile EC having only one EM.
+                */
+               if (cur_ec->ec_has_volatile)
+                       continue;
+
+               /* Sanity check on get_eclass_indexes_for_relids result */
+               Assert(bms_overlap(top_parent_relids, cur_ec->ec_relids));
+
+               /*
+                * We don't use foreach() here because there's no point in scanning
+                * newly-added child members, so we can stop after the last
+                * pre-existing EC member.
+                */
+               num_members = list_length(cur_ec->ec_members);
+               for (int pos = 0; pos < num_members; pos++)
+               {
+                       EquivalenceMember *cur_em = (EquivalenceMember *) list_nth(cur_ec->ec_members, pos);
+
+                       if (cur_em->em_is_const)
+                               continue;               /* ignore consts here */
+
+                       /*
+                        * We consider only original EC members here, not
+                        * already-transformed child members.
+                        */
+                       if (cur_em->em_is_child)
+                               continue;               /* ignore children here */
+
+                       /*
+                        * We may ignore expressions that reference a single baserel,
+                        * because add_child_rel_equivalences should have handled them.
+                        */
+                       if (bms_membership(cur_em->em_relids) != BMS_MULTIPLE)
+                               continue;
+
+                       /* Does this member reference child's topmost parent rel? */
+                       if (bms_overlap(cur_em->em_relids, top_parent_relids))
+                       {
+                               /* Yes, generate transformed child version */
+                               Expr       *child_expr;
+                               Relids          new_relids;
+                               Relids          new_nullable_relids;
+
+                               if (parent_joinrel->reloptkind == RELOPT_JOINREL)
+                               {
+                                       /* Simple single-level transformation */
+                                       child_expr = (Expr *)
+                                               adjust_appendrel_attrs(root,
+                                                                                          (Node *) cur_em->em_expr,
+                                                                                          nappinfos, appinfos);
+                               }
+                               else
+                               {
+                                       /* Must do multi-level transformation */
+                                       Assert(parent_joinrel->reloptkind == RELOPT_OTHER_JOINREL);
+                                       child_expr = (Expr *)
+                                               adjust_appendrel_attrs_multilevel(root,
+                                                                                                                 (Node *) cur_em->em_expr,
+                                                                                                                 child_relids,
+                                                                                                                 top_parent_relids);
+                               }
+
+                               /*
+                                * Transform em_relids to match.  Note we do *not* do
+                                * pull_varnos(child_expr) here, as for example the
+                                * transformation might have substituted a constant, but we
+                                * don't want the child member to be marked as constant.
+                                */
+                               new_relids = bms_difference(cur_em->em_relids,
+                                                                                       top_parent_relids);
+                               new_relids = bms_add_members(new_relids, child_relids);
+
+                               /*
+                                * For nullable_relids, we must selectively replace parent
+                                * nullable relids with child ones.
+                                */
+                               new_nullable_relids = cur_em->em_nullable_relids;
+                               if (bms_overlap(new_nullable_relids, top_parent_relids))
+                                       new_nullable_relids =
+                                               adjust_child_relids_multilevel(root,
+                                                                                                          new_nullable_relids,
+                                                                                                          child_relids,
+                                                                                                          top_parent_relids);
+
+                               (void) add_eq_member(cur_ec, child_expr,
+                                                                        new_relids, new_nullable_relids,
+                                                                        true, cur_em->em_datatype);
+                       }
+               }
+       }
+}
+
 
 /*
  * generate_implied_equalities_for_column
index 85415381fb1730995d366693bea42531947c155b..03e02423b2ed4a68e5efe419889819a611b37576 100644 (file)
@@ -843,6 +843,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
        /* Compute information relevant to foreign relations. */
        set_foreign_rel_properties(joinrel, outer_rel, inner_rel);
 
+       /* Compute information needed for mapping Vars to the child rel */
        appinfos = find_appinfos_by_relids(root, joinrel->relids, &nappinfos);
 
        /* Set up reltarget struct */
@@ -854,7 +855,6 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
                                                                                                                (Node *) parent_joinrel->joininfo,
                                                                                                                nappinfos,
                                                                                                                appinfos);
-       pfree(appinfos);
 
        /*
         * Lateral relids referred in child join will be same as that referred in
@@ -886,6 +886,19 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
        /* Add the relation to the PlannerInfo. */
        add_join_rel(root, joinrel);
 
+       /*
+        * We might need EquivalenceClass members corresponding to the child join,
+        * so that we can represent sort pathkeys for it.  As with children of
+        * baserels, we shouldn't need this unless there are relevant eclass joins
+        * (implying that a merge join might be possible) or pathkeys to sort by.
+        */
+       if (joinrel->has_eclass_joins || has_useful_pathkeys(root, parent_joinrel))
+               add_child_join_rel_equivalences(root,
+                                                                               nappinfos, appinfos,
+                                                                               parent_joinrel, joinrel);
+
+       pfree(appinfos);
+
        return joinrel;
 }
 
index 7345137d1da4e0f4df71ae9e101c99d11e742edc..c6c34630c2828563a4dd1666e3e35861f47580af 100644 (file)
@@ -153,6 +153,11 @@ extern void add_child_rel_equivalences(PlannerInfo *root,
                                                                           AppendRelInfo *appinfo,
                                                                           RelOptInfo *parent_rel,
                                                                           RelOptInfo *child_rel);
+extern void add_child_join_rel_equivalences(PlannerInfo *root,
+                                                                                       int nappinfos,
+                                                                                       AppendRelInfo **appinfos,
+                                                                                       RelOptInfo *parent_rel,
+                                                                                       RelOptInfo *child_rel);
 extern List *generate_implied_equalities_for_column(PlannerInfo *root,
                                                                                                        RelOptInfo *rel,
                                                                                                        ec_matches_callback_type callback,
index cad8dd591aa6b2d3b5c50ee49ef2a51d7d0b3849..975bf6765ca5f79a227dc83936846ea91c69969c 100644 (file)
@@ -459,6 +459,83 @@ SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
  550 |     | 
 (12 rows)
 
+-- bug with inadequate sort key representation
+SET enable_partitionwise_aggregate TO true;
+SET enable_hashjoin TO false;
+EXPLAIN (COSTS OFF)
+SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
+  WHERE a BETWEEN 490 AND 510
+  GROUP BY 1, 2 ORDER BY 1, 2;
+                                                    QUERY PLAN                                                     
+-------------------------------------------------------------------------------------------------------------------
+ Group
+   Group Key: (COALESCE(prt1_p1.a, p2.a)), (COALESCE(prt1_p1.b, p2.b))
+   ->  Merge Append
+         Sort Key: (COALESCE(prt1_p1.a, p2.a)), (COALESCE(prt1_p1.b, p2.b))
+         ->  Group
+               Group Key: (COALESCE(prt1_p1.a, p2.a)), (COALESCE(prt1_p1.b, p2.b))
+               ->  Sort
+                     Sort Key: (COALESCE(prt1_p1.a, p2.a)), (COALESCE(prt1_p1.b, p2.b))
+                     ->  Merge Full Join
+                           Merge Cond: ((prt1_p1.a = p2.a) AND (prt1_p1.b = p2.b))
+                           Filter: ((COALESCE(prt1_p1.a, p2.a) >= 490) AND (COALESCE(prt1_p1.a, p2.a) <= 510))
+                           ->  Sort
+                                 Sort Key: prt1_p1.a, prt1_p1.b
+                                 ->  Seq Scan on prt1_p1
+                           ->  Sort
+                                 Sort Key: p2.a, p2.b
+                                 ->  Seq Scan on prt2_p1 p2
+         ->  Group
+               Group Key: (COALESCE(prt1_p2.a, p2_1.a)), (COALESCE(prt1_p2.b, p2_1.b))
+               ->  Sort
+                     Sort Key: (COALESCE(prt1_p2.a, p2_1.a)), (COALESCE(prt1_p2.b, p2_1.b))
+                     ->  Merge Full Join
+                           Merge Cond: ((prt1_p2.a = p2_1.a) AND (prt1_p2.b = p2_1.b))
+                           Filter: ((COALESCE(prt1_p2.a, p2_1.a) >= 490) AND (COALESCE(prt1_p2.a, p2_1.a) <= 510))
+                           ->  Sort
+                                 Sort Key: prt1_p2.a, prt1_p2.b
+                                 ->  Seq Scan on prt1_p2
+                           ->  Sort
+                                 Sort Key: p2_1.a, p2_1.b
+                                 ->  Seq Scan on prt2_p2 p2_1
+         ->  Group
+               Group Key: (COALESCE(prt1_p3.a, p2_2.a)), (COALESCE(prt1_p3.b, p2_2.b))
+               ->  Sort
+                     Sort Key: (COALESCE(prt1_p3.a, p2_2.a)), (COALESCE(prt1_p3.b, p2_2.b))
+                     ->  Merge Full Join
+                           Merge Cond: ((prt1_p3.a = p2_2.a) AND (prt1_p3.b = p2_2.b))
+                           Filter: ((COALESCE(prt1_p3.a, p2_2.a) >= 490) AND (COALESCE(prt1_p3.a, p2_2.a) <= 510))
+                           ->  Sort
+                                 Sort Key: prt1_p3.a, prt1_p3.b
+                                 ->  Seq Scan on prt1_p3
+                           ->  Sort
+                                 Sort Key: p2_2.a, p2_2.b
+                                 ->  Seq Scan on prt2_p3 p2_2
+(43 rows)
+
+SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
+  WHERE a BETWEEN 490 AND 510
+  GROUP BY 1, 2 ORDER BY 1, 2;
+  a  | b  
+-----+----
+ 490 | 15
+ 492 | 17
+ 494 | 19
+ 495 | 20
+ 496 | 21
+ 498 | 23
+ 500 |  0
+ 501 |  1
+ 502 |  2
+ 504 |  4
+ 506 |  6
+ 507 |  7
+ 508 |  8
+ 510 | 10
+(14 rows)
+
+RESET enable_partitionwise_aggregate;
+RESET enable_hashjoin;
 --
 -- partitioned by expression
 --
index fb3ba18a26ff617cd13443c5073e429f1307ee76..92994b479bba982f58bc72b1b08e9d7362cdadc7 100644 (file)
@@ -91,6 +91,21 @@ SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
                          (SELECT t2.a AS t2a, t3.a AS t3a, t2.b t2b, t2.c t2c, least(t1.a,t2.a,t3.a) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
                          ON t1.c = ss.t2c WHERE (t1.b + coalesce(ss.t2b, 0)) = 0 ORDER BY t1.a;
 
+-- bug with inadequate sort key representation
+SET enable_partitionwise_aggregate TO true;
+SET enable_hashjoin TO false;
+
+EXPLAIN (COSTS OFF)
+SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
+  WHERE a BETWEEN 490 AND 510
+  GROUP BY 1, 2 ORDER BY 1, 2;
+SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
+  WHERE a BETWEEN 490 AND 510
+  GROUP BY 1, 2 ORDER BY 1, 2;
+
+RESET enable_partitionwise_aggregate;
+RESET enable_hashjoin;
+
 --
 -- partitioned by expression
 --