Fix joinclause removal logic to cope with cloned clauses.
authorTom Lane <[email protected]>
Fri, 26 May 2023 16:13:19 +0000 (12:13 -0400)
committerTom Lane <[email protected]>
Fri, 26 May 2023 16:13:19 +0000 (12:13 -0400)
When we're deleting a no-op LEFT JOIN from the query, we must remove
the join's joinclauses from surviving relations' joininfo lists.
The invention of "cloned" clauses in 2489d76c4 broke the logic for
that; it'd fail to remove clones that include OJ relids outside the
doomed join's min relid sets, which could happen if that join was
previously discovered to commute with some other join.

This accidentally failed to cause problems in the majority of cases,
because we'd never decide that such a cloned clause was evaluatable at
any surviving join.  However, Richard Guo discovered a case where that
did happen, leading to "no relation entry for relid" errors later.
Also, adding assertions that a non-removed clause contains no Vars from
the doomed join exposes that there are quite a few existing regression
test cases where the problem happens but is accidentally not exposed.

The fix for this is just to include the target join's commute_above_r
and commute_below_l sets in the relid set we test against when
deciding whether a join clause is "pushed down" and thus not
removable.

While at it, do a little refactoring: the join's relid set can be
computed inside remove_rel_from_query rather than in the caller.

Patch by me; thanks to Richard Guo for review.

Discussion: https://postgr.es/m/CAMbWs4_PHrRqTKDNnTRsxxQy6BtYCVKsgXm1_gdN2yQ=kmcO5g@mail.gmail.com

src/backend/optimizer/plan/analyzejoins.c
src/test/regress/expected/join.out
src/test/regress/sql/join.sql

index a61e35f92d068582cfa146aabbe12968553ab2ae..6f7e657f05610667719476544520ee70c15f5764 100644 (file)
@@ -35,8 +35,8 @@
 
 /* local functions */
 static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
-static void remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
-                                                                 Relids joinrelids);
+static void remove_rel_from_query(PlannerInfo *root, int relid,
+                                                                 SpecialJoinInfo *sjinfo);
 static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
                                                                                 int relid, int ojrelid);
 static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
@@ -73,7 +73,6 @@ restart:
        foreach(lc, root->join_info_list)
        {
                SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
-               Relids          joinrelids;
                int                     innerrelid;
                int                     nremoved;
 
@@ -88,12 +87,7 @@ restart:
                 */
                innerrelid = bms_singleton_member(sjinfo->min_righthand);
 
-               /* Compute the relid set for the join we are considering */
-               joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
-               if (sjinfo->ojrelid != 0)
-                       joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid);
-
-               remove_rel_from_query(root, innerrelid, sjinfo->ojrelid, joinrelids);
+               remove_rel_from_query(root, innerrelid, sjinfo);
 
                /* We verify that exactly one reference gets removed from joinlist */
                nremoved = 0;
@@ -324,21 +318,29 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 
 
 /*
- * Remove the target relid from the planner's data structures, having
- * determined that there is no need to include it in the query.
+ * Remove the target relid and references to the target join from the
+ * planner's data structures, having determined that there is no need
+ * to include them in the query.
  *
  * We are not terribly thorough here.  We only bother to update parts of
  * the planner's data structures that will actually be consulted later.
  */
 static void
-remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
-                                         Relids joinrelids)
+remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo)
 {
        RelOptInfo *rel = find_base_rel(root, relid);
+       int                     ojrelid = sjinfo->ojrelid;
+       Relids          joinrelids;
+       Relids          join_plus_commute;
        List       *joininfos;
        Index           rti;
        ListCell   *l;
 
+       /* Compute the relid set for the join we are considering */
+       joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
+       Assert(ojrelid != 0);
+       joinrelids = bms_add_member(joinrelids, ojrelid);
+
        /*
         * Remove references to the rel from other baserels' attr_needed arrays.
         */
@@ -386,21 +388,21 @@ remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
         */
        foreach(l, root->join_info_list)
        {
-               SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
-
-               sjinfo->min_lefthand = bms_del_member(sjinfo->min_lefthand, relid);
-               sjinfo->min_righthand = bms_del_member(sjinfo->min_righthand, relid);
-               sjinfo->syn_lefthand = bms_del_member(sjinfo->syn_lefthand, relid);
-               sjinfo->syn_righthand = bms_del_member(sjinfo->syn_righthand, relid);
-               sjinfo->min_lefthand = bms_del_member(sjinfo->min_lefthand, ojrelid);
-               sjinfo->min_righthand = bms_del_member(sjinfo->min_righthand, ojrelid);
-               sjinfo->syn_lefthand = bms_del_member(sjinfo->syn_lefthand, ojrelid);
-               sjinfo->syn_righthand = bms_del_member(sjinfo->syn_righthand, ojrelid);
+               SpecialJoinInfo *sjinf = (SpecialJoinInfo *) lfirst(l);
+
+               sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, relid);
+               sjinf->min_righthand = bms_del_member(sjinf->min_righthand, relid);
+               sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, relid);
+               sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, relid);
+               sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, ojrelid);
+               sjinf->min_righthand = bms_del_member(sjinf->min_righthand, ojrelid);
+               sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, ojrelid);
+               sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, ojrelid);
                /* relid cannot appear in these fields, but ojrelid can: */
-               sjinfo->commute_above_l = bms_del_member(sjinfo->commute_above_l, ojrelid);
-               sjinfo->commute_above_r = bms_del_member(sjinfo->commute_above_r, ojrelid);
-               sjinfo->commute_below_l = bms_del_member(sjinfo->commute_below_l, ojrelid);
-               sjinfo->commute_below_r = bms_del_member(sjinfo->commute_below_r, ojrelid);
+               sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l, ojrelid);
+               sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r, ojrelid);
+               sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l, ojrelid);
+               sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r, ojrelid);
        }
 
        /*
@@ -456,6 +458,18 @@ remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
         * just discard them, though.  Only quals that logically belonged to the
         * outer join being discarded should be removed from the query.
         *
+        * We might encounter a qual that is a clone of a deletable qual with some
+        * outer-join relids added (see deconstruct_distribute_oj_quals).  To
+        * ensure we get rid of such clones as well, add the relids of all OJs
+        * commutable with this one to the set we test against for
+        * pushed-down-ness.
+        */
+       join_plus_commute = bms_union(joinrelids,
+                                                                 sjinfo->commute_above_r);
+       join_plus_commute = bms_add_members(join_plus_commute,
+                                                                               sjinfo->commute_below_l);
+
+       /*
         * We must make a copy of the rel's old joininfo list before starting the
         * loop, because otherwise remove_join_clause_from_rels would destroy the
         * list while we're scanning it.
@@ -467,15 +481,30 @@ remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
 
                remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
 
-               if (RINFO_IS_PUSHED_DOWN(rinfo, joinrelids))
+               if (RINFO_IS_PUSHED_DOWN(rinfo, join_plus_commute))
                {
                        /*
                         * There might be references to relid or ojrelid in the
-                        * RestrictInfo, as a consequence of PHVs having ph_eval_at sets
-                        * that include those.  We already checked above that any such PHV
-                        * is safe, so we can just drop those references.
+                        * RestrictInfo's relid sets, as a consequence of PHVs having had
+                        * ph_eval_at sets that include those.  We already checked above
+                        * that any such PHV is safe (and updated its ph_eval_at), so we
+                        * can just drop those references.
                         */
                        remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
+
+                       /*
+                        * Cross-check that the clause itself does not reference the
+                        * target rel or join.
+                        */
+#ifdef USE_ASSERT_CHECKING
+                       {
+                               Relids          clause_varnos = pull_varnos(root,
+                                                                                                               (Node *) rinfo->clause);
+
+                               Assert(!bms_is_member(relid, clause_varnos));
+                               Assert(!bms_is_member(ojrelid, clause_varnos));
+                       }
+#endif
                        /* Now throw it back into the joininfo lists */
                        distribute_restrictinfo_to_rels(root, rinfo);
                }
index cd3db330d742e50017ae116b36c0d488875828c6..4c278b2fa3953a19f5e4a5664b84536c2723c079 100644 (file)
@@ -5320,6 +5320,22 @@ select a1.id from
  Seq Scan on a a1
 (1 row)
 
+explain (costs off)
+select 1 from a t1
+    left join a t2 on true
+   inner join a t3 on true
+    left join a t4 on t2.id = t4.id and t2.id = t3.id;
+             QUERY PLAN             
+------------------------------------
+ Nested Loop
+   ->  Nested Loop Left Join
+         ->  Seq Scan on a t1
+         ->  Materialize
+               ->  Seq Scan on a t2
+   ->  Materialize
+         ->  Seq Scan on a t3
+(7 rows)
+
 -- another example (bug #17781)
 explain (costs off)
 select ss1.f1
index ec7f81481c73edbd447f688b5d46f6238e201b81..4baf5ebc138f7946ef89f260478e7c0f3be01087 100644 (file)
@@ -1891,6 +1891,12 @@ select a1.id from
   (a a3 left join a a4 on a3.id = a4.id)
   on a2.id = a3.id;
 
+explain (costs off)
+select 1 from a t1
+    left join a t2 on true
+   inner join a t3 on true
+    left join a t4 on t2.id = t4.id and t2.id = t3.id;
+
 -- another example (bug #17781)
 explain (costs off)
 select ss1.f1