Repair bug #4926 "too few pathkeys for mergeclauses". This example shows

author Tom Lane <[email protected]>

Fri, 17 Jul 2009 23:19:34 +0000 (23:19 +0000)

committer Tom Lane <[email protected]>

Fri, 17 Jul 2009 23:19:34 +0000 (23:19 +0000)
author Tom Lane <[email protected]>
Fri, 17 Jul 2009 23:19:34 +0000 (23:19 +0000)
committer Tom Lane <[email protected]>
Fri, 17 Jul 2009 23:19:34 +0000 (23:19 +0000)
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c

index 469ff82db39a86c018e1e8701c9fcdb984d6dbf1..2693cc154d534a20091b3da98c68c6e7eac9738d 100644 (file)
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -11,7 +11,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.97 2009/02/28 03:51:05 tgl Exp $
+ *   $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.98 2009/07/17 23:19:34 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -988,12 +988,21 @@ find_mergeclauses_for_pathkeys(PlannerInfo *root,
          * no two members have the same EC, so it's not possible for this
          * code to enter the same mergeclause into the result list twice.
          *
-        * XXX it's possible that multiple matching clauses might have
-        * different ECs on the other side, in which case the order we put
-        * them into our result makes a difference in the pathkeys required
-        * for the other input path.  However this routine hasn't got any info
-        * about which order would be best, so for now we disregard that case
-        * (which is probably a corner case anyway).
+        * It's possible that multiple matching clauses might have different
+        * ECs on the other side, in which case the order we put them into our
+        * result makes a difference in the pathkeys required for the other
+        * input path.  However this routine hasn't got any info about which
+        * order would be best, so we don't worry about that.
+        *
+        * It's also possible that the selected mergejoin clauses produce
+        * a noncanonical ordering of pathkeys for the other side, ie, we
+        * might select clauses that reference b.v1, b.v2, b.v1 in that
+        * order.  This is not harmful in itself, though it suggests that
+        * the clauses are partially redundant.  Since it happens only with
+        * redundant query conditions, we don't bother to eliminate it.
+        * make_inner_pathkeys_for_merge() has to delete duplicates when
+        * it constructs the canonical pathkeys list, and we also have to
+        * deal with the case in create_mergejoin_plan().
          *----------
          */
         foreach(j, restrictinfos)
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index ab07a0dbeae6780249578505b43cbed72f2c7460..3f211508a1fe096e68efe0c782510539927e28d3 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
   *
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.260 2009/06/11 14:48:59 momjian Exp $
+ *   $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.261 2009/07/17 23:19:34 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1620,10 +1620,6 @@ create_mergejoin_plan(PlannerInfo *root,
     bool       *mergenullsfirst;
     MergeJoin  *join_plan;
     int         i;
-   EquivalenceClass *lastoeclass;
-   EquivalenceClass *lastieclass;
-   PathKey    *opathkey;
-   PathKey    *ipathkey;
     ListCell   *lc;
     ListCell   *lop;
     ListCell   *lip;
@@ -1729,10 +1725,6 @@ create_mergejoin_plan(PlannerInfo *root,
     mergestrategies = (int *) palloc(nClauses * sizeof(int));
     mergenullsfirst = (bool *) palloc(nClauses * sizeof(bool));
  
-   lastoeclass = NULL;
-   lastieclass = NULL;
-   opathkey = NULL;
-   ipathkey = NULL;
     lop = list_head(outerpathkeys);
     lip = list_head(innerpathkeys);
     i = 0;
@@ -1741,6 +1733,11 @@ create_mergejoin_plan(PlannerInfo *root,
         RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
         EquivalenceClass *oeclass;
         EquivalenceClass *ieclass;
+       PathKey    *opathkey;
+       PathKey    *ipathkey;
+       EquivalenceClass *opeclass;
+       EquivalenceClass *ipeclass;
+       ListCell   *l2;
  
         /* fetch outer/inner eclass from mergeclause */
         Assert(IsA(rinfo, RestrictInfo));
@@ -1757,28 +1754,100 @@ create_mergejoin_plan(PlannerInfo *root,
         Assert(oeclass != NULL);
         Assert(ieclass != NULL);
  
-       /* should match current or next pathkeys */
-       /* we check this carefully for debugging reasons */
-       if (oeclass != lastoeclass)
+       /*
+        * For debugging purposes, we check that the eclasses match the
+        * paths' pathkeys.  In typical cases the merge clauses are one-to-one
+        * with the pathkeys, but when dealing with partially redundant query
+        * conditions, we might have clauses that re-reference earlier path
+        * keys.  The case that we need to reject is where a pathkey is
+        * entirely skipped over.
+        *
+        * lop and lip reference the first as-yet-unused pathkey elements;
+        * it's okay to match them, or any element before them.  If they're
+        * NULL then we have found all pathkey elements to be used.
+        */
+       if (lop)
         {
-           if (!lop)
-               elog(ERROR, "too few pathkeys for mergeclauses");
             opathkey = (PathKey *) lfirst(lop);
-           lop = lnext(lop);
-           lastoeclass = opathkey->pk_eclass;
-           if (oeclass != lastoeclass)
-               elog(ERROR, "outer pathkeys do not match mergeclause");
+           opeclass = opathkey->pk_eclass;
+           if (oeclass == opeclass)
+           {
+               /* fast path for typical case */
+               lop = lnext(lop);
+           }
+           else
+           {
+               /* redundant clauses ... must match something before lop */
+               foreach(l2, outerpathkeys)
+               {
+                   if (l2 == lop)
+                       break;
+                   opathkey = (PathKey *) lfirst(l2);
+                   opeclass = opathkey->pk_eclass;
+                   if (oeclass == opeclass)
+                       break;
+               }
+               if (oeclass != opeclass)
+                   elog(ERROR, "outer pathkeys do not match mergeclauses");
+           }
         }
-       if (ieclass != lastieclass)
+       else
+       {
+           /* redundant clauses ... must match some already-used pathkey */
+           opathkey = NULL;
+           opeclass = NULL;
+           foreach(l2, outerpathkeys)
+           {
+               opathkey = (PathKey *) lfirst(l2);
+               opeclass = opathkey->pk_eclass;
+               if (oeclass == opeclass)
+                   break;
+           }
+           if (l2 == NULL)
+               elog(ERROR, "outer pathkeys do not match mergeclauses");
+       }
+
+       if (lip)
         {
-           if (!lip)
-               elog(ERROR, "too few pathkeys for mergeclauses");
             ipathkey = (PathKey *) lfirst(lip);
-           lip = lnext(lip);
-           lastieclass = ipathkey->pk_eclass;
-           if (ieclass != lastieclass)
-               elog(ERROR, "inner pathkeys do not match mergeclause");
+           ipeclass = ipathkey->pk_eclass;
+           if (ieclass == ipeclass)
+           {
+               /* fast path for typical case */
+               lip = lnext(lip);
+           }
+           else
+           {
+               /* redundant clauses ... must match something before lip */
+               foreach(l2, innerpathkeys)
+               {
+                   if (l2 == lip)
+                       break;
+                   ipathkey = (PathKey *) lfirst(l2);
+                   ipeclass = ipathkey->pk_eclass;
+                   if (ieclass == ipeclass)
+                       break;
+               }
+               if (ieclass != ipeclass)
+                   elog(ERROR, "inner pathkeys do not match mergeclauses");
+           }
+       }
+       else
+       {
+           /* redundant clauses ... must match some already-used pathkey */
+           ipathkey = NULL;
+           ipeclass = NULL;
+           foreach(l2, innerpathkeys)
+           {
+               ipathkey = (PathKey *) lfirst(l2);
+               ipeclass = ipathkey->pk_eclass;
+               if (ieclass == ipeclass)
+                   break;
+           }
+           if (l2 == NULL)
+               elog(ERROR, "inner pathkeys do not match mergeclauses");
         }
+
         /* pathkeys should match each other too (more debugging) */
         if (opathkey->pk_opfamily != ipathkey->pk_opfamily ||
             opathkey->pk_strategy != ipathkey->pk_strategy ||
@@ -1792,6 +1861,11 @@ create_mergejoin_plan(PlannerInfo *root,
         i++;
     }
  
+   /*
+    * Note: it is not an error if we have additional pathkey elements
+    * (i.e., lop or lip isn't NULL here).  The input paths might be
+    * better-sorted than we need for the current mergejoin.
+    */
  
     /*
      * Now we can build the mergejoin node.
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out

index 8e6fc13bb38fd8e9fb8ef4e766c6737f6d0205e3..a7b7b73ad6f13095653a9b2104690b0d808b2406 100644 (file)
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -2352,3 +2352,18 @@ execute foo(false);
   10000
  (1 row)
  
+--
+-- test for sane behavior with noncanonical merge clauses, per bug #4926
+--
+begin;
+set enable_mergejoin = 1;
+set enable_hashjoin = 0;
+set enable_nestloop = 0;
+create temp table a (i integer);
+create temp table b (x integer, y integer);
+select * from a left join b on i = x and i = y and x = i;
+ i | x | y 
+---+---+---
+(0 rows)
+
+rollback;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql

index 149530e14db25d50e38d3d69f01c15c4043fee79..29992ced0c230bf7d7b81bac0f22919b14096791 100644 (file)
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -505,3 +505,20 @@ prepare foo(bool) as
          (select 1 from tenk1 c where c.thousand = b.unique2 and $1));
  execute foo(true);
  execute foo(false);
+
+--
+-- test for sane behavior with noncanonical merge clauses, per bug #4926
+--
+
+begin;
+
+set enable_mergejoin = 1;
+set enable_hashjoin = 0;
+set enable_nestloop = 0;
+
+create temp table a (i integer);
+create temp table b (x integer, y integer);
+
+select * from a left join b on i = x and i = y and x = i;
+
+rollback;
author	Tom Lane <[email protected]>
	Fri, 17 Jul 2009 23:19:34 +0000 (23:19 +0000)
committer	Tom Lane <[email protected]>
	Fri, 17 Jul 2009 23:19:34 +0000 (23:19 +0000)
src/backend/optimizer/path/pathkeys.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| blame \| history
src/test/regress/expected/join.out		patch \| blob \| blame \| history
src/test/regress/sql/join.sql		patch \| blob \| blame \| history