Fix planning of parameterized appendrel paths with expensive join quals.
authorTom Lane <[email protected]>
Mon, 8 Jul 2013 02:37:24 +0000 (22:37 -0400)
committerTom Lane <[email protected]>
Mon, 8 Jul 2013 02:37:24 +0000 (22:37 -0400)
The code in set_append_rel_pathlist() for building parameterized paths
for append relations (inheritance and UNION ALL combinations) supposed
that the cheapest regular path for a child relation would still be cheapest
when reparameterized.  Which might not be the case, particularly if the
added join conditions are expensive to compute, as in a recent example from
Jeff Janes.  Fix it to compare child path costs *after* reparameterizing.
We can short-circuit that if the cheapest pre-existing path is already
parameterized correctly, which seems likely to be true often enough to be
worth checking for.

Back-patch to 9.2 where parameterized paths were introduced.

src/backend/optimizer/path/allpaths.c
src/test/regress/expected/union.out
src/test/regress/sql/union.sql

index b84bbba68c9e51a3eed5b0913d8156bd93168712..4b8a73d60f42a414b49a481e9911a810e7caeed1 100644 (file)
@@ -68,6 +68,9 @@ static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
                           List *live_childrels,
                           List *all_child_pathkeys);
+static Path *get_cheapest_parameterized_child_path(PlannerInfo *root,
+                                     RelOptInfo *rel,
+                                     Relids required_outer);
 static List *accumulate_append_subpath(List *subpaths, Path *path);
 static void set_dummy_rel_pathlist(RelOptInfo *rel);
 static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
@@ -831,28 +834,18 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
        foreach(lcr, live_childrels)
        {
            RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
-           Path       *cheapest_total;
+           Path       *subpath;
 
-           cheapest_total =
-               get_cheapest_path_for_pathkeys(childrel->pathlist,
-                                              NIL,
-                                              required_outer,
-                                              TOTAL_COST);
-           Assert(cheapest_total != NULL);
-
-           /* Children must have exactly the desired parameterization */
-           if (!bms_equal(PATH_REQ_OUTER(cheapest_total), required_outer))
+           subpath = get_cheapest_parameterized_child_path(root,
+                                                           childrel,
+                                                           required_outer);
+           if (subpath == NULL)
            {
-               cheapest_total = reparameterize_path(root, cheapest_total,
-                                                    required_outer, 1.0);
-               if (cheapest_total == NULL)
-               {
-                   subpaths_valid = false;
-                   break;
-               }
+               /* failed to make a suitable path for this child */
+               subpaths_valid = false;
+               break;
            }
-
-           subpaths = accumulate_append_subpath(subpaths, cheapest_total);
+           subpaths = accumulate_append_subpath(subpaths, subpath);
        }
 
        if (subpaths_valid)
@@ -962,6 +955,79 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
    }
 }
 
+/*
+ * get_cheapest_parameterized_child_path
+ *     Get cheapest path for this relation that has exactly the requested
+ *     parameterization.
+ *
+ * Returns NULL if unable to create such a path.
+ */
+static Path *
+get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel,
+                                     Relids required_outer)
+{
+   Path       *cheapest;
+   ListCell   *lc;
+
+   /*
+    * Look up the cheapest existing path with no more than the needed
+    * parameterization.  If it has exactly the needed parameterization, we're
+    * done.
+    */
+   cheapest = get_cheapest_path_for_pathkeys(rel->pathlist,
+                                             NIL,
+                                             required_outer,
+                                             TOTAL_COST);
+   Assert(cheapest != NULL);
+   if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer))
+       return cheapest;
+
+   /*
+    * Otherwise, we can "reparameterize" an existing path to match the given
+    * parameterization, which effectively means pushing down additional
+    * joinquals to be checked within the path's scan.  However, some existing
+    * paths might check the available joinquals already while others don't;
+    * therefore, it's not clear which existing path will be cheapest after
+    * reparameterization.  We have to go through them all and find out.
+    */
+   cheapest = NULL;
+   foreach(lc, rel->pathlist)
+   {
+       Path       *path = (Path *) lfirst(lc);
+
+       /* Can't use it if it needs more than requested parameterization */
+       if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer))
+           continue;
+
+       /*
+        * Reparameterization can only increase the path's cost, so if it's
+        * already more expensive than the current cheapest, forget it.
+        */
+       if (cheapest != NULL &&
+           compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
+           continue;
+
+       /* Reparameterize if needed, then recheck cost */
+       if (!bms_equal(PATH_REQ_OUTER(path), required_outer))
+       {
+           path = reparameterize_path(root, path, required_outer, 1.0);
+           if (path == NULL)
+               continue;       /* failed to reparameterize this one */
+           Assert(bms_equal(PATH_REQ_OUTER(path), required_outer));
+
+           if (cheapest != NULL &&
+               compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
+               continue;
+       }
+
+       /* We have a new best path */
+       cheapest = path;
+   }
+
+   /* Return the best path, or NULL if we found no suitable candidate */
+   return cheapest;
+}
+
 /*
  * accumulate_append_subpath
  *     Add a subpath to the list being built for an Append or MergeAppend
index 435f50d050ddd072ec50e2112e44157f2bd55bc0..bf8f1bcaf77f7751a45e72693d66d6857ecb995d 100644 (file)
@@ -603,3 +603,37 @@ WHERE x > 3;
  2 | 4
 (1 row)
 
+-- Test proper handling of parameterized appendrel paths when the
+-- potential join qual is expensive
+create function expensivefunc(int) returns int
+language plpgsql immutable strict cost 10000
+as $$begin return $1; end$$;
+create temp table t3 as select generate_series(-1000,1000) as x;
+create index t3i on t3 (expensivefunc(x));
+analyze t3;
+explain (costs off)
+select * from
+  (select * from t3 a union all select * from t3 b) ss
+  join int4_tbl on f1 = expensivefunc(x);
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Nested Loop
+   ->  Seq Scan on int4_tbl
+   ->  Append
+         ->  Index Scan using t3i on t3 a
+               Index Cond: (expensivefunc(x) = int4_tbl.f1)
+         ->  Index Scan using t3i on t3 b
+               Index Cond: (expensivefunc(x) = int4_tbl.f1)
+(7 rows)
+
+select * from
+  (select * from t3 a union all select * from t3 b) ss
+  join int4_tbl on f1 = expensivefunc(x);
+ x | f1 
+---+----
+ 0 |  0
+ 0 |  0
+(2 rows)
+
+drop table t3;
+drop function expensivefunc(int);
index f3c9d113827eb240d055bfc32a31a60d89038dbc..6194ce475110944abb69951b70c66ef368a8834a 100644 (file)
@@ -249,3 +249,24 @@ SELECT * FROM
    UNION
    SELECT 2 AS t, 4 AS x) ss
 WHERE x > 3;
+
+-- Test proper handling of parameterized appendrel paths when the
+-- potential join qual is expensive
+create function expensivefunc(int) returns int
+language plpgsql immutable strict cost 10000
+as $$begin return $1; end$$;
+
+create temp table t3 as select generate_series(-1000,1000) as x;
+create index t3i on t3 (expensivefunc(x));
+analyze t3;
+
+explain (costs off)
+select * from
+  (select * from t3 a union all select * from t3 b) ss
+  join int4_tbl on f1 = expensivefunc(x);
+select * from
+  (select * from t3 a union all select * from t3 b) ss
+  join int4_tbl on f1 = expensivefunc(x);
+
+drop table t3;
+drop function expensivefunc(int);