Let Parallel Append over simple UNION ALL have partial subpaths.
authorRobert Haas <[email protected]>
Tue, 13 Mar 2018 20:34:08 +0000 (16:34 -0400)
committerRobert Haas <[email protected]>
Tue, 13 Mar 2018 20:34:08 +0000 (16:34 -0400)
A simple UNION ALL gets flattened into an appendrel of subquery
RTEs, but up until now it's been impossible for the appendrel to use
the partial paths for the subqueries, so we can implement the
appendrel as a Parallel Append but only one with non-partial paths
as children.

There are three separate obstacles to removing that limitation.
First, when planning a subquery, propagate any partial paths to the
final_rel so that they are potentially visible to outer query levels
(but not if they have initPlans attached, because that wouldn't be
safe).  Second, after planning a subquery, propagate any partial paths
for the final_rel to the subquery RTE in the outer query level in the
same way we do for non-partial paths.  Third, teach finalize_plan() to
account for the possibility that the fake parameter we use for rescan
signalling when the plan contains a Gather (Merge) node may be
propagated from an outer query level.

Patch by me, reviewed and tested by Amit Khandekar, Rajkumar
Raghuwanshi, and Ashutosh Bapat.  Test cases based on examples by
Rajkumar Raghuwanshi.

Discussion: http://postgr.es/m/CA+Tgmoa6L9A1nNCk3aTDVZLZ4KkHDn1+tm7mFyFvP+uQPS7bAg@mail.gmail.com

src/backend/optimizer/path/allpaths.c
src/backend/optimizer/plan/planner.c
src/backend/optimizer/plan/subselect.c
src/test/regress/expected/select_parallel.out
src/test/regress/sql/select_parallel.sql

index 1c792a00eb2bf57004f6d0361732d7ca44970465..ea4e683abb060789fc813a58bdfd9005391c2b36 100644 (file)
@@ -2179,6 +2179,28 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
                                 create_subqueryscan_path(root, rel, subpath,
                                                                                  pathkeys, required_outer));
        }
+
+       /* If consider_parallel is false, there should be no partial paths. */
+       Assert(sub_final_rel->consider_parallel ||
+                  sub_final_rel->partial_pathlist == NIL);
+
+       /* Same for partial paths. */
+       foreach(lc, sub_final_rel->partial_pathlist)
+       {
+               Path       *subpath = (Path *) lfirst(lc);
+               List       *pathkeys;
+
+               /* Convert subpath's pathkeys to outer representation */
+               pathkeys = convert_subquery_pathkeys(root,
+                                                                                        rel,
+                                                                                        subpath->pathkeys,
+                                                                                        make_tlist_from_pathtarget(subpath->pathtarget));
+
+               /* Generate outer path using this subpath */
+               add_partial_path(rel, (Path *)
+                                                create_subqueryscan_path(root, rel, subpath,
+                                                                                                 pathkeys, required_outer));
+       }
 }
 
 /*
index 24e6c463961e73db2fa744c08b3c4f072303bc09..66e7e7badcf2bd75a8f21d7137d1bc4db8aabff1 100644 (file)
@@ -2194,6 +2194,22 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
                add_path(final_rel, path);
        }
 
+       /*
+        * Generate partial paths for final_rel, too, if outer query levels might
+        * be able to make use of them.
+        */
+       if (final_rel->consider_parallel && root->query_level > 1 &&
+               !limit_needed(parse))
+       {
+               Assert(!parse->rowMarks && parse->commandType == CMD_SELECT);
+               foreach(lc, current_rel->partial_pathlist)
+               {
+                       Path       *partial_path = (Path *) lfirst(lc);
+
+                       add_partial_path(final_rel, partial_path);
+               }
+       }
+
        /*
         * If there is an FDW that's responsible for all baserels of the query,
         * let it consider adding ForeignPaths.
index dc86dd5a0b687975770f120fbc4b0261eb713750..83008d76619e52d4fad5a2f211e622ce30b4b026 100644 (file)
@@ -2202,6 +2202,13 @@ SS_charge_for_initplans(PlannerInfo *root, RelOptInfo *final_rel)
                path->parallel_safe = false;
        }
 
+       /*
+        * Forget about any partial paths and clear consider_parallel, too;
+        * they're not usable if we attached an initPlan.
+        */
+       final_rel->partial_pathlist = NIL;
+       final_rel->consider_parallel = false;
+
        /* We needn't do set_cheapest() here, caller will do it */
 }
 
@@ -2407,10 +2414,16 @@ finalize_plan(PlannerInfo *root, Plan *plan,
                        {
                                SubqueryScan *sscan = (SubqueryScan *) plan;
                                RelOptInfo *rel;
+                               Bitmapset  *subquery_params;
 
-                               /* We must run SS_finalize_plan on the subquery */
+                               /* We must run finalize_plan on the subquery */
                                rel = find_base_rel(root, sscan->scan.scanrelid);
-                               SS_finalize_plan(rel->subroot, sscan->subplan);
+                               subquery_params = rel->subroot->outer_params;
+                               if (gather_param >= 0)
+                                       subquery_params = bms_add_member(bms_copy(subquery_params),
+                                                                                                        gather_param);
+                               finalize_plan(rel->subroot, sscan->subplan, gather_param,
+                                                         subquery_params, NULL);
 
                                /* Now we can add its extParams to the parent's params */
                                context.paramids = bms_add_members(context.paramids,
index 0a782616385a0fb868d3776fdc1f6867607e3347..2fb16d1a154200b5d554777e7404aa6b53c2d0d5 100644 (file)
@@ -890,4 +890,69 @@ select stringu1::int2 from tenk1 where unique1 = 1;
 ERROR:  invalid input syntax for integer: "BAAAAA"
 CONTEXT:  parallel worker
 ROLLBACK TO SAVEPOINT settings;
+-- test interaction with set-returning functions
+SAVEPOINT settings;
+-- multiple subqueries under a single Gather node
+-- must set parallel_setup_cost > 0 to discourage multiple Gather nodes
+SET LOCAL parallel_setup_cost = 10;
+EXPLAIN (COSTS OFF)
+SELECT unique1 FROM tenk1 WHERE fivethous = tenthous + 1
+UNION ALL
+SELECT unique1 FROM tenk1 WHERE fivethous = tenthous + 1;
+                     QUERY PLAN                     
+----------------------------------------------------
+ Gather
+   Workers Planned: 4
+   ->  Parallel Append
+         ->  Parallel Seq Scan on tenk1
+               Filter: (fivethous = (tenthous + 1))
+         ->  Parallel Seq Scan on tenk1 tenk1_1
+               Filter: (fivethous = (tenthous + 1))
+(7 rows)
+
+ROLLBACK TO SAVEPOINT settings;
+-- can't use multiple subqueries under a single Gather node due to initPlans
+EXPLAIN (COSTS OFF)
+SELECT unique1 FROM tenk1 WHERE fivethous =
+       (SELECT unique1 FROM tenk1 WHERE fivethous = 1 LIMIT 1)
+UNION ALL
+SELECT unique1 FROM tenk1 WHERE fivethous =
+       (SELECT unique2 FROM tenk1 WHERE fivethous = 1 LIMIT 1)
+ORDER BY 1;
+                             QUERY PLAN                             
+--------------------------------------------------------------------
+ Sort
+   Sort Key: tenk1.unique1
+   ->  Append
+         ->  Gather
+               Workers Planned: 4
+               Params Evaluated: $1
+               InitPlan 1 (returns $1)
+                 ->  Limit
+                       ->  Gather
+                             Workers Planned: 4
+                             ->  Parallel Seq Scan on tenk1 tenk1_2
+                                   Filter: (fivethous = 1)
+               ->  Parallel Seq Scan on tenk1
+                     Filter: (fivethous = $1)
+         ->  Gather
+               Workers Planned: 4
+               Params Evaluated: $3
+               InitPlan 2 (returns $3)
+                 ->  Limit
+                       ->  Gather
+                             Workers Planned: 4
+                             ->  Parallel Seq Scan on tenk1 tenk1_3
+                                   Filter: (fivethous = 1)
+               ->  Parallel Seq Scan on tenk1 tenk1_1
+                     Filter: (fivethous = $3)
+(25 rows)
+
+-- test interaction with SRFs
+SELECT * FROM information_schema.foreign_data_wrapper_options
+ORDER BY 1, 2, 3;
+ foreign_data_wrapper_catalog | foreign_data_wrapper_name | option_name | option_value 
+------------------------------+---------------------------+-------------+--------------
+(0 rows)
+
 rollback;
index fa03aae0c03a1e105b874f0ae74e9c5538f64348..ec817f2a4c0b0e9c06c6a7c23ee85b8cb1b55d5d 100644 (file)
@@ -358,4 +358,29 @@ SET LOCAL force_parallel_mode = 1;
 select stringu1::int2 from tenk1 where unique1 = 1;
 ROLLBACK TO SAVEPOINT settings;
 
+-- test interaction with set-returning functions
+SAVEPOINT settings;
+
+-- multiple subqueries under a single Gather node
+-- must set parallel_setup_cost > 0 to discourage multiple Gather nodes
+SET LOCAL parallel_setup_cost = 10;
+EXPLAIN (COSTS OFF)
+SELECT unique1 FROM tenk1 WHERE fivethous = tenthous + 1
+UNION ALL
+SELECT unique1 FROM tenk1 WHERE fivethous = tenthous + 1;
+ROLLBACK TO SAVEPOINT settings;
+
+-- can't use multiple subqueries under a single Gather node due to initPlans
+EXPLAIN (COSTS OFF)
+SELECT unique1 FROM tenk1 WHERE fivethous =
+       (SELECT unique1 FROM tenk1 WHERE fivethous = 1 LIMIT 1)
+UNION ALL
+SELECT unique1 FROM tenk1 WHERE fivethous =
+       (SELECT unique2 FROM tenk1 WHERE fivethous = 1 LIMIT 1)
+ORDER BY 1;
+
+-- test interaction with SRFs
+SELECT * FROM information_schema.foreign_data_wrapper_options
+ORDER BY 1, 2, 3;
+
 rollback;