if (pathkeys_contained_in(needed_pathkeys, path->pathkeys))
{
- add_path(distinct_rel, (Path *)
- create_upper_unique_path(root, distinct_rel,
- path,
- list_length(root->distinct_pathkeys),
- numDistinctRows));
+ /*
+ * distinct_pathkeys may have become empty if all of the
+ * pathkeys were determined to be redundant. If all of the
+ * pathkeys are redundant then each DISTINCT target must only
+ * allow a single value, therefore all resulting tuples must
+ * be identical (or at least indistinguishable by an equality
+ * check). We can uniquify these tuples simply by just taking
+ * the first tuple. All we do here is add a path to do "LIMIT
+ * 1" atop of 'path'. When doing a DISTINCT ON we may still
+ * have a non-NIL sort_pathkeys list, so we must still only do
+ * this with paths which are correctly sorted by
+ * sort_pathkeys.
+ */
+ if (root->distinct_pathkeys == NIL)
+ {
+ Node *limitCount;
+
+ limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
+ sizeof(int64),
+ Int64GetDatum(1), false,
+ FLOAT8PASSBYVAL);
+
+ /*
+ * If the query already has a LIMIT clause, then we could
+ * end up with a duplicate LimitPath in the final plan.
+ * That does not seem worth troubling over too much.
+ */
+ add_path(distinct_rel, (Path *)
+ create_limit_path(root, distinct_rel, path, NULL,
+ limitCount, LIMIT_OPTION_COUNT,
+ 0, 1));
+ }
+ else
+ {
+ add_path(distinct_rel, (Path *)
+ create_upper_unique_path(root, distinct_rel,
+ path,
+ list_length(root->distinct_pathkeys),
+ numDistinctRows));
+ }
}
}
path = (Path *) create_sort_path(root, distinct_rel,
path,
needed_pathkeys,
- -1.0);
+ root->distinct_pathkeys == NIL ?
+ 1.0 : -1.0);
- add_path(distinct_rel, (Path *)
- create_upper_unique_path(root, distinct_rel,
- path,
- list_length(root->distinct_pathkeys),
- numDistinctRows));
+ /*
+ * As above, use a LimitPath instead of a UniquePath when all of the
+ * distinct_pathkeys are redundant and we're only going to get a
+ * series of tuples all with the same values anyway.
+ */
+ if (root->distinct_pathkeys == NIL)
+ {
+ Node *limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
+ sizeof(int64),
+ Int64GetDatum(1), false,
+ FLOAT8PASSBYVAL);
+
+ add_path(distinct_rel, (Path *)
+ create_limit_path(root, distinct_rel, path, NULL,
+ limitCount, LIMIT_OPTION_COUNT, 0, 1));
+ }
+ else
+ {
+ add_path(distinct_rel, (Path *)
+ create_upper_unique_path(root, distinct_rel,
+ path,
+ list_length(root->distinct_pathkeys),
+ numDistinctRows));
+ }
}
/*
RESET min_parallel_table_scan_size;
RESET parallel_setup_cost;
RESET parallel_tuple_cost;
+--
+-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when
+-- all of the distinct_pathkeys have been marked as redundant
+--
+-- Ensure we get a plan with a Limit 1
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT four FROM tenk1 WHERE four = 0;
+ QUERY PLAN
+----------------------------
+ Limit
+ -> Seq Scan on tenk1
+ Filter: (four = 0)
+(3 rows)
+
+-- Ensure the above gives us the correct result
+SELECT DISTINCT four FROM tenk1 WHERE four = 0;
+ four
+------
+ 0
+(1 row)
+
+-- Ensure we get a plan with a Limit 1
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0;
+ QUERY PLAN
+---------------------------------------------
+ Limit
+ -> Seq Scan on tenk1
+ Filter: ((two <> 0) AND (four = 0))
+(3 rows)
+
+-- Ensure no rows are returned
+SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0;
+ four
+------
+(0 rows)
+
+-- Ensure we get a plan with a Limit 1 when the SELECT list contains constants
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
+ QUERY PLAN
+----------------------------
+ Limit
+ -> Seq Scan on tenk1
+ Filter: (four = 0)
+(3 rows)
+
+-- Ensure we only get 1 row
+SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
+ four | ?column? | ?column? | ?column?
+------+----------+----------+----------
+ 0 | 1 | 2 | 3
+(1 row)
+
--
-- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
-- very own regression file.
0 | -2147483647
(1 row)
+--
+-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when
+-- all of the distinct_pathkeys have been marked as redundant
+--
+-- Ensure we also get a LIMIT plan with DISTINCT ON
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (four) four,two
+ FROM tenk1 WHERE four = 0 ORDER BY 1;
+ QUERY PLAN
+----------------------------------
+ Result
+ -> Limit
+ -> Seq Scan on tenk1
+ Filter: (four = 0)
+(4 rows)
+
+-- and check the result of the above query is correct
+SELECT DISTINCT ON (four) four,two
+ FROM tenk1 WHERE four = 0 ORDER BY 1;
+ four | two
+------+-----
+ 0 | 0
+(1 row)
+
+-- Ensure a Sort -> Limit is used when the ORDER BY contains additional cols
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (four) four,two
+ FROM tenk1 WHERE four = 0 ORDER BY 1,2;
+ QUERY PLAN
+----------------------------------
+ Limit
+ -> Sort
+ Sort Key: two
+ -> Seq Scan on tenk1
+ Filter: (four = 0)
+(5 rows)
+
+-- Same again but use a column that is indexed so that we get an index scan
+-- then a limit
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (four) four,hundred
+ FROM tenk1 WHERE four = 0 ORDER BY 1,2;
+ QUERY PLAN
+-----------------------------------------------------
+ Result
+ -> Limit
+ -> Index Scan using tenk1_hundred on tenk1
+ Filter: (four = 0)
+(4 rows)
+
RESET parallel_setup_cost;
RESET parallel_tuple_cost;
+--
+-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when
+-- all of the distinct_pathkeys have been marked as redundant
+--
+
+-- Ensure we get a plan with a Limit 1
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT four FROM tenk1 WHERE four = 0;
+
+-- Ensure the above gives us the correct result
+SELECT DISTINCT four FROM tenk1 WHERE four = 0;
+
+-- Ensure we get a plan with a Limit 1
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0;
+
+-- Ensure no rows are returned
+SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0;
+
+-- Ensure we get a plan with a Limit 1 when the SELECT list contains constants
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
+
+-- Ensure we only get 1 row
+SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
+
--
-- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
-- very own regression file.
-- bug #5049: early 8.4.x chokes on volatile DISTINCT ON clauses
select distinct on (1) floor(random()) as r, f1 from int4_tbl order by 1,2;
+
+--
+-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when
+-- all of the distinct_pathkeys have been marked as redundant
+--
+
+-- Ensure we also get a LIMIT plan with DISTINCT ON
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (four) four,two
+ FROM tenk1 WHERE four = 0 ORDER BY 1;
+
+-- and check the result of the above query is correct
+SELECT DISTINCT ON (four) four,two
+ FROM tenk1 WHERE four = 0 ORDER BY 1;
+
+-- Ensure a Sort -> Limit is used when the ORDER BY contains additional cols
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (four) four,two
+ FROM tenk1 WHERE four = 0 ORDER BY 1,2;
+
+-- Same again but use a column that is indexed so that we get an index scan
+-- then a limit
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (four) four,hundred
+ FROM tenk1 WHERE four = 0 ORDER BY 1,2;