</programlisting>
</para>
+ <para>
+ <indexterm zone="queries-grouping-sets">
+ <primary>ALL</primary>
+ <secondary>GROUP BY ALL</secondary>
+ </indexterm>
+ <indexterm zone="queries-grouping-sets">
+ <primary>DISTINCT</primary>
+ <secondary>GROUP BY DISTINCT</secondary>
+ </indexterm>
+ When specifying multiple grouping items together, the final set of grouping
+ sets might contain duplicates. For example:
+<programlisting>
+GROUP BY ROLLUP (a, b), ROLLUP (a, c)
+</programlisting>
+ is equivalent to
+<programlisting>
+GROUP BY GROUPING SETS (
+ (a, b, c),
+ (a, b),
+ (a, b),
+ (a, c),
+ (a),
+ (a),
+ (a, c),
+ (a),
+ ()
+)
+</programlisting>
+ If these duplicates are undesirable, they can be removed using the
+ <literal>DISTINCT</literal> clause directly on the <literal>GROUP BY</literal>.
+ Therefore:
+<programlisting>
+GROUP BY <emphasis>DISTINCT</emphasis> ROLLUP (a, b), ROLLUP (a, c)
+</programlisting>
+ is equivalent to
+<programlisting>
+GROUP BY GROUPING SETS (
+ (a, b, c),
+ (a, b),
+ (a, c),
+ (a),
+ ()
+)
+</programlisting>
+ This is not the same as using <literal>SELECT DISTINCT</literal> because the output
+ rows may still contain duplicates. If any of the ungrouped columns contains NULL,
+ it will be indistinguishable from the NULL used when that same column is grouped.
+ </para>
+
<note>
<para>
The construct <literal>(a, b)</literal> is normally recognized in expressions as
<sect2 id="queries-distinct">
<title><literal>DISTINCT</literal></title>
+ <indexterm zone="queries-distinct">
+ <primary>ALL</primary>
+ <secondary>SELECT ALL</secondary>
+ </indexterm>
<indexterm zone="queries-distinct">
<primary>DISTINCT</primary>
+ <secondary>SELECT DISTINCT</secondary>
</indexterm>
<indexterm zone="queries-distinct">
[ * | <replaceable class="parameter">expression</replaceable> [ [ AS ] <replaceable class="parameter">output_name</replaceable> ] [, ...] ]
[ FROM <replaceable class="parameter">from_item</replaceable> [, ...] ]
[ WHERE <replaceable class="parameter">condition</replaceable> ]
- [ GROUP BY <replaceable class="parameter">grouping_element</replaceable> [, ...] ]
+ [ GROUP BY [ ALL | DISTINCT ] <replaceable class="parameter">grouping_element</replaceable> [, ...] ]
[ HAVING <replaceable class="parameter">condition</replaceable> ]
[ WINDOW <replaceable class="parameter">window_name</replaceable> AS ( <replaceable class="parameter">window_definition</replaceable> ) [, ...] ]
[ { UNION | INTERSECT | EXCEPT } [ ALL | DISTINCT ] <replaceable class="parameter">select</replaceable> ]
<para>
The optional <literal>GROUP BY</literal> clause has the general form
<synopsis>
-GROUP BY <replaceable class="parameter">grouping_element</replaceable> [, ...]
+GROUP BY [ ALL | DISTINCT ] <replaceable class="parameter">grouping_element</replaceable> [, ...]
</synopsis>
</para>
independent <replaceable>grouping sets</replaceable>. The effect of this is
equivalent to constructing a <literal>UNION ALL</literal> between
subqueries with the individual grouping sets as their
- <literal>GROUP BY</literal> clauses. For further details on the handling
+ <literal>GROUP BY</literal> clauses. The optional <literal>DISTINCT</literal>
+ clause removes duplicate sets before processing; it does <emphasis>not</emphasis>
+ transform the <literal>UNION ALL</literal> into a <literal>UNION DISTINCT</literal>.
+ For further details on the handling
of grouping sets see <xref linkend="queries-grouping-sets"/>.
</para>
T431 Extended grouping capabilities YES
T432 Nested and concatenated GROUPING SETS YES
T433 Multiargument GROUPING function YES
-T434 GROUP BY DISTINCT NO
+T434 GROUP BY DISTINCT YES
T441 ABS and MOD functions YES
T461 Symmetric BETWEEN predicate YES
T471 Result sets return value NO
COPY_NODE_FIELD(onConflict);
COPY_NODE_FIELD(returningList);
COPY_NODE_FIELD(groupClause);
+ COPY_SCALAR_FIELD(groupDistinct);
COPY_NODE_FIELD(groupingSets);
COPY_NODE_FIELD(havingQual);
COPY_NODE_FIELD(windowClause);
COPY_NODE_FIELD(fromClause);
COPY_NODE_FIELD(whereClause);
COPY_NODE_FIELD(groupClause);
+ COPY_SCALAR_FIELD(groupDistinct);
COPY_NODE_FIELD(havingClause);
COPY_NODE_FIELD(windowClause);
COPY_NODE_FIELD(valuesLists);
COMPARE_NODE_FIELD(onConflict);
COMPARE_NODE_FIELD(returningList);
COMPARE_NODE_FIELD(groupClause);
+ COMPARE_SCALAR_FIELD(groupDistinct);
COMPARE_NODE_FIELD(groupingSets);
COMPARE_NODE_FIELD(havingQual);
COMPARE_NODE_FIELD(windowClause);
COMPARE_NODE_FIELD(fromClause);
COMPARE_NODE_FIELD(whereClause);
COMPARE_NODE_FIELD(groupClause);
+ COMPARE_SCALAR_FIELD(groupDistinct);
COMPARE_NODE_FIELD(havingClause);
COMPARE_NODE_FIELD(windowClause);
COMPARE_NODE_FIELD(valuesLists);
qsort(list->elements, len, sizeof(ListCell), (qsort_comparator) cmp);
}
+/*
+ * list_sort comparator for sorting a list into ascending int order.
+ */
+int
+list_int_cmp(const ListCell *p1, const ListCell *p2)
+{
+ int v1 = lfirst_int(p1);
+ int v2 = lfirst_int(p2);
+
+ if (v1 < v2)
+ return -1;
+ if (v1 > v2)
+ return 1;
+ return 0;
+}
+
/*
* list_sort comparator for sorting a list into ascending OID order.
*/
WRITE_NODE_FIELD(fromClause);
WRITE_NODE_FIELD(whereClause);
WRITE_NODE_FIELD(groupClause);
+ WRITE_BOOL_FIELD(groupDistinct);
WRITE_NODE_FIELD(havingClause);
WRITE_NODE_FIELD(windowClause);
WRITE_NODE_FIELD(valuesLists);
WRITE_NODE_FIELD(onConflict);
WRITE_NODE_FIELD(returningList);
WRITE_NODE_FIELD(groupClause);
+ WRITE_BOOL_FIELD(groupDistinct);
WRITE_NODE_FIELD(groupingSets);
WRITE_NODE_FIELD(havingQual);
WRITE_NODE_FIELD(windowClause);
READ_NODE_FIELD(onConflict);
READ_NODE_FIELD(returningList);
READ_NODE_FIELD(groupClause);
+ READ_BOOL_FIELD(groupDistinct);
READ_NODE_FIELD(groupingSets);
READ_NODE_FIELD(havingQual);
READ_NODE_FIELD(windowClause);
ListCell *lc_set;
grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data));
- parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
+ parse->groupingSets = expand_grouping_sets(parse->groupingSets, parse->groupDistinct, -1);
gd->any_hashable = false;
gd->unhashable_refs = NULL;
qry->sortClause,
EXPR_KIND_GROUP_BY,
false /* allow SQL92 rules */ );
+ qry->groupDistinct = stmt->groupDistinct;
if (stmt->distinctClause == NIL)
{
LimitOption limitOption;
} SelectLimit;
+/* Private struct for the result of group_clause production */
+typedef struct GroupClause
+{
+ bool distinct;
+ List *list;
+} GroupClause;
+
/* ConstraintAttributeSpec yields an integer bitmask of these flags: */
#define CAS_NOT_DEFERRABLE 0x01
#define CAS_DEFERRABLE 0x02
PartitionBoundSpec *partboundspec;
RoleSpec *rolespec;
struct SelectLimit *selectlimit;
+ SetQuantifier setquantifier;
+ struct GroupClause *groupclause;
}
%type <node> stmt schema_stmt
target_list opt_target_list insert_column_list set_target_list
set_clause_list set_clause
def_list operator_def_list indirection opt_indirection
- reloption_list group_clause TriggerFuncArgs opclass_item_list opclass_drop_list
+ reloption_list TriggerFuncArgs opclass_item_list opclass_drop_list
opclass_purpose opt_opfamily transaction_mode_list_or_empty
OptTableFuncElementList TableFuncElementList opt_type_modifiers
prep_type_clause
vacuum_relation_list opt_vacuum_relation_list
drop_option_list
+%type <groupclause> group_clause
%type <list> group_by_list
%type <node> group_by_item empty_grouping_set rollup_clause cube_clause
%type <node> grouping_sets_clause
%type <node> for_locking_item
%type <list> for_locking_clause opt_for_locking_clause for_locking_items
%type <list> locked_rels_list
-%type <boolean> all_or_distinct
+%type <setquantifier> set_quantifier
%type <node> join_qual
%type <jtype> join_type
n->intoClause = $4;
n->fromClause = $5;
n->whereClause = $6;
- n->groupClause = $7;
+ n->groupClause = ($7)->list;
+ n->groupDistinct = ($7)->distinct;
n->havingClause = $8;
n->windowClause = $9;
$$ = (Node *)n;
n->intoClause = $4;
n->fromClause = $5;
n->whereClause = $6;
- n->groupClause = $7;
+ n->groupClause = ($7)->list;
+ n->groupDistinct = ($7)->distinct;
n->havingClause = $8;
n->windowClause = $9;
$$ = (Node *)n;
n->fromClause = list_make1($2);
$$ = (Node *)n;
}
- | select_clause UNION all_or_distinct select_clause
+ | select_clause UNION set_quantifier select_clause
{
- $$ = makeSetOp(SETOP_UNION, $3, $1, $4);
+ $$ = makeSetOp(SETOP_UNION, $3 == SET_QUANTIFIER_ALL, $1, $4);
}
- | select_clause INTERSECT all_or_distinct select_clause
+ | select_clause INTERSECT set_quantifier select_clause
{
- $$ = makeSetOp(SETOP_INTERSECT, $3, $1, $4);
+ $$ = makeSetOp(SETOP_INTERSECT, $3 == SET_QUANTIFIER_ALL, $1, $4);
}
- | select_clause EXCEPT all_or_distinct select_clause
+ | select_clause EXCEPT set_quantifier select_clause
{
- $$ = makeSetOp(SETOP_EXCEPT, $3, $1, $4);
+ $$ = makeSetOp(SETOP_EXCEPT, $3 == SET_QUANTIFIER_ALL, $1, $4);
}
;
| /*EMPTY*/
;
-all_or_distinct:
- ALL { $$ = true; }
- | DISTINCT { $$ = false; }
- | /*EMPTY*/ { $$ = false; }
+set_quantifier:
+ ALL { $$ = SET_QUANTIFIER_ALL; }
+ | DISTINCT { $$ = SET_QUANTIFIER_DISTINCT; }
+ | /*EMPTY*/ { $$ = SET_QUANTIFIER_DEFAULT; }
;
/* We use (NIL) as a placeholder to indicate that all target expressions
* GroupingSet node of some type.
*/
group_clause:
- GROUP_P BY group_by_list { $$ = $3; }
- | /*EMPTY*/ { $$ = NIL; }
+ GROUP_P BY set_quantifier group_by_list
+ {
+ GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
+ n->distinct = $3 == SET_QUANTIFIER_DISTINCT;
+ n->list = $4;
+ $$ = n;
+ }
+ | /*EMPTY*/
+ {
+ GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
+ n->distinct = false;
+ n->list = NIL;
+ $$ = n;
+ }
;
group_by_list:
n->targetList = $2;
n->fromClause = $3;
n->whereClause = $4;
- n->groupClause = $5;
+ n->groupClause = ($5)->list;
+ n->groupDistinct = ($5)->distinct;
n->havingClause = $6;
n->windowClause = $7;
n->sortClause = $8;
* The limit of 4096 is arbitrary and exists simply to avoid resource
* issues from pathological constructs.
*/
- List *gsets = expand_grouping_sets(qry->groupingSets, 4096);
+ List *gsets = expand_grouping_sets(qry->groupingSets, qry->groupDistinct, 4096);
if (!gsets)
ereport(ERROR,
return (la > lb) ? 1 : (la == lb) ? 0 : -1;
}
+/* list_sort comparator to sort sub-lists by length and contents */
+static int
+cmp_list_len_contents_asc(const ListCell *a, const ListCell *b)
+{
+ int res = cmp_list_len_asc(a, b);
+
+ if (res == 0)
+ {
+ List *la = (List *) lfirst(a);
+ List *lb = (List *) lfirst(b);
+ ListCell *lca;
+ ListCell *lcb;
+
+ forboth(lca, la, lcb, lb)
+ {
+ int va = intVal(lca);
+ int vb = intVal(lcb);
+ if (va > vb)
+ return 1;
+ if (va < vb)
+ return -1;
+ }
+ }
+
+ return res;
+}
+
/*
* Expand a groupingSets clause to a flat list of grouping sets.
* The returned list is sorted by length, shortest sets first.
* some consistency checks.
*/
List *
-expand_grouping_sets(List *groupingSets, int limit)
+expand_grouping_sets(List *groupingSets, bool groupDistinct, int limit)
{
List *expanded_groups = NIL;
List *result = NIL;
result = new_result;
}
- /* Now sort the lists by length */
- list_sort(result, cmp_list_len_asc);
+ /* Now sort the lists by length and deduplicate if necessary */
+ if (!groupDistinct || list_length(result) < 2)
+ list_sort(result, cmp_list_len_asc);
+ else
+ {
+ ListCell *cell;
+ List *prev;
+
+ /* Sort each groupset individually */
+ foreach(cell, result)
+ list_sort(lfirst(cell), list_int_cmp);
+
+ /* Now sort the list of groupsets by length and contents */
+ list_sort(result, cmp_list_len_contents_asc);
+
+ /* Finally, remove duplicates */
+ prev = list_nth_node(List, result, 0);
+ for_each_from(cell, result, 1)
+ {
+ if (equal(lfirst(cell), prev))
+ foreach_delete_current(result, cell);
+ else
+ prev = lfirst(cell);
+ }
+ }
return result;
}
appendContextKeyword(context, " GROUP BY ",
-PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+ if (query->groupDistinct)
+ appendStringInfoString(buf, "DISTINCT ");
save_exprkind = context->special_exprkind;
context->special_exprkind = EXPR_KIND_GROUP_BY;
SORTBY_NULLS_LAST
} SortByNulls;
+/* Options for [ ALL | DISTINCT ] */
+typedef enum SetQuantifier
+{
+ SET_QUANTIFIER_DEFAULT,
+ SET_QUANTIFIER_ALL,
+ SET_QUANTIFIER_DISTINCT
+} SetQuantifier;
+
/*
* Grantable rights are encoded so that we can OR them together in a bitmask.
* The present representation of AclItem limits us to 16 distinct rights,
List *returningList; /* return-values list (of TargetEntry) */
List *groupClause; /* a list of SortGroupClause's */
+ bool groupDistinct; /* is the group by clause distinct? */
List *groupingSets; /* a list of GroupingSet's if present */
List *fromClause; /* the FROM clause */
Node *whereClause; /* WHERE qualification */
List *groupClause; /* GROUP BY clauses */
+ bool groupDistinct; /* Is this GROUP BY DISTINCT? */
Node *havingClause; /* HAVING conditional-expression */
List *windowClause; /* WINDOW window_name AS (...), ... */
typedef int (*list_sort_comparator) (const ListCell *a, const ListCell *b);
extern void list_sort(List *list, list_sort_comparator cmp);
+extern int list_int_cmp(const ListCell *p1, const ListCell *p2);
extern int list_oid_cmp(const ListCell *p1, const ListCell *p2);
#endif /* PG_LIST_H */
extern void parseCheckAggregates(ParseState *pstate, Query *qry);
-extern List *expand_grouping_sets(List *groupingSets, int limit);
+extern List *expand_grouping_sets(List *groupingSets, bool groupDistinct, int limit);
extern int get_aggregate_argtypes(Aggref *aggref, Oid *inputTypes);
drop table gs_group_1;
drop table gs_hash_1;
+-- GROUP BY DISTINCT
+-- "normal" behavior...
+select a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by all rollup(a, b), rollup(a, c)
+order by a, b, c;
+ a | b | c
+---+---+---
+ 1 | 2 | 3
+ 1 | 2 |
+ 1 | 2 |
+ 1 | | 3
+ 1 | | 3
+ 1 | |
+ 1 | |
+ 1 | |
+ 4 | | 6
+ 4 | | 6
+ 4 | | 6
+ 4 | |
+ 4 | |
+ 4 | |
+ 4 | |
+ 4 | |
+ 7 | 8 | 9
+ 7 | 8 |
+ 7 | 8 |
+ 7 | | 9
+ 7 | | 9
+ 7 | |
+ 7 | |
+ 7 | |
+ | |
+(25 rows)
+
+-- ...which is also the default
+select a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by rollup(a, b), rollup(a, c)
+order by a, b, c;
+ a | b | c
+---+---+---
+ 1 | 2 | 3
+ 1 | 2 |
+ 1 | 2 |
+ 1 | | 3
+ 1 | | 3
+ 1 | |
+ 1 | |
+ 1 | |
+ 4 | | 6
+ 4 | | 6
+ 4 | | 6
+ 4 | |
+ 4 | |
+ 4 | |
+ 4 | |
+ 4 | |
+ 7 | 8 | 9
+ 7 | 8 |
+ 7 | 8 |
+ 7 | | 9
+ 7 | | 9
+ 7 | |
+ 7 | |
+ 7 | |
+ | |
+(25 rows)
+
+-- "group by distinct" behavior...
+select a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by distinct rollup(a, b), rollup(a, c)
+order by a, b, c;
+ a | b | c
+---+---+---
+ 1 | 2 | 3
+ 1 | 2 |
+ 1 | | 3
+ 1 | |
+ 4 | | 6
+ 4 | | 6
+ 4 | |
+ 4 | |
+ 7 | 8 | 9
+ 7 | 8 |
+ 7 | | 9
+ 7 | |
+ | |
+(13 rows)
+
+-- ...which is not the same as "select distinct"
+select distinct a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by rollup(a, b), rollup(a, c)
+order by a, b, c;
+ a | b | c
+---+---+---
+ 1 | 2 | 3
+ 1 | 2 |
+ 1 | | 3
+ 1 | |
+ 4 | | 6
+ 4 | |
+ 7 | 8 | 9
+ 7 | 8 |
+ 7 | | 9
+ 7 | |
+ | |
+(11 rows)
+
-- end
drop table gs_group_1;
drop table gs_hash_1;
+-- GROUP BY DISTINCT
+
+-- "normal" behavior...
+select a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by all rollup(a, b), rollup(a, c)
+order by a, b, c;
+
+-- ...which is also the default
+select a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by rollup(a, b), rollup(a, c)
+order by a, b, c;
+
+-- "group by distinct" behavior...
+select a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by distinct rollup(a, b), rollup(a, c)
+order by a, b, c;
+
+-- ...which is not the same as "select distinct"
+select distinct a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by rollup(a, b), rollup(a, c)
+order by a, b, c;
+
-- end