Change "long" numGroups fields to be Cardinality (i.e., double).
authorTom Lane <[email protected]>
Sun, 2 Nov 2025 21:57:43 +0000 (16:57 -0500)
committerTom Lane <[email protected]>
Sun, 2 Nov 2025 21:57:43 +0000 (16:57 -0500)
We've been nibbling away at removing uses of "long" for a long time,
since its width is platform-dependent.  Here's one more: change the
remaining "long" fields in Plan nodes to Cardinality, since the three
surviving examples all represent group-count estimates.  The upstream
planner code was converted to Cardinality some time ago; for example
the corresponding fields in Path nodes are type Cardinality, as are
the arguments of the make_foo_path functions.  Downstream in the
executor, it turns out that these all feed to the table-size argument
of BuildTupleHashTable.  Change that to "double" as well, and fix it
so that it safely clamps out-of-range values to the uint32 limit of
simplehash.h, as was not being done before.

Essentially, this is removing all the artificial datatype-dependent
limitations on these values from upstream processing, and applying
just one clamp at the moment where we're forced to do so by the
datatype choices of simplehash.h.

Also, remove BuildTupleHashTable's misguided attempt to enforce
work_mem/hash_mem_limit.  It doesn't have enough information
(particularly not the expected tuple width) to do that accurately,
and it has no real business second-guessing the caller's choice.
For all these plan types, it's really the planner's responsibility
to not choose a hashed implementation if the hashtable is expected
to exceed hash_mem_limit.  The previous patch improved the
accuracy of those estimates, and even if BuildTupleHashTable had
more information it should arrive at the same conclusions.

Reported-by: Jeff Janes <[email protected]>
Author: Tom Lane <[email protected]>
Reviewed-by: David Rowley <[email protected]>
Discussion: https://postgr.es/m/CAMkU=1zia0JfW_QR8L5xA2vpa0oqVuiapm78h=WpNsHH13_9uw@mail.gmail.com

src/backend/executor/execGrouping.c
src/backend/executor/nodeAgg.c
src/backend/executor/nodeRecursiveunion.c
src/backend/executor/nodeSetOp.c
src/backend/executor/nodeSubplan.c
src/backend/optimizer/path/costsize.c
src/backend/optimizer/plan/createplan.c
src/include/executor/executor.h
src/include/nodes/plannodes.h
src/include/optimizer/optimizer.h
src/include/optimizer/planmain.h

index e1a3a813dd9b86c554e08e2900d3b9ef23071242..8b64a625ca5533490d033ec76521798e865b74ab 100644 (file)
@@ -14,6 +14,8 @@
  */
 #include "postgres.h"
 
+#include <math.h>
+
 #include "access/htup_details.h"
 #include "access/parallel.h"
 #include "common/hashfn.h"
@@ -144,7 +146,7 @@ execTuplesHashPrepare(int numCols,
  * eqfuncoids: OIDs of equality comparison functions to use
  * hashfunctions: FmgrInfos of datatype-specific hashing functions to use
  * collations: collations to use in comparisons
- * nbuckets: initial estimate of hashtable size
+ * nelements: initial estimate of hashtable size
  * additionalsize: size of data that may be stored along with the hash entry
  * metacxt: memory context for long-lived data and the simplehash table
  * tuplescxt: memory context in which to store the hashed tuples themselves
@@ -187,7 +189,7 @@ BuildTupleHashTable(PlanState *parent,
                    const Oid *eqfuncoids,
                    FmgrInfo *hashfunctions,
                    Oid *collations,
-                   long nbuckets,
+                   double nelements,
                    Size additionalsize,
                    MemoryContext metacxt,
                    MemoryContext tuplescxt,
@@ -195,12 +197,24 @@ BuildTupleHashTable(PlanState *parent,
                    bool use_variable_hash_iv)
 {
    TupleHashTable hashtable;
-   Size        entrysize;
-   Size        hash_mem_limit;
+   uint32      nbuckets;
    MemoryContext oldcontext;
    uint32      hash_iv = 0;
 
-   Assert(nbuckets > 0);
+   /*
+    * tuplehash_create requires a uint32 element count, so we had better
+    * clamp the given nelements to fit in that.  As long as we have to do
+    * that, we might as well protect against completely insane input like
+    * zero or NaN.  But it is not our job here to enforce issues like staying
+    * within hash_mem: the caller should have done that, and we don't have
+    * enough info to second-guess.
+    */
+   if (isnan(nelements) || nelements <= 0)
+       nbuckets = 1;
+   else if (nelements >= PG_UINT32_MAX)
+       nbuckets = PG_UINT32_MAX;
+   else
+       nbuckets = (uint32) nelements;
 
    /* tuplescxt must be separate, else ResetTupleHashTable breaks things */
    Assert(metacxt != tuplescxt);
@@ -208,18 +222,6 @@ BuildTupleHashTable(PlanState *parent,
    /* ensure additionalsize is maxalign'ed */
    additionalsize = MAXALIGN(additionalsize);
 
-   /*
-    * Limit initial table size request to not more than hash_mem.
-    *
-    * XXX this calculation seems pretty misguided, as it counts only overhead
-    * and not the tuples themselves.  But we have no knowledge of the
-    * expected tuple width here.
-    */
-   entrysize = sizeof(TupleHashEntryData) + additionalsize;
-   hash_mem_limit = get_hash_memory_limit() / entrysize;
-   if (nbuckets > hash_mem_limit)
-       nbuckets = hash_mem_limit;
-
    oldcontext = MemoryContextSwitchTo(metacxt);
 
    hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));
index 759ffeed2e6debb7f2b6cb97eed43bfb2ba88aeb..0b02fd321075cd9af28270c4712c8cd0664f5ae6 100644 (file)
@@ -402,12 +402,12 @@ static void find_cols(AggState *aggstate, Bitmapset **aggregated,
                      Bitmapset **unaggregated);
 static bool find_cols_walker(Node *node, FindColsContext *context);
 static void build_hash_tables(AggState *aggstate);
-static void build_hash_table(AggState *aggstate, int setno, long nbuckets);
+static void build_hash_table(AggState *aggstate, int setno, double nbuckets);
 static void hashagg_recompile_expressions(AggState *aggstate, bool minslot,
                                          bool nullcheck);
 static void hash_create_memory(AggState *aggstate);
-static long hash_choose_num_buckets(double hashentrysize,
-                                   long ngroups, Size memory);
+static double hash_choose_num_buckets(double hashentrysize,
+                                     double ngroups, Size memory);
 static int hash_choose_num_partitions(double input_groups,
                                       double hashentrysize,
                                       int used_bits,
@@ -1469,7 +1469,7 @@ build_hash_tables(AggState *aggstate)
    for (setno = 0; setno < aggstate->num_hashes; ++setno)
    {
        AggStatePerHash perhash = &aggstate->perhash[setno];
-       long        nbuckets;
+       double      nbuckets;
        Size        memory;
 
        if (perhash->hashtable != NULL)
@@ -1478,8 +1478,6 @@ build_hash_tables(AggState *aggstate)
            continue;
        }
 
-       Assert(perhash->aggnode->numGroups > 0);
-
        memory = aggstate->hash_mem_limit / aggstate->num_hashes;
 
        /* choose reasonable number of buckets per hashtable */
@@ -1505,7 +1503,7 @@ build_hash_tables(AggState *aggstate)
  * Build a single hashtable for this grouping set.
  */
 static void
-build_hash_table(AggState *aggstate, int setno, long nbuckets)
+build_hash_table(AggState *aggstate, int setno, double nbuckets)
 {
    AggStatePerHash perhash = &aggstate->perhash[setno];
    MemoryContext metacxt = aggstate->hash_metacxt;
@@ -2053,11 +2051,11 @@ hash_create_memory(AggState *aggstate)
 /*
  * Choose a reasonable number of buckets for the initial hash table size.
  */
-static long
-hash_choose_num_buckets(double hashentrysize, long ngroups, Size memory)
+static double
+hash_choose_num_buckets(double hashentrysize, double ngroups, Size memory)
 {
-   long        max_nbuckets;
-   long        nbuckets = ngroups;
+   double      max_nbuckets;
+   double      nbuckets = ngroups;
 
    max_nbuckets = memory / hashentrysize;
 
@@ -2065,12 +2063,16 @@ hash_choose_num_buckets(double hashentrysize, long ngroups, Size memory)
     * Underestimating is better than overestimating. Too many buckets crowd
     * out space for group keys and transition state values.
     */
-   max_nbuckets >>= 1;
+   max_nbuckets /= 2;
 
    if (nbuckets > max_nbuckets)
        nbuckets = max_nbuckets;
 
-   return Max(nbuckets, 1);
+   /*
+    * BuildTupleHashTable will clamp any obviously-insane result, so we don't
+    * need to be too careful here.
+    */
+   return nbuckets;
 }
 
 /*
@@ -3686,7 +3688,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
    if (use_hashing)
    {
        Plan       *outerplan = outerPlan(node);
-       uint64      totalGroups = 0;
+       double      totalGroups = 0;
 
        aggstate->hash_spill_rslot = ExecInitExtraTupleSlot(estate, scanDesc,
                                                            &TTSOpsMinimalTuple);
index ebb7919b49b5f4292afce304bd28c931527a0813..cd0ad51dcd29765b30f63b51758bfb738b23119a 100644 (file)
@@ -35,7 +35,6 @@ build_hash_table(RecursiveUnionState *rustate)
    TupleDesc   desc = ExecGetResultType(outerPlanState(rustate));
 
    Assert(node->numCols > 0);
-   Assert(node->numGroups > 0);
 
    /*
     * If both child plans deliver the same fixed tuple slot type, we can tell
index 5aabed18a09e80c672f44f78c659ed248150688b..9e0f9274fb190ba6c0418934619ccbdc3c6eb27c 100644 (file)
@@ -88,7 +88,6 @@ build_hash_table(SetOpState *setopstate)
    TupleDesc   desc = ExecGetResultType(outerPlanState(setopstate));
 
    Assert(node->strategy == SETOP_HASHED);
-   Assert(node->numGroups > 0);
 
    /*
     * If both child plans deliver the same fixed tuple slot type, we can tell
index 1cd0988bb496572beb1015fd474dfdc43a7fc9f1..c8b7bd9eb661074fdfd8fca4594c8feeb6cb9311 100644 (file)
@@ -34,7 +34,6 @@
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
-#include "optimizer/optimizer.h"
 #include "utils/array.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
@@ -481,7 +480,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
    int         ncols = node->numCols;
    ExprContext *innerecontext = node->innerecontext;
    MemoryContext oldcontext;
-   long        nbuckets;
+   double      nentries;
    TupleTableSlot *slot;
 
    Assert(subplan->subLinkType == ANY_SUBLINK);
@@ -509,9 +508,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
    node->havehashrows = false;
    node->havenullrows = false;
 
-   nbuckets = clamp_cardinality_to_long(planstate->plan->plan_rows);
-   if (nbuckets < 1)
-       nbuckets = 1;
+   nentries = planstate->plan->plan_rows;
 
    if (node->hashtable)
        ResetTupleHashTable(node->hashtable);
@@ -524,7 +521,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
                                              node->tab_eq_funcoids,
                                              node->tab_hash_funcs,
                                              node->tab_collations,
-                                             nbuckets,
+                                             nentries,
                                              0,    /* no additional data */
                                              node->planstate->state->es_query_cxt,
                                              node->tuplesContext,
@@ -534,12 +531,12 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
    if (!subplan->unknownEqFalse)
    {
        if (ncols == 1)
-           nbuckets = 1;       /* there can only be one entry */
+           nentries = 1;       /* there can only be one entry */
        else
        {
-           nbuckets /= 16;
-           if (nbuckets < 1)
-               nbuckets = 1;
+           nentries /= 16;
+           if (nentries < 1)
+               nentries = 1;
        }
 
        if (node->hashnulls)
@@ -553,7 +550,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
                                                  node->tab_eq_funcoids,
                                                  node->tab_hash_funcs,
                                                  node->tab_collations,
-                                                 nbuckets,
+                                                 nentries,
                                                  0,    /* no additional data */
                                                  node->planstate->state->es_query_cxt,
                                                  node->tuplesContext,
index 94077e6a006d54c2c731b9741ccf70a428e3dbd5..8335cf5b5c5aeee69ae2616039109b2c0a2991fc 100644 (file)
@@ -257,32 +257,6 @@ clamp_width_est(int64 tuple_width)
    return (int32) tuple_width;
 }
 
-/*
- * clamp_cardinality_to_long
- *     Cast a Cardinality value to a sane long value.
- */
-long
-clamp_cardinality_to_long(Cardinality x)
-{
-   /*
-    * Just for paranoia's sake, ensure we do something sane with negative or
-    * NaN values.
-    */
-   if (isnan(x))
-       return LONG_MAX;
-   if (x <= 0)
-       return 0;
-
-   /*
-    * If "long" is 64 bits, then LONG_MAX cannot be represented exactly as a
-    * double.  Casting it to double and back may well result in overflow due
-    * to rounding, so avoid doing that.  We trust that any double value that
-    * compares strictly less than "(double) LONG_MAX" will cast to a
-    * representable "long" value.
-    */
-   return (x < (double) LONG_MAX) ? (long) x : LONG_MAX;
-}
-
 
 /*
  * cost_seqscan
index 63fe663715565b471b145e6f5425f5bd221b4b91..8af091ba6471b8709aefffe01f321c79ce4726de 100644 (file)
@@ -226,7 +226,7 @@ static RecursiveUnion *make_recursive_union(List *tlist,
                                            Plan *righttree,
                                            int wtParam,
                                            List *distinctList,
-                                           long numGroups);
+                                           Cardinality numGroups);
 static BitmapAnd *make_bitmap_and(List *bitmapplans);
 static BitmapOr *make_bitmap_or(List *bitmapplans);
 static NestLoop *make_nestloop(List *tlist,
@@ -301,7 +301,7 @@ static Gather *make_gather(List *qptlist, List *qpqual,
                           int nworkers, int rescan_param, bool single_copy, Plan *subplan);
 static SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy,
                         List *tlist, Plan *lefttree, Plan *righttree,
-                        List *groupList, long numGroups);
+                        List *groupList, Cardinality numGroups);
 static LockRows *make_lockrows(Plan *lefttree, List *rowMarks, int epqParam);
 static Result *make_gating_result(List *tlist, Node *resconstantqual,
                                  Plan *subplan);
@@ -2564,7 +2564,6 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags)
    List       *tlist = build_path_tlist(root, &best_path->path);
    Plan       *leftplan;
    Plan       *rightplan;
-   long        numGroups;
 
    /*
     * SetOp doesn't project, so tlist requirements pass through; moreover we
@@ -2575,16 +2574,13 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags)
    rightplan = create_plan_recurse(root, best_path->rightpath,
                                    flags | CP_LABEL_TLIST);
 
-   /* Convert numGroups to long int --- but 'ware overflow! */
-   numGroups = clamp_cardinality_to_long(best_path->numGroups);
-
    plan = make_setop(best_path->cmd,
                      best_path->strategy,
                      tlist,
                      leftplan,
                      rightplan,
                      best_path->groupList,
-                     numGroups);
+                     best_path->numGroups);
 
    copy_generic_path_info(&plan->plan, (Path *) best_path);
 
@@ -2604,7 +2600,6 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path)
    Plan       *leftplan;
    Plan       *rightplan;
    List       *tlist;
-   long        numGroups;
 
    /* Need both children to produce same tlist, so force it */
    leftplan = create_plan_recurse(root, best_path->leftpath, CP_EXACT_TLIST);
@@ -2612,15 +2607,12 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path)
 
    tlist = build_path_tlist(root, &best_path->path);
 
-   /* Convert numGroups to long int --- but 'ware overflow! */
-   numGroups = clamp_cardinality_to_long(best_path->numGroups);
-
    plan = make_recursive_union(tlist,
                                leftplan,
                                rightplan,
                                best_path->wtParam,
                                best_path->distinctList,
-                               numGroups);
+                               best_path->numGroups);
 
    copy_generic_path_info(&plan->plan, (Path *) best_path);
 
@@ -5845,7 +5837,7 @@ make_recursive_union(List *tlist,
                     Plan *righttree,
                     int wtParam,
                     List *distinctList,
-                    long numGroups)
+                    Cardinality numGroups)
 {
    RecursiveUnion *node = makeNode(RecursiveUnion);
    Plan       *plan = &node->plan;
@@ -6582,15 +6574,11 @@ Agg *
 make_agg(List *tlist, List *qual,
         AggStrategy aggstrategy, AggSplit aggsplit,
         int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
-        List *groupingSets, List *chain, double dNumGroups,
+        List *groupingSets, List *chain, Cardinality numGroups,
         Size transitionSpace, Plan *lefttree)
 {
    Agg        *node = makeNode(Agg);
    Plan       *plan = &node->plan;
-   long        numGroups;
-
-   /* Reduce to long, but 'ware overflow! */
-   numGroups = clamp_cardinality_to_long(dNumGroups);
 
    node->aggstrategy = aggstrategy;
    node->aggsplit = aggsplit;
@@ -6822,7 +6810,7 @@ make_gather(List *qptlist,
 static SetOp *
 make_setop(SetOpCmd cmd, SetOpStrategy strategy,
           List *tlist, Plan *lefttree, Plan *righttree,
-          List *groupList, long numGroups)
+          List *groupList, Cardinality numGroups)
 {
    SetOp      *node = makeNode(SetOp);
    Plan       *plan = &node->plan;
index 086f52cff3d8685cd70e3f2a44425feeaf785765..fa2b657fb2ffb69f8b80815e35b236d7c87f0f9e 100644 (file)
@@ -138,7 +138,7 @@ extern TupleHashTable BuildTupleHashTable(PlanState *parent,
                                          const Oid *eqfuncoids,
                                          FmgrInfo *hashfunctions,
                                          Oid *collations,
-                                         long nbuckets,
+                                         double nelements,
                                          Size additionalsize,
                                          MemoryContext metacxt,
                                          MemoryContext tuplescxt,
index 7cdd2b51c94e16d8101f670f78fce5368a84c4e2..c4393a9432116e72ee8ebb098cc0f85752cc9319 100644 (file)
@@ -475,7 +475,7 @@ typedef struct RecursiveUnion
    Oid        *dupCollations pg_node_attr(array_size(numCols));
 
    /* estimated number of groups in input */
-   long        numGroups;
+   Cardinality numGroups;
 } RecursiveUnion;
 
 /* ----------------
@@ -1206,7 +1206,7 @@ typedef struct Agg
    Oid        *grpCollations pg_node_attr(array_size(numCols));
 
    /* estimated number of groups in input */
-   long        numGroups;
+   Cardinality numGroups;
 
    /* for pass-by-ref transition data */
    uint64      transitionSpace;
@@ -1446,7 +1446,7 @@ typedef struct SetOp
    bool       *cmpNullsFirst pg_node_attr(array_size(numCols));
 
    /* estimated number of groups in left input */
-   long        numGroups;
+   Cardinality numGroups;
 } SetOp;
 
 /* ----------------
index a34113903c0e0f0a60a47e81fe8045727d7eff00..d0aa8ab0c1c0509339be20ca758c7afb41ef9f43 100644 (file)
@@ -82,7 +82,6 @@ extern PGDLLIMPORT int effective_cache_size;
 
 extern double clamp_row_est(double nrows);
 extern int32 clamp_width_est(int64 tuple_width);
-extern long clamp_cardinality_to_long(Cardinality x);
 
 /* in path/indxpath.c: */
 
index 09b48b26f8f4804cf183a20899d02f64a775b999..00addf1599250822001b039eca44e4e24d084474 100644 (file)
@@ -55,7 +55,7 @@ extern Sort *make_sort_from_sortclauses(List *sortcls, Plan *lefttree);
 extern Agg *make_agg(List *tlist, List *qual,
                     AggStrategy aggstrategy, AggSplit aggsplit,
                     int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
-                    List *groupingSets, List *chain, double dNumGroups,
+                    List *groupingSets, List *chain, Cardinality numGroups,
                     Size transitionSpace, Plan *lefttree);
 extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount,
                         LimitOption limitOption, int uniqNumCols,