From: Tom Lane Date: Wed, 2 Sep 2009 17:52:24 +0000 (+0000) Subject: Fix subquery pullup to wrap a PlaceHolderVar around the entire RowExpr X-Git-Tag: REL8_5_ALPHA2~137 X-Git-Url: http://git.postgresql.org/gitweb/?a=commitdiff_plain;h=57c9dff9d1a6473042caa0f1065f85c57890d2a0;p=postgresql.git Fix subquery pullup to wrap a PlaceHolderVar around the entire RowExpr that's generated for a whole-row Var referencing the subquery, when the subquery is in the nullable side of an outer join. The previous coding instead put PlaceHolderVars around the elements of the RowExpr. The effect was that when the outer join made the subquery outputs go to null, the whole-row Var produced ROW(NULL,NULL,...) rather than just NULL. There are arguments afoot about whether those things ought to be semantically indistinguishable, but for the moment they are not entirely so, and the planner needs to take care that its machinations preserve the difference. Per bug #5025. Making this feasible required refactoring ResolveNew() to allow more caller control over what is substituted for a Var. I chose to make ResolveNew() a wrapper around a new general-purpose function replace_rte_variables(). I also fixed the ancient bogosity that ResolveNew might fail to set a query's hasSubLinks field after inserting a SubLink in it. Although all current callers make sure that happens anyway, we've had bugs of that sort before, and it seemed like a good time to install a proper solution. Back-patch to 8.4. The problem can be demonstrated clear back to 8.0, but the fix would be too invasive in earlier branches; not to mention that people may be depending on the subtly-incorrect behavior. The 8.4 series is new enough that fixing this probably won't cause complaints, but it might in older branches. Also, 8.4 shows the incorrect behavior in more cases than older branches do, because it is able to flatten subqueries in more cases. --- diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index e8e5b2ecb2d..ada9140552e 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.184 2009/07/06 18:26:30 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.185 2009/09/02 17:52:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1254,7 +1254,8 @@ subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual) */ qual = ResolveNew(qual, rti, 0, rte, subquery->targetList, - CMD_SELECT, 0); + CMD_SELECT, 0, + &subquery->hasSubLinks); /* * Now attach the qual to the proper place: normally WHERE, but if the diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index fd451b338bf..d7676efbcb6 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -16,12 +16,13 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepjointree.c,v 1.66 2009/06/11 14:48:59 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepjointree.c,v 1.67 2009/09/02 17:52:24 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" +#include "catalog/pg_type.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" @@ -30,10 +31,23 @@ #include "optimizer/subselect.h" #include "optimizer/tlist.h" #include "optimizer/var.h" +#include "parser/parse_relation.h" #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" +typedef struct pullup_replace_vars_context +{ + PlannerInfo *root; + List *targetlist; /* tlist of subquery being pulled up */ + RangeTblEntry *target_rte; /* RTE of subquery */ + bool *outer_hasSubLinks; /* -> outer query's hasSubLinks */ + int varno; /* varno of subquery */ + bool need_phvs; /* do we need PlaceHolderVars? */ + bool wrap_non_vars; /* do we need 'em on *all* non-Vars? */ + Node **rv_cache; /* cache for results with PHVs */ +} pullup_replace_vars_context; + typedef struct reduce_outer_joins_state { Relids relids; /* base relids within this subtree */ @@ -60,12 +74,14 @@ static bool is_simple_subquery(Query *subquery); static bool is_simple_union_all(Query *subquery); static bool is_simple_union_all_recurse(Node *setOp, Query *setOpQuery, List *colTypes); -static List *insert_targetlist_placeholders(PlannerInfo *root, List *tlist, - int varno, bool wrap_non_vars); static bool is_safe_append_member(Query *subquery); -static void resolvenew_in_jointree(Node *jtnode, int varno, RangeTblEntry *rte, - List *subtlist, List *subtlist_with_phvs, - JoinExpr *lowest_outer_join); +static void replace_vars_in_jointree(Node *jtnode, + pullup_replace_vars_context *context, + JoinExpr *lowest_outer_join); +static Node *pullup_replace_vars(Node *expr, + pullup_replace_vars_context *context); +static Node *pullup_replace_vars_callback(Var *var, + replace_rte_variables_context *context); static reduce_outer_joins_state *reduce_outer_joins_pass1(Node *jtnode); static void reduce_outer_joins_pass2(Node *jtnode, reduce_outer_joins_state *state, @@ -457,8 +473,8 @@ inline_set_returning_functions(PlannerInfo *root) * we are currently processing! We handle this by being careful not to * change the jointree structure while recursing: no nodes other than * subquery RangeTblRef entries will be replaced. Also, we can't turn - * ResolveNew loose on the whole jointree, because it'll return a mutated - * copy of the tree; we have to invoke it just on the quals, instead. + * pullup_replace_vars loose on the whole jointree, because it'll return a + * mutated copy of the tree; we have to invoke it just on the quals, instead. * This behavior is what makes it reasonable to pass lowest_outer_join as a * pointer rather than some more-indirect way of identifying the lowest OJ. * Likewise, we don't replace append_rel_list members but only their @@ -584,8 +600,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, Query *subquery; PlannerInfo *subroot; int rtoffset; - List *subtlist; - List *subtlist_with_phvs; + pullup_replace_vars_context rvcontext; ListCell *lc; /* @@ -692,16 +707,20 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, * insert into the top query, but if we are under an outer join then * non-nullable items may have to be turned into PlaceHolderVars. If we * are dealing with an appendrel member then anything that's not a simple - * Var has to be turned into a PlaceHolderVar. + * Var has to be turned into a PlaceHolderVar. Set up appropriate context + * data for pullup_replace_vars. */ - subtlist = subquery->targetList; - if (lowest_outer_join != NULL || containing_appendrel != NULL) - subtlist_with_phvs = insert_targetlist_placeholders(root, - subtlist, - varno, - containing_appendrel != NULL); - else - subtlist_with_phvs = subtlist; + rvcontext.root = root; + rvcontext.targetlist = subquery->targetList; + rvcontext.target_rte = rte; + rvcontext.outer_hasSubLinks = &parse->hasSubLinks; + rvcontext.varno = varno; + rvcontext.need_phvs = (lowest_outer_join != NULL || + containing_appendrel != NULL); + rvcontext.wrap_non_vars = (containing_appendrel != NULL); + /* initialize cache array with indexes 0 .. length(tlist) */ + rvcontext.rv_cache = palloc0((list_length(subquery->targetList) + 1) * + sizeof(Node *)); /* * Replace all of the top query's references to the subquery's outputs @@ -709,25 +728,17 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, * replace any of the jointree structure. (This'd be a lot cleaner if we * could use query_tree_mutator.) We have to use PHVs in the targetList, * returningList, and havingQual, since those are certainly above any - * outer join. resolvenew_in_jointree tracks its location in the jointree - * and uses PHVs or not appropriately. + * outer join. replace_vars_in_jointree tracks its location in the + * jointree and uses PHVs or not appropriately. */ parse->targetList = (List *) - ResolveNew((Node *) parse->targetList, - varno, 0, rte, - subtlist_with_phvs, CMD_SELECT, 0); + pullup_replace_vars((Node *) parse->targetList, &rvcontext); parse->returningList = (List *) - ResolveNew((Node *) parse->returningList, - varno, 0, rte, - subtlist_with_phvs, CMD_SELECT, 0); - resolvenew_in_jointree((Node *) parse->jointree, varno, rte, - subtlist, subtlist_with_phvs, - lowest_outer_join); + pullup_replace_vars((Node *) parse->returningList, &rvcontext); + replace_vars_in_jointree((Node *) parse->jointree, &rvcontext, + lowest_outer_join); Assert(parse->setOperations == NULL); - parse->havingQual = - ResolveNew(parse->havingQual, - varno, 0, rte, - subtlist_with_phvs, CMD_SELECT, 0); + parse->havingQual = pullup_replace_vars(parse->havingQual, &rvcontext); /* * Replace references in the translated_vars lists of appendrels. When @@ -739,13 +750,13 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, foreach(lc, root->append_rel_list) { AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc); + bool save_need_phvs = rvcontext.need_phvs; + if (appinfo == containing_appendrel) + rvcontext.need_phvs = false; appinfo->translated_vars = (List *) - ResolveNew((Node *) appinfo->translated_vars, - varno, 0, rte, - (appinfo == containing_appendrel) ? - subtlist : subtlist_with_phvs, - CMD_SELECT, 0); + pullup_replace_vars((Node *) appinfo->translated_vars, &rvcontext); + rvcontext.need_phvs = save_need_phvs; } /* @@ -763,9 +774,8 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, if (otherrte->rtekind == RTE_JOIN) otherrte->joinaliasvars = (List *) - ResolveNew((Node *) otherrte->joinaliasvars, - varno, 0, rte, - subtlist_with_phvs, CMD_SELECT, 0); + pullup_replace_vars((Node *) otherrte->joinaliasvars, + &rvcontext); } /* @@ -783,9 +793,9 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, /* * We also have to fix the relid sets of any PlaceHolderVar nodes in the - * parent query. (This could perhaps be done by ResolveNew, but it would - * clutter that routine's API unreasonably.) Note in particular that any - * PlaceHolderVar nodes just created by insert_targetlist_placeholders() + * parent query. (This could perhaps be done by pullup_replace_vars(), + * but it seems cleaner to use two passes.) Note in particular that any + * PlaceHolderVar nodes just created by pullup_replace_vars() * will be adjusted, so having created them with the subquery's varno is * correct. * @@ -820,6 +830,12 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, /* * Miscellaneous housekeeping. + * + * Although replace_rte_variables() faithfully updated parse->hasSubLinks + * if it copied any SubLinks out of the subquery's targetlist, we still + * could have SubLinks added to the query in the expressions of FUNCTION + * and VALUES RTEs copied up from the subquery. So it's necessary to copy + * subquery->hasSubLinks anyway. Perhaps this can be improved someday. */ parse->hasSubLinks |= subquery->hasSubLinks; @@ -1136,77 +1152,6 @@ is_simple_union_all_recurse(Node *setOp, Query *setOpQuery, List *colTypes) } } -/* - * insert_targetlist_placeholders - * Insert PlaceHolderVar nodes into any non-junk targetlist items that are - * not simple variables or strict functions of simple variables (and hence - * might not correctly go to NULL when examined above the point of an outer - * join). - * - * varno is the upper-query relid of the subquery; this is used as the - * syntactic location of the PlaceHolderVars. - * If wrap_non_vars is true then *only* simple Var references escape being - * wrapped with PlaceHolderVars. - */ -static List * -insert_targetlist_placeholders(PlannerInfo *root, List *tlist, - int varno, bool wrap_non_vars) -{ - List *result = NIL; - ListCell *lc; - - foreach(lc, tlist) - { - TargetEntry *tle = (TargetEntry *) lfirst(lc); - TargetEntry *newtle; - - /* resjunk columns need not be changed */ - if (tle->resjunk) - { - result = lappend(result, tle); - continue; - } - - /* - * Simple Vars always escape being wrapped. This is common enough to - * deserve a fast path even if we aren't doing wrap_non_vars. - */ - if (tle->expr && IsA(tle->expr, Var) && - ((Var *) tle->expr)->varlevelsup == 0) - { - result = lappend(result, tle); - continue; - } - - if (!wrap_non_vars) - { - /* - * If it contains a Var of current level, and does not contain any - * non-strict constructs, then it's certainly nullable and we - * don't need to insert a PlaceHolderVar. (Note: in future maybe - * we should insert PlaceHolderVars anyway, when a tlist item is - * expensive to evaluate? - */ - if (contain_vars_of_level((Node *) tle->expr, 0) && - !contain_nonstrict_functions((Node *) tle->expr)) - { - result = lappend(result, tle); - continue; - } - } - - /* Else wrap it in a PlaceHolderVar */ - newtle = makeNode(TargetEntry); - memcpy(newtle, tle, sizeof(TargetEntry)); - newtle->expr = (Expr *) - make_placeholder_expr(root, - tle->expr, - bms_make_singleton(varno)); - result = lappend(result, newtle); - } - return result; -} - /* * is_safe_append_member * Check a subquery that is a leaf of a UNION ALL appendrel to see if it's @@ -1244,18 +1189,17 @@ is_safe_append_member(Query *subquery) } /* - * Helper routine for pull_up_subqueries: do ResolveNew on every expression - * in the jointree, without changing the jointree structure itself. Ugly, - * but there's no other way... + * Helper routine for pull_up_subqueries: do pullup_replace_vars on every + * expression in the jointree, without changing the jointree structure itself. + * Ugly, but there's no other way... * - * If we are above lowest_outer_join then use subtlist_with_phvs; at or - * below it, use subtlist. (When no outer joins are in the picture, - * these will be the same list.) + * If we are at or below lowest_outer_join, we can suppress use of + * PlaceHolderVars wrapped around the replacement expressions. */ static void -resolvenew_in_jointree(Node *jtnode, int varno, RangeTblEntry *rte, - List *subtlist, List *subtlist_with_phvs, - JoinExpr *lowest_outer_join) +replace_vars_in_jointree(Node *jtnode, + pullup_replace_vars_context *context, + JoinExpr *lowest_outer_join) { if (jtnode == NULL) return; @@ -1269,43 +1213,204 @@ resolvenew_in_jointree(Node *jtnode, int varno, RangeTblEntry *rte, ListCell *l; foreach(l, f->fromlist) - resolvenew_in_jointree(lfirst(l), varno, rte, - subtlist, subtlist_with_phvs, - lowest_outer_join); - f->quals = ResolveNew(f->quals, - varno, 0, rte, - subtlist_with_phvs, CMD_SELECT, 0); + replace_vars_in_jointree(lfirst(l), context, lowest_outer_join); + f->quals = pullup_replace_vars(f->quals, context); } else if (IsA(jtnode, JoinExpr)) { JoinExpr *j = (JoinExpr *) jtnode; + bool save_need_phvs = context->need_phvs; if (j == lowest_outer_join) { /* no more PHVs in or below this join */ - subtlist_with_phvs = subtlist; + context->need_phvs = false; lowest_outer_join = NULL; } - resolvenew_in_jointree(j->larg, varno, rte, - subtlist, subtlist_with_phvs, - lowest_outer_join); - resolvenew_in_jointree(j->rarg, varno, rte, - subtlist, subtlist_with_phvs, - lowest_outer_join); - j->quals = ResolveNew(j->quals, - varno, 0, rte, - subtlist_with_phvs, CMD_SELECT, 0); + replace_vars_in_jointree(j->larg, context, lowest_outer_join); + replace_vars_in_jointree(j->rarg, context, lowest_outer_join); + j->quals = pullup_replace_vars(j->quals, context); /* * We don't bother to update the colvars list, since it won't be used * again ... */ + context->need_phvs = save_need_phvs; } else elog(ERROR, "unrecognized node type: %d", (int) nodeTag(jtnode)); } +/* + * Apply pullup variable replacement throughout an expression tree + * + * Returns a modified copy of the tree, so this can't be used where we + * need to do in-place replacement. + */ +static Node * +pullup_replace_vars(Node *expr, pullup_replace_vars_context *context) +{ + return replace_rte_variables(expr, + context->varno, 0, + pullup_replace_vars_callback, + (void *) context, + context->outer_hasSubLinks); +} + +static Node * +pullup_replace_vars_callback(Var *var, + replace_rte_variables_context *context) +{ + pullup_replace_vars_context *rcon = (pullup_replace_vars_context *) context->callback_arg; + int varattno = var->varattno; + Node *newnode; + + /* + * If PlaceHolderVars are needed, we cache the modified expressions in + * rcon->rv_cache[]. This is not in hopes of any material speed gain + * within this function, but to avoid generating identical PHVs with + * different IDs. That would result in duplicate evaluations at runtime, + * and possibly prevent optimizations that rely on recognizing different + * references to the same subquery output as being equal(). So it's worth + * a bit of extra effort to avoid it. + */ + if (rcon->need_phvs && + varattno >= InvalidAttrNumber && + varattno <= list_length(rcon->targetlist) && + rcon->rv_cache[varattno] != NULL) + { + /* Just copy the entry and fall through to adjust its varlevelsup */ + newnode = copyObject(rcon->rv_cache[varattno]); + } + else if (varattno == InvalidAttrNumber) + { + /* Must expand whole-tuple reference into RowExpr */ + RowExpr *rowexpr; + List *colnames; + List *fields; + bool save_need_phvs = rcon->need_phvs; + + /* + * If generating an expansion for a var of a named rowtype (ie, this + * is a plain relation RTE), then we must include dummy items for + * dropped columns. If the var is RECORD (ie, this is a JOIN), then + * omit dropped columns. Either way, attach column names to the + * RowExpr for use of ruleutils.c. + * + * In order to be able to cache the results, we always generate the + * expansion with varlevelsup = 0, and then adjust if needed. + */ + expandRTE(rcon->target_rte, + var->varno, 0 /* not varlevelsup */, var->location, + (var->vartype != RECORDOID), + &colnames, &fields); + /* Adjust the generated per-field Vars, but don't insert PHVs */ + rcon->need_phvs = false; + fields = (List *) replace_rte_variables_mutator((Node *) fields, + context); + rcon->need_phvs = save_need_phvs; + rowexpr = makeNode(RowExpr); + rowexpr->args = fields; + rowexpr->row_typeid = var->vartype; + rowexpr->row_format = COERCE_IMPLICIT_CAST; + rowexpr->colnames = colnames; + rowexpr->location = var->location; + newnode = (Node *) rowexpr; + + /* + * Insert PlaceHolderVar if needed. Notice that we are wrapping + * one PlaceHolderVar around the whole RowExpr, rather than putting + * one around each element of the row. This is because we need + * the expression to yield NULL, not ROW(NULL,NULL,...) when it + * is forced to null by an outer join. + */ + if (rcon->need_phvs) + { + /* RowExpr is certainly not strict, so always need PHV */ + newnode = (Node *) + make_placeholder_expr(rcon->root, + (Expr *) newnode, + bms_make_singleton(rcon->varno)); + /* cache it with the PHV, and with varlevelsup still zero */ + rcon->rv_cache[InvalidAttrNumber] = copyObject(newnode); + } + } + else + { + /* Normal case referencing one targetlist element */ + TargetEntry *tle = get_tle_by_resno(rcon->targetlist, varattno); + + if (tle == NULL) /* shouldn't happen */ + elog(ERROR, "could not find attribute %d in subquery targetlist", + varattno); + + /* Make a copy of the tlist item to return */ + newnode = copyObject(tle->expr); + + /* Insert PlaceHolderVar if needed */ + if (rcon->need_phvs) + { + bool wrap; + + if (newnode && IsA(newnode, Var) && + ((Var *) newnode)->varlevelsup == 0) + { + /* Simple Vars always escape being wrapped */ + wrap = false; + } + else if (rcon->wrap_non_vars) + { + /* Wrap all non-Vars in a PlaceHolderVar */ + wrap = true; + } + else + { + /* + * If it contains a Var of current level, and does not contain + * any non-strict constructs, then it's certainly nullable and + * we don't need to insert a PlaceHolderVar. (Note: in future + * maybe we should insert PlaceHolderVars anyway, when a tlist + * item is expensive to evaluate? + */ + if (contain_vars_of_level((Node *) newnode, 0) && + !contain_nonstrict_functions((Node *) newnode)) + { + /* No wrap needed */ + wrap = false; + } + else + { + /* Else wrap it in a PlaceHolderVar */ + wrap = true; + } + } + + if (wrap) + newnode = (Node *) + make_placeholder_expr(rcon->root, + (Expr *) newnode, + bms_make_singleton(rcon->varno)); + + /* + * Cache it if possible (ie, if the attno is in range, which it + * probably always should be). We can cache the value even if + * we decided we didn't need a PHV, since this result will be + * suitable for any request that has need_phvs. + */ + if (varattno > InvalidAttrNumber && + varattno <= list_length(rcon->targetlist)) + rcon->rv_cache[varattno] = copyObject(newnode); + } + } + + /* Must adjust varlevelsup if tlist item is from higher query */ + if (var->varlevelsup > 0) + IncrementVarSublevelsUp(newnode, var->varlevelsup, 0); + + return newnode; +} + /* * reduce_outer_joins * Attempt to reduce outer joins to plain inner joins. @@ -1724,9 +1829,9 @@ reduce_outer_joins_pass2(Node *jtnode, * top query could (yet) contain such a reference. * * NOTE: although this has the form of a walker, we cheat and modify the - * nodes in-place. This should be OK since the tree was copied by ResolveNew - * earlier. Avoid scribbling on the original values of the bitmapsets, though, - * because expression_tree_mutator doesn't copy those. + * nodes in-place. This should be OK since the tree was copied by + * pullup_replace_vars earlier. Avoid scribbling on the original values of + * the bitmapsets, though, because expression_tree_mutator doesn't copy those. */ typedef struct diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 63bac959a74..a0098836b95 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -22,7 +22,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.173 2009/08/13 16:53:09 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.174 2009/09/02 17:52:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1477,8 +1477,8 @@ translate_col_privs(const Bitmapset *parent_privs, * Note: this is only applied after conversion of sublinks to subplans, * so we don't need to cope with recursion into sub-queries. * - * Note: this is not hugely different from what ResolveNew() does; maybe - * we should try to fold the two routines together. + * Note: this is not hugely different from what pullup_replace_vars() does; + * maybe we should try to fold the two routines together. */ Node * adjust_appendrel_attrs(Node *node, AppendRelInfo *appinfo) diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index c1233b63570..8a178c9d3c6 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/rewrite/rewriteHandler.c,v 1.186 2009/06/11 14:49:01 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/rewrite/rewriteHandler.c,v 1.187 2009/09/02 17:52:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -463,7 +463,8 @@ rewriteRuleAction(Query *parsetree, sub_action->rtable), parsetree->targetList, event, - current_varno); + current_varno, + NULL); if (sub_action_ptr) *sub_action_ptr = sub_action; else @@ -493,7 +494,8 @@ rewriteRuleAction(Query *parsetree, parsetree->rtable), rule_action->returningList, CMD_SELECT, - 0); + 0, + &rule_action->hasSubLinks); /* * There could have been some SubLinks in parsetree's returningList, @@ -1510,7 +1512,8 @@ CopyAndAddInvertedQual(Query *parsetree, rt_fetch(rt_index, parsetree->rtable), parsetree->targetList, event, - rt_index); + rt_index, + &parsetree->hasSubLinks); /* And attach the fixed qual */ AddInvertedQual(parsetree, new_qual); diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c index 570434ed83e..d211a88ba45 100644 --- a/src/backend/rewrite/rewriteManip.c +++ b/src/backend/rewrite/rewriteManip.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/rewrite/rewriteManip.c,v 1.122 2009/06/11 14:49:01 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/rewrite/rewriteManip.c,v 1.123 2009/09/02 17:52:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1067,16 +1067,14 @@ AddInvertedQual(Query *parsetree, Node *qual) /* - * ResolveNew - replace Vars with corresponding items from a targetlist - * - * Vars matching target_varno and sublevels_up are replaced by the - * entry with matching resno from targetlist, if there is one. - * If not, we either change the unmatched Var's varno to update_varno - * (when event == CMD_UPDATE) or replace it with a constant NULL. + * replace_rte_variables() finds all Vars in an expression tree + * that reference a particular RTE, and replaces them with substitute + * expressions obtained from a caller-supplied callback function. * - * The caller must also provide target_rte, the RTE describing the target - * relation. This is needed to handle whole-row Vars referencing the target. - * We expand such Vars into RowExpr constructs. + * When invoking replace_rte_variables on a portion of a Query, pass the + * address of the containing Query's hasSubLinks field as outer_hasSubLinks. + * Otherwise, pass NULL, but inserting a SubLink into a non-Query expression + * will then cause an error. * * Note: the business with inserted_sublink is needed to update hasSubLinks * in subqueries when the replacement adds a subquery inside a subquery. @@ -1084,122 +1082,88 @@ AddInvertedQual(Query *parsetree, Node *qual) * because it isn't possible for this transformation to insert a level-zero * aggregate reference into a subquery --- it could only insert outer aggs. * Likewise for hasWindowFuncs. + * + * Note: usually, we'd not expose the mutator function or context struct + * for a function like this. We do so because callbacks often find it + * convenient to recurse directly to the mutator on sub-expressions of + * what they will return. */ - -typedef struct +Node * +replace_rte_variables(Node *node, int target_varno, int sublevels_up, + replace_rte_variables_callback callback, + void *callback_arg, + bool *outer_hasSubLinks) { - int target_varno; - int sublevels_up; - RangeTblEntry *target_rte; - List *targetlist; - int event; - int update_varno; - bool inserted_sublink; -} ResolveNew_context; + Node *result; + replace_rte_variables_context context; -static Node * -resolve_one_var(Var *var, ResolveNew_context *context) -{ - TargetEntry *tle; + context.callback = callback; + context.callback_arg = callback_arg; + context.target_varno = target_varno; + context.sublevels_up = sublevels_up; - tle = get_tle_by_resno(context->targetlist, var->varattno); + /* + * We try to initialize inserted_sublink to true if there is no need to + * detect new sublinks because the query already has some. + */ + if (node && IsA(node, Query)) + context.inserted_sublink = ((Query *) node)->hasSubLinks; + else if (outer_hasSubLinks) + context.inserted_sublink = *outer_hasSubLinks; + else + context.inserted_sublink = false; - if (tle == NULL) + /* + * Must be prepared to start with a Query or a bare expression tree; if + * it's a Query, we don't want to increment sublevels_up. + */ + result = query_or_expression_tree_mutator(node, + replace_rte_variables_mutator, + (void *) &context, + 0); + + if (context.inserted_sublink) { - /* Failed to find column in insert/update tlist */ - if (context->event == CMD_UPDATE) - { - /* For update, just change unmatched var's varno */ - var = (Var *) copyObject(var); - var->varno = context->update_varno; - var->varnoold = context->update_varno; - return (Node *) var; - } + if (result && IsA(result, Query)) + ((Query *) result)->hasSubLinks = true; + else if (outer_hasSubLinks) + *outer_hasSubLinks = true; else - { - /* Otherwise replace unmatched var with a null */ - /* need coerce_to_domain in case of NOT NULL domain constraint */ - return coerce_to_domain((Node *) makeNullConst(var->vartype, - var->vartypmod), - InvalidOid, -1, - var->vartype, - COERCE_IMPLICIT_CAST, - -1, - false, - false); - } + elog(ERROR, "replace_rte_variables inserted a SubLink, but has noplace to record it"); } - else - { - /* Make a copy of the tlist item to return */ - Node *n = copyObject(tle->expr); - /* Adjust varlevelsup if tlist item is from higher query */ - if (var->varlevelsup > 0) - IncrementVarSublevelsUp(n, var->varlevelsup, 0); - /* Report it if we are adding a sublink to query */ - if (!context->inserted_sublink) - context->inserted_sublink = checkExprHasSubLink(n); - return n; - } + return result; } -static Node * -ResolveNew_mutator(Node *node, ResolveNew_context *context) +Node * +replace_rte_variables_mutator(Node *node, + replace_rte_variables_context *context) { if (node == NULL) return NULL; if (IsA(node, Var)) { Var *var = (Var *) node; - int this_varno = (int) var->varno; - int this_varlevelsup = (int) var->varlevelsup; - if (this_varno == context->target_varno && - this_varlevelsup == context->sublevels_up) + if (var->varno == context->target_varno && + var->varlevelsup == context->sublevels_up) { - if (var->varattno == InvalidAttrNumber) - { - /* Must expand whole-tuple reference into RowExpr */ - RowExpr *rowexpr; - List *colnames; - List *fields; - - /* - * If generating an expansion for a var of a named rowtype - * (ie, this is a plain relation RTE), then we must include - * dummy items for dropped columns. If the var is RECORD (ie, - * this is a JOIN), then omit dropped columns. Either way, - * attach column names to the RowExpr for use of ruleutils.c. - */ - expandRTE(context->target_rte, - this_varno, this_varlevelsup, var->location, - (var->vartype != RECORDOID), - &colnames, &fields); - /* Adjust the generated per-field Vars... */ - fields = (List *) ResolveNew_mutator((Node *) fields, - context); - rowexpr = makeNode(RowExpr); - rowexpr->args = fields; - rowexpr->row_typeid = var->vartype; - rowexpr->row_format = COERCE_IMPLICIT_CAST; - rowexpr->colnames = colnames; - rowexpr->location = -1; - - return (Node *) rowexpr; - } - - /* Normal case for scalar variable */ - return resolve_one_var(var, context); + /* Found a matching variable, make the substitution */ + Node *newnode; + + newnode = (*context->callback) (var, context); + /* Detect if we are adding a sublink to query */ + if (!context->inserted_sublink) + context->inserted_sublink = checkExprHasSubLink(newnode); + return newnode; } /* otherwise fall through to copy the var normally */ } else if (IsA(node, CurrentOfExpr)) { CurrentOfExpr *cexpr = (CurrentOfExpr *) node; - int this_varno = (int) cexpr->cvarno; - if (this_varno == context->target_varno && + if (cexpr->cvarno == context->target_varno && context->sublevels_up == 0) { /* @@ -1222,9 +1186,9 @@ ResolveNew_mutator(Node *node, ResolveNew_context *context) context->sublevels_up++; save_inserted_sublink = context->inserted_sublink; - context->inserted_sublink = false; + context->inserted_sublink = ((Query *) node)->hasSubLinks; newnode = query_tree_mutator((Query *) node, - ResolveNew_mutator, + replace_rte_variables_mutator, (void *) context, 0); newnode->hasSubLinks |= context->inserted_sublink; @@ -1232,46 +1196,128 @@ ResolveNew_mutator(Node *node, ResolveNew_context *context) context->sublevels_up--; return (Node *) newnode; } - return expression_tree_mutator(node, ResolveNew_mutator, + return expression_tree_mutator(node, replace_rte_variables_mutator, (void *) context); } + +/* + * ResolveNew - replace Vars with corresponding items from a targetlist + * + * Vars matching target_varno and sublevels_up are replaced by the + * entry with matching resno from targetlist, if there is one. + * If not, we either change the unmatched Var's varno to update_varno + * (when event == CMD_UPDATE) or replace it with a constant NULL. + * + * The caller must also provide target_rte, the RTE describing the target + * relation. This is needed to handle whole-row Vars referencing the target. + * We expand such Vars into RowExpr constructs. + * + * outer_hasSubLinks works the same as for replace_rte_variables(). + */ + +typedef struct +{ + RangeTblEntry *target_rte; + List *targetlist; + int event; + int update_varno; +} ResolveNew_context; + +static Node * +ResolveNew_callback(Var *var, + replace_rte_variables_context *context) +{ + ResolveNew_context *rcon = (ResolveNew_context *) context->callback_arg; + TargetEntry *tle; + + if (var->varattno == InvalidAttrNumber) + { + /* Must expand whole-tuple reference into RowExpr */ + RowExpr *rowexpr; + List *colnames; + List *fields; + + /* + * If generating an expansion for a var of a named rowtype + * (ie, this is a plain relation RTE), then we must include + * dummy items for dropped columns. If the var is RECORD (ie, + * this is a JOIN), then omit dropped columns. Either way, + * attach column names to the RowExpr for use of ruleutils.c. + */ + expandRTE(rcon->target_rte, + var->varno, var->varlevelsup, var->location, + (var->vartype != RECORDOID), + &colnames, &fields); + /* Adjust the generated per-field Vars... */ + fields = (List *) replace_rte_variables_mutator((Node *) fields, + context); + rowexpr = makeNode(RowExpr); + rowexpr->args = fields; + rowexpr->row_typeid = var->vartype; + rowexpr->row_format = COERCE_IMPLICIT_CAST; + rowexpr->colnames = colnames; + rowexpr->location = var->location; + + return (Node *) rowexpr; + } + + /* Normal case referencing one targetlist element */ + tle = get_tle_by_resno(rcon->targetlist, var->varattno); + + if (tle == NULL) + { + /* Failed to find column in insert/update tlist */ + if (rcon->event == CMD_UPDATE) + { + /* For update, just change unmatched var's varno */ + var = (Var *) copyObject(var); + var->varno = rcon->update_varno; + var->varnoold = rcon->update_varno; + return (Node *) var; + } + else + { + /* Otherwise replace unmatched var with a null */ + /* need coerce_to_domain in case of NOT NULL domain constraint */ + return coerce_to_domain((Node *) makeNullConst(var->vartype, + var->vartypmod), + InvalidOid, -1, + var->vartype, + COERCE_IMPLICIT_CAST, + -1, + false, + false); + } + } + else + { + /* Make a copy of the tlist item to return */ + Node *newnode = copyObject(tle->expr); + + /* Must adjust varlevelsup if tlist item is from higher query */ + if (var->varlevelsup > 0) + IncrementVarSublevelsUp(newnode, var->varlevelsup, 0); + + return newnode; + } +} + Node * ResolveNew(Node *node, int target_varno, int sublevels_up, RangeTblEntry *target_rte, - List *targetlist, int event, int update_varno) + List *targetlist, int event, int update_varno, + bool *outer_hasSubLinks) { - Node *result; ResolveNew_context context; - context.target_varno = target_varno; - context.sublevels_up = sublevels_up; context.target_rte = target_rte; context.targetlist = targetlist; context.event = event; context.update_varno = update_varno; - context.inserted_sublink = false; - - /* - * Must be prepared to start with a Query or a bare expression tree; if - * it's a Query, we don't want to increment sublevels_up. - */ - result = query_or_expression_tree_mutator(node, - ResolveNew_mutator, - (void *) &context, - 0); - if (context.inserted_sublink) - { - if (IsA(result, Query)) - ((Query *) result)->hasSubLinks = true; - - /* - * Note: if we're called on a non-Query node then it's the caller's - * responsibility to update hasSubLinks in the ancestor Query. This is - * pretty fragile and perhaps should be rethought ... - */ - } - - return result; + return replace_rte_variables(node, target_varno, sublevels_up, + ResolveNew_callback, + (void *) &context, + outer_hasSubLinks); } diff --git a/src/include/rewrite/rewriteManip.h b/src/include/rewrite/rewriteManip.h index 51993f3a78b..15150728d9b 100644 --- a/src/include/rewrite/rewriteManip.h +++ b/src/include/rewrite/rewriteManip.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/rewrite/rewriteManip.h,v 1.50 2009/06/11 14:49:12 momjian Exp $ + * $PostgreSQL: pgsql/src/include/rewrite/rewriteManip.h,v 1.51 2009/09/02 17:52:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -17,6 +17,21 @@ #include "nodes/parsenodes.h" +typedef struct replace_rte_variables_context replace_rte_variables_context; + +typedef Node * (*replace_rte_variables_callback) (Var *var, + replace_rte_variables_context *context); + +struct replace_rte_variables_context +{ + replace_rte_variables_callback callback; /* callback function */ + void *callback_arg; /* context data for callback function */ + int target_varno; /* RTE index to search for */ + int sublevels_up; /* (current) nesting depth */ + bool inserted_sublink; /* have we inserted a SubLink? */ +}; + + extern void OffsetVarNodes(Node *node, int offset, int sublevels_up); extern void ChangeVarNodes(Node *node, int old_varno, int new_varno, int sublevels_up); @@ -42,8 +57,17 @@ extern bool checkExprHasAggs(Node *node); extern bool checkExprHasWindowFuncs(Node *node); extern bool checkExprHasSubLink(Node *node); +extern Node *replace_rte_variables(Node *node, + int target_varno, int sublevels_up, + replace_rte_variables_callback callback, + void *callback_arg, + bool *outer_hasSubLinks); +extern Node *replace_rte_variables_mutator(Node *node, + replace_rte_variables_context *context); + extern Node *ResolveNew(Node *node, int target_varno, int sublevels_up, RangeTblEntry *target_rte, - List *targetlist, int event, int update_varno); + List *targetlist, int event, int update_varno, + bool *outer_hasSubLinks); #endif /* REWRITEMANIP_H */ diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index c8c9b960b5b..dd9bf9c43f0 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2394,3 +2394,52 @@ select * from a left join b on i = x and i = y and x = i; (0 rows) rollback; +-- +-- test NULL behavior of whole-row Vars, per bug #5025 +-- +select t1.q2, count(t2.*) +from int8_tbl t1 left join int8_tbl t2 on (t1.q2 = t2.q1) +group by t1.q2 order by 1; + q2 | count +-------------------+------- + -4567890123456789 | 0 + 123 | 2 + 456 | 0 + 4567890123456789 | 6 +(4 rows) + +select t1.q2, count(t2.*) +from int8_tbl t1 left join (select * from int8_tbl) t2 on (t1.q2 = t2.q1) +group by t1.q2 order by 1; + q2 | count +-------------------+------- + -4567890123456789 | 0 + 123 | 2 + 456 | 0 + 4567890123456789 | 6 +(4 rows) + +select t1.q2, count(t2.*) +from int8_tbl t1 left join (select * from int8_tbl offset 0) t2 on (t1.q2 = t2.q1) +group by t1.q2 order by 1; + q2 | count +-------------------+------- + -4567890123456789 | 0 + 123 | 2 + 456 | 0 + 4567890123456789 | 6 +(4 rows) + +select t1.q2, count(t2.*) +from int8_tbl t1 left join + (select q1, case when q2=1 then 1 else q2 end as q2 from int8_tbl) t2 + on (t1.q2 = t2.q1) +group by t1.q2 order by 1; + q2 | count +-------------------+------- + -4567890123456789 | 0 + 123 | 2 + 456 | 0 + 4567890123456789 | 6 +(4 rows) + diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 1516e09b56c..46ff66c6909 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -540,3 +540,24 @@ create temp table b (x integer, y integer); select * from a left join b on i = x and i = y and x = i; rollback; + +-- +-- test NULL behavior of whole-row Vars, per bug #5025 +-- +select t1.q2, count(t2.*) +from int8_tbl t1 left join int8_tbl t2 on (t1.q2 = t2.q1) +group by t1.q2 order by 1; + +select t1.q2, count(t2.*) +from int8_tbl t1 left join (select * from int8_tbl) t2 on (t1.q2 = t2.q1) +group by t1.q2 order by 1; + +select t1.q2, count(t2.*) +from int8_tbl t1 left join (select * from int8_tbl offset 0) t2 on (t1.q2 = t2.q1) +group by t1.q2 order by 1; + +select t1.q2, count(t2.*) +from int8_tbl t1 left join + (select q1, case when q2=1 then 1 else q2 end as q2 from int8_tbl) t2 + on (t1.q2 = t2.q1) +group by t1.q2 order by 1;