Don't generate EEOP_*_FETCHSOME operations for slots know to be virtual.
authorAndres Freund <[email protected]>
Mon, 30 Sep 2019 23:06:16 +0000 (16:06 -0700)
committerAndres Freund <[email protected]>
Mon, 30 Sep 2019 23:06:16 +0000 (16:06 -0700)
That avoids unnecessary work during both interpreted execution, and
JIT compiled expression evaluation. Both benefit from fewer expression
steps needing be processed, and for interpreted execution there now is
a fastpath dedicated to just fetching a value from a virtual
slot. That's e.g. beneficial for hashjoins over nodes that perform
projections, as the hashed columns are currently fetched individually.

Author: Soumyadeep Chakraborty, Andres Freund
Discussion: https://postgr.es/m/CAE-ML+9OKSN71+mHtfMD-L24oDp8dGTfaVjDU6U+j+FNAW5kRQ@mail.gmail.com

src/backend/executor/execExpr.c
src/backend/executor/execExprInterp.c
src/backend/jit/llvm/llvmjit_expr.c

index a608ff67b27eed65971f73fb1be6edaedcdbb552..7e486449eca303bbf70985d350a7f0489a5893fa 100644 (file)
@@ -65,7 +65,7 @@ static void ExecInitFunc(ExprEvalStep *scratch, Expr *node, List *args,
 static void ExecInitExprSlots(ExprState *state, Node *node);
 static void ExecPushExprSlots(ExprState *state, LastAttnumInfo *info);
 static bool get_last_attnums_walker(Node *node, LastAttnumInfo *info);
-static void ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op);
+static bool ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op);
 static void ExecInitWholeRowVar(ExprEvalStep *scratch, Var *variable,
                                                                ExprState *state);
 static void ExecInitSubscriptingRef(ExprEvalStep *scratch,
@@ -2285,8 +2285,8 @@ ExecPushExprSlots(ExprState *state, LastAttnumInfo *info)
                scratch.d.fetch.fixed = false;
                scratch.d.fetch.kind = NULL;
                scratch.d.fetch.known_desc = NULL;
-               ExecComputeSlotInfo(state, &scratch);
-               ExprEvalPushStep(state, &scratch);
+               if (ExecComputeSlotInfo(state, &scratch))
+                       ExprEvalPushStep(state, &scratch);
        }
        if (info->last_outer > 0)
        {
@@ -2295,8 +2295,8 @@ ExecPushExprSlots(ExprState *state, LastAttnumInfo *info)
                scratch.d.fetch.fixed = false;
                scratch.d.fetch.kind = NULL;
                scratch.d.fetch.known_desc = NULL;
-               ExecComputeSlotInfo(state, &scratch);
-               ExprEvalPushStep(state, &scratch);
+               if (ExecComputeSlotInfo(state, &scratch))
+                       ExprEvalPushStep(state, &scratch);
        }
        if (info->last_scan > 0)
        {
@@ -2305,8 +2305,8 @@ ExecPushExprSlots(ExprState *state, LastAttnumInfo *info)
                scratch.d.fetch.fixed = false;
                scratch.d.fetch.kind = NULL;
                scratch.d.fetch.known_desc = NULL;
-               ExecComputeSlotInfo(state, &scratch);
-               ExprEvalPushStep(state, &scratch);
+               if (ExecComputeSlotInfo(state, &scratch))
+                       ExprEvalPushStep(state, &scratch);
        }
 }
 
@@ -2364,14 +2364,21 @@ get_last_attnums_walker(Node *node, LastAttnumInfo *info)
  * The goal is to determine whether a slot is 'fixed', that is, every
  * evaluation of the expression will have the same type of slot, with an
  * equivalent descriptor.
+ *
+ * Returns true if the the deforming step is required, false otherwise.
  */
-static void
+static bool
 ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op)
 {
        PlanState  *parent = state->parent;
        TupleDesc       desc = NULL;
        const TupleTableSlotOps *tts_ops = NULL;
        bool            isfixed = false;
+       ExprEvalOp      opcode = op->opcode;
+
+       Assert(opcode == EEOP_INNER_FETCHSOME ||
+                  opcode == EEOP_OUTER_FETCHSOME ||
+                  opcode == EEOP_SCAN_FETCHSOME);
 
        if (op->d.fetch.known_desc != NULL)
        {
@@ -2383,7 +2390,7 @@ ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op)
        {
                isfixed = false;
        }
-       else if (op->opcode == EEOP_INNER_FETCHSOME)
+       else if (opcode == EEOP_INNER_FETCHSOME)
        {
                PlanState  *is = innerPlanState(parent);
 
@@ -2403,7 +2410,7 @@ ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op)
                        desc = ExecGetResultType(is);
                }
        }
-       else if (op->opcode == EEOP_OUTER_FETCHSOME)
+       else if (opcode == EEOP_OUTER_FETCHSOME)
        {
                PlanState  *os = outerPlanState(parent);
 
@@ -2423,7 +2430,7 @@ ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op)
                        desc = ExecGetResultType(os);
                }
        }
-       else if (op->opcode == EEOP_SCAN_FETCHSOME)
+       else if (opcode == EEOP_SCAN_FETCHSOME)
        {
                desc = parent->scandesc;
 
@@ -2446,6 +2453,12 @@ ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op)
                op->d.fetch.kind = NULL;
                op->d.fetch.known_desc = NULL;
        }
+
+       /* if the slot is known to always virtual we never need to deform */
+       if (op->d.fetch.fixed && op->d.fetch.kind == &TTSOpsVirtual)
+               return false;
+
+       return true;
 }
 
 /*
@@ -3360,16 +3373,16 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc,
        scratch.d.fetch.fixed = false;
        scratch.d.fetch.known_desc = ldesc;
        scratch.d.fetch.kind = lops;
-       ExecComputeSlotInfo(state, &scratch);
-       ExprEvalPushStep(state, &scratch);
+       if (ExecComputeSlotInfo(state, &scratch))
+               ExprEvalPushStep(state, &scratch);
 
        scratch.opcode = EEOP_OUTER_FETCHSOME;
        scratch.d.fetch.last_var = maxatt;
        scratch.d.fetch.fixed = false;
        scratch.d.fetch.known_desc = rdesc;
        scratch.d.fetch.kind = rops;
-       ExecComputeSlotInfo(state, &scratch);
-       ExprEvalPushStep(state, &scratch);
+       if (ExecComputeSlotInfo(state, &scratch))
+               ExprEvalPushStep(state, &scratch);
 
        /*
         * Start comparing at the last field (least significant sort key). That's
index e876160a0e71a16e3f02c88b47eb80e35b0a3caf..0cd6f65bc749a581630cef7888f770d15cb4cde7 100644 (file)
@@ -160,6 +160,12 @@ static Datum ExecJustAssignOuterVar(ExprState *state, ExprContext *econtext, boo
 static Datum ExecJustAssignScanVar(ExprState *state, ExprContext *econtext, bool *isnull);
 static Datum ExecJustApplyFuncToCase(ExprState *state, ExprContext *econtext, bool *isnull);
 static Datum ExecJustConst(ExprState *state, ExprContext *econtext, bool *isnull);
+static Datum ExecJustInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull);
+static Datum ExecJustOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull);
+static Datum ExecJustScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull);
+static Datum ExecJustAssignInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull);
+static Datum ExecJustAssignOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull);
+static Datum ExecJustAssignScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull);
 
 
 /*
@@ -255,11 +261,45 @@ ExecReadyInterpretedExpr(ExprState *state)
                        return;
                }
        }
-       else if (state->steps_len == 2 &&
-                        state->steps[0].opcode == EEOP_CONST)
+       else if (state->steps_len == 2)
        {
-               state->evalfunc_private = (void *) ExecJustConst;
-               return;
+               ExprEvalOp      step0 = state->steps[0].opcode;
+
+               if (step0 == EEOP_CONST)
+               {
+                       state->evalfunc_private = (void *) ExecJustConst;
+                       return;
+               }
+               else if (step0 == EEOP_INNER_VAR)
+               {
+                       state->evalfunc_private = (void *) ExecJustInnerVarVirt;
+                       return;
+               }
+               else if (step0 == EEOP_OUTER_VAR)
+               {
+                       state->evalfunc_private = (void *) ExecJustOuterVarVirt;
+                       return;
+               }
+               else if (step0 == EEOP_SCAN_VAR)
+               {
+                       state->evalfunc_private = (void *) ExecJustScanVarVirt;
+                       return;
+               }
+               else if (step0 == EEOP_ASSIGN_INNER_VAR)
+               {
+                       state->evalfunc_private = (void *) ExecJustAssignInnerVarVirt;
+                       return;
+               }
+               else if (step0 == EEOP_ASSIGN_OUTER_VAR)
+               {
+                       state->evalfunc_private = (void *) ExecJustAssignOuterVarVirt;
+                       return;
+               }
+               else if (step0 == EEOP_ASSIGN_SCAN_VAR)
+               {
+                       state->evalfunc_private = (void *) ExecJustAssignScanVarVirt;
+                       return;
+               }
        }
 
 #if defined(EEO_USE_COMPUTED_GOTO)
@@ -2096,6 +2136,91 @@ ExecJustConst(ExprState *state, ExprContext *econtext, bool *isnull)
        return op->d.constval.value;
 }
 
+/* implementation of ExecJust(Inner|Outer|Scan)VarVirt */
+static pg_attribute_always_inline Datum
+ExecJustVarVirtImpl(ExprState *state, TupleTableSlot *slot, bool *isnull)
+{
+       ExprEvalStep *op = &state->steps[0];
+       int                     attnum = op->d.var.attnum;
+
+       /*
+        * As it is guaranteed that a virtual slot is used, there never is a need
+        * to perform tuple deforming (nor would it be possible). Therefore
+        * execExpr.c has not emitted an EEOP_*_FETCHSOME step. Verify, as much as
+        * possible, that that determination was accurate.
+        */
+       Assert(TTS_IS_VIRTUAL(slot));
+       Assert(TTS_FIXED(slot));
+       Assert(attnum >= 0 && attnum < slot->tts_nvalid);
+
+       *isnull = slot->tts_isnull[attnum];
+
+       return slot->tts_values[attnum];
+}
+
+/* Like ExecJustInnerVar, optimized for virtual slots */
+static Datum
+ExecJustInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull)
+{
+       return ExecJustVarVirtImpl(state, econtext->ecxt_innertuple, isnull);
+}
+
+/* Like ExecJustOuterVar, optimized for virtual slots */
+static Datum
+ExecJustOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull)
+{
+       return ExecJustVarVirtImpl(state, econtext->ecxt_outertuple, isnull);
+}
+
+/* Like ExecJustScanVar, optimized for virtual slots */
+static Datum
+ExecJustScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull)
+{
+       return ExecJustVarVirtImpl(state, econtext->ecxt_scantuple, isnull);
+}
+
+/* implementation of ExecJustAssign(Inner|Outer|Scan)VarVirt */
+static pg_attribute_always_inline Datum
+ExecJustAssignVarVirtImpl(ExprState *state, TupleTableSlot *inslot, bool *isnull)
+{
+       ExprEvalStep *op = &state->steps[0];
+       int                     attnum = op->d.assign_var.attnum;
+       int                     resultnum = op->d.assign_var.resultnum;
+       TupleTableSlot *outslot = state->resultslot;
+
+       /* see ExecJustVarVirtImpl for comments */
+
+       Assert(TTS_IS_VIRTUAL(inslot));
+       Assert(TTS_FIXED(inslot));
+       Assert(attnum >= 0 && attnum < inslot->tts_nvalid);
+
+       outslot->tts_values[resultnum] = inslot->tts_values[attnum];
+       outslot->tts_isnull[resultnum] = inslot->tts_isnull[attnum];
+
+       return 0;
+}
+
+/* Like ExecJustAssignInnerVar, optimized for virtual slots */
+static Datum
+ExecJustAssignInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull)
+{
+       return ExecJustAssignVarVirtImpl(state, econtext->ecxt_innertuple, isnull);
+}
+
+/* Like ExecJustAssignOuterVar, optimized for virtual slots */
+static Datum
+ExecJustAssignOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull)
+{
+       return ExecJustAssignVarVirtImpl(state, econtext->ecxt_outertuple, isnull);
+}
+
+/* Like ExecJustAssignScanVar, optimized for virtual slots */
+static Datum
+ExecJustAssignScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull)
+{
+       return ExecJustAssignVarVirtImpl(state, econtext->ecxt_scantuple, isnull);
+}
+
 #if defined(EEO_USE_COMPUTED_GOTO)
 /*
  * Comparator used when building address->opcode lookup table for
index 30133634c70b246b29d9156ef64c44ad3a98edd2..d09324637b98a3963232cca5bc410aae1710d3cc 100644 (file)
@@ -287,6 +287,9 @@ llvm_compile_expr(ExprState *state)
                                        if (op->d.fetch.fixed)
                                                tts_ops = op->d.fetch.kind;
 
+                                       /* step should not have been generated */
+                                       Assert(tts_ops != &TTSOpsVirtual);
+
                                        if (opcode == EEOP_INNER_FETCHSOME)
                                                v_slot = v_innerslot;
                                        else if (opcode == EEOP_OUTER_FETCHSOME)
@@ -297,9 +300,6 @@ llvm_compile_expr(ExprState *state)
                                        /*
                                         * Check if all required attributes are available, or
                                         * whether deforming is required.
-                                        *
-                                        * TODO: skip nvalid check if slot is fixed and known to
-                                        * be a virtual slot.
                                         */
                                        v_nvalid =
                                                l_load_struct_gep(b, v_slot,