#include "utils/memutils.h"
#include "utils/rel.h"
+#define LOOK_AHEAD_REQUIRED_RECHECKS 3
+#define LOOK_AHEAD_DEFAULT_DISTANCE 5
typedef struct BTSortArrayContext
{
- FmgrInfo flinfo;
+ FmgrInfo *sortproc;
Oid collation;
bool reverse;
} BTSortArrayContext;
+typedef struct BTScanKeyPreproc
+{
+ ScanKey skey;
+ int ikey;
+ int arrayidx;
+} BTScanKeyPreproc;
+
+static void _bt_setup_array_cmp(IndexScanDesc scan, ScanKey skey, Oid elemtype,
+ FmgrInfo *orderproc, FmgrInfo **sortprocp);
static Datum _bt_find_extreme_element(IndexScanDesc scan, ScanKey skey,
- StrategyNumber strat,
+ Oid elemtype, StrategyNumber strat,
Datum *elems, int nelems);
-static int _bt_sort_array_elements(IndexScanDesc scan, ScanKey skey,
- bool reverse,
- Datum *elems, int nelems);
+static int _bt_sort_array_elements(ScanKey skey, FmgrInfo *sortproc,
+ bool reverse, Datum *elems, int nelems);
+static bool _bt_merge_arrays(IndexScanDesc scan, ScanKey skey,
+ FmgrInfo *sortproc, bool reverse,
+ Oid origelemtype, Oid nextelemtype,
+ Datum *elems_orig, int *nelems_orig,
+ Datum *elems_next, int nelems_next);
+static bool _bt_compare_array_scankey_args(IndexScanDesc scan,
+ ScanKey arraysk, ScanKey skey,
+ FmgrInfo *orderproc, BTArrayKeyInfo *array,
+ bool *qual_ok);
+static ScanKey _bt_preprocess_array_keys(IndexScanDesc scan);
+static void _bt_preprocess_array_keys_final(IndexScanDesc scan, int *keyDataMap);
static int _bt_compare_array_elements(const void *a, const void *b, void *arg);
+static inline int32 _bt_compare_array_skey(FmgrInfo *orderproc,
+ Datum tupdatum, bool tupnull,
+ Datum arrdatum, ScanKey cur);
+static int _bt_binsrch_array_skey(FmgrInfo *orderproc,
+ bool cur_elem_trig, ScanDirection dir,
+ Datum tupdatum, bool tupnull,
+ BTArrayKeyInfo *array, ScanKey cur,
+ int32 *set_elem_result);
+static bool _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir);
+static void _bt_rewind_nonrequired_arrays(IndexScanDesc scan, ScanDirection dir);
+static bool _bt_tuple_before_array_skeys(IndexScanDesc scan, ScanDirection dir,
+ IndexTuple tuple, TupleDesc tupdesc, int tupnatts,
+ bool readpagetup, int sktrig, bool *scanBehind);
+static bool _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
+ IndexTuple tuple, int tupnatts, TupleDesc tupdesc,
+ int sktrig, bool sktrig_required);
+#ifdef USE_ASSERT_CHECKING
+static bool _bt_verify_arrays_bt_first(IndexScanDesc scan, ScanDirection dir);
+static bool _bt_verify_keys_with_arraykeys(IndexScanDesc scan);
+#endif
static bool _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
ScanKey leftarg, ScanKey rightarg,
+ BTArrayKeyInfo *array, FmgrInfo *orderproc,
bool *result);
static bool _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption);
static void _bt_mark_scankey_required(ScanKey skey);
+static bool _bt_check_compare(IndexScanDesc scan, ScanDirection dir,
+ IndexTuple tuple, int tupnatts, TupleDesc tupdesc,
+ bool advancenonrequired, bool prechecked, bool firstmatch,
+ bool *continuescan, int *ikey);
static bool _bt_check_rowcompare(ScanKey skey,
IndexTuple tuple, int tupnatts, TupleDesc tupdesc,
ScanDirection dir, bool *continuescan);
+static void _bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate,
+ int tupnatts, TupleDesc tupdesc);
static int _bt_keep_natts(Relation rel, IndexTuple lastleft,
IndexTuple firstright, BTScanInsert itup_key);
*
* If there are any SK_SEARCHARRAY scan keys, deconstruct the array(s) and
* set up BTArrayKeyInfo info for each one that is an equality-type key.
- * Prepare modified scan keys in so->arrayKeyData, which will hold the current
- * array elements during each primitive indexscan operation. For inequality
- * array keys, it's sufficient to find the extreme element value and replace
- * the whole array with that scalar value.
- *
- * Note: the reason we need so->arrayKeyData, rather than just scribbling
- * on scan->keyData, is that callers are permitted to call btrescan without
- * supplying a new set of scankey data.
+ * Returns modified scan keys as input for further, standard preprocessing.
+ *
+ * Currently we perform two kinds of preprocessing to deal with redundancies.
+ * For inequality array keys, it's sufficient to find the extreme element
+ * value and replace the whole array with that scalar value. This eliminates
+ * all but one array element as redundant. Similarly, we are capable of
+ * "merging together" multiple equality array keys (from two or more input
+ * scan keys) into a single output scan key containing only the intersecting
+ * array elements. This can eliminate many redundant array elements, as well
+ * as eliminating whole array scan keys as redundant. It can also allow us to
+ * detect contradictory quals.
+ *
+ * It is convenient for _bt_preprocess_keys caller to have to deal with no
+ * more than one equality strategy array scan key per index attribute. We'll
+ * always be able to set things up that way when complete opfamilies are used.
+ * Eliminated array scan keys can be recognized as those that have had their
+ * sk_strategy field set to InvalidStrategy here by us. Caller should avoid
+ * including these in the scan's so->keyData[] output array.
+ *
+ * We set the scan key references from the scan's BTArrayKeyInfo info array to
+ * offsets into the temp modified input array returned to caller. Scans that
+ * have array keys should call _bt_preprocess_array_keys_final when standard
+ * preprocessing steps are complete. This will convert the scan key offset
+ * references into references to the scan's so->keyData[] output scan keys.
+ *
+ * Note: the reason we need to return a temp scan key array, rather than just
+ * scribbling on scan->keyData, is that callers are permitted to call btrescan
+ * without supplying a new set of scankey data.
*/
-void
+static ScanKey
_bt_preprocess_array_keys(IndexScanDesc scan)
{
BTScanOpaque so = (BTScanOpaque) scan->opaque;
+ Relation rel = scan->indexRelation;
int numberOfKeys = scan->numberOfKeys;
- int16 *indoption = scan->indexRelation->rd_indoption;
+ int16 *indoption = rel->rd_indoption;
int numArrayKeys;
+ int origarrayatt = InvalidAttrNumber,
+ origarraykey = -1;
+ Oid origelemtype = InvalidOid;
ScanKey cur;
- int i;
MemoryContext oldContext;
+ ScanKey arrayKeyData; /* modified copy of scan->keyData */
+
+ Assert(numberOfKeys);
/* Quick check to see if there are any array keys */
numArrayKeys = 0;
- for (i = 0; i < numberOfKeys; i++)
+ for (int i = 0; i < numberOfKeys; i++)
{
cur = &scan->keyData[i];
if (cur->sk_flags & SK_SEARCHARRAY)
/* If any arrays are null as a whole, we can quit right now. */
if (cur->sk_flags & SK_ISNULL)
{
- so->numArrayKeys = -1;
- so->arrayKeyData = NULL;
- return;
+ so->qual_ok = false;
+ return NULL;
}
}
}
/* Quit if nothing to do. */
if (numArrayKeys == 0)
- {
- so->numArrayKeys = 0;
- so->arrayKeyData = NULL;
- return;
- }
+ return NULL;
/*
* Make a scan-lifespan context to hold array-associated data, or reset it
oldContext = MemoryContextSwitchTo(so->arrayContext);
/* Create modifiable copy of scan->keyData in the workspace context */
- so->arrayKeyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
- memcpy(so->arrayKeyData,
- scan->keyData,
- scan->numberOfKeys * sizeof(ScanKeyData));
+ arrayKeyData = (ScanKey) palloc(numberOfKeys * sizeof(ScanKeyData));
+ memcpy(arrayKeyData, scan->keyData, numberOfKeys * sizeof(ScanKeyData));
/* Allocate space for per-array data in the workspace context */
- so->arrayKeys = (BTArrayKeyInfo *) palloc0(numArrayKeys * sizeof(BTArrayKeyInfo));
+ so->arrayKeys = (BTArrayKeyInfo *) palloc(numArrayKeys * sizeof(BTArrayKeyInfo));
+
+ /* Allocate space for ORDER procs used to help _bt_checkkeys */
+ so->orderProcs = (FmgrInfo *) palloc(numberOfKeys * sizeof(FmgrInfo));
/* Now process each array key */
numArrayKeys = 0;
- for (i = 0; i < numberOfKeys; i++)
+ for (int i = 0; i < numberOfKeys; i++)
{
+ FmgrInfo sortproc;
+ FmgrInfo *sortprocp = &sortproc;
+ Oid elemtype;
+ bool reverse;
ArrayType *arrayval;
int16 elmlen;
bool elmbyval;
int num_nonnulls;
int j;
- cur = &so->arrayKeyData[i];
+ cur = &arrayKeyData[i];
if (!(cur->sk_flags & SK_SEARCHARRAY))
continue;
/* If there's no non-nulls, the scan qual is unsatisfiable */
if (num_nonnulls == 0)
{
- numArrayKeys = -1;
+ so->qual_ok = false;
break;
}
+ /*
+ * Determine the nominal datatype of the array elements. We have to
+ * support the convention that sk_subtype == InvalidOid means the
+ * opclass input type; this is a hack to simplify life for
+ * ScanKeyInit().
+ */
+ elemtype = cur->sk_subtype;
+ if (elemtype == InvalidOid)
+ elemtype = rel->rd_opcintype[cur->sk_attno - 1];
+ Assert(elemtype == ARR_ELEMTYPE(arrayval));
+
/*
* If the comparison operator is not equality, then the array qual
* degenerates to a simple comparison against the smallest or largest
case BTLessStrategyNumber:
case BTLessEqualStrategyNumber:
cur->sk_argument =
- _bt_find_extreme_element(scan, cur,
+ _bt_find_extreme_element(scan, cur, elemtype,
BTGreaterStrategyNumber,
elem_values, num_nonnulls);
continue;
case BTGreaterEqualStrategyNumber:
case BTGreaterStrategyNumber:
cur->sk_argument =
- _bt_find_extreme_element(scan, cur,
+ _bt_find_extreme_element(scan, cur, elemtype,
BTLessStrategyNumber,
elem_values, num_nonnulls);
continue;
break;
}
+ /*
+ * We'll need a 3-way ORDER proc to perform binary searches for the
+ * next matching array element. Set that up now.
+ *
+ * Array scan keys with cross-type equality operators will require a
+ * separate same-type ORDER proc for sorting their array. Otherwise,
+ * sortproc just points to the same proc used during binary searches.
+ */
+ _bt_setup_array_cmp(scan, cur, elemtype,
+ &so->orderProcs[i], &sortprocp);
+
/*
* Sort the non-null elements and eliminate any duplicates. We must
* sort in the same ordering used by the index column, so that the
- * successive primitive indexscans produce data in index order.
+ * arrays can be advanced in lockstep with the scan's progress through
+ * the index's key space.
*/
- num_elems = _bt_sort_array_elements(scan, cur,
- (indoption[cur->sk_attno - 1] & INDOPTION_DESC) != 0,
+ reverse = (indoption[cur->sk_attno - 1] & INDOPTION_DESC) != 0;
+ num_elems = _bt_sort_array_elements(cur, sortprocp, reverse,
elem_values, num_nonnulls);
+ if (origarrayatt == cur->sk_attno)
+ {
+ BTArrayKeyInfo *orig = &so->arrayKeys[origarraykey];
+
+ /*
+ * This array scan key is redundant with a previous equality
+ * operator array scan key. Merge the two arrays together to
+ * eliminate contradictory non-intersecting elements (or try to).
+ *
+ * We merge this next array back into attribute's original array.
+ */
+ Assert(arrayKeyData[orig->scan_key].sk_attno == cur->sk_attno);
+ Assert(arrayKeyData[orig->scan_key].sk_collation ==
+ cur->sk_collation);
+ if (_bt_merge_arrays(scan, cur, sortprocp, reverse,
+ origelemtype, elemtype,
+ orig->elem_values, &orig->num_elems,
+ elem_values, num_elems))
+ {
+ /* Successfully eliminated this array */
+ pfree(elem_values);
+
+ /*
+ * If no intersecting elements remain in the original array,
+ * the scan qual is unsatisfiable
+ */
+ if (orig->num_elems == 0)
+ {
+ so->qual_ok = false;
+ break;
+ }
+
+ /*
+ * Indicate to _bt_preprocess_keys caller that it must ignore
+ * this scan key
+ */
+ cur->sk_strategy = InvalidStrategy;
+ continue;
+ }
+
+ /*
+ * Unable to merge this array with previous array due to a lack of
+ * suitable cross-type opfamily support. Will need to keep both
+ * scan keys/arrays.
+ */
+ }
+ else
+ {
+ /*
+ * This array is the first for current index attribute.
+ *
+ * If it turns out to not be the last array (that is, if the next
+ * array is redundantly applied to this same index attribute),
+ * we'll then treat this array as the attribute's "original" array
+ * when merging.
+ */
+ origarrayatt = cur->sk_attno;
+ origarraykey = numArrayKeys;
+ origelemtype = elemtype;
+ }
+
/*
* And set up the BTArrayKeyInfo data.
+ *
+ * Note: _bt_preprocess_array_keys_final will fix-up each array's
+ * scan_key field later on, after so->keyData[] has been finalized.
*/
so->arrayKeys[numArrayKeys].scan_key = i;
so->arrayKeys[numArrayKeys].num_elems = num_elems;
so->numArrayKeys = numArrayKeys;
MemoryContextSwitchTo(oldContext);
+
+ return arrayKeyData;
+}
+
+/*
+ * _bt_preprocess_array_keys_final() -- fix up array scan key references
+ *
+ * When _bt_preprocess_array_keys performed initial array preprocessing, it
+ * set each array's array->scan_key to the array's arrayKeys[] entry offset
+ * (that also work as references into the original scan->keyData[] array).
+ * This function handles translation of the scan key references from the
+ * BTArrayKeyInfo info array, from input scan key references (to the keys in
+ * scan->keyData[]), into output references (to the keys in so->keyData[]).
+ * Caller's keyDataMap[] array tells us how to perform this remapping.
+ *
+ * Also finalizes so->orderProcs[] for the scan. Arrays already have an ORDER
+ * proc, which might need to be repositioned to its so->keyData[]-wise offset
+ * (very much like the remapping that we apply to array->scan_key references).
+ * Non-array equality strategy scan keys (that survived preprocessing) don't
+ * yet have an so->orderProcs[] entry, so we set one for them here.
+ *
+ * Also converts single-element array scan keys into equivalent non-array
+ * equality scan keys, which decrements so->numArrayKeys. It's possible that
+ * this will leave this new btrescan without any arrays at all. This isn't
+ * necessary for correctness; it's just an optimization. Non-array equality
+ * scan keys are slightly faster than equivalent array scan keys at runtime.
+ */
+static void
+_bt_preprocess_array_keys_final(IndexScanDesc scan, int *keyDataMap)
+{
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
+ Relation rel = scan->indexRelation;
+ int arrayidx = 0;
+ int last_equal_output_ikey PG_USED_FOR_ASSERTS_ONLY = -1;
+
+ Assert(so->qual_ok);
+ Assert(so->numArrayKeys);
+
+ for (int output_ikey = 0; output_ikey < so->numberOfKeys; output_ikey++)
+ {
+ ScanKey outkey = so->keyData + output_ikey;
+ int input_ikey;
+ bool found PG_USED_FOR_ASSERTS_ONLY = false;
+
+ Assert(outkey->sk_strategy != InvalidStrategy);
+
+ if (outkey->sk_strategy != BTEqualStrategyNumber)
+ continue;
+
+ input_ikey = keyDataMap[output_ikey];
+
+ Assert(last_equal_output_ikey < output_ikey);
+ Assert(last_equal_output_ikey < input_ikey);
+ last_equal_output_ikey = output_ikey;
+
+ /*
+ * We're lazy about looking up ORDER procs for non-array keys, since
+ * not all input keys become output keys. Take care of it now.
+ */
+ if (!(outkey->sk_flags & SK_SEARCHARRAY))
+ {
+ Oid elemtype;
+
+ /* No need for an ORDER proc given an IS NULL scan key */
+ if (outkey->sk_flags & SK_SEARCHNULL)
+ continue;
+
+ /*
+ * A non-required scan key doesn't need an ORDER proc, either
+ * (unless it's associated with an array, which this one isn't)
+ */
+ if (!(outkey->sk_flags & SK_BT_REQFWD))
+ continue;
+
+ elemtype = outkey->sk_subtype;
+ if (elemtype == InvalidOid)
+ elemtype = rel->rd_opcintype[outkey->sk_attno - 1];
+
+ _bt_setup_array_cmp(scan, outkey, elemtype,
+ &so->orderProcs[output_ikey], NULL);
+ continue;
+ }
+
+ /*
+ * Reorder existing array scan key so->orderProcs[] entries.
+ *
+ * Doing this in-place is safe because preprocessing is required to
+ * output all equality strategy scan keys in original input order
+ * (among each group of entries against the same index attribute).
+ * This is also the order that the arrays themselves appear in.
+ */
+ so->orderProcs[output_ikey] = so->orderProcs[input_ikey];
+
+ /* Fix-up array->scan_key references for arrays */
+ for (; arrayidx < so->numArrayKeys; arrayidx++)
+ {
+ BTArrayKeyInfo *array = &so->arrayKeys[arrayidx];
+
+ Assert(array->num_elems > 0);
+
+ if (array->scan_key == input_ikey)
+ {
+ /* found it */
+ array->scan_key = output_ikey;
+ found = true;
+
+ /*
+ * Transform array scan keys that have exactly 1 element
+ * remaining (following all prior preprocessing) into
+ * equivalent non-array scan keys.
+ */
+ if (array->num_elems == 1)
+ {
+ outkey->sk_flags &= ~SK_SEARCHARRAY;
+ outkey->sk_argument = array->elem_values[0];
+ so->numArrayKeys--;
+
+ /* If we're out of array keys, we can quit right away */
+ if (so->numArrayKeys == 0)
+ return;
+
+ /* Shift other arrays forward */
+ memmove(array, array + 1,
+ sizeof(BTArrayKeyInfo) *
+ (so->numArrayKeys - arrayidx));
+
+ /*
+ * Don't increment arrayidx (there was an entry that was
+ * just shifted forward to the offset at arrayidx, which
+ * will still need to be matched)
+ */
+ }
+ else
+ {
+ /* Match found, so done with this array */
+ arrayidx++;
+ }
+
+ break;
+ }
+ }
+
+ Assert(found);
+ }
+
+ /*
+ * Parallel index scans require space in shared memory to store the
+ * current array elements (for arrays kept by preprocessing) to schedule
+ * the next primitive index scan. The underlying structure is protected
+ * using a spinlock, so defensively limit its size. In practice this can
+ * only affect parallel scans that use an incomplete opfamily.
+ */
+ if (scan->parallel_scan && so->numArrayKeys > INDEX_MAX_KEYS)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg_internal("number of array scan keys left by preprocessing (%d) exceeds the maximum allowed by parallel btree index scans (%d)",
+ so->numArrayKeys, INDEX_MAX_KEYS)));
+}
+
+/*
+ * _bt_setup_array_cmp() -- Set up array comparison functions
+ *
+ * Sets ORDER proc in caller's orderproc argument, which is used during binary
+ * searches of arrays during the index scan. Also sets a same-type ORDER proc
+ * in caller's *sortprocp argument, which is used when sorting the array.
+ *
+ * Preprocessing calls here with all equality strategy scan keys (when scan
+ * uses equality array keys), including those not associated with any array.
+ * See _bt_advance_array_keys for an explanation of why it'll need to treat
+ * simple scalar equality scan keys as degenerate single element arrays.
+ *
+ * Caller should pass an orderproc pointing to space that'll store the ORDER
+ * proc for the scan, and a *sortprocp pointing to its own separate space.
+ * When calling here for a non-array scan key, sortprocp arg should be NULL.
+ *
+ * In the common case where we don't need to deal with cross-type operators,
+ * only one ORDER proc is actually required by caller. We'll set *sortprocp
+ * to point to the same memory that caller's orderproc continues to point to.
+ * Otherwise, *sortprocp will continue to point to caller's own space. Either
+ * way, *sortprocp will point to a same-type ORDER proc (since that's the only
+ * safe way to sort/deduplicate the array associated with caller's scan key).
+ */
+static void
+_bt_setup_array_cmp(IndexScanDesc scan, ScanKey skey, Oid elemtype,
+ FmgrInfo *orderproc, FmgrInfo **sortprocp)
+{
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
+ Relation rel = scan->indexRelation;
+ RegProcedure cmp_proc;
+ Oid opcintype = rel->rd_opcintype[skey->sk_attno - 1];
+
+ Assert(skey->sk_strategy == BTEqualStrategyNumber);
+ Assert(OidIsValid(elemtype));
+
+ /*
+ * If scankey operator is not a cross-type comparison, we can use the
+ * cached comparison function; otherwise gotta look it up in the catalogs
+ */
+ if (elemtype == opcintype)
+ {
+ /* Set same-type ORDER procs for caller */
+ *orderproc = *index_getprocinfo(rel, skey->sk_attno, BTORDER_PROC);
+ if (sortprocp)
+ *sortprocp = orderproc;
+
+ return;
+ }
+
+ /*
+ * Look up the appropriate cross-type comparison function in the opfamily.
+ *
+ * Use the opclass input type as the left hand arg type, and the array
+ * element type as the right hand arg type (since binary searches use an
+ * index tuple's attribute value to search for a matching array element).
+ *
+ * Note: it's possible that this would fail, if the opfamily is
+ * incomplete, but only in cases where it's quite likely that _bt_first
+ * would fail in just the same way (had we not failed before it could).
+ */
+ cmp_proc = get_opfamily_proc(rel->rd_opfamily[skey->sk_attno - 1],
+ opcintype, elemtype, BTORDER_PROC);
+ if (!RegProcedureIsValid(cmp_proc))
+ elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"",
+ BTORDER_PROC, opcintype, elemtype, skey->sk_attno,
+ RelationGetRelationName(rel));
+
+ /* Set cross-type ORDER proc for caller */
+ fmgr_info_cxt(cmp_proc, orderproc, so->arrayContext);
+
+ /* Done if caller doesn't actually have an array they'll need to sort */
+ if (!sortprocp)
+ return;
+
+ /*
+ * Look up the appropriate same-type comparison function in the opfamily.
+ *
+ * Note: it's possible that this would fail, if the opfamily is
+ * incomplete, but it seems quite unlikely that an opfamily would omit
+ * non-cross-type comparison procs for any datatype that it supports at
+ * all.
+ */
+ cmp_proc = get_opfamily_proc(rel->rd_opfamily[skey->sk_attno - 1],
+ elemtype, elemtype, BTORDER_PROC);
+ if (!RegProcedureIsValid(cmp_proc))
+ elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"",
+ BTORDER_PROC, elemtype, elemtype,
+ skey->sk_attno, RelationGetRelationName(rel));
+
+ /* Set same-type ORDER proc for caller */
+ fmgr_info_cxt(cmp_proc, *sortprocp, so->arrayContext);
}
/*
* least element, or BTGreaterStrategyNumber to get the greatest.
*/
static Datum
-_bt_find_extreme_element(IndexScanDesc scan, ScanKey skey,
+_bt_find_extreme_element(IndexScanDesc scan, ScanKey skey, Oid elemtype,
StrategyNumber strat,
Datum *elems, int nelems)
{
Relation rel = scan->indexRelation;
- Oid elemtype,
- cmp_op;
+ Oid cmp_op;
RegProcedure cmp_proc;
FmgrInfo flinfo;
Datum result;
int i;
- /*
- * Determine the nominal datatype of the array elements. We have to
- * support the convention that sk_subtype == InvalidOid means the opclass
- * input type; this is a hack to simplify life for ScanKeyInit().
- */
- elemtype = skey->sk_subtype;
- if (elemtype == InvalidOid)
- elemtype = rel->rd_opcintype[skey->sk_attno - 1];
-
/*
* Look up the appropriate comparison operator in the opfamily.
*
* non-cross-type comparison operators for any datatype that it supports
* at all.
*/
+ Assert(skey->sk_strategy != BTEqualStrategyNumber);
+ Assert(OidIsValid(elemtype));
cmp_op = get_opfamily_member(rel->rd_opfamily[skey->sk_attno - 1],
elemtype,
elemtype,
* The array elements are sorted in-place, and the new number of elements
* after duplicate removal is returned.
*
- * scan and skey identify the index column, whose opfamily determines the
- * comparison semantics. If reverse is true, we sort in descending order.
+ * skey identifies the index column whose opfamily determines the comparison
+ * semantics, and sortproc is a corresponding ORDER proc. If reverse is true,
+ * we sort in descending order.
*/
static int
-_bt_sort_array_elements(IndexScanDesc scan, ScanKey skey,
- bool reverse,
+_bt_sort_array_elements(ScanKey skey, FmgrInfo *sortproc, bool reverse,
Datum *elems, int nelems)
{
- Relation rel = scan->indexRelation;
- Oid elemtype;
- RegProcedure cmp_proc;
BTSortArrayContext cxt;
if (nelems <= 1)
return nelems; /* no work to do */
- /*
- * Determine the nominal datatype of the array elements. We have to
- * support the convention that sk_subtype == InvalidOid means the opclass
- * input type; this is a hack to simplify life for ScanKeyInit().
- */
- elemtype = skey->sk_subtype;
- if (elemtype == InvalidOid)
- elemtype = rel->rd_opcintype[skey->sk_attno - 1];
-
- /*
- * Look up the appropriate comparison function in the opfamily.
- *
- * Note: it's possible that this would fail, if the opfamily is
- * incomplete, but it seems quite unlikely that an opfamily would omit
- * non-cross-type support functions for any datatype that it supports at
- * all.
- */
- cmp_proc = get_opfamily_proc(rel->rd_opfamily[skey->sk_attno - 1],
- elemtype,
- elemtype,
- BTORDER_PROC);
- if (!RegProcedureIsValid(cmp_proc))
- elog(ERROR, "missing support function %d(%u,%u) in opfamily %u",
- BTORDER_PROC, elemtype, elemtype,
- rel->rd_opfamily[skey->sk_attno - 1]);
-
/* Sort the array elements */
- fmgr_info(cmp_proc, &cxt.flinfo);
+ cxt.sortproc = sortproc;
cxt.collation = skey->sk_collation;
cxt.reverse = reverse;
qsort_arg(elems, nelems, sizeof(Datum),
}
/*
- * qsort_arg comparator for sorting array elements
- */
-static int
-_bt_compare_array_elements(const void *a, const void *b, void *arg)
-{
- Datum da = *((const Datum *) a);
- Datum db = *((const Datum *) b);
- BTSortArrayContext *cxt = (BTSortArrayContext *) arg;
- int32 compare;
-
- compare = DatumGetInt32(FunctionCall2Coll(&cxt->flinfo,
- cxt->collation,
- da, db));
- if (cxt->reverse)
- INVERT_COMPARE_RESULT(compare);
- return compare;
-}
-
-/*
- * _bt_start_array_keys() -- Initialize array keys at start of a scan
+ * _bt_merge_arrays() -- merge next array's elements into an original array
*
- * Set up the cur_elem counters and fill in the first sk_argument value for
- * each array scankey. We can't do this until we know the scan direction.
+ * Called when preprocessing encounters a pair of array equality scan keys,
+ * both against the same index attribute (during initial array preprocessing).
+ * Merging reorganizes caller's original array (the left hand arg) in-place,
+ * without ever copying elements from one array into the other. (Mixing the
+ * elements together like this would be wrong, since they don't necessarily
+ * use the same underlying element type, despite all the other similarities.)
+ *
+ * Both arrays must have already been sorted and deduplicated by calling
+ * _bt_sort_array_elements. sortproc is the same-type ORDER proc that was
+ * just used to sort and deduplicate caller's "next" array. We'll usually be
+ * able to reuse that order PROC to merge the arrays together now. If not,
+ * then we'll perform a separate ORDER proc lookup.
+ *
+ * If the opfamily doesn't supply a complete set of cross-type ORDER procs we
+ * may not be able to determine which elements are contradictory. If we have
+ * the required ORDER proc then we return true (and validly set *nelems_orig),
+ * guaranteeing that at least the next array can be considered redundant. We
+ * return false if the required comparisons cannot not be made (caller must
+ * keep both arrays when this happens).
*/
-void
-_bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
+static bool
+_bt_merge_arrays(IndexScanDesc scan, ScanKey skey, FmgrInfo *sortproc,
+ bool reverse, Oid origelemtype, Oid nextelemtype,
+ Datum *elems_orig, int *nelems_orig,
+ Datum *elems_next, int nelems_next)
{
+ Relation rel = scan->indexRelation;
BTScanOpaque so = (BTScanOpaque) scan->opaque;
- int i;
+ BTSortArrayContext cxt;
+ int nelems_orig_start = *nelems_orig,
+ nelems_orig_merged = 0;
+ FmgrInfo *mergeproc = sortproc;
+ FmgrInfo crosstypeproc;
- for (i = 0; i < so->numArrayKeys; i++)
+ Assert(skey->sk_strategy == BTEqualStrategyNumber);
+ Assert(OidIsValid(origelemtype) && OidIsValid(nextelemtype));
+
+ if (origelemtype != nextelemtype)
{
- BTArrayKeyInfo *curArrayKey = &so->arrayKeys[i];
- ScanKey skey = &so->arrayKeyData[curArrayKey->scan_key];
+ RegProcedure cmp_proc;
- Assert(curArrayKey->num_elems > 0);
- if (ScanDirectionIsBackward(dir))
- curArrayKey->cur_elem = curArrayKey->num_elems - 1;
- else
- curArrayKey->cur_elem = 0;
- skey->sk_argument = curArrayKey->elem_values[curArrayKey->cur_elem];
+ /*
+ * Cross-array-element-type merging is required, so can't just reuse
+ * sortproc when merging
+ */
+ cmp_proc = get_opfamily_proc(rel->rd_opfamily[skey->sk_attno - 1],
+ origelemtype, nextelemtype, BTORDER_PROC);
+ if (!RegProcedureIsValid(cmp_proc))
+ {
+ /* Can't make the required comparisons */
+ return false;
+ }
+
+ /* We have all we need to determine redundancy/contradictoriness */
+ mergeproc = &crosstypeproc;
+ fmgr_info_cxt(cmp_proc, mergeproc, so->arrayContext);
+ }
+
+ cxt.sortproc = mergeproc;
+ cxt.collation = skey->sk_collation;
+ cxt.reverse = reverse;
+
+ for (int i = 0, j = 0; i < nelems_orig_start && j < nelems_next;)
+ {
+ Datum *oelem = elems_orig + i,
+ *nelem = elems_next + j;
+ int res = _bt_compare_array_elements(oelem, nelem, &cxt);
+
+ if (res == 0)
+ {
+ elems_orig[nelems_orig_merged++] = *oelem;
+ i++;
+ j++;
+ }
+ else if (res < 0)
+ i++;
+ else /* res > 0 */
+ j++;
}
- so->arraysStarted = true;
+ *nelems_orig = nelems_orig_merged;
+
+ return true;
}
/*
- * _bt_advance_array_keys() -- Advance to next set of array elements
+ * Compare an array scan key to a scalar scan key, eliminating contradictory
+ * array elements such that the scalar scan key becomes redundant.
*
- * Returns true if there is another set of values to consider, false if not.
- * On true result, the scankeys are initialized with the next set of values.
+ * Array elements can be eliminated as contradictory when excluded by some
+ * other operator on the same attribute. For example, with an index scan qual
+ * "WHERE a IN (1, 2, 3) AND a < 2", all array elements except the value "1"
+ * are eliminated, and the < scan key is eliminated as redundant. Cases where
+ * every array element is eliminated by a redundant scalar scan key have an
+ * unsatisfiable qual, which we handle by setting *qual_ok=false for caller.
+ *
+ * If the opfamily doesn't supply a complete set of cross-type ORDER procs we
+ * may not be able to determine which elements are contradictory. If we have
+ * the required ORDER proc then we return true (and validly set *qual_ok),
+ * guaranteeing that at least the scalar scan key can be considered redundant.
+ * We return false if the comparison could not be made (caller must keep both
+ * scan keys when this happens).
*/
-bool
-_bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
+static bool
+_bt_compare_array_scankey_args(IndexScanDesc scan, ScanKey arraysk, ScanKey skey,
+ FmgrInfo *orderproc, BTArrayKeyInfo *array,
+ bool *qual_ok)
{
- BTScanOpaque so = (BTScanOpaque) scan->opaque;
- bool found = false;
- int i;
+ Relation rel = scan->indexRelation;
+ Oid opcintype = rel->rd_opcintype[arraysk->sk_attno - 1];
+ int cmpresult = 0,
+ cmpexact = 0,
+ matchelem,
+ new_nelems = 0;
+ FmgrInfo crosstypeproc;
+ FmgrInfo *orderprocp = orderproc;
+
+ Assert(arraysk->sk_attno == skey->sk_attno);
+ Assert(array->num_elems > 0);
+ Assert(!(arraysk->sk_flags & (SK_ISNULL | SK_ROW_HEADER | SK_ROW_MEMBER)));
+ Assert((arraysk->sk_flags & SK_SEARCHARRAY) &&
+ arraysk->sk_strategy == BTEqualStrategyNumber);
+ Assert(!(skey->sk_flags & (SK_ISNULL | SK_ROW_HEADER | SK_ROW_MEMBER)));
+ Assert(!(skey->sk_flags & SK_SEARCHARRAY) ||
+ skey->sk_strategy != BTEqualStrategyNumber);
/*
- * We must advance the last array key most quickly, since it will
- * correspond to the lowest-order index column among the available
- * qualifications. This is necessary to ensure correct ordering of output
- * when there are multiple array keys.
+ * _bt_binsrch_array_skey searches an array for the entry best matching a
+ * datum of opclass input type for the index's attribute (on-disk type).
+ * We can reuse the array's ORDER proc whenever the non-array scan key's
+ * type is a match for the corresponding attribute's input opclass type.
+ * Otherwise, we have to do another ORDER proc lookup so that our call to
+ * _bt_binsrch_array_skey applies the correct comparator.
+ *
+ * Note: we have to support the convention that sk_subtype == InvalidOid
+ * means the opclass input type; this is a hack to simplify life for
+ * ScanKeyInit().
*/
- for (i = so->numArrayKeys - 1; i >= 0; i--)
+ if (skey->sk_subtype != opcintype && skey->sk_subtype != InvalidOid)
{
- BTArrayKeyInfo *curArrayKey = &so->arrayKeys[i];
- ScanKey skey = &so->arrayKeyData[curArrayKey->scan_key];
- int cur_elem = curArrayKey->cur_elem;
- int num_elems = curArrayKey->num_elems;
+ RegProcedure cmp_proc;
+ Oid arraysk_elemtype;
- if (ScanDirectionIsBackward(dir))
+ /*
+ * Need an ORDER proc lookup to detect redundancy/contradictoriness
+ * with this pair of scankeys.
+ *
+ * Scalar scan key's argument will be passed to _bt_compare_array_skey
+ * as its tupdatum/lefthand argument (rhs arg is for array elements).
+ */
+ arraysk_elemtype = arraysk->sk_subtype;
+ if (arraysk_elemtype == InvalidOid)
+ arraysk_elemtype = rel->rd_opcintype[arraysk->sk_attno - 1];
+ cmp_proc = get_opfamily_proc(rel->rd_opfamily[arraysk->sk_attno - 1],
+ skey->sk_subtype, arraysk_elemtype,
+ BTORDER_PROC);
+ if (!RegProcedureIsValid(cmp_proc))
{
- if (--cur_elem < 0)
+ /* Can't make the comparison */
+ *qual_ok = false; /* suppress compiler warnings */
+ return false;
+ }
+
+ /* We have all we need to determine redundancy/contradictoriness */
+ orderprocp = &crosstypeproc;
+ fmgr_info(cmp_proc, orderprocp);
+ }
+
+ matchelem = _bt_binsrch_array_skey(orderprocp, false,
+ NoMovementScanDirection,
+ skey->sk_argument, false, array,
+ arraysk, &cmpresult);
+
+ switch (skey->sk_strategy)
+ {
+ case BTLessStrategyNumber:
+ cmpexact = 1; /* exclude exact match, if any */
+ /* FALL THRU */
+ case BTLessEqualStrategyNumber:
+ if (cmpresult >= cmpexact)
+ matchelem++;
+ /* Resize, keeping elements from the start of the array */
+ new_nelems = matchelem;
+ break;
+ case BTEqualStrategyNumber:
+ if (cmpresult != 0)
{
- cur_elem = num_elems - 1;
- found = false; /* need to advance next array key */
+ /* qual is unsatisfiable */
+ new_nelems = 0;
}
else
- found = true;
+ {
+ /* Shift matching element to the start of the array, resize */
+ array->elem_values[0] = array->elem_values[matchelem];
+ new_nelems = 1;
+ }
+ break;
+ case BTGreaterEqualStrategyNumber:
+ cmpexact = 1; /* include exact match, if any */
+ /* FALL THRU */
+ case BTGreaterStrategyNumber:
+ if (cmpresult >= cmpexact)
+ matchelem++;
+ /* Shift matching elements to the start of the array, resize */
+ new_nelems = array->num_elems - matchelem;
+ memmove(array->elem_values, array->elem_values + matchelem,
+ sizeof(Datum) * new_nelems);
+ break;
+ default:
+ elog(ERROR, "unrecognized StrategyNumber: %d",
+ (int) skey->sk_strategy);
+ break;
+ }
+
+ Assert(new_nelems >= 0);
+ Assert(new_nelems <= array->num_elems);
+
+ array->num_elems = new_nelems;
+ *qual_ok = new_nelems > 0;
+
+ return true;
+}
+
+/*
+ * qsort_arg comparator for sorting array elements
+ */
+static int
+_bt_compare_array_elements(const void *a, const void *b, void *arg)
+{
+ Datum da = *((const Datum *) a);
+ Datum db = *((const Datum *) b);
+ BTSortArrayContext *cxt = (BTSortArrayContext *) arg;
+ int32 compare;
+
+ compare = DatumGetInt32(FunctionCall2Coll(cxt->sortproc,
+ cxt->collation,
+ da, db));
+ if (cxt->reverse)
+ INVERT_COMPARE_RESULT(compare);
+ return compare;
+}
+
+/*
+ * _bt_compare_array_skey() -- apply array comparison function
+ *
+ * Compares caller's tuple attribute value to a scan key/array element.
+ * Helper function used during binary searches of SK_SEARCHARRAY arrays.
+ *
+ * This routine returns:
+ * <0 if tupdatum < arrdatum;
+ * 0 if tupdatum == arrdatum;
+ * >0 if tupdatum > arrdatum.
+ *
+ * This is essentially the same interface as _bt_compare: both functions
+ * compare the value that they're searching for to a binary search pivot.
+ * However, unlike _bt_compare, this function's "tuple argument" comes first,
+ * while its "array/scankey argument" comes second.
+*/
+static inline int32
+_bt_compare_array_skey(FmgrInfo *orderproc,
+ Datum tupdatum, bool tupnull,
+ Datum arrdatum, ScanKey cur)
+{
+ int32 result = 0;
+
+ Assert(cur->sk_strategy == BTEqualStrategyNumber);
+
+ if (tupnull) /* NULL tupdatum */
+ {
+ if (cur->sk_flags & SK_ISNULL)
+ result = 0; /* NULL "=" NULL */
+ else if (cur->sk_flags & SK_BT_NULLS_FIRST)
+ result = -1; /* NULL "<" NOT_NULL */
+ else
+ result = 1; /* NULL ">" NOT_NULL */
+ }
+ else if (cur->sk_flags & SK_ISNULL) /* NOT_NULL tupdatum, NULL arrdatum */
+ {
+ if (cur->sk_flags & SK_BT_NULLS_FIRST)
+ result = 1; /* NOT_NULL ">" NULL */
+ else
+ result = -1; /* NOT_NULL "<" NULL */
+ }
+ else
+ {
+ /*
+ * Like _bt_compare, we need to be careful of cross-type comparisons,
+ * so the left value has to be the value that came from an index tuple
+ */
+ result = DatumGetInt32(FunctionCall2Coll(orderproc, cur->sk_collation,
+ tupdatum, arrdatum));
+
+ /*
+ * We flip the sign by following the obvious rule: flip whenever the
+ * column is a DESC column.
+ *
+ * _bt_compare does it the wrong way around (flip when *ASC*) in order
+ * to compensate for passing its orderproc arguments backwards. We
+ * don't need to play these games because we find it natural to pass
+ * tupdatum as the left value (and arrdatum as the right value).
+ */
+ if (cur->sk_flags & SK_BT_DESC)
+ INVERT_COMPARE_RESULT(result);
+ }
+
+ return result;
+}
+
+/*
+ * _bt_binsrch_array_skey() -- Binary search for next matching array key
+ *
+ * Returns an index to the first array element >= caller's tupdatum argument.
+ * This convention is more natural for forwards scan callers, but that can't
+ * really matter to backwards scan callers. Both callers require handling for
+ * the case where the match we return is < tupdatum, and symmetric handling
+ * for the case where our best match is > tupdatum.
+ *
+ * Also sets *set_elem_result to the result _bt_compare_array_skey returned
+ * when we used it to compare the matching array element to tupdatum/tupnull.
+ *
+ * cur_elem_trig indicates if array advancement was triggered by this array's
+ * scan key, and that the array is for a required scan key. We can apply this
+ * information to find the next matching array element in the current scan
+ * direction using far fewer comparisons (fewer on average, compared to naive
+ * binary search). This scheme takes advantage of an important property of
+ * required arrays: required arrays always advance in lockstep with the index
+ * scan's progress through the index's key space.
+ */
+static int
+_bt_binsrch_array_skey(FmgrInfo *orderproc,
+ bool cur_elem_trig, ScanDirection dir,
+ Datum tupdatum, bool tupnull,
+ BTArrayKeyInfo *array, ScanKey cur,
+ int32 *set_elem_result)
+{
+ int low_elem = 0,
+ mid_elem = -1,
+ high_elem = array->num_elems - 1,
+ result = 0;
+ Datum arrdatum;
+
+ Assert(cur->sk_flags & SK_SEARCHARRAY);
+ Assert(cur->sk_strategy == BTEqualStrategyNumber);
+
+ if (cur_elem_trig)
+ {
+ Assert(!ScanDirectionIsNoMovement(dir));
+ Assert(cur->sk_flags & SK_BT_REQFWD);
+
+ /*
+ * When the scan key that triggered array advancement is a required
+ * array scan key, it is now certain that the current array element
+ * (plus all prior elements relative to the current scan direction)
+ * cannot possibly be at or ahead of the corresponding tuple value.
+ * (_bt_checkkeys must have called _bt_tuple_before_array_skeys, which
+ * makes sure this is true as a condition of advancing the arrays.)
+ *
+ * This makes it safe to exclude array elements up to and including
+ * the former-current array element from our search.
+ *
+ * Separately, when array advancement was triggered by a required scan
+ * key, the array element immediately after the former-current element
+ * is often either an exact tupdatum match, or a "close by" near-match
+ * (a near-match tupdatum is one whose key space falls _between_ the
+ * former-current and new-current array elements). We'll detect both
+ * cases via an optimistic comparison of the new search lower bound
+ * (or new search upper bound in the case of backwards scans).
+ */
+ if (ScanDirectionIsForward(dir))
+ {
+ low_elem = array->cur_elem + 1; /* old cur_elem exhausted */
+
+ /* Compare prospective new cur_elem (also the new lower bound) */
+ if (high_elem >= low_elem)
+ {
+ arrdatum = array->elem_values[low_elem];
+ result = _bt_compare_array_skey(orderproc, tupdatum, tupnull,
+ arrdatum, cur);
+
+ if (result <= 0)
+ {
+ /* Optimistic comparison optimization worked out */
+ *set_elem_result = result;
+ return low_elem;
+ }
+ mid_elem = low_elem;
+ low_elem++; /* this cur_elem exhausted, too */
+ }
+
+ if (high_elem < low_elem)
+ {
+ /* Caller needs to perform "beyond end" array advancement */
+ *set_elem_result = 1;
+ return high_elem;
+ }
}
else
{
- if (++cur_elem >= num_elems)
+ high_elem = array->cur_elem - 1; /* old cur_elem exhausted */
+
+ /* Compare prospective new cur_elem (also the new upper bound) */
+ if (high_elem >= low_elem)
+ {
+ arrdatum = array->elem_values[high_elem];
+ result = _bt_compare_array_skey(orderproc, tupdatum, tupnull,
+ arrdatum, cur);
+
+ if (result >= 0)
+ {
+ /* Optimistic comparison optimization worked out */
+ *set_elem_result = result;
+ return high_elem;
+ }
+ mid_elem = high_elem;
+ high_elem--; /* this cur_elem exhausted, too */
+ }
+
+ if (high_elem < low_elem)
{
- cur_elem = 0;
- found = false; /* need to advance next array key */
+ /* Caller needs to perform "beyond end" array advancement */
+ *set_elem_result = -1;
+ return low_elem;
}
- else
- found = true;
}
+ }
- curArrayKey->cur_elem = cur_elem;
- skey->sk_argument = curArrayKey->elem_values[cur_elem];
- if (found)
+ while (high_elem > low_elem)
+ {
+ mid_elem = low_elem + ((high_elem - low_elem) / 2);
+ arrdatum = array->elem_values[mid_elem];
+
+ result = _bt_compare_array_skey(orderproc, tupdatum, tupnull,
+ arrdatum, cur);
+
+ if (result == 0)
+ {
+ /*
+ * It's safe to quit as soon as we see an equal array element.
+ * This often saves an extra comparison or two...
+ */
+ low_elem = mid_elem;
break;
- }
+ }
- /* advance parallel scan */
- if (scan->parallel_scan != NULL)
- _bt_parallel_advance_array_keys(scan);
+ if (result > 0)
+ low_elem = mid_elem + 1;
+ else
+ high_elem = mid_elem;
+ }
/*
- * When no new array keys were found, the scan is "past the end" of the
- * array keys. _bt_start_array_keys can still "restart" the array keys if
- * a rescan is required.
+ * ...but our caller also cares about how its searched-for tuple datum
+ * compares to the low_elem datum. Must always set *set_elem_result with
+ * the result of that comparison specifically.
*/
- if (!found)
- so->arraysStarted = false;
+ if (low_elem != mid_elem)
+ result = _bt_compare_array_skey(orderproc, tupdatum, tupnull,
+ array->elem_values[low_elem], cur);
+
+ *set_elem_result = result;
- return found;
+ return low_elem;
}
/*
- * _bt_mark_array_keys() -- Handle array keys during btmarkpos
+ * _bt_start_array_keys() -- Initialize array keys at start of a scan
*
- * Save the current state of the array keys as the "mark" position.
+ * Set up the cur_elem counters and fill in the first sk_argument value for
+ * each array scankey.
*/
void
-_bt_mark_array_keys(IndexScanDesc scan)
+_bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
{
BTScanOpaque so = (BTScanOpaque) scan->opaque;
int i;
+ Assert(so->numArrayKeys);
+ Assert(so->qual_ok);
+
for (i = 0; i < so->numArrayKeys; i++)
{
BTArrayKeyInfo *curArrayKey = &so->arrayKeys[i];
+ ScanKey skey = &so->keyData[curArrayKey->scan_key];
+
+ Assert(curArrayKey->num_elems > 0);
+ Assert(skey->sk_flags & SK_SEARCHARRAY);
- curArrayKey->mark_elem = curArrayKey->cur_elem;
+ if (ScanDirectionIsBackward(dir))
+ curArrayKey->cur_elem = curArrayKey->num_elems - 1;
+ else
+ curArrayKey->cur_elem = 0;
+ skey->sk_argument = curArrayKey->elem_values[curArrayKey->cur_elem];
}
+ so->scanBehind = false;
}
/*
- * _bt_restore_array_keys() -- Handle array keys during btrestrpos
+ * _bt_advance_array_keys_increment() -- Advance to next set of array elements
+ *
+ * Advances the array keys by a single increment in the current scan
+ * direction. When there are multiple array keys this can roll over from the
+ * lowest order array to higher order arrays.
*
- * Restore the array keys to where they were when the mark was set.
+ * Returns true if there is another set of values to consider, false if not.
+ * On true result, the scankeys are initialized with the next set of values.
+ * On false result, the scankeys stay the same, and the array keys are not
+ * advanced (every array remains at its final element for scan direction).
*/
-void
-_bt_restore_array_keys(IndexScanDesc scan)
+static bool
+_bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir)
{
BTScanOpaque so = (BTScanOpaque) scan->opaque;
- bool changed = false;
- int i;
- /* Restore each array key to its position when the mark was set */
- for (i = 0; i < so->numArrayKeys; i++)
- {
- BTArrayKeyInfo *curArrayKey = &so->arrayKeys[i];
- ScanKey skey = &so->arrayKeyData[curArrayKey->scan_key];
- int mark_elem = curArrayKey->mark_elem;
+ /*
+ * We must advance the last array key most quickly, since it will
+ * correspond to the lowest-order index column among the available
+ * qualifications
+ */
+ for (int i = so->numArrayKeys - 1; i >= 0; i--)
+ {
+ BTArrayKeyInfo *curArrayKey = &so->arrayKeys[i];
+ ScanKey skey = &so->keyData[curArrayKey->scan_key];
+ int cur_elem = curArrayKey->cur_elem;
+ int num_elems = curArrayKey->num_elems;
+ bool rolled = false;
+
+ if (ScanDirectionIsForward(dir) && ++cur_elem >= num_elems)
+ {
+ cur_elem = 0;
+ rolled = true;
+ }
+ else if (ScanDirectionIsBackward(dir) && --cur_elem < 0)
+ {
+ cur_elem = num_elems - 1;
+ rolled = true;
+ }
+
+ curArrayKey->cur_elem = cur_elem;
+ skey->sk_argument = curArrayKey->elem_values[cur_elem];
+ if (!rolled)
+ return true;
+
+ /* Need to advance next array key, if any */
+ }
+
+ /*
+ * The array keys are now exhausted. (There isn't actually a distinct
+ * state that represents array exhaustion, since index scans don't always
+ * end after btgettuple returns "false".)
+ *
+ * Restore the array keys to the state they were in immediately before we
+ * were called. This ensures that the arrays only ever ratchet in the
+ * current scan direction. Without this, scans would overlook matching
+ * tuples if and when the scan's direction was subsequently reversed.
+ */
+ _bt_start_array_keys(scan, -dir);
+
+ return false;
+}
+
+/*
+ * _bt_rewind_nonrequired_arrays() -- Rewind non-required arrays
+ *
+ * Called when _bt_advance_array_keys decides to start a new primitive index
+ * scan on the basis of the current scan position being before the position
+ * that _bt_first is capable of repositioning the scan to by applying an
+ * inequality operator required in the opposite-to-scan direction only.
+ *
+ * Although equality strategy scan keys (for both arrays and non-arrays alike)
+ * are either marked required in both directions or in neither direction,
+ * there is a sense in which non-required arrays behave like required arrays.
+ * With a qual such as "WHERE a IN (100, 200) AND b >= 3 AND c IN (5, 6, 7)",
+ * the scan key on "c" is non-required, but nevertheless enables positioning
+ * the scan at the first tuple >= "(100, 3, 5)" on the leaf level during the
+ * first descent of the tree by _bt_first. Later on, there could also be a
+ * second descent, that places the scan right before tuples >= "(200, 3, 5)".
+ * _bt_first must never be allowed to build an insertion scan key whose "c"
+ * entry is set to a value other than 5, the "c" array's first element/value.
+ * (Actually, it's the first in the current scan direction. This example uses
+ * a forward scan.)
+ *
+ * Calling here resets the array scan key elements for the scan's non-required
+ * arrays. This is strictly necessary for correctness in a subset of cases
+ * involving "required in opposite direction"-triggered primitive index scans.
+ * Not all callers are at risk of _bt_first using a non-required array like
+ * this, but advancement always resets the arrays when another primitive scan
+ * is scheduled, just to keep things simple. Array advancement even makes
+ * sure to reset non-required arrays during scans that have no inequalities.
+ * (Advancement still won't call here when there are no inequalities, though
+ * that's just because it's all handled indirectly instead.)
+ *
+ * Note: _bt_verify_arrays_bt_first is called by an assertion to enforce that
+ * everybody got this right.
+ */
+static void
+_bt_rewind_nonrequired_arrays(IndexScanDesc scan, ScanDirection dir)
+{
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
+ int arrayidx = 0;
+
+ for (int ikey = 0; ikey < so->numberOfKeys; ikey++)
+ {
+ ScanKey cur = so->keyData + ikey;
+ BTArrayKeyInfo *array = NULL;
+ int first_elem_dir;
+
+ if (!(cur->sk_flags & SK_SEARCHARRAY) ||
+ cur->sk_strategy != BTEqualStrategyNumber)
+ continue;
+
+ array = &so->arrayKeys[arrayidx++];
+ Assert(array->scan_key == ikey);
+
+ if ((cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
+ continue;
+
+ if (ScanDirectionIsForward(dir))
+ first_elem_dir = 0;
+ else
+ first_elem_dir = array->num_elems - 1;
+
+ if (array->cur_elem != first_elem_dir)
+ {
+ array->cur_elem = first_elem_dir;
+ cur->sk_argument = array->elem_values[first_elem_dir];
+ }
+ }
+}
+
+/*
+ * _bt_tuple_before_array_skeys() -- too early to advance required arrays?
+ *
+ * We always compare the tuple using the current array keys (which we assume
+ * are already set in so->keyData[]). readpagetup indicates if tuple is the
+ * scan's current _bt_readpage-wise tuple.
+ *
+ * readpagetup callers must only call here when _bt_check_compare already set
+ * continuescan=false. We help these callers deal with _bt_check_compare's
+ * inability to distinguishing between the < and > cases (it uses equality
+ * operator scan keys, whereas we use 3-way ORDER procs). These callers pass
+ * a _bt_check_compare-set sktrig value that indicates which scan key
+ * triggered the call (!readpagetup callers just pass us sktrig=0 instead).
+ * This information allows us to avoid wastefully checking earlier scan keys
+ * that were already deemed to have been satisfied inside _bt_check_compare.
+ *
+ * Returns false when caller's tuple is >= the current required equality scan
+ * keys (or <=, in the case of backwards scans). This happens to readpagetup
+ * callers when the scan has reached the point of needing its array keys
+ * advanced; caller will need to advance required and non-required arrays at
+ * scan key offsets >= sktrig, plus scan keys < sktrig iff sktrig rolls over.
+ * (When we return false to readpagetup callers, tuple can only be == current
+ * required equality scan keys when caller's sktrig indicates that the arrays
+ * need to be advanced due to an unsatisfied required inequality key trigger.)
+ *
+ * Returns true when caller passes a tuple that is < the current set of
+ * equality keys for the most significant non-equal required scan key/column
+ * (or > the keys, during backwards scans). This happens to readpagetup
+ * callers when tuple is still before the start of matches for the scan's
+ * required equality strategy scan keys. (sktrig can't have indicated that an
+ * inequality strategy scan key wasn't satisfied in _bt_check_compare when we
+ * return true. In fact, we automatically return false when passed such an
+ * inequality sktrig by readpagetup callers -- _bt_check_compare's initial
+ * continuescan=false doesn't really need to be confirmed here by us.)
+ *
+ * !readpagetup callers optionally pass us *scanBehind, which tracks whether
+ * any missing truncated attributes might have affected array advancement
+ * (compared to what would happen if it was shown the first non-pivot tuple on
+ * the page to the right of caller's finaltup/high key tuple instead). It's
+ * only possible that we'll set *scanBehind to true when caller passes us a
+ * pivot tuple (with truncated -inf attributes) that we return false for.
+ */
+static bool
+_bt_tuple_before_array_skeys(IndexScanDesc scan, ScanDirection dir,
+ IndexTuple tuple, TupleDesc tupdesc, int tupnatts,
+ bool readpagetup, int sktrig, bool *scanBehind)
+{
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
+
+ Assert(so->numArrayKeys);
+ Assert(so->numberOfKeys);
+ Assert(sktrig == 0 || readpagetup);
+ Assert(!readpagetup || scanBehind == NULL);
+
+ if (scanBehind)
+ *scanBehind = false;
+
+ for (int ikey = sktrig; ikey < so->numberOfKeys; ikey++)
+ {
+ ScanKey cur = so->keyData + ikey;
+ Datum tupdatum;
+ bool tupnull;
+ int32 result;
+
+ /* readpagetup calls require one ORDER proc comparison (at most) */
+ Assert(!readpagetup || ikey == sktrig);
+
+ /*
+ * Once we reach a non-required scan key, we're completely done.
+ *
+ * Note: we deliberately don't consider the scan direction here.
+ * _bt_advance_array_keys caller requires that we track *scanBehind
+ * without concern for scan direction.
+ */
+ if ((cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) == 0)
+ {
+ Assert(!readpagetup);
+ Assert(ikey > sktrig || ikey == 0);
+ return false;
+ }
+
+ if (cur->sk_attno > tupnatts)
+ {
+ Assert(!readpagetup);
+
+ /*
+ * When we reach a high key's truncated attribute, assume that the
+ * tuple attribute's value is >= the scan's equality constraint
+ * scan keys (but set *scanBehind to let interested callers know
+ * that a truncated attribute might have affected our answer).
+ */
+ if (scanBehind)
+ *scanBehind = true;
+
+ return false;
+ }
+
+ /*
+ * Deal with inequality strategy scan keys that _bt_check_compare set
+ * continuescan=false for
+ */
+ if (cur->sk_strategy != BTEqualStrategyNumber)
+ {
+ /*
+ * When _bt_check_compare indicated that a required inequality
+ * scan key wasn't satisfied, there's no need to verify anything;
+ * caller always calls _bt_advance_array_keys with this sktrig.
+ */
+ if (readpagetup)
+ return false;
+
+ /*
+ * Otherwise we can't give up, since we must check all required
+ * scan keys (required in either direction) in order to correctly
+ * track *scanBehind for caller
+ */
+ continue;
+ }
+
+ tupdatum = index_getattr(tuple, cur->sk_attno, tupdesc, &tupnull);
+
+ result = _bt_compare_array_skey(&so->orderProcs[ikey],
+ tupdatum, tupnull,
+ cur->sk_argument, cur);
+
+ /*
+ * Does this comparison indicate that caller must _not_ advance the
+ * scan's arrays just yet?
+ */
+ if ((ScanDirectionIsForward(dir) && result < 0) ||
+ (ScanDirectionIsBackward(dir) && result > 0))
+ return true;
+
+ /*
+ * Does this comparison indicate that caller should now advance the
+ * scan's arrays? (Must be if we get here during a readpagetup call.)
+ */
+ if (readpagetup || result != 0)
+ {
+ Assert(result != 0);
+ return false;
+ }
+
+ /*
+ * Inconclusive -- need to check later scan keys, too.
+ *
+ * This must be a finaltup precheck, or a call made from an assertion.
+ */
+ Assert(result == 0);
+ }
+
+ Assert(!readpagetup);
+
+ return false;
+}
+
+/*
+ * _bt_start_prim_scan() -- start scheduled primitive index scan?
+ *
+ * Returns true if _bt_checkkeys scheduled another primitive index scan, just
+ * as the last one ended. Otherwise returns false, indicating that the array
+ * keys are now fully exhausted.
+ *
+ * Only call here during scans with one or more equality type array scan keys,
+ * after _bt_first or _bt_next return false.
+ */
+bool
+_bt_start_prim_scan(IndexScanDesc scan, ScanDirection dir)
+{
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
+
+ Assert(so->numArrayKeys);
+
+ /* scanBehind flag doesn't persist across primitive index scans - reset */
+ so->scanBehind = false;
+
+ /*
+ * Array keys are advanced within _bt_checkkeys when the scan reaches the
+ * leaf level (more precisely, they're advanced when the scan reaches the
+ * end of each distinct set of array elements). This process avoids
+ * repeat access to leaf pages (across multiple primitive index scans) by
+ * advancing the scan's array keys when it allows the primitive index scan
+ * to find nearby matching tuples (or when it eliminates ranges of array
+ * key space that can't possibly be satisfied by any index tuple).
+ *
+ * _bt_checkkeys sets a simple flag variable to schedule another primitive
+ * index scan. The flag tells us what to do.
+ *
+ * We cannot rely on _bt_first always reaching _bt_checkkeys. There are
+ * various cases where that won't happen. For example, if the index is
+ * completely empty, then _bt_first won't call _bt_readpage/_bt_checkkeys.
+ * We also don't expect a call to _bt_checkkeys during searches for a
+ * non-existent value that happens to be lower/higher than any existing
+ * value in the index.
+ *
+ * We don't require special handling for these cases -- we don't need to
+ * be explicitly instructed to _not_ perform another primitive index scan.
+ * It's up to code under the control of _bt_first to always set the flag
+ * when another primitive index scan will be required.
+ *
+ * This works correctly, even with the tricky cases listed above, which
+ * all involve access to leaf pages "near the boundaries of the key space"
+ * (whether it's from a leftmost/rightmost page, or an imaginary empty
+ * leaf root page). If _bt_checkkeys cannot be reached by a primitive
+ * index scan for one set of array keys, then it also won't be reached for
+ * any later set ("later" in terms of the direction that we scan the index
+ * and advance the arrays). The array keys won't have advanced in these
+ * cases, but that's the correct behavior (even _bt_advance_array_keys
+ * won't always advance the arrays at the point they become "exhausted").
+ */
+ if (so->needPrimScan)
+ {
+ Assert(_bt_verify_arrays_bt_first(scan, dir));
+
+ /*
+ * Flag was set -- must call _bt_first again, which will reset the
+ * scan's needPrimScan flag
+ */
+ return true;
+ }
+
+ /* The top-level index scan ran out of tuples in this scan direction */
+ if (scan->parallel_scan != NULL)
+ _bt_parallel_done(scan);
+
+ return false;
+}
+
+/*
+ * _bt_advance_array_keys() -- Advance array elements using a tuple
+ *
+ * The scan always gets a new qual as a consequence of calling here (except
+ * when we determine that the top-level scan has run out of matching tuples).
+ * All later _bt_check_compare calls also use the same new qual that was first
+ * used here (at least until the next call here advances the keys once again).
+ * It's convenient to structure _bt_check_compare rechecks of caller's tuple
+ * (using the new qual) as one the steps of advancing the scan's array keys,
+ * so this function works as a wrapper around _bt_check_compare.
+ *
+ * Like _bt_check_compare, we'll set pstate.continuescan on behalf of the
+ * caller, and return a boolean indicating if caller's tuple satisfies the
+ * scan's new qual. But unlike _bt_check_compare, we set so->needPrimScan
+ * when we set continuescan=false, indicating if a new primitive index scan
+ * has been scheduled (otherwise, the top-level scan has run out of tuples in
+ * the current scan direction).
+ *
+ * Caller must use _bt_tuple_before_array_skeys to determine if the current
+ * place in the scan is >= the current array keys _before_ calling here.
+ * We're responsible for ensuring that caller's tuple is <= the newly advanced
+ * required array keys once we return. We try to find an exact match, but
+ * failing that we'll advance the array keys to whatever set of array elements
+ * comes next in the key space for the current scan direction. Required array
+ * keys "ratchet forwards" (or backwards). They can only advance as the scan
+ * itself advances through the index/key space.
+ *
+ * (The rules are the same for backwards scans, except that the operators are
+ * flipped: just replace the precondition's >= operator with a <=, and the
+ * postcondition's <= operator with with a >=. In other words, just swap the
+ * precondition with the postcondition.)
+ *
+ * We also deal with "advancing" non-required arrays here. Callers whose
+ * sktrig scan key is non-required specify sktrig_required=false. These calls
+ * are the only exception to the general rule about always advancing the
+ * required array keys (the scan may not even have a required array). These
+ * callers should just pass a NULL pstate (since there is never any question
+ * of stopping the scan). No call to _bt_tuple_before_array_skeys is required
+ * ahead of these calls (it's already clear that any required scan keys must
+ * be satisfied by caller's tuple).
+ *
+ * Note that we deal with non-array required equality strategy scan keys as
+ * degenerate single element arrays here. Obviously, they can never really
+ * advance in the way that real arrays can, but they must still affect how we
+ * advance real array scan keys (exactly like true array equality scan keys).
+ * We have to keep around a 3-way ORDER proc for these (using the "=" operator
+ * won't do), since in general whether the tuple is < or > _any_ unsatisfied
+ * required equality key influences how the scan's real arrays must advance.
+ *
+ * Note also that we may sometimes need to advance the array keys when the
+ * existing required array keys (and other required equality keys) are already
+ * an exact match for every corresponding value from caller's tuple. We must
+ * do this for inequalities that _bt_check_compare set continuescan=false for.
+ * They'll advance the array keys here, just like any other scan key that
+ * _bt_check_compare stops on. (This can even happen _after_ we advance the
+ * array keys, in which case we'll advance the array keys a second time. That
+ * way _bt_checkkeys caller always has its required arrays advance to the
+ * maximum possible extent that its tuple will allow.)
+ */
+static bool
+_bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
+ IndexTuple tuple, int tupnatts, TupleDesc tupdesc,
+ int sktrig, bool sktrig_required)
+{
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
+ Relation rel = scan->indexRelation;
+ ScanDirection dir = pstate ? pstate->dir : ForwardScanDirection;
+ int arrayidx = 0;
+ bool beyond_end_advance = false,
+ has_required_opposite_direction_only = false,
+ oppodir_inequality_sktrig = false,
+ all_required_satisfied = true,
+ all_satisfied = true;
+
+ if (sktrig_required)
+ {
+ /*
+ * Precondition array state assertion
+ */
+ Assert(!_bt_tuple_before_array_skeys(scan, dir, tuple, tupdesc,
+ tupnatts, false, 0, NULL));
+
+ so->scanBehind = false; /* reset */
+
+ /*
+ * Required scan key wasn't satisfied, so required arrays will have to
+ * advance. Invalidate page-level state that tracks whether the
+ * scan's required-in-opposite-direction-only keys are known to be
+ * satisfied by page's remaining tuples.
+ */
+ pstate->firstmatch = false;
+
+ /* Shouldn't have to invalidate 'prechecked', though */
+ Assert(!pstate->prechecked);
+
+ /*
+ * Once we return we'll have a new set of required array keys, so
+ * reset state used by "look ahead" optimization
+ */
+ pstate->rechecks = 0;
+ pstate->targetdistance = 0;
+ }
+
+ Assert(_bt_verify_keys_with_arraykeys(scan));
+
+ for (int ikey = 0; ikey < so->numberOfKeys; ikey++)
+ {
+ ScanKey cur = so->keyData + ikey;
+ BTArrayKeyInfo *array = NULL;
+ Datum tupdatum;
+ bool required = false,
+ required_opposite_direction_only = false,
+ tupnull;
+ int32 result;
+ int set_elem = 0;
+
+ if (cur->sk_strategy == BTEqualStrategyNumber)
+ {
+ /* Manage array state */
+ if (cur->sk_flags & SK_SEARCHARRAY)
+ {
+ array = &so->arrayKeys[arrayidx++];
+ Assert(array->scan_key == ikey);
+ }
+ }
+ else
+ {
+ /*
+ * Are any inequalities required in the opposite direction only
+ * present here?
+ */
+ if (((ScanDirectionIsForward(dir) &&
+ (cur->sk_flags & (SK_BT_REQBKWD))) ||
+ (ScanDirectionIsBackward(dir) &&
+ (cur->sk_flags & (SK_BT_REQFWD)))))
+ has_required_opposite_direction_only =
+ required_opposite_direction_only = true;
+ }
+
+ /* Optimization: skip over known-satisfied scan keys */
+ if (ikey < sktrig)
+ continue;
+
+ if (cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))
+ {
+ Assert(sktrig_required);
+
+ required = true;
+
+ if (cur->sk_attno > tupnatts)
+ {
+ /* Set this just like _bt_tuple_before_array_skeys */
+ Assert(sktrig < ikey);
+ so->scanBehind = true;
+ }
+ }
+
+ /*
+ * Handle a required non-array scan key that the initial call to
+ * _bt_check_compare indicated triggered array advancement, if any.
+ *
+ * The non-array scan key's strategy will be <, <=, or = during a
+ * forwards scan (or any one of =, >=, or > during a backwards scan).
+ * It follows that the corresponding tuple attribute's value must now
+ * be either > or >= the scan key value (for backwards scans it must
+ * be either < or <= that value).
+ *
+ * If this is a required equality strategy scan key, this is just an
+ * optimization; _bt_tuple_before_array_skeys already confirmed that
+ * this scan key places us ahead of caller's tuple. There's no need
+ * to repeat that work now. (The same underlying principle also gets
+ * applied by the cur_elem_trig optimization used to speed up searches
+ * for the next array element.)
+ *
+ * If this is a required inequality strategy scan key, we _must_ rely
+ * on _bt_check_compare like this; we aren't capable of directly
+ * evaluating required inequality strategy scan keys here, on our own.
+ */
+ if (ikey == sktrig && !array)
+ {
+ Assert(sktrig_required && required && all_required_satisfied);
+
+ /* Use "beyond end" advancement. See below for an explanation. */
+ beyond_end_advance = true;
+ all_satisfied = all_required_satisfied = false;
+
+ /*
+ * Set a flag that remembers that this was an inequality required
+ * in the opposite scan direction only, that nevertheless
+ * triggered the call here.
+ *
+ * This only happens when an inequality operator (which must be
+ * strict) encounters a group of NULLs that indicate the end of
+ * non-NULL values for tuples in the current scan direction.
+ */
+ if (unlikely(required_opposite_direction_only))
+ oppodir_inequality_sktrig = true;
+
+ continue;
+ }
+
+ /*
+ * Nothing more for us to do with an inequality strategy scan key that
+ * wasn't the one that _bt_check_compare stopped on, though.
+ *
+ * Note: if our later call to _bt_check_compare (to recheck caller's
+ * tuple) sets continuescan=false due to finding this same inequality
+ * unsatisfied (possible when it's required in the scan direction),
+ * we'll deal with it via a recursive "second pass" call.
+ */
+ else if (cur->sk_strategy != BTEqualStrategyNumber)
+ continue;
+
+ /*
+ * Nothing for us to do with an equality strategy scan key that isn't
+ * marked required, either -- unless it's a non-required array
+ */
+ else if (!required && !array)
+ continue;
+
+ /*
+ * Here we perform steps for all array scan keys after a required
+ * array scan key whose binary search triggered "beyond end of array
+ * element" array advancement due to encountering a tuple attribute
+ * value > the closest matching array key (or < for backwards scans).
+ */
+ if (beyond_end_advance)
+ {
+ int final_elem_dir;
+
+ if (ScanDirectionIsBackward(dir) || !array)
+ final_elem_dir = 0;
+ else
+ final_elem_dir = array->num_elems - 1;
+
+ if (array && array->cur_elem != final_elem_dir)
+ {
+ array->cur_elem = final_elem_dir;
+ cur->sk_argument = array->elem_values[final_elem_dir];
+ }
+
+ continue;
+ }
+
+ /*
+ * Here we perform steps for all array scan keys after a required
+ * array scan key whose tuple attribute was < the closest matching
+ * array key when we dealt with it (or > for backwards scans).
+ *
+ * This earlier required array key already puts us ahead of caller's
+ * tuple in the key space (for the current scan direction). We must
+ * make sure that subsequent lower-order array keys do not put us too
+ * far ahead (ahead of tuples that have yet to be seen by our caller).
+ * For example, when a tuple "(a, b) = (42, 5)" advances the array
+ * keys on "a" from 40 to 45, we must also set "b" to whatever the
+ * first array element for "b" is. It would be wrong to allow "b" to
+ * be set based on the tuple value.
+ *
+ * Perform the same steps with truncated high key attributes. You can
+ * think of this as a "binary search" for the element closest to the
+ * value -inf. Again, the arrays must never get ahead of the scan.
+ */
+ if (!all_required_satisfied || cur->sk_attno > tupnatts)
+ {
+ int first_elem_dir;
+
+ if (ScanDirectionIsForward(dir) || !array)
+ first_elem_dir = 0;
+ else
+ first_elem_dir = array->num_elems - 1;
+
+ if (array && array->cur_elem != first_elem_dir)
+ {
+ array->cur_elem = first_elem_dir;
+ cur->sk_argument = array->elem_values[first_elem_dir];
+ }
+
+ continue;
+ }
+
+ /*
+ * Search in scankey's array for the corresponding tuple attribute
+ * value from caller's tuple
+ */
+ tupdatum = index_getattr(tuple, cur->sk_attno, tupdesc, &tupnull);
+
+ if (array)
+ {
+ bool cur_elem_trig = (sktrig_required && ikey == sktrig);
+
+ /*
+ * Binary search for closest match that's available from the array
+ */
+ set_elem = _bt_binsrch_array_skey(&so->orderProcs[ikey],
+ cur_elem_trig, dir,
+ tupdatum, tupnull, array, cur,
+ &result);
+
+ Assert(set_elem >= 0 && set_elem < array->num_elems);
+ }
+ else
+ {
+ Assert(sktrig_required && required);
+
+ /*
+ * This is a required non-array equality strategy scan key, which
+ * we'll treat as a degenerate single element array.
+ *
+ * This scan key's imaginary "array" can't really advance, but it
+ * can still roll over like any other array. (Actually, this is
+ * no different to real single value arrays, which never advance
+ * without rolling over -- they can never truly advance, either.)
+ */
+ result = _bt_compare_array_skey(&so->orderProcs[ikey],
+ tupdatum, tupnull,
+ cur->sk_argument, cur);
+ }
+
+ /*
+ * Consider "beyond end of array element" array advancement.
+ *
+ * When the tuple attribute value is > the closest matching array key
+ * (or < in the backwards scan case), we need to ratchet this array
+ * forward (backward) by one increment, so that caller's tuple ends up
+ * being < final array value instead (or > final array value instead).
+ * This process has to work for all of the arrays, not just this one:
+ * it must "carry" to higher-order arrays when the set_elem that we
+ * just found happens to be the final one for the scan's direction.
+ * Incrementing (decrementing) set_elem itself isn't good enough.
+ *
+ * Our approach is to provisionally use set_elem as if it was an exact
+ * match now, then set each later/less significant array to whatever
+ * its final element is. Once outside the loop we'll then "increment
+ * this array's set_elem" by calling _bt_advance_array_keys_increment.
+ * That way the process rolls over to higher order arrays as needed.
+ *
+ * Under this scheme any required arrays only ever ratchet forwards
+ * (or backwards), and always do so to the maximum possible extent
+ * that we can know will be safe without seeing the scan's next tuple.
+ * We don't need any special handling for required scan keys that lack
+ * a real array to advance, nor for redundant scan keys that couldn't
+ * be eliminated by _bt_preprocess_keys. It won't matter if some of
+ * our "true" array scan keys (or even all of them) are non-required.
+ */
+ if (required &&
+ ((ScanDirectionIsForward(dir) && result > 0) ||
+ (ScanDirectionIsBackward(dir) && result < 0)))
+ beyond_end_advance = true;
+
+ Assert(all_required_satisfied && all_satisfied);
+ if (result != 0)
+ {
+ /*
+ * Track whether caller's tuple satisfies our new post-advancement
+ * qual, for required scan keys, as well as for the entire set of
+ * interesting scan keys (all required scan keys plus non-required
+ * array scan keys are considered interesting.)
+ */
+ all_satisfied = false;
+ if (required)
+ all_required_satisfied = false;
+ else
+ {
+ /*
+ * There's no need to advance the arrays using the best
+ * available match for a non-required array. Give up now.
+ * (Though note that sktrig_required calls still have to do
+ * all the usual post-advancement steps, including the recheck
+ * call to _bt_check_compare.)
+ */
+ break;
+ }
+ }
+
+ /* Advance array keys, even when set_elem isn't an exact match */
+ if (array && array->cur_elem != set_elem)
+ {
+ array->cur_elem = set_elem;
+ cur->sk_argument = array->elem_values[set_elem];
+ }
+ }
+
+ /*
+ * Advance the array keys incrementally whenever "beyond end of array
+ * element" array advancement happens, so that advancement will carry to
+ * higher-order arrays (might exhaust all the scan's arrays instead, which
+ * ends the top-level scan).
+ */
+ if (beyond_end_advance && !_bt_advance_array_keys_increment(scan, dir))
+ goto end_toplevel_scan;
+
+ Assert(_bt_verify_keys_with_arraykeys(scan));
+
+ /*
+ * Does tuple now satisfy our new qual? Recheck with _bt_check_compare.
+ *
+ * Calls triggered by an unsatisfied required scan key, whose tuple now
+ * satisfies all required scan keys, but not all nonrequired array keys,
+ * will still require a recheck call to _bt_check_compare. They'll still
+ * need its "second pass" handling of required inequality scan keys.
+ * (Might have missed a still-unsatisfied required inequality scan key
+ * that caller didn't detect as the sktrig scan key during its initial
+ * _bt_check_compare call that used the old/original qual.)
+ *
+ * Calls triggered by an unsatisfied nonrequired array scan key never need
+ * "second pass" handling of required inequalities (nor any other handling
+ * of any required scan key). All that matters is whether caller's tuple
+ * satisfies the new qual, so it's safe to just skip the _bt_check_compare
+ * recheck when we've already determined that it can only return 'false'.
+ */
+ if ((sktrig_required && all_required_satisfied) ||
+ (!sktrig_required && all_satisfied))
+ {
+ int nsktrig = sktrig + 1;
+ bool continuescan;
+
+ Assert(all_required_satisfied);
+
+ /* Recheck _bt_check_compare on behalf of caller */
+ if (_bt_check_compare(scan, dir, tuple, tupnatts, tupdesc,
+ false, false, false,
+ &continuescan, &nsktrig) &&
+ !so->scanBehind)
+ {
+ /* This tuple satisfies the new qual */
+ Assert(all_satisfied && continuescan);
+
+ if (pstate)
+ pstate->continuescan = true;
+
+ return true;
+ }
+
+ /*
+ * Consider "second pass" handling of required inequalities.
+ *
+ * It's possible that our _bt_check_compare call indicated that the
+ * scan should end due to some unsatisfied inequality that wasn't
+ * initially recognized as such by us. Handle this by calling
+ * ourselves recursively, this time indicating that the trigger is the
+ * inequality that we missed first time around (and using a set of
+ * required array/equality keys that are now exact matches for tuple).
+ *
+ * We make a strong, general guarantee that every _bt_checkkeys call
+ * here will advance the array keys to the maximum possible extent
+ * that we can know to be safe based on caller's tuple alone. If we
+ * didn't perform this step, then that guarantee wouldn't quite hold.
+ */
+ if (unlikely(!continuescan))
+ {
+ bool satisfied PG_USED_FOR_ASSERTS_ONLY;
+
+ Assert(sktrig_required);
+ Assert(so->keyData[nsktrig].sk_strategy != BTEqualStrategyNumber);
+
+ /*
+ * The tuple must use "beyond end" advancement during the
+ * recursive call, so we cannot possibly end up back here when
+ * recursing. We'll consume a small, fixed amount of stack space.
+ */
+ Assert(!beyond_end_advance);
+
+ /* Advance the array keys a second time using same tuple */
+ satisfied = _bt_advance_array_keys(scan, pstate, tuple, tupnatts,
+ tupdesc, nsktrig, true);
+
+ /* This tuple doesn't satisfy the inequality */
+ Assert(!satisfied);
+ return false;
+ }
+
+ /*
+ * Some non-required scan key (from new qual) still not satisfied.
+ *
+ * All scan keys required in the current scan direction must still be
+ * satisfied, though, so we can trust all_required_satisfied below.
+ */
+ }
+
+ /*
+ * When we were called just to deal with "advancing" non-required arrays,
+ * this is as far as we can go (cannot stop the scan for these callers)
+ */
+ if (!sktrig_required)
+ {
+ /* Caller's tuple doesn't match any qual */
+ return false;
+ }
+
+ /*
+ * Postcondition array state assertion (for still-unsatisfied tuples).
+ *
+ * By here we have established that the scan's required arrays (scan must
+ * have at least one required array) advanced, without becoming exhausted.
+ *
+ * Caller's tuple is now < the newly advanced array keys (or > when this
+ * is a backwards scan), except in the case where we only got this far due
+ * to an unsatisfied non-required scan key. Verify that with an assert.
+ *
+ * Note: we don't just quit at this point when all required scan keys were
+ * found to be satisfied because we need to consider edge-cases involving
+ * scan keys required in the opposite direction only; those aren't tracked
+ * by all_required_satisfied. (Actually, oppodir_inequality_sktrig trigger
+ * scan keys are tracked by all_required_satisfied, since it's convenient
+ * for _bt_check_compare to behave as if they are required in the current
+ * scan direction to deal with NULLs. We'll account for that separately.)
+ */
+ Assert(_bt_tuple_before_array_skeys(scan, dir, tuple, tupdesc, tupnatts,
+ false, 0, NULL) ==
+ !all_required_satisfied);
+
+ /*
+ * We generally permit primitive index scans to continue onto the next
+ * sibling page when the page's finaltup satisfies all required scan keys
+ * at the point where we're between pages.
+ *
+ * If caller's tuple is also the page's finaltup, and we see that required
+ * scan keys still aren't satisfied, start a new primitive index scan.
+ */
+ if (!all_required_satisfied && pstate->finaltup == tuple)
+ goto new_prim_scan;
+
+ /*
+ * Proactively check finaltup (don't wait until finaltup is reached by the
+ * scan) when it might well turn out to not be satisfied later on.
+ *
+ * Note: if so->scanBehind hasn't already been set for finaltup by us,
+ * it'll be set during this call to _bt_tuple_before_array_skeys. Either
+ * way, it'll be set correctly (for the whole page) after this point.
+ */
+ if (!all_required_satisfied && pstate->finaltup &&
+ _bt_tuple_before_array_skeys(scan, dir, pstate->finaltup, tupdesc,
+ BTreeTupleGetNAtts(pstate->finaltup, rel),
+ false, 0, &so->scanBehind))
+ goto new_prim_scan;
+
+ /*
+ * When we encounter a truncated finaltup high key attribute, we're
+ * optimistic about the chances of its corresponding required scan key
+ * being satisfied when we go on to check it against tuples from this
+ * page's right sibling leaf page. We consider truncated attributes to be
+ * satisfied by required scan keys, which allows the primitive index scan
+ * to continue to the next leaf page. We must set so->scanBehind to true
+ * to remember that the last page's finaltup had "satisfied" required scan
+ * keys for one or more truncated attribute values (scan keys required in
+ * _either_ scan direction).
+ *
+ * There is a chance that _bt_checkkeys (which checks so->scanBehind) will
+ * find that even the sibling leaf page's finaltup is < the new array
+ * keys. When that happens, our optimistic policy will have incurred a
+ * single extra leaf page access that could have been avoided.
+ *
+ * A pessimistic policy would give backward scans a gratuitous advantage
+ * over forward scans. We'd punish forward scans for applying more
+ * accurate information from the high key, rather than just using the
+ * final non-pivot tuple as finaltup, in the style of backward scans.
+ * Being pessimistic would also give some scans with non-required arrays a
+ * perverse advantage over similar scans that use required arrays instead.
+ *
+ * You can think of this as a speculative bet on what the scan is likely
+ * to find on the next page. It's not much of a gamble, though, since the
+ * untruncated prefix of attributes must strictly satisfy the new qual
+ * (though it's okay if any non-required scan keys fail to be satisfied).
+ */
+ if (so->scanBehind && has_required_opposite_direction_only)
+ {
+ /*
+ * However, we avoid this behavior whenever the scan involves a scan
+ * key required in the opposite direction to the scan only, along with
+ * a finaltup with at least one truncated attribute that's associated
+ * with a scan key marked required (required in either direction).
+ *
+ * _bt_check_compare simply won't stop the scan for a scan key that's
+ * marked required in the opposite scan direction only. That leaves
+ * us without any reliable way of reconsidering any opposite-direction
+ * inequalities if it turns out that starting a new primitive index
+ * scan will allow _bt_first to skip ahead by a great many leaf pages
+ * (see next section for details of how that works).
+ */
+ goto new_prim_scan;
+ }
+
+ /*
+ * Handle inequalities marked required in the opposite scan direction.
+ * They can also signal that we should start a new primitive index scan.
+ *
+ * It's possible that the scan is now positioned where "matching" tuples
+ * begin, and that caller's tuple satisfies all scan keys required in the
+ * current scan direction. But if caller's tuple still doesn't satisfy
+ * other scan keys that are required in the opposite scan direction only
+ * (e.g., a required >= strategy scan key when scan direction is forward),
+ * it's still possible that there are many leaf pages before the page that
+ * _bt_first could skip straight to. Groveling through all those pages
+ * will always give correct answers, but it can be very inefficient. We
+ * must avoid needlessly scanning extra pages.
+ *
+ * Separately, it's possible that _bt_check_compare set continuescan=false
+ * for a scan key that's required in the opposite direction only. This is
+ * a special case, that happens only when _bt_check_compare sees that the
+ * inequality encountered a NULL value. This signals the end of non-NULL
+ * values in the current scan direction, which is reason enough to end the
+ * (primitive) scan. If this happens at the start of a large group of
+ * NULL values, then we shouldn't expect to be called again until after
+ * the scan has already read indefinitely-many leaf pages full of tuples
+ * with NULL suffix values. We need a separate test for this case so that
+ * we don't miss our only opportunity to skip over such a group of pages.
+ * (_bt_first is expected to skip over the group of NULLs by applying a
+ * similar "deduce NOT NULL" rule, where it finishes its insertion scan
+ * key by consing up an explicit SK_SEARCHNOTNULL key.)
+ *
+ * Apply a test against finaltup to detect and recover from these problem:
+ * if even finaltup doesn't satisfy such an inequality, we just skip by
+ * starting a new primitive index scan. When we skip, we know for sure
+ * that all of the tuples on the current page following caller's tuple are
+ * also before the _bt_first-wise start of tuples for our new qual. That
+ * at least suggests many more skippable pages beyond the current page.
+ */
+ if (has_required_opposite_direction_only && pstate->finaltup &&
+ (all_required_satisfied || oppodir_inequality_sktrig))
+ {
+ int nfinaltupatts = BTreeTupleGetNAtts(pstate->finaltup, rel);
+ ScanDirection flipped;
+ bool continuescanflip;
+ int opsktrig;
+
+ /*
+ * We're checking finaltup (which is usually not caller's tuple), so
+ * cannot reuse work from caller's earlier _bt_check_compare call.
+ *
+ * Flip the scan direction when calling _bt_check_compare this time,
+ * so that it will set continuescanflip=false when it encounters an
+ * inequality required in the opposite scan direction.
+ */
+ Assert(!so->scanBehind);
+ opsktrig = 0;
+ flipped = -dir;
+ _bt_check_compare(scan, flipped,
+ pstate->finaltup, nfinaltupatts, tupdesc,
+ false, false, false,
+ &continuescanflip, &opsktrig);
+
+ /*
+ * If we ended up here due to the all_required_satisfied criteria,
+ * test opsktrig in a way that ensures that finaltup contains the same
+ * prefix of key columns as caller's tuple (a prefix that satisfies
+ * earlier required-in-current-direction scan keys).
+ *
+ * If we ended up here due to the oppodir_inequality_sktrig criteria,
+ * test opsktrig in a way that ensures that the same scan key that our
+ * caller found to be unsatisfied (by the scan's tuple) was also the
+ * one unsatisfied just now (by finaltup). That way we'll only start
+ * a new primitive scan when we're sure that both tuples _don't_ share
+ * the same prefix of satisfied equality-constrained attribute values,
+ * and that finaltup has a non-NULL attribute value indicated by the
+ * unsatisfied scan key at offset opsktrig/sktrig. (This depends on
+ * _bt_check_compare not caring about the direction that inequalities
+ * are required in whenever NULL attribute values are unsatisfied. It
+ * only cares about the scan direction, and its relationship to
+ * whether NULLs are stored first or last relative to non-NULLs.)
+ */
+ Assert(all_required_satisfied != oppodir_inequality_sktrig);
+ if (unlikely(!continuescanflip &&
+ ((all_required_satisfied && opsktrig > sktrig) ||
+ (oppodir_inequality_sktrig && opsktrig >= sktrig))))
+ {
+ Assert(so->keyData[opsktrig].sk_strategy != BTEqualStrategyNumber);
+
+ /*
+ * Make sure that any non-required arrays are set to the first
+ * array element for the current scan direction
+ */
+ _bt_rewind_nonrequired_arrays(scan, dir);
+
+ goto new_prim_scan;
+ }
+ }
+
+ /*
+ * Stick with the ongoing primitive index scan for now.
+ *
+ * It's possible that later tuples will also turn out to have values that
+ * are still < the now-current array keys (or > the current array keys).
+ * Our caller will handle this by performing what amounts to a linear
+ * search of the page, implemented by calling _bt_check_compare and then
+ * _bt_tuple_before_array_skeys for each tuple.
+ *
+ * This approach has various advantages over a binary search of the page.
+ * Repeated binary searches of the page (one binary search for every array
+ * advancement) won't outperform a continuous linear search. While there
+ * are workloads that a naive linear search won't handle well, our caller
+ * has a "look ahead" fallback mechanism to deal with that problem.
+ */
+ pstate->continuescan = true; /* Override _bt_check_compare */
+ so->needPrimScan = false; /* _bt_readpage has more tuples to check */
+
+ if (so->scanBehind)
+ {
+ /* Optimization: skip by setting "look ahead" mechanism's offnum */
+ Assert(ScanDirectionIsForward(dir));
+ pstate->skip = pstate->maxoff + 1;
+ }
+
+ /* Caller's tuple doesn't match the new qual */
+ return false;
+
+new_prim_scan:
+
+ /*
+ * End this primitive index scan, but schedule another.
+ *
+ * Note: If the scan direction happens to change, this scheduled primitive
+ * index scan won't go ahead after all.
+ */
+ pstate->continuescan = false; /* Tell _bt_readpage we're done... */
+ so->needPrimScan = true; /* ...but call _bt_first again */
+
+ if (scan->parallel_scan)
+ _bt_parallel_primscan_schedule(scan, pstate->prev_scan_page);
- if (curArrayKey->cur_elem != mark_elem)
- {
- curArrayKey->cur_elem = mark_elem;
- skey->sk_argument = curArrayKey->elem_values[mark_elem];
- changed = true;
- }
- }
+ /* Caller's tuple doesn't match the new qual */
+ return false;
+
+end_toplevel_scan:
/*
- * If we changed any keys, we must redo _bt_preprocess_keys. That might
- * sound like overkill, but in cases with multiple keys per index column
- * it seems necessary to do the full set of pushups.
+ * End the current primitive index scan, but don't schedule another.
+ *
+ * This ends the entire top-level scan in the current scan direction.
*
- * Also do this whenever the scan's set of array keys "wrapped around" at
- * the end of the last primitive index scan. There won't have been a call
- * to _bt_preprocess_keys from some other place following wrap around, so
- * we do it for ourselves.
+ * Note: The scan's arrays (including any non-required arrays) are now in
+ * their final positions for the current scan direction. If the scan
+ * direction happens to change, then the arrays will already be in their
+ * first positions for what will then be the current scan direction.
*/
- if (changed || !so->arraysStarted)
- {
- _bt_preprocess_keys(scan);
- /* The mark should have been set on a consistent set of keys... */
- Assert(so->qual_ok);
- }
-}
+ pstate->continuescan = false; /* Tell _bt_readpage we're done... */
+ so->needPrimScan = false; /* ...don't call _bt_first again, though */
+ /* Caller's tuple doesn't match any qual */
+ return false;
+}
/*
* _bt_preprocess_keys() -- Preprocess scan keys
*
- * The given search-type keys (in scan->keyData[] or so->arrayKeyData[])
+ * The given search-type keys (taken from scan->keyData[])
* are copied to so->keyData[] with possible transformation.
* scan->numberOfKeys is the number of input keys, so->numberOfKeys gets
* the number of output keys (possibly less, never greater).
* The output keys must be sorted by index attribute. Presently we expect
* (but verify) that the input keys are already so sorted --- this is done
* by match_clauses_to_index() in indxpath.c. Some reordering of the keys
- * within each attribute may be done as a byproduct of the processing here,
- * but no other code depends on that.
+ * within each attribute may be done as a byproduct of the processing here.
+ * That process must leave array scan keys (within an attribute) in the same
+ * order as corresponding entries from the scan's BTArrayKeyInfo array info.
*
* The output keys are marked with flags SK_BT_REQFWD and/or SK_BT_REQBKWD
* if they must be satisfied in order to continue the scan forward or backward
*
* Note: the reason we have to copy the preprocessed scan keys into private
* storage is that we are modifying the array based on comparisons of the
- * key argument values, which could change on a rescan or after moving to
- * new elements of array keys. Therefore we can't overwrite the source data.
+ * key argument values, which could change on a rescan. Therefore we can't
+ * overwrite the source data.
*/
void
_bt_preprocess_keys(IndexScanDesc scan)
ScanKey inkeys;
ScanKey outkeys;
ScanKey cur;
- ScanKey xform[BTMaxStrategyNumber];
+ BTScanKeyPreproc xform[BTMaxStrategyNumber];
bool test_result;
int i,
j;
AttrNumber attno;
+ ScanKey arrayKeyData;
+ int *keyDataMap = NULL;
+ int arrayidx = 0;
+
+ /*
+ * We're called at the start of each primitive index scan during scans
+ * that use equality array keys. We can just reuse the scan keys that
+ * were output at the start of the scan's first primitive index scan.
+ */
+ if (so->numberOfKeys > 0)
+ {
+ /*
+ * An earlier call to _bt_advance_array_keys already set everything up
+ * already. Just assert that the scan's existing output scan keys are
+ * consistent with its current array elements.
+ */
+ Assert(so->numArrayKeys);
+ Assert(_bt_verify_keys_with_arraykeys(scan));
+ return;
+ }
/* initialize result variables */
so->qual_ok = true;
if (numberOfKeys < 1)
return; /* done if qual-less scan */
+ /* If any keys are SK_SEARCHARRAY type, set up array-key info */
+ arrayKeyData = _bt_preprocess_array_keys(scan);
+ if (!so->qual_ok)
+ {
+ /* unmatchable array, so give up */
+ return;
+ }
+
/*
- * Read so->arrayKeyData if array keys are present, else scan->keyData
+ * Treat arrayKeyData[] (a partially preprocessed copy of scan->keyData[])
+ * as our input if _bt_preprocess_array_keys just allocated it, else just
+ * use scan->keyData[]
*/
- if (so->arrayKeyData != NULL)
- inkeys = so->arrayKeyData;
+ if (arrayKeyData)
+ {
+ inkeys = arrayKeyData;
+
+ /* Also maintain keyDataMap for remapping so->orderProc[] later */
+ keyDataMap = MemoryContextAlloc(so->arrayContext,
+ numberOfKeys * sizeof(int));
+ }
else
inkeys = scan->keyData;
/* We can mark the qual as required if it's for first index col */
if (cur->sk_attno == 1)
_bt_mark_scankey_required(outkeys);
+ if (arrayKeyData)
+ {
+ /*
+ * Don't call _bt_preprocess_array_keys_final in this fast path
+ * (we'll miss out on the single value array transformation, but
+ * that's not nearly as important when there's only one scan key)
+ */
+ Assert(cur->sk_flags & SK_SEARCHARRAY);
+ Assert(cur->sk_strategy != BTEqualStrategyNumber ||
+ (so->arrayKeys[0].scan_key == 0 &&
+ OidIsValid(so->orderProcs[0].fn_oid)));
+ }
+
return;
}
* check, and we've rejected any combination of it with a regular
* equality condition; but not with other types of conditions.
*/
- if (xform[BTEqualStrategyNumber - 1])
+ if (xform[BTEqualStrategyNumber - 1].skey)
{
- ScanKey eq = xform[BTEqualStrategyNumber - 1];
+ ScanKey eq = xform[BTEqualStrategyNumber - 1].skey;
+ BTArrayKeyInfo *array = NULL;
+ FmgrInfo *orderproc = NULL;
+
+ if (arrayKeyData && (eq->sk_flags & SK_SEARCHARRAY))
+ {
+ int eq_in_ikey,
+ eq_arrayidx;
+
+ eq_in_ikey = xform[BTEqualStrategyNumber - 1].ikey;
+ eq_arrayidx = xform[BTEqualStrategyNumber - 1].arrayidx;
+ array = &so->arrayKeys[eq_arrayidx - 1];
+ orderproc = so->orderProcs + eq_in_ikey;
+
+ Assert(array->scan_key == eq_in_ikey);
+ Assert(OidIsValid(orderproc->fn_oid));
+ }
for (j = BTMaxStrategyNumber; --j >= 0;)
{
- ScanKey chk = xform[j];
+ ScanKey chk = xform[j].skey;
if (!chk || j == (BTEqualStrategyNumber - 1))
continue;
}
if (_bt_compare_scankey_args(scan, chk, eq, chk,
+ array, orderproc,
&test_result))
{
if (!test_result)
return;
}
/* else discard the redundant non-equality key */
- xform[j] = NULL;
+ Assert(!array || array->num_elems > 0);
+ xform[j].skey = NULL;
+ xform[j].ikey = -1;
}
/* else, cannot determine redundancy, keep both keys */
}
}
/* try to keep only one of <, <= */
- if (xform[BTLessStrategyNumber - 1]
- && xform[BTLessEqualStrategyNumber - 1])
+ if (xform[BTLessStrategyNumber - 1].skey
+ && xform[BTLessEqualStrategyNumber - 1].skey)
{
- ScanKey lt = xform[BTLessStrategyNumber - 1];
- ScanKey le = xform[BTLessEqualStrategyNumber - 1];
+ ScanKey lt = xform[BTLessStrategyNumber - 1].skey;
+ ScanKey le = xform[BTLessEqualStrategyNumber - 1].skey;
- if (_bt_compare_scankey_args(scan, le, lt, le,
+ if (_bt_compare_scankey_args(scan, le, lt, le, NULL, NULL,
&test_result))
{
if (test_result)
- xform[BTLessEqualStrategyNumber - 1] = NULL;
+ xform[BTLessEqualStrategyNumber - 1].skey = NULL;
else
- xform[BTLessStrategyNumber - 1] = NULL;
+ xform[BTLessStrategyNumber - 1].skey = NULL;
}
}
/* try to keep only one of >, >= */
- if (xform[BTGreaterStrategyNumber - 1]
- && xform[BTGreaterEqualStrategyNumber - 1])
+ if (xform[BTGreaterStrategyNumber - 1].skey
+ && xform[BTGreaterEqualStrategyNumber - 1].skey)
{
- ScanKey gt = xform[BTGreaterStrategyNumber - 1];
- ScanKey ge = xform[BTGreaterEqualStrategyNumber - 1];
+ ScanKey gt = xform[BTGreaterStrategyNumber - 1].skey;
+ ScanKey ge = xform[BTGreaterEqualStrategyNumber - 1].skey;
- if (_bt_compare_scankey_args(scan, ge, gt, ge,
+ if (_bt_compare_scankey_args(scan, ge, gt, ge, NULL, NULL,
&test_result))
{
if (test_result)
- xform[BTGreaterEqualStrategyNumber - 1] = NULL;
+ xform[BTGreaterEqualStrategyNumber - 1].skey = NULL;
else
- xform[BTGreaterStrategyNumber - 1] = NULL;
+ xform[BTGreaterStrategyNumber - 1].skey = NULL;
}
}
*/
for (j = BTMaxStrategyNumber; --j >= 0;)
{
- if (xform[j])
+ if (xform[j].skey)
{
ScanKey outkey = &outkeys[new_numberOfKeys++];
- memcpy(outkey, xform[j], sizeof(ScanKeyData));
+ memcpy(outkey, xform[j].skey, sizeof(ScanKeyData));
+ if (arrayKeyData)
+ keyDataMap[new_numberOfKeys - 1] = xform[j].ikey;
if (priorNumberOfEqualCols == attno - 1)
_bt_mark_scankey_required(outkey);
}
ScanKey outkey = &outkeys[new_numberOfKeys++];
memcpy(outkey, cur, sizeof(ScanKeyData));
+ if (arrayKeyData)
+ keyDataMap[new_numberOfKeys - 1] = i;
if (numberOfEqualCols == attno - 1)
_bt_mark_scankey_required(outkey);
continue;
}
- /* have we seen one of these before? */
- if (xform[j] == NULL)
+ /*
+ * Does this input scan key require further processing as an array?
+ */
+ if (cur->sk_strategy == InvalidStrategy)
+ {
+ /* _bt_preprocess_array_keys marked this array key redundant */
+ Assert(arrayKeyData);
+ Assert(cur->sk_flags & SK_SEARCHARRAY);
+ continue;
+ }
+
+ if (cur->sk_strategy == BTEqualStrategyNumber &&
+ (cur->sk_flags & SK_SEARCHARRAY))
{
- /* nope, so remember this scankey */
- xform[j] = cur;
+ /* _bt_preprocess_array_keys kept this array key */
+ Assert(arrayKeyData);
+ arrayidx++;
+ }
+
+ /*
+ * have we seen a scan key for this same attribute and using this same
+ * operator strategy before now?
+ */
+ if (xform[j].skey == NULL)
+ {
+ /* nope, so this scan key wins by default (at least for now) */
+ xform[j].skey = cur;
+ xform[j].ikey = i;
+ xform[j].arrayidx = arrayidx;
}
else
{
- /* yup, keep only the more restrictive key */
- if (_bt_compare_scankey_args(scan, cur, cur, xform[j],
- &test_result))
+ FmgrInfo *orderproc = NULL;
+ BTArrayKeyInfo *array = NULL;
+
+ /*
+ * Seen one of these before, so keep only the more restrictive key
+ * if possible
+ */
+ if (j == (BTEqualStrategyNumber - 1) && arrayKeyData)
+ {
+ /*
+ * Have to set up array keys
+ */
+ if ((cur->sk_flags & SK_SEARCHARRAY))
+ {
+ array = &so->arrayKeys[arrayidx - 1];
+ orderproc = so->orderProcs + i;
+
+ Assert(array->scan_key == i);
+ Assert(OidIsValid(orderproc->fn_oid));
+ }
+ else if ((xform[j].skey->sk_flags & SK_SEARCHARRAY))
+ {
+ array = &so->arrayKeys[xform[j].arrayidx - 1];
+ orderproc = so->orderProcs + xform[j].ikey;
+
+ Assert(array->scan_key == xform[j].ikey);
+ Assert(OidIsValid(orderproc->fn_oid));
+ }
+
+ /*
+ * Both scan keys might have arrays, in which case we'll
+ * arbitrarily pass only one of the arrays. That won't
+ * matter, since _bt_compare_scankey_args is aware that two
+ * SEARCHARRAY scan keys mean that _bt_preprocess_array_keys
+ * failed to eliminate redundant arrays through array merging.
+ * _bt_compare_scankey_args just returns false when it sees
+ * this; it won't even try to examine either array.
+ */
+ }
+
+ if (_bt_compare_scankey_args(scan, cur, cur, xform[j].skey,
+ array, orderproc, &test_result))
{
+ /* Have all we need to determine redundancy */
if (test_result)
- xform[j] = cur;
+ {
+ Assert(!array || array->num_elems > 0);
+
+ /*
+ * New key is more restrictive, and so replaces old key...
+ */
+ if (j != (BTEqualStrategyNumber - 1) ||
+ !(xform[j].skey->sk_flags & SK_SEARCHARRAY))
+ {
+ Assert(!array || array->scan_key == i);
+ xform[j].skey = cur;
+ xform[j].ikey = i;
+ xform[j].arrayidx = arrayidx;
+ }
+ else
+ {
+ /*
+ * ...unless we have to keep the old key because it's
+ * an array that rendered the new key redundant. We
+ * need to make sure that we don't throw away an array
+ * scan key. _bt_compare_scankey_args expects us to
+ * always keep arrays (and discard non-arrays).
+ */
+ Assert(j == (BTEqualStrategyNumber - 1));
+ Assert(xform[j].skey->sk_flags & SK_SEARCHARRAY);
+ Assert(xform[j].ikey == array->scan_key);
+ Assert(!(cur->sk_flags & SK_SEARCHARRAY));
+ }
+ }
else if (j == (BTEqualStrategyNumber - 1))
{
/* key == a && key == b, but a != b */
else
{
/*
- * We can't determine which key is more restrictive. Keep the
- * previous one in xform[j] and push this one directly to the
- * output array.
+ * We can't determine which key is more restrictive. Push
+ * xform[j] directly to the output array, then set xform[j] to
+ * the new scan key.
+ *
+ * Note: We do things this way around so that our arrays are
+ * always in the same order as their corresponding scan keys,
+ * even with incomplete opfamilies. _bt_advance_array_keys
+ * depends on this.
*/
ScanKey outkey = &outkeys[new_numberOfKeys++];
- memcpy(outkey, cur, sizeof(ScanKeyData));
+ memcpy(outkey, xform[j].skey, sizeof(ScanKeyData));
+ if (arrayKeyData)
+ keyDataMap[new_numberOfKeys - 1] = xform[j].ikey;
if (numberOfEqualCols == attno - 1)
_bt_mark_scankey_required(outkey);
+ xform[j].skey = cur;
+ xform[j].ikey = i;
+ xform[j].arrayidx = arrayidx;
}
}
}
so->numberOfKeys = new_numberOfKeys;
+
+ /*
+ * Now that we've built a temporary mapping from so->keyData[] (output
+ * scan keys) to scan->keyData[] (input scan keys), fix array->scan_key
+ * references. Also consolidate the so->orderProc[] array such that it
+ * can be subscripted using so->keyData[]-wise offsets.
+ */
+ if (arrayKeyData)
+ _bt_preprocess_array_keys_final(scan, keyDataMap);
+
+ /* Could pfree arrayKeyData/keyDataMap now, but not worth the cycles */
+}
+
+#ifdef USE_ASSERT_CHECKING
+/*
+ * Verify that the scan's qual state matches what we expect at the point that
+ * _bt_start_prim_scan is about to start a just-scheduled new primitive scan.
+ *
+ * We enforce a rule against non-required array scan keys: they must start out
+ * with whatever element is the first for the scan's current scan direction.
+ * See _bt_rewind_nonrequired_arrays comments for an explanation.
+ */
+static bool
+_bt_verify_arrays_bt_first(IndexScanDesc scan, ScanDirection dir)
+{
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
+ int arrayidx = 0;
+
+ for (int ikey = 0; ikey < so->numberOfKeys; ikey++)
+ {
+ ScanKey cur = so->keyData + ikey;
+ BTArrayKeyInfo *array = NULL;
+ int first_elem_dir;
+
+ if (!(cur->sk_flags & SK_SEARCHARRAY) ||
+ cur->sk_strategy != BTEqualStrategyNumber)
+ continue;
+
+ array = &so->arrayKeys[arrayidx++];
+
+ if (((cur->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) ||
+ ((cur->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir)))
+ continue;
+
+ if (ScanDirectionIsForward(dir))
+ first_elem_dir = 0;
+ else
+ first_elem_dir = array->num_elems - 1;
+
+ if (array->cur_elem != first_elem_dir)
+ return false;
+ }
+
+ return _bt_verify_keys_with_arraykeys(scan);
+}
+
+/*
+ * Verify that the scan's "so->keyData[]" scan keys are in agreement with
+ * its array key state
+ */
+static bool
+_bt_verify_keys_with_arraykeys(IndexScanDesc scan)
+{
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
+ int last_sk_attno = InvalidAttrNumber,
+ arrayidx = 0;
+
+ if (!so->qual_ok)
+ return false;
+
+ for (int ikey = 0; ikey < so->numberOfKeys; ikey++)
+ {
+ ScanKey cur = so->keyData + ikey;
+ BTArrayKeyInfo *array;
+
+ if (cur->sk_strategy != BTEqualStrategyNumber ||
+ !(cur->sk_flags & SK_SEARCHARRAY))
+ continue;
+
+ array = &so->arrayKeys[arrayidx++];
+ if (array->scan_key != ikey)
+ return false;
+
+ if (array->num_elems <= 0)
+ return false;
+
+ if (cur->sk_argument != array->elem_values[array->cur_elem])
+ return false;
+ if (last_sk_attno > cur->sk_attno)
+ return false;
+ last_sk_attno = cur->sk_attno;
+ }
+
+ if (arrayidx != so->numArrayKeys)
+ return false;
+
+ return true;
}
+#endif
/*
* Compare two scankey values using a specified operator.
* we store the operator result in *result and return true. We return false
* if the comparison could not be made.
*
+ * If either leftarg or rightarg are an array, we'll apply array-specific
+ * rules to determine which array elements are redundant on behalf of caller.
+ * It is up to our caller to save whichever of the two scan keys is the array,
+ * and discard the non-array scan key (the non-array scan key is guaranteed to
+ * be redundant with any complete opfamily). Caller isn't expected to call
+ * here with a pair of array scan keys provided we're dealing with a complete
+ * opfamily (_bt_preprocess_array_keys will merge array keys together to make
+ * sure of that).
+ *
+ * Note: we'll also shrink caller's array as needed to eliminate redundant
+ * array elements. One reason why caller should prefer to discard non-array
+ * scan keys is so that we'll have the opportunity to shrink the array
+ * multiple times, in multiple calls (for each of several other scan keys on
+ * the same index attribute).
+ *
* Note: op always points at the same ScanKey as either leftarg or rightarg.
- * Since we don't scribble on the scankeys, this aliasing should cause no
- * trouble.
+ * Since we don't scribble on the scankeys themselves, this aliasing should
+ * cause no trouble.
*
* Note: this routine needs to be insensitive to any DESC option applied
* to the index column. For example, "x < 4" is a tighter constraint than
static bool
_bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
ScanKey leftarg, ScanKey rightarg,
+ BTArrayKeyInfo *array, FmgrInfo *orderproc,
bool *result)
{
Relation rel = scan->indexRelation;
return true;
}
+ /*
+ * If either leftarg or rightarg are equality-type array scankeys, we need
+ * specialized handling (since by now we know that IS NULL wasn't used)
+ */
+ if (array)
+ {
+ bool leftarray,
+ rightarray;
+
+ leftarray = ((leftarg->sk_flags & SK_SEARCHARRAY) &&
+ leftarg->sk_strategy == BTEqualStrategyNumber);
+ rightarray = ((rightarg->sk_flags & SK_SEARCHARRAY) &&
+ rightarg->sk_strategy == BTEqualStrategyNumber);
+
+ /*
+ * _bt_preprocess_array_keys is responsible for merging together array
+ * scan keys, and will do so whenever the opfamily has the required
+ * cross-type support. If it failed to do that, we handle it just
+ * like the case where we can't make the comparison ourselves.
+ */
+ if (leftarray && rightarray)
+ {
+ /* Can't make the comparison */
+ *result = false; /* suppress compiler warnings */
+ return false;
+ }
+
+ /*
+ * Otherwise we need to determine if either one of leftarg or rightarg
+ * uses an array, then pass this through to a dedicated helper
+ * function.
+ */
+ if (leftarray)
+ return _bt_compare_array_scankey_args(scan, leftarg, rightarg,
+ orderproc, array, result);
+ else if (rightarray)
+ return _bt_compare_array_scankey_args(scan, rightarg, leftarg,
+ orderproc, array, result);
+
+ /* FALL THRU */
+ }
+
/*
* The opfamily we need to worry about is identified by the index column.
*/
*
* Return true if so, false if not. If the tuple fails to pass the qual,
* we also determine whether there's any need to continue the scan beyond
- * this tuple, and set *continuescan accordingly. See comments for
+ * this tuple, and set pstate.continuescan accordingly. See comments for
* _bt_preprocess_keys(), above, about how this is done.
*
* Forward scan callers can pass a high key tuple in the hopes of having
* us set *continuescan to false, and avoiding an unnecessary visit to
* the page to the right.
*
+ * Advances the scan's array keys when necessary for arrayKeys=true callers.
+ * Caller can avoid all array related side-effects when calling just to do a
+ * page continuescan precheck -- pass arrayKeys=false for that. Scans without
+ * any arrays keys must always pass arrayKeys=false.
+ *
+ * Also stops and starts primitive index scans for arrayKeys=true callers.
+ * Scans with array keys are required to set up page state that helps us with
+ * this. The page's finaltup tuple (the page high key for a forward scan, or
+ * the page's first non-pivot tuple for a backward scan) must be set in
+ * pstate.finaltup ahead of the first call here for the page (or possibly the
+ * first call after an initial continuescan-setting page precheck call). Set
+ * this to NULL for rightmost page (or the leftmost page for backwards scans).
+ *
* scan: index scan descriptor (containing a search-type scankey)
+ * pstate: page level input and output parameters
+ * arrayKeys: should we advance the scan's array keys if necessary?
* tuple: index tuple to test
* tupnatts: number of attributes in tupnatts (high key may be truncated)
- * dir: direction we are scanning in
- * continuescan: output parameter (will be set correctly in all cases)
- * continuescanPrechecked: indicates that *continuescan flag is known to
- * be true for the last item on the page
- * haveFirstMatch: indicates that we already have at least one match
- * in the current page
*/
bool
-_bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
- ScanDirection dir, bool *continuescan,
- bool continuescanPrechecked, bool haveFirstMatch)
+_bt_checkkeys(IndexScanDesc scan, BTReadPageState *pstate, bool arrayKeys,
+ IndexTuple tuple, int tupnatts)
{
- TupleDesc tupdesc;
- BTScanOpaque so;
- int keysz;
- int ikey;
- ScanKey key;
+ TupleDesc tupdesc = RelationGetDescr(scan->indexRelation);
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
+ ScanDirection dir = pstate->dir;
+ int ikey = 0;
+ bool res;
Assert(BTreeTupleGetNAtts(tuple, scan->indexRelation) == tupnatts);
- *continuescan = true; /* default assumption */
+ res = _bt_check_compare(scan, dir, tuple, tupnatts, tupdesc,
+ arrayKeys, pstate->prechecked, pstate->firstmatch,
+ &pstate->continuescan, &ikey);
+
+#ifdef USE_ASSERT_CHECKING
+ if (!arrayKeys && so->numArrayKeys)
+ {
+ /*
+ * This is a continuescan precheck call for a scan with array keys.
+ *
+ * Assert that the scan isn't in danger of becoming confused.
+ */
+ Assert(!so->scanBehind && !pstate->prechecked && !pstate->firstmatch);
+ Assert(!_bt_tuple_before_array_skeys(scan, dir, tuple, tupdesc,
+ tupnatts, false, 0, NULL));
+ }
+ if (pstate->prechecked || pstate->firstmatch)
+ {
+ bool dcontinuescan;
+ int dikey = 0;
+
+ /*
+ * Call relied on continuescan/firstmatch prechecks -- assert that we
+ * get the same answer without those optimizations
+ */
+ Assert(res == _bt_check_compare(scan, dir, tuple, tupnatts, tupdesc,
+ false, false, false,
+ &dcontinuescan, &dikey));
+ Assert(pstate->continuescan == dcontinuescan);
+ }
+#endif
+
+ /*
+ * Only one _bt_check_compare call is required in the common case where
+ * there are no equality strategy array scan keys. Otherwise we can only
+ * accept _bt_check_compare's answer unreservedly when it didn't set
+ * pstate.continuescan=false.
+ */
+ if (!arrayKeys || pstate->continuescan)
+ return res;
+
+ /*
+ * _bt_check_compare call set continuescan=false in the presence of
+ * equality type array keys. This could mean that the tuple is just past
+ * the end of matches for the current array keys.
+ *
+ * It's also possible that the scan is still _before_ the _start_ of
+ * tuples matching the current set of array keys. Check for that first.
+ */
+ if (_bt_tuple_before_array_skeys(scan, dir, tuple, tupdesc, tupnatts, true,
+ ikey, NULL))
+ {
+ /*
+ * Tuple is still before the start of matches according to the scan's
+ * required array keys (according to _all_ of its required equality
+ * strategy keys, actually).
+ *
+ * _bt_advance_array_keys occasionally sets so->scanBehind to signal
+ * that the scan's current position/tuples might be significantly
+ * behind (multiple pages behind) its current array keys. When this
+ * happens, we need to be prepared to recover by starting a new
+ * primitive index scan here, on our own.
+ */
+ Assert(!so->scanBehind ||
+ so->keyData[ikey].sk_strategy == BTEqualStrategyNumber);
+ if (unlikely(so->scanBehind) && pstate->finaltup &&
+ _bt_tuple_before_array_skeys(scan, dir, pstate->finaltup, tupdesc,
+ BTreeTupleGetNAtts(pstate->finaltup,
+ scan->indexRelation),
+ false, 0, NULL))
+ {
+ /* Cut our losses -- start a new primitive index scan now */
+ pstate->continuescan = false;
+ so->needPrimScan = true;
+ }
+ else
+ {
+ /* Override _bt_check_compare, continue primitive scan */
+ pstate->continuescan = true;
+
+ /*
+ * We will end up here repeatedly given a group of tuples > the
+ * previous array keys and < the now-current keys (for a backwards
+ * scan it's just the same, though the operators swap positions).
+ *
+ * We must avoid allowing this linear search process to scan very
+ * many tuples from well before the start of tuples matching the
+ * current array keys (or from well before the point where we'll
+ * once again have to advance the scan's array keys).
+ *
+ * We keep the overhead under control by speculatively "looking
+ * ahead" to later still-unscanned items from this same leaf page.
+ * We'll only attempt this once the number of tuples that the
+ * linear search process has examined starts to get out of hand.
+ */
+ pstate->rechecks++;
+ if (pstate->rechecks >= LOOK_AHEAD_REQUIRED_RECHECKS)
+ {
+ /* See if we should skip ahead within the current leaf page */
+ _bt_checkkeys_look_ahead(scan, pstate, tupnatts, tupdesc);
+
+ /*
+ * Might have set pstate.skip to a later page offset. When
+ * that happens then _bt_readpage caller will inexpensively
+ * skip ahead to a later tuple from the same page (the one
+ * just after the tuple we successfully "looked ahead" to).
+ */
+ }
+ }
- tupdesc = RelationGetDescr(scan->indexRelation);
- so = (BTScanOpaque) scan->opaque;
- keysz = so->numberOfKeys;
+ /* This indextuple doesn't match the current qual, in any case */
+ return false;
+ }
+
+ /*
+ * Caller's tuple is >= the current set of array keys and other equality
+ * constraint scan keys (or <= if this is a backwards scan). It's now
+ * clear that we _must_ advance any required array keys in lockstep with
+ * the scan.
+ */
+ return _bt_advance_array_keys(scan, pstate, tuple, tupnatts, tupdesc,
+ ikey, true);
+}
+
+/*
+ * Test whether an indextuple satisfies current scan condition.
+ *
+ * Return true if so, false if not. If not, also sets *continuescan to false
+ * when it's also not possible for any later tuples to pass the current qual
+ * (with the scan's current set of array keys, in the current scan direction),
+ * in addition to setting *ikey to the so->keyData[] subscript/offset for the
+ * unsatisfied scan key (needed when caller must consider advancing the scan's
+ * array keys).
+ *
+ * This is a subroutine for _bt_checkkeys. We provisionally assume that
+ * reaching the end of the current set of required keys (in particular the
+ * current required array keys) ends the ongoing (primitive) index scan.
+ * Callers without array keys should just end the scan right away when they
+ * find that continuescan has been set to false here by us. Things are more
+ * complicated for callers with array keys.
+ *
+ * Callers with array keys must first consider advancing the arrays when
+ * continuescan has been set to false here by us. They must then consider if
+ * it really does make sense to end the current (primitive) index scan, in
+ * light of everything that is known at that point. (In general when we set
+ * continuescan=false for these callers it must be treated as provisional.)
+ *
+ * We deal with advancing unsatisfied non-required arrays directly, though.
+ * This is safe, since by definition non-required keys can't end the scan.
+ * This is just how we determine if non-required arrays are just unsatisfied
+ * by the current array key, or if they're truly unsatisfied (that is, if
+ * they're unsatisfied by every possible array key).
+ *
+ * Though we advance non-required array keys on our own, that shouldn't have
+ * any lasting consequences for the scan. By definition, non-required arrays
+ * have no fixed relationship with the scan's progress. (There are delicate
+ * considerations for non-required arrays when the arrays need to be advanced
+ * following our setting continuescan to false, but that doesn't concern us.)
+ *
+ * Pass advancenonrequired=false to avoid all array related side effects.
+ * This allows _bt_advance_array_keys caller to avoid infinite recursion.
+ */
+static bool
+_bt_check_compare(IndexScanDesc scan, ScanDirection dir,
+ IndexTuple tuple, int tupnatts, TupleDesc tupdesc,
+ bool advancenonrequired, bool prechecked, bool firstmatch,
+ bool *continuescan, int *ikey)
+{
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
+
+ *continuescan = true; /* default assumption */
- for (key = so->keyData, ikey = 0; ikey < keysz; key++, ikey++)
+ for (; *ikey < so->numberOfKeys; (*ikey)++)
{
+ ScanKey key = so->keyData + *ikey;
Datum datum;
bool isNull;
- Datum test;
bool requiredSameDir = false,
- requiredOppositeDir = false;
+ requiredOppositeDirOnly = false;
/*
- * Check if the key is required for ordered scan in the same or
- * opposite direction. Save as flag variables for future usage.
+ * Check if the key is required in the current scan direction, in the
+ * opposite scan direction _only_, or in neither direction
*/
if (((key->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) ||
((key->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir)))
requiredSameDir = true;
else if (((key->sk_flags & SK_BT_REQFWD) && ScanDirectionIsBackward(dir)) ||
((key->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsForward(dir)))
- requiredOppositeDir = true;
+ requiredOppositeDirOnly = true;
/*
* If the caller told us the *continuescan flag is known to be true
* Both cases above work except for the row keys, where NULLs could be
* found in the middle of matching values.
*/
- if ((requiredSameDir || (requiredOppositeDir && haveFirstMatch)) &&
- !(key->sk_flags & SK_ROW_HEADER) && continuescanPrechecked)
+ if (prechecked &&
+ (requiredSameDir || (requiredOppositeDirOnly && firstmatch)) &&
+ !(key->sk_flags & SK_ROW_HEADER))
continue;
if (key->sk_attno > tupnatts)
* right could be any possible value. Assume that truncated
* attribute passes the qual.
*/
- Assert(ScanDirectionIsForward(dir));
Assert(BTreeTupleIsPivot(tuple));
continue;
}
* because it's not possible for any future tuples to pass. On
* a forward scan, however, we must keep going, because we may
* have initially positioned to the start of the index.
+ * (_bt_advance_array_keys also relies on this behavior during
+ * forward scans.)
*/
if ((key->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
ScanDirectionIsBackward(dir))
* because it's not possible for any future tuples to pass. On
* a backward scan, however, we must keep going, because we
* may have initially positioned to the end of the index.
+ * (_bt_advance_array_keys also relies on this behavior during
+ * backward scans.)
*/
if ((key->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
ScanDirectionIsForward(dir))
}
/*
- * Apply the key-checking function. When the key is required for the
- * opposite direction scan, it must be already satisfied as soon as
- * there is already match on the page. Except for the NULLs checking,
- * which have already done above.
+ * Apply the key-checking function, though only if we must.
+ *
+ * When a key is required in the opposite-of-scan direction _only_,
+ * then it must already be satisfied if firstmatch=true indicates that
+ * an earlier tuple from this same page satisfied it earlier on.
*/
- if (!(requiredOppositeDir && haveFirstMatch))
- {
- test = FunctionCall2Coll(&key->sk_func, key->sk_collation,
- datum, key->sk_argument);
- }
- else
- {
- test = true;
- Assert(test == FunctionCall2Coll(&key->sk_func, key->sk_collation,
- datum, key->sk_argument));
- }
-
- if (!DatumGetBool(test))
+ if (!(requiredOppositeDirOnly && firstmatch) &&
+ !DatumGetBool(FunctionCall2Coll(&key->sk_func, key->sk_collation,
+ datum, key->sk_argument)))
{
/*
* Tuple fails this qual. If it's a required qual for the current
*continuescan = false;
/*
- * In any case, this indextuple doesn't match the qual.
+ * If this is a non-required equality-type array key, the tuple
+ * needs to be checked against every possible array key. Handle
+ * this by "advancing" the scan key's array to a matching value
+ * (if we're successful then the tuple might match the qual).
+ */
+ else if (advancenonrequired &&
+ key->sk_strategy == BTEqualStrategyNumber &&
+ (key->sk_flags & SK_SEARCHARRAY))
+ return _bt_advance_array_keys(scan, NULL, tuple, tupnatts,
+ tupdesc, *ikey, false);
+
+ /*
+ * This indextuple doesn't match the qual.
*/
return false;
}
* it's not possible for any future tuples in the current scan direction
* to pass the qual.
*
- * This is a subroutine for _bt_checkkeys, which see for more info.
+ * This is a subroutine for _bt_checkkeys/_bt_check_compare.
*/
static bool
_bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
* right could be any possible value. Assume that truncated
* attribute passes the qual.
*/
- Assert(ScanDirectionIsForward(dir));
Assert(BTreeTupleIsPivot(tuple));
cmpresult = 0;
if (subkey->sk_flags & SK_ROW_END)
* because it's not possible for any future tuples to pass. On
* a forward scan, however, we must keep going, because we may
* have initially positioned to the start of the index.
+ * (_bt_advance_array_keys also relies on this behavior during
+ * forward scans.)
*/
if ((subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
ScanDirectionIsBackward(dir))
* because it's not possible for any future tuples to pass. On
* a backward scan, however, we must keep going, because we
* may have initially positioned to the end of the index.
+ * (_bt_advance_array_keys also relies on this behavior during
+ * backward scans.)
*/
if ((subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
ScanDirectionIsForward(dir))
return result;
}
+/*
+ * Determine if a scan with array keys should skip over uninteresting tuples.
+ *
+ * This is a subroutine for _bt_checkkeys. Called when _bt_readpage's linear
+ * search process (started after it finishes reading an initial group of
+ * matching tuples, used to locate the start of the next group of tuples
+ * matching the next set of required array keys) has already scanned an
+ * excessive number of tuples whose key space is "between arrays".
+ *
+ * When we perform look ahead successfully, we'll sets pstate.skip, which
+ * instructs _bt_readpage to skip ahead to that tuple next (could be past the
+ * end of the scan's leaf page). Pages where the optimization is effective
+ * will generally still need to skip several times. Each call here performs
+ * only a single "look ahead" comparison of a later tuple, whose distance from
+ * the current tuple's offset number is determined by applying heuristics.
+ */
+static void
+_bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate,
+ int tupnatts, TupleDesc tupdesc)
+{
+ ScanDirection dir = pstate->dir;
+ OffsetNumber aheadoffnum;
+ IndexTuple ahead;
+
+ /* Avoid looking ahead when comparing the page high key */
+ if (pstate->offnum < pstate->minoff)
+ return;
+
+ /*
+ * Don't look ahead when there aren't enough tuples remaining on the page
+ * (in the current scan direction) for it to be worth our while
+ */
+ if (ScanDirectionIsForward(dir) &&
+ pstate->offnum >= pstate->maxoff - LOOK_AHEAD_DEFAULT_DISTANCE)
+ return;
+ else if (ScanDirectionIsBackward(dir) &&
+ pstate->offnum <= pstate->minoff + LOOK_AHEAD_DEFAULT_DISTANCE)
+ return;
+
+ /*
+ * The look ahead distance starts small, and ramps up as each call here
+ * allows _bt_readpage to skip over more tuples
+ */
+ if (!pstate->targetdistance)
+ pstate->targetdistance = LOOK_AHEAD_DEFAULT_DISTANCE;
+ else
+ pstate->targetdistance *= 2;
+
+ /* Don't read past the end (or before the start) of the page, though */
+ if (ScanDirectionIsForward(dir))
+ aheadoffnum = Min((int) pstate->maxoff,
+ (int) pstate->offnum + pstate->targetdistance);
+ else
+ aheadoffnum = Max((int) pstate->minoff,
+ (int) pstate->offnum - pstate->targetdistance);
+
+ ahead = (IndexTuple) PageGetItem(pstate->page,
+ PageGetItemId(pstate->page, aheadoffnum));
+ if (_bt_tuple_before_array_skeys(scan, dir, ahead, tupdesc, tupnatts,
+ false, 0, NULL))
+ {
+ /*
+ * Success -- instruct _bt_readpage to skip ahead to very next tuple
+ * after the one we determined was still before the current array keys
+ */
+ if (ScanDirectionIsForward(dir))
+ pstate->skip = aheadoffnum + 1;
+ else
+ pstate->skip = aheadoffnum - 1;
+ }
+ else
+ {
+ /*
+ * Failure -- "ahead" tuple is too far ahead (we were too aggresive).
+ *
+ * Reset the number of rechecks, and aggressively reduce the target
+ * distance (we're much more aggressive here than we were when the
+ * distance was initially ramped up).
+ */
+ pstate->rechecks = 0;
+ pstate->targetdistance = Max(pstate->targetdistance / 8, 1);
+ }
+}
+
/*
* _bt_killitems - set LP_DEAD state for items an indexscan caller has
* told us were killed