Skip to content

Commit dc7c4fb

Browse files
Erik Frosethdahlerlend
authored andcommitted
Bug#30224582 ASSERTION `M_INDEX_CURSOR.IS_POSITIONED()' FAILED
In some cases, hash join may run the build phase multiple times. This in turn may trigger cases where we ask the build iterator for rows after it has returned EOF. Most iterators can handle this, but the RefIterator was not ready for this. This patchs makes the hash join to not ask for more rows after it has seen EOF. A different (and maybe more correct) approach would be to handle this in the RefIterator, as iterators generally should be able to handle multiple calls to Read() after EOF. This would have forced us to add a flag in RefIterator that must be evaluated on every call to Read(). We chose not to do this as the RefIterator is considered rather performance critical. Change-Id: If231d3390e52803b314e59be0f62b5c73d14af20
1 parent 566133b commit dc7c4fb

File tree

4 files changed

+91
-1
lines changed

4 files changed

+91
-1
lines changed

mysql-test/r/hash_join.result

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1804,3 +1804,50 @@ WHERE t2.col2 > 0 OR t2.col3 > 0 LIMIT 10;
18041804
col1
18051805
1
18061806
DROP TABLE t1, t2;
1807+
#
1808+
# Bug#30224582 ASSERTION `M_INDEX_CURSOR.IS_POSITIONED()' FAILED
1809+
#
1810+
# Set up a query where the hash join build input consists of a
1811+
# materialized table, where we do an index lookup on the materialized
1812+
# table. The LIMIT is also needed in order to trigger a second build
1813+
# phase in the hash join.
1814+
CREATE TABLE t1 (col1 INTEGER);
1815+
CREATE TABLE t2 (col1 INTEGER);
1816+
INSERT INTO t1 VALUES (1);
1817+
INSERT INTO t2 VALUES (1);
1818+
EXPLAIN FORMAT=tree SELECT /*+ JOIN_ORDER(table1, t2) */
1819+
*
1820+
FROM
1821+
(
1822+
SELECT
1823+
DISTINCT t1.*
1824+
FROM
1825+
t1
1826+
) AS table1 JOIN t2
1827+
WHERE table1.col1 = 1
1828+
LIMIT 50;
1829+
EXPLAIN
1830+
-> Limit: 50 row(s)
1831+
-> Inner hash join
1832+
-> Table scan on t2 (cost=0.35 rows=1)
1833+
-> Hash
1834+
-> Index lookup on table1 using <auto_key0> (col1=1)
1835+
-> Materialize
1836+
-> Table scan on <temporary>
1837+
-> Temporary table with deduplication
1838+
-> Table scan on t1 (cost=0.35 rows=1)
1839+
1840+
SELECT /*+ JOIN_ORDER(table1, t2) */
1841+
*
1842+
FROM
1843+
(
1844+
SELECT
1845+
DISTINCT t1.*
1846+
FROM
1847+
t1
1848+
) AS table1 JOIN t2
1849+
WHERE table1.col1 = 1
1850+
LIMIT 50;
1851+
col1 col1
1852+
1 1
1853+
DROP TABLE t1, t2;

mysql-test/t/hash_join.test

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,3 +1289,32 @@ let $query = SELECT /*+ JOIN_ORDER(t2, t1) INDEX_MERGE(t2) */ t1.col1
12891289
eval EXPLAIN FORMAT=tree $query;
12901290
eval $query;
12911291
DROP TABLE t1, t2;
1292+
1293+
1294+
--echo #
1295+
--echo # Bug#30224582 ASSERTION `M_INDEX_CURSOR.IS_POSITIONED()' FAILED
1296+
--echo #
1297+
--echo # Set up a query where the hash join build input consists of a
1298+
--echo # materialized table, where we do an index lookup on the materialized
1299+
--echo # table. The LIMIT is also needed in order to trigger a second build
1300+
--echo # phase in the hash join.
1301+
CREATE TABLE t1 (col1 INTEGER);
1302+
CREATE TABLE t2 (col1 INTEGER);
1303+
INSERT INTO t1 VALUES (1);
1304+
INSERT INTO t2 VALUES (1);
1305+
1306+
let $query = SELECT /*+ JOIN_ORDER(table1, t2) */
1307+
*
1308+
FROM
1309+
(
1310+
SELECT
1311+
DISTINCT t1.*
1312+
FROM
1313+
t1
1314+
) AS table1 JOIN t2
1315+
WHERE table1.col1 = 1
1316+
LIMIT 50;
1317+
1318+
eval EXPLAIN FORMAT=tree $query;
1319+
eval $query;
1320+
DROP TABLE t1, t2;

sql/hash_join_iterator.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ bool HashJoinIterator::Init() {
162162
DBUG_ASSERT(thd()->is_error()); // my_error should have been called.
163163
return true;
164164
}
165+
m_build_iterator_has_more_rows = true;
165166

166167
// Set up the buffer that is used when
167168
// a) moving a row between the tables' record buffers, and,
@@ -350,6 +351,11 @@ static bool InitializeChunkFiles(
350351
}
351352

352353
bool HashJoinIterator::BuildHashTable() {
354+
if (!m_build_iterator_has_more_rows) {
355+
m_state = State::END_OF_ROWS;
356+
return false;
357+
}
358+
353359
if (InitRowBuffer()) {
354360
return true;
355361
}
@@ -362,6 +368,7 @@ bool HashJoinIterator::BuildHashTable() {
362368
}
363369

364370
if (res == -1) {
371+
m_build_iterator_has_more_rows = false;
365372
if (m_row_buffer.empty()) {
366373
m_state = State::END_OF_ROWS;
367374
return false;

sql/hash_join_iterator.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -364,8 +364,15 @@ class HashJoinIterator final : public RowIterator {
364364
// read the first row from the probe input.
365365
bool m_enable_batch_mode_for_probe_input{false};
366366

367-
// Wether we are allowed to spill to disk.
367+
// Whether we are allowed to spill to disk.
368368
bool m_allow_spill_to_disk{true};
369+
370+
// Whether the build iterator has more rows. This is used to stop the hash
371+
// join iterator asking for more rows when we know for sure that the entire
372+
// build input is consumed. The variable is only used if m_allow_spill_to_disk
373+
// is false, as we have to see if there are more rows in the build input after
374+
// the probe input is consumed.
375+
bool m_build_iterator_has_more_rows{true};
369376
};
370377

371378
#endif // SQL_HASH_JOIN_ITERATOR_H_

0 commit comments

Comments
 (0)