Bug#30224582 ASSERTION `M_INDEX_CURSOR.IS_POSITIONED()' FAILED

Erik Froseth · dahlerlend · commit dc7c4fb5aaf7 · 2019-09-10T13:16:26.000+02:00
In some cases, hash join may run the build phase multiple times.
This in turn may trigger cases where we ask the build iterator
for rows after it has returned EOF. Most iterators can handle this,
but the RefIterator was not ready for this. This patchs makes the
hash join to not ask for more rows after it has seen EOF.

A different (and maybe more correct) approach would be to handle
this in the RefIterator, as iterators generally should be able to
handle multiple calls to Read() after EOF. This would have forced
us to add a flag in RefIterator that must be evaluated on every
call to Read(). We chose not to do this as the RefIterator is
considered rather performance critical.

Change-Id: If231d3390e52803b314e59be0f62b5c73d14af20
diff --git a/mysql-test/r/hash_join.result b/mysql-test/r/hash_join.result
@@ -1804,3 +1804,50 @@ WHERE t2.col2 > 0 OR t2.col3 > 0 LIMIT 10;
 col1
 1
 DROP TABLE t1, t2;
+#
+# Bug#30224582 ASSERTION `M_INDEX_CURSOR.IS_POSITIONED()' FAILED
+#
+# Set up a query where the hash join build input consists of a
+# materialized table, where we do an index lookup on the materialized
+# table. The LIMIT is also needed in order to trigger a second build
+# phase in the hash join.
+CREATE TABLE t1 (col1 INTEGER);
+CREATE TABLE t2 (col1 INTEGER);
+INSERT INTO t1 VALUES (1);
+INSERT INTO t2 VALUES (1);
+EXPLAIN FORMAT=tree SELECT /*+ JOIN_ORDER(table1, t2) */
+*
+FROM
+(
+SELECT
+DISTINCT t1.*
+FROM
+t1
+) AS table1 JOIN t2
+WHERE table1.col1 = 1
+LIMIT 50;
+EXPLAIN
+-> Limit: 50 row(s)
+    -> Inner hash join
+        -> Table scan on t2  (cost=0.35 rows=1)
+        -> Hash
+            -> Index lookup on table1 using <auto_key0> (col1=1)
+                -> Materialize
+                    -> Table scan on <temporary>
+                        -> Temporary table with deduplication
+                            -> Table scan on t1  (cost=0.35 rows=1)
+
+SELECT /*+ JOIN_ORDER(table1, t2) */
+*
+FROM
+(
+SELECT
+DISTINCT t1.*
+FROM
+t1
+) AS table1 JOIN t2
+WHERE table1.col1 = 1
+LIMIT 50;
+col1	col1
+1	1
+DROP TABLE t1, t2;
diff --git a/mysql-test/t/hash_join.test b/mysql-test/t/hash_join.test
@@ -1289,3 +1289,32 @@ let $query = SELECT /*+ JOIN_ORDER(t2, t1) INDEX_MERGE(t2) */ t1.col1
 eval EXPLAIN FORMAT=tree $query;
 eval $query;
 DROP TABLE t1, t2;
+
+
+--echo #
+--echo # Bug#30224582 ASSERTION `M_INDEX_CURSOR.IS_POSITIONED()' FAILED
+--echo #
+--echo # Set up a query where the hash join build input consists of a
+--echo # materialized table, where we do an index lookup on the materialized
+--echo # table. The LIMIT is also needed in order to trigger a second build
+--echo # phase in the hash join.
+CREATE TABLE t1 (col1 INTEGER);
+CREATE TABLE t2 (col1 INTEGER);
+INSERT INTO t1 VALUES (1);
+INSERT INTO t2 VALUES (1);
+
+let $query = SELECT /*+ JOIN_ORDER(table1, t2) */
+  *
+FROM
+  (
+    SELECT
+      DISTINCT t1.*
+    FROM
+      t1
+  ) AS table1 JOIN t2
+  WHERE table1.col1 = 1
+LIMIT 50;
+
+eval EXPLAIN FORMAT=tree $query;
+eval $query;
+DROP TABLE t1, t2;
diff --git a/sql/hash_join_iterator.cc b/sql/hash_join_iterator.cc
@@ -162,6 +162,7 @@ bool HashJoinIterator::Init() {
     DBUG_ASSERT(thd()->is_error());  // my_error should have been called.
     return true;
   }
+  m_build_iterator_has_more_rows = true;
 
   // Set up the buffer that is used when
   // a) moving a row between the tables' record buffers, and,
@@ -350,6 +351,11 @@ static bool InitializeChunkFiles(
 }
 
 bool HashJoinIterator::BuildHashTable() {
+  if (!m_build_iterator_has_more_rows) {
+    m_state = State::END_OF_ROWS;
+    return false;
+  }
+
   if (InitRowBuffer()) {
     return true;
   }
@@ -362,6 +368,7 @@ bool HashJoinIterator::BuildHashTable() {
     }
 
     if (res == -1) {
+      m_build_iterator_has_more_rows = false;
       if (m_row_buffer.empty()) {
         m_state = State::END_OF_ROWS;
         return false;
diff --git a/sql/hash_join_iterator.h b/sql/hash_join_iterator.h
@@ -364,8 +364,15 @@ class HashJoinIterator final : public RowIterator {
   // read the first row from the probe input.
   bool m_enable_batch_mode_for_probe_input{false};
 
-  // Wether we are allowed to spill to disk.
+  // Whether we are allowed to spill to disk.
   bool m_allow_spill_to_disk{true};
+
+  // Whether the build iterator has more rows. This is used to stop the hash
+  // join iterator asking for more rows when we know for sure that the entire
+  // build input is consumed. The variable is only used if m_allow_spill_to_disk
+  // is false, as we have to see if there are more rows in the build input after
+  // the probe input is consumed.
+  bool m_build_iterator_has_more_rows{true};
 };
 
 #endif  // SQL_HASH_JOIN_ITERATOR_H_

Original file line number	Diff line number	Diff line change
`@@ -162,6 +162,7 @@ bool HashJoinIterator::Init() {`
`162`	`162`	`DBUG_ASSERT(thd()->is_error()); // my_error should have been called.`
`163`	`163`	`return true;`
`164`	`164`	`}`
	`165`	`+ m_build_iterator_has_more_rows = true;`
`165`	`166`
`166`	`167`	`// Set up the buffer that is used when`
`167`	`168`	`// a) moving a row between the tables' record buffers, and,`
`@@ -350,6 +351,11 @@ static bool InitializeChunkFiles(`
`350`	`351`	`}`
`351`	`352`
`352`	`353`	`bool HashJoinIterator::BuildHashTable() {`
	`354`	`+ if (!m_build_iterator_has_more_rows) {`
	`355`	`+ m_state = State::END_OF_ROWS;`
	`356`	`+ return false;`
	`357`	`+ }`
	`358`	`+`
`353`	`359`	`if (InitRowBuffer()) {`
`354`	`360`	`return true;`
`355`	`361`	`}`
`@@ -362,6 +368,7 @@ bool HashJoinIterator::BuildHashTable() {`
`362`	`368`	`}`
`363`	`369`
`364`	`370`	`if (res == -1) {`
	`371`	`+ m_build_iterator_has_more_rows = false;`
`365`	`372`	`if (m_row_buffer.empty()) {`
`366`	`373`	`m_state = State::END_OF_ROWS;`
`367`	`374`	`return false;`