Skip to content

Commit c7d0df6

Browse files
author
Yasufumi Kinoshita
committed
Bug#32912868: dict_table_x_lock_indexes() can cause deadlock with concurrent purge operations
Confirmed carefully also other possibility with index latch order and implicit latch order. This bug has the both 2 aspects. [A] Indexes latch order in same table is [secondary --> primary] We should be along with description in sync0type.h. The current dict_table_x_lock_indexes() violates the order. Better to be fixed. [B] Even if dict_table_x_lock_indexes() is atomic operation, still might cause deadlock with "[secondary index page]-->[primary index latch]-->[primary index page]" operation. (if dict_table_x_lock_indexes() caller will access to [secondary index page].) That seems to be allowed order, and included to purge and rollback paths. So, dict_table_x_lock_indexes() should exclude other threads' concurrent accesses. - User threads: already excluded by mysqld MDL - Purge: ## needs to stop during getting latch ## - Rollback after recovery: ## needed to be ended already ## Fixing [B] is enough to fix this bug. But also [A] is better to be fixed for future fail-tolerance. (for 5.7 only) "TRUNCATE TABLE" path also calls dict_table_x_lock_indexes(), and fixed. Change-Id: I0673af125aef2f4cc14d8b86a3816c71c5becc8a
1 parent 7d49622 commit c7d0df6

File tree

10 files changed

+78
-4
lines changed

10 files changed

+78
-4
lines changed

mysql-test/suite/innodb/r/innodb-truncate-debug.result

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,6 @@ SET DEBUG_SYNC= 'now SIGNAL finish_scan';
112112
connection con1
113113
connection default
114114
drop table t1;
115-
Pattern "InnoDB: Record with space id \d+ belongs to table which is being truncated or tablespace which is missing therefore skipping this undo record." found
115+
Pattern "InnoDB: Record with space id \d+ belongs to table which is being truncated or tablespace which is missing therefore skipping this undo record." not found
116116
# restart server
117117
# restart:

mysql-test/suite/innodb/t/innodb-truncate-debug.test

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,9 @@ disconnect con1;
179179

180180
drop table t1;
181181

182+
# Note: Because of fix for bug#32912868,
183+
# truncate table doesn't allow concurrent purge operations.
184+
# So, this test might not work as intended, or not needed.
182185
let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err;
183186
let SEARCH_PATTERN = InnoDB: Record with space id \d+ belongs to table which is being truncated or tablespace which is missing therefore skipping this undo record.;
184187
--source include/search_pattern.inc

storage/innobase/dict/dict0dict.cc

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ ib_warn_row_too_big(const dict_table_t* table);
9292
#include "sync0sync.h"
9393
#include "trx0undo.h"
9494
#include "ut0new.h"
95+
#ifdef UNIV_DEBUG
96+
#include "trx0purge.h"
97+
#endif /* !UNIV_DEBUG */
9598

9699
#include <vector>
97100
#include <algorithm>
@@ -7188,3 +7191,17 @@ uint32_t dict_vcol_base_is_foreign_key(dict_v_col_t *vcol,
71887191
}
71897192
return foreign_col_count;
71907193
}
7194+
7195+
#ifndef UNIV_HOTBACKUP
7196+
#ifdef UNIV_DEBUG
7197+
/** Validate no active background threads to cause purge or rollback
7198+
operations. */
7199+
void
7200+
dict_validate_no_purge_rollback_threads() {
7201+
/* No concurrent background threads to access to the table */
7202+
ut_ad(trx_purge_state() == PURGE_STATE_STOP
7203+
|| trx_purge_state() == PURGE_STATE_DISABLED);
7204+
ut_ad(!trx_rollback_or_clean_is_active);
7205+
}
7206+
#endif /* UNIV_DEBUG */
7207+
#endif /* !UNIV_HOTBACKUP */

storage/innobase/include/dict0dict.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,12 @@ dict_table_analyze_index_lock(
371371
void
372372
dict_table_analyze_index_unlock(
373373
dict_table_t* table);
374+
#ifdef UNIV_DEBUG
375+
/** Validate no active background threads to cause purge or rollback
376+
operations. */
377+
void
378+
dict_validate_no_purge_rollback_threads();
379+
#endif /* UNIV_DEBUG */
374380
#endif /* !UNIV_HOTBACKUP */
375381
/**********************************************************************//**
376382
Adds system columns to a table object. */

storage/innobase/include/dict0dict.ic

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1083,16 +1083,20 @@ dict_table_x_lock_indexes(
10831083
dict_table_t* table) /*!< in: table */
10841084
{
10851085
dict_index_t* index;
1086+
dict_index_t* clustered_index;
10861087

10871088
ut_a(table);
10881089
ut_ad(mutex_own(&dict_sys->mutex));
1090+
ut_d(dict_validate_no_purge_rollback_threads());
10891091

10901092
/* Loop through each index of the table and lock them */
1091-
for (index = dict_table_get_first_index(table);
1093+
clustered_index = dict_table_get_first_index(table);
1094+
for (index = dict_table_get_next_index(clustered_index);
10921095
index != NULL;
10931096
index = dict_table_get_next_index(index)) {
10941097
rw_lock_x_lock(dict_index_get_lock(index));
10951098
}
1099+
rw_lock_x_lock(dict_index_get_lock(clustered_index));
10961100
}
10971101
/*********************************************************************//**
10981102
Returns true if the particular FTS index in the table is still syncing

storage/innobase/include/trx0roll.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,12 @@ trx_roll_savepoints_free(
207207
trx_named_savept_t* savep); /*!< in: free all savepoints > this one;
208208
if this is NULL, free all savepoints
209209
of trx */
210+
/*******************************************************************//**
211+
Waits for rollback after recovery end. */
212+
UNIV_INLINE
213+
void
214+
trx_rollback_or_clean_wait();
215+
210216
/** Rollback node states */
211217
enum roll_node_state {
212218
ROLL_NODE_NONE = 0, /*!< Unknown state */

storage/innobase/include/trx0roll.ic

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,12 @@ trx_roll_check_undo_rec_ordering(
6868
}
6969
#endif /* UNIV_DEBUG */
7070

71+
/*******************************************************************//**
72+
Waits for rollback after recovery end. */
73+
UNIV_INLINE
74+
void
75+
trx_rollback_or_clean_wait() {
76+
while (trx_rollback_or_clean_is_active) {
77+
os_thread_sleep(100000);
78+
}
79+
}

storage/innobase/log/log0log.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2096,10 +2096,10 @@ logs_empty_and_mark_files_at_shutdown(void)
20962096

20972097
ib::info() << "Starting shutdown...";
20982098

2099-
while (srv_fast_shutdown == 0 && trx_rollback_or_clean_is_active) {
2099+
if (srv_fast_shutdown == 0) {
21002100
/* we should wait until rollback after recovery end
21012101
for slow shutdown */
2102-
os_thread_sleep(100000);
2102+
trx_rollback_or_clean_wait();
21032103
}
21042104

21052105
/* Wait until the master thread and all other operations are idle: our

storage/innobase/row/row0quiesce.cc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ Created 2012-02-08 by Sunny Bains.
4242
#include "ibuf0ibuf.h"
4343
#include "srv0start.h"
4444
#include "trx0purge.h"
45+
#include "trx0roll.h"
4546
#include "fsp0sysspace.h"
4647

4748
#include <my_aes.h>
@@ -913,10 +914,23 @@ row_quiesce_set_state(
913914
" FTS auxiliary tables will not be flushed.");
914915
}
915916

917+
/* We should wait until rollback after recovery end,
918+
to lock the table consistently. */
919+
trx_rollback_or_clean_wait();
920+
921+
if (trx_purge_state() != PURGE_STATE_DISABLED) {
922+
/* We should stop purge to lock the table consistently. */
923+
trx_purge_stop();
924+
}
925+
916926
row_mysql_lock_data_dictionary(trx);
917927

918928
dict_table_x_lock_indexes(table);
919929

930+
if (trx_purge_state() != PURGE_STATE_DISABLED) {
931+
trx_purge_run();
932+
}
933+
920934
switch (state) {
921935
case QUIESCE_START:
922936
break;

storage/innobase/row/row0trunc.cc

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ Created 2013-04-12 Sunny Bains
4343
#include "srv0start.h"
4444
#include "row0trunc.h"
4545
#include "os0file.h"
46+
#include "trx0purge.h"
47+
#include "trx0roll.h"
4648
#include <vector>
4749

4850
bool truncate_t::s_fix_up_active = false;
@@ -1235,6 +1237,10 @@ row_truncate_complete(
12351237
TruncateLogger* &logger,
12361238
dberr_t err)
12371239
{
1240+
if (trx_purge_state() != PURGE_STATE_DISABLED) {
1241+
trx_purge_run();
1242+
}
1243+
12381244
bool is_file_per_table = dict_table_is_file_per_table(table);
12391245

12401246
if (table->memcached_sync_count == DICT_TABLE_IN_DDL) {
@@ -1843,6 +1849,15 @@ row_truncate_table_for_mysql(
18431849
trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
18441850
}
18451851

1852+
/* We should wait until rollback after recovery end,
1853+
to lock the table consistently. */
1854+
trx_rollback_or_clean_wait();
1855+
1856+
if (trx_purge_state() != PURGE_STATE_DISABLED) {
1857+
/* We should stop purge to lock the table consistently. */
1858+
trx_purge_stop();
1859+
}
1860+
18461861
/* Step-3: Validate ownership of needed locks (Exclusive lock).
18471862
Ownership will also ensure there is no active SQL queries, INSERT,
18481863
SELECT, .....*/

0 commit comments

Comments
 (0)