Skip to content

Commit 62b7dd1

Browse files
committed
Bug#36008973 [HCS]Bulk load from many threads crashes server
- This is a regression caused by fix for Bug#35889669 bulkload doesn't provide innodb stats hence autodb gives incorrect estimates. - In dict_stats_exec_sql() don't rollback the trx that was passed to it in error situations. - Add retry mechanism if statistics update fails because of DB_LOCK_WAIT_TIMEOUT. Change-Id: I6591633cb9a5fd87d2a74972b39287cfadaf31dc
1 parent ca43fae commit 62b7dd1

File tree

2 files changed

+65
-46
lines changed

2 files changed

+65
-46
lines changed

storage/innobase/ddl/ddl0bulk.cc

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ BULK Data Load. Currently treated like DDL */
3434
#include "dict0stats.h"
3535
#include "field_types.h"
3636
#include "mach0data.h"
37+
#include "trx0roll.h"
3738
#include "trx0sys.h"
3839
#include "trx0undo.h"
3940

@@ -303,11 +304,24 @@ dberr_t Loader::end(const row_prebuilt_t *prebuilt, bool is_error) {
303304
dict_stats_is_persistent_enabled(table) ? DICT_STATS_RECALC_PERSISTENT
304305
: DICT_STATS_RECALC_TRANSIENT;
305306

306-
const auto st = dict_stats_update(table, option, prebuilt->trx);
307+
const size_t MAX_RETRY = 5;
308+
for (size_t retry = 0; retry < MAX_RETRY; ++retry) {
309+
auto savept = trx_savept_take(prebuilt->trx);
310+
const auto st = dict_stats_update(table, option, prebuilt->trx);
311+
312+
if (st != DB_SUCCESS) {
313+
LogErr(WARNING_LEVEL, ER_IB_BULK_LOAD_STATS_WARN,
314+
"ddl_bulk::Loader::end()", table->name.m_name,
315+
static_cast<size_t>(st));
316+
if (st == DB_LOCK_WAIT_TIMEOUT) {
317+
const auto ms = std::chrono::milliseconds{10 * (1 + retry)};
318+
std::this_thread::sleep_for(ms);
319+
trx_rollback_to_savepoint(prebuilt->trx, &savept);
320+
continue;
321+
}
322+
}
307323

308-
if (st != DB_SUCCESS) {
309-
LogErr(WARNING_LEVEL, ER_IB_BULK_LOAD_STATS_WARN,
310-
"ddl_bulk::Loader::end()", table->name.m_name, (size_t)st);
324+
break;
311325
}
312326
}
313327

storage/innobase/dict/dict0stats.cc

Lines changed: 47 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
3737
#include <map>
3838
#include <vector>
3939

40+
#include <scope_guard.h>
4041
#include "dict0stats.h"
4142
#include "dyn0buf.h"
4243
#include "ha_prototypes.h"
@@ -171,22 +172,20 @@ static inline bool dict_stats_should_ignore_index(
171172
This function will free the pinfo object.
172173
@param[in,out] pinfo pinfo to pass to que_eval_sql() must already
173174
have any literals bound to it
174-
@param[in] sql SQL string to execute
175-
@param[in,out] trx in case of NULL the function will allocate and
176-
free the trx object. If it is not NULL then it will be rolled back
177-
only in the case of error, but not freed.
175+
@param[in] sql SQL string to execute
176+
@param[in,out] trx in case of NULL the function will allocate and free the
177+
trx object.
178178
@return DB_SUCCESS or error code */
179179
static dberr_t dict_stats_exec_sql(pars_info_t *pinfo, const char *sql,
180180
trx_t *trx) {
181181
dberr_t err;
182-
bool trx_started = false;
182+
const bool trx_started = (trx == nullptr);
183183

184184
ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
185185
ut_ad(!dict_sys_mutex_own());
186186

187187
if (trx == nullptr) {
188188
trx = trx_allocate_for_background();
189-
trx_started = true;
190189

191190
if (srv_read_only_mode) {
192191
trx_start_internal_read_only(trx, UT_LOCATION_HERE);
@@ -197,14 +196,25 @@ static dberr_t dict_stats_exec_sql(pars_info_t *pinfo, const char *sql,
197196

198197
err = que_eval_sql(pinfo, sql, trx); /* pinfo is freed here */
199198

199+
DBUG_EXECUTE_IF("dict_stats_exec_sql_lock_timeout", {
200+
static thread_local int execution_counter{0};
201+
202+
execution_counter++;
203+
204+
err = DB_LOCK_WAIT_TIMEOUT;
205+
if (execution_counter > 3) {
206+
DBUG_SET("-d,dict_stats_exec_sql_lock_timeout");
207+
}
208+
});
209+
200210
DBUG_EXECUTE_IF(
201211
"stats_index_error", if (!trx_started) {
202212
err = DB_STATS_DO_NOT_EXIST;
203213
trx->error_state = DB_STATS_DO_NOT_EXIST;
204214
});
205215

206-
if (!trx_started && err == DB_SUCCESS) {
207-
return (DB_SUCCESS);
216+
if (!trx_started) {
217+
return err;
208218
}
209219

210220
if (err == DB_SUCCESS) {
@@ -218,9 +228,7 @@ static dberr_t dict_stats_exec_sql(pars_info_t *pinfo, const char *sql,
218228
ut_a(trx->error_state == DB_SUCCESS);
219229
}
220230

221-
if (trx_started) {
222-
trx_free_for_background(trx);
223-
}
231+
trx_free_for_background(trx);
224232

225233
return (err);
226234
}
@@ -1662,7 +1670,7 @@ static inline void dict_stats_index_set_n_diff(const n_diff_data_t *n_diff_data,
16621670
members stat_n_diff_key_vals[], stat_n_sample_sizes[], stat_index_size and
16631671
stat_n_leaf_pages, based on the specified n_sample_pages.
16641672
@param[in,out] n_sample_pages number of leaf pages to sample. and suggested
1665-
next value to retry if aborted.
1673+
next value to retry if aborted.
16661674
@param[in,out] index index to analyze.
16671675
@return false if aborted */
16681676
static bool dict_stats_analyze_index_low(uint64_t &n_sample_pages,
@@ -2218,6 +2226,30 @@ static dberr_t dict_stats_save(dict_table_t *table_orig,
22182226
pars_info_add_ull_literal(pinfo, "sum_of_other_index_sizes",
22192227
table->stat_sum_of_other_index_sizes);
22202228

2229+
if (local_trx) {
2230+
trx = trx_allocate_for_background();
2231+
2232+
if (srv_read_only_mode) {
2233+
trx_start_internal_read_only(trx, UT_LOCATION_HERE);
2234+
} else {
2235+
trx_start_internal(trx, UT_LOCATION_HERE);
2236+
}
2237+
}
2238+
2239+
auto guard = create_scope_guard([&]() {
2240+
if (local_trx) {
2241+
if (ret == DB_SUCCESS) {
2242+
trx_commit_for_mysql(trx);
2243+
} else {
2244+
trx_rollback_to_savepoint(trx, nullptr);
2245+
}
2246+
trx_free_for_background(trx);
2247+
}
2248+
rw_lock_x_unlock(dict_operation_lock);
2249+
2250+
dict_stats_snapshot_free(table);
2251+
});
2252+
22212253
ret = dict_stats_exec_sql(pinfo,
22222254
"PROCEDURE TABLE_STATS_SAVE () IS\n"
22232255
"BEGIN\n"
@@ -2246,23 +2278,9 @@ static dberr_t dict_stats_save(dict_table_t *table_orig,
22462278
ib::error(ER_IB_MSG_223) << "Cannot save table statistics for table "
22472279
<< table->name << ": " << ut_strerr(ret);
22482280

2249-
rw_lock_x_unlock(dict_operation_lock);
2250-
2251-
dict_stats_snapshot_free(table);
2252-
22532281
return (ret);
22542282
}
22552283

2256-
if (local_trx) {
2257-
trx = trx_allocate_for_background();
2258-
2259-
if (srv_read_only_mode) {
2260-
trx_start_internal_read_only(trx, UT_LOCATION_HERE);
2261-
} else {
2262-
trx_start_internal(trx, UT_LOCATION_HERE);
2263-
}
2264-
}
2265-
22662284
dict_index_t *index;
22672285
index_map_t indexes((ut_strcmp_functor()),
22682286
index_map_t_allocator(mem_key_dict_stats_index_map_t));
@@ -2324,7 +2342,7 @@ static dberr_t dict_stats_save(dict_table_t *table_orig,
23242342
&index->stat_n_sample_sizes[i], stat_description, trx);
23252343

23262344
if (ret != DB_SUCCESS) {
2327-
goto end;
2345+
return ret;
23282346
}
23292347
}
23302348

@@ -2334,7 +2352,7 @@ static dberr_t dict_stats_save(dict_table_t *table_orig,
23342352
"in the index",
23352353
trx);
23362354
if (ret != DB_SUCCESS) {
2337-
goto end;
2355+
return ret;
23382356
}
23392357

23402358
ret = dict_stats_save_index_stat(index, now, "size", index->stat_index_size,
@@ -2343,23 +2361,10 @@ static dberr_t dict_stats_save(dict_table_t *table_orig,
23432361
"in the index",
23442362
trx);
23452363
if (ret != DB_SUCCESS) {
2346-
goto end;
2364+
return ret;
23472365
}
23482366
}
23492367

2350-
if (local_trx) {
2351-
trx_commit_for_mysql(trx);
2352-
}
2353-
2354-
end:
2355-
if (local_trx) {
2356-
trx_free_for_background(trx);
2357-
}
2358-
2359-
rw_lock_x_unlock(dict_operation_lock);
2360-
2361-
dict_stats_snapshot_free(table);
2362-
23632368
return (ret);
23642369
}
23652370

0 commit comments

Comments
 (0)