Skip to content

Commit d77b518

Browse files
Marcin Babijdahlerlend
authored andcommitted
Bug #33343690 ER_IB_MSG_277 and ER_IB_MSG_275 messages are excessively logged
After bug#32634620 the server counts the files that are added to LRU of files against `innodb_open_files` limit. However, this leads to a problem. On a duration of an IO the file is being temporarily removed from the LRU. This allows additional files to be opened within the `innodb_open_files` limit. After the IOs are done, the number of opened files can exceed the `innodb_open_files` limit by 10-20. Assuming this case, there are a lot of files in the LRU that can be easily closed. As a call to `close_file_in_all_LRU` will succeed in this case, the loop in `Fil_shard::mutex_acquire_and_get_space` tries to close only 3 files from the LRU before giving up (and allowing the file to be opened either way). Closing 3 files is not enough to get LRU size below the limit. Also, additionally the `close_file_in_all_LRU` while closing 3rd file will print an abundant amount of info messages to log. This is fixed by moving closer to logic before the WL#8619, we constantly try to close files for 5 seconds before reporting it takes too long and retrying with flushing the tablespaces. Added throttling to messages printed by `Fil_system::close_file_in_all_LRU`. Don't print message `ER_IB_MSG_277` when there is no files in current shard. Changed message to warn about the low `innodb_open_files`. RB#27009
1 parent 08f2ccd commit d77b518

File tree

2 files changed

+62
-5
lines changed

2 files changed

+62
-5
lines changed

storage/innobase/fil/fil0fil.cc

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ The tablespace memory cache */
5252
#include "mtr0log.h"
5353
#include "my_dbug.h"
5454
#include "ut0new.h"
55+
#include "ut0ut.h"
5556

5657
#include "clone0api.h"
5758
#include "os0file.h"
@@ -1570,7 +1571,7 @@ class Fil_system {
15701571
The caller must hold the mutex.
15711572
@param[in] print_info if true, prints information why it close a file
15721573
@return true if success, false if should retry later */
1573-
[[nodiscard]] bool close_file_in_all_LRU(bool print_info);
1574+
bool close_file_in_all_LRU(bool print_info);
15741575

15751576
/** Opens all log files and system tablespace data files in
15761577
all shards. */
@@ -1765,6 +1766,11 @@ class Fil_system {
17651766
/** Free the data structures required for recovery. */
17661767
void free_scanned_files() { m_dirs.clear(); }
17671768

1769+
/** Throttles info messages when closing files in LRU lists. */
1770+
bool should_print_close_by_lru_info() {
1771+
return m_close_by_LRU_info_throttler.apply();
1772+
}
1773+
17681774
#ifdef UNIV_HOTBACKUP
17691775
/** Extends all tablespaces to the size stored in the space header.
17701776
During the mysqlbackup --apply-log phase we extended the spaces
@@ -1818,6 +1824,9 @@ class Fil_system {
18181824
/** n_open is not allowed to exceed this */
18191825
const size_t m_max_n_open;
18201826

1827+
/** Throttles info messages when closing files in LRU lists. */
1828+
ib::Throttler m_close_by_LRU_info_throttler;
1829+
18211830
/** Maximum space id in the existing tables, or assigned during
18221831
the time mysqld has been up; at an InnoDB startup we scan the
18231832
data dictionary and set here the maximum of the space id's of
@@ -2904,8 +2913,10 @@ bool Fil_system::close_file_in_all_LRU(bool print_info) {
29042913
shard->mutex_acquire();
29052914

29062915
if (print_info) {
2907-
ib::info(ER_IB_MSG_277, shard->id(),
2908-
ulonglong{UT_LIST_GET_LEN(shard->m_LRU)});
2916+
const auto len = ulonglong{UT_LIST_GET_LEN(shard->m_LRU)};
2917+
if (len > 0) {
2918+
ib::info(ER_IB_MSG_277, shard->id(), len);
2919+
}
29092920
}
29102921

29112922
bool success = shard->close_files_in_LRU(print_info);
@@ -3046,15 +3057,20 @@ bool Fil_shard::mutex_acquire_and_get_space(space_id_t space_id,
30463057
/* Reserve an open slot for this shard. So that this
30473058
shard's open file succeeds. */
30483059

3049-
while (fil_system->m_max_n_open <= s_n_spaces_in_lru &&
3050-
!fil_system->close_file_in_all_LRU(i > 1)) {
3060+
while (fil_system->m_max_n_open <= s_n_spaces_in_lru) {
3061+
if (!fil_system->close_file_in_all_LRU(false)) {
3062+
std::this_thread::yield();
3063+
}
30513064
if (ut_time_monotonic() - start_time >= PRINT_INTERVAL_SECS) {
30523065
start_time = ut_time_monotonic();
30533066

30543067
ib::warn(ER_IB_MSG_279) << "Trying to close a file for "
30553068
<< start_time - begin_time << " seconds"
30563069
<< ". Configuration only allows for "
30573070
<< fil_system->m_max_n_open << " open files.";
3071+
fil_system->close_file_in_all_LRU(
3072+
fil_system->should_print_close_by_lru_info());
3073+
break;
30583074
}
30593075
}
30603076

storage/innobase/include/ut0ut.h

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -942,6 +942,47 @@ struct Wait_stats {
942942
bool any_waits() const { return (wait_loops != 0); }
943943
};
944944

945+
namespace ib {
946+
947+
/** Allows to monitor an event processing times, allowing to throttle the
948+
processing to one per THROTTLE_DELAY_SEC. */
949+
class Throttler {
950+
public:
951+
Throttler() : m_last_applied_time(0) {}
952+
953+
/** Checks if the item should be processed or ignored to not process them more
954+
frequently than one per THROTTLE_DELAY_SEC. */
955+
bool apply() {
956+
const auto current_time = std::chrono::steady_clock::now();
957+
const auto current_time_in_sec =
958+
std::chrono::duration_cast<std::chrono::seconds>(
959+
current_time.time_since_epoch())
960+
.count();
961+
auto last_apply_time = m_last_applied_time.load();
962+
if (last_apply_time + THROTTLE_DELAY_SEC <
963+
static_cast<uint64_t>(current_time_in_sec)) {
964+
if (m_last_applied_time.compare_exchange_strong(last_apply_time,
965+
current_time_in_sec)) {
966+
return true;
967+
}
968+
/* Any race condition with other threads would mean someone just changed
969+
the `m_last_apply_time` and will print the message. We don't want
970+
to retry the operation again. */
971+
}
972+
return false;
973+
}
974+
975+
private:
976+
/* Time when the last item was not throttled. Stored as number of seconds
977+
since epoch. */
978+
std::atomic<uint64_t> m_last_applied_time;
979+
980+
/** Throttle all items within that amount seconds from the last non throttled
981+
one. */
982+
static constexpr uint64_t THROTTLE_DELAY_SEC = 10;
983+
};
984+
} // namespace ib
985+
945986
#include "ut0ut.ic"
946987

947988
#endif /* !ut0ut_h */

0 commit comments

Comments
 (0)