Skip to content

Commit 89c42b7

Browse files
nacarvalhobjornmu
authored andcommitted
BUG#25055190: FLOW-CONTROL APPLIER QUEUE SIZE INCORRECT IN JOINS ON BUSY GROUPS
When a new member joins a group that is busy, one of the statistics used by the flow-control - applier queue size - may be incorrectly calculated. This comes from the fact that the queue size is incremented once the transaction is queued into the relay log, but it is only decremented when executing if the member is considered ONLINE. If the member is on RECOVERING status, the queue size is not decremented. To fix the above issue, the applier queue counters are only incremented and decremented when member status is ONLINE. The certifier queue counter it is always sent, to avoid the throttling to reach to small values when one or more members are on RECOVERING, we cap the throttling to 5% of the queues threshold specified by DBA on group_replication_flow_control_certifier_threshold and group_replication_flow_control_applier_threshold options. A second issue was observed on flow control disable, which was not enforcing its immediate disable. Now when the low control is disable, it does reset its used quota which ensures immediate throttling disable. (cherry picked from commit d78bb00532032355fc29af70012422d6ede9e992)
1 parent 2b9dd4d commit 89c42b7

File tree

3 files changed

+23
-11
lines changed

3 files changed

+23
-11
lines changed

rapid/plugin/group_replication/src/certifier.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1604,8 +1604,11 @@ void Certifier::update_certified_transaction_count(bool result)
16041604
else
16051605
negative_cert++;
16061606

1607-
applier_module->get_pipeline_stats_member_collector()
1608-
->increment_transactions_certified();
1607+
if (local_member_info->get_recovery_status() == Group_member_info::MEMBER_ONLINE)
1608+
{
1609+
applier_module->get_pipeline_stats_member_collector()
1610+
->increment_transactions_certified();
1611+
}
16091612
}
16101613

16111614
ulonglong Certifier::get_positive_certified()

rapid/plugin/group_replication/src/handlers/applier_handler.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,8 @@ int Applier_handler::handle_event(Pipeline_event *event,Continuation *cont)
157157
{
158158
error= channel_interface.queue_packet((const char*)p->payload, p->len);
159159

160-
if (event->get_event_type() == binary_log::GTID_LOG_EVENT)
160+
if (event->get_event_type() == binary_log::GTID_LOG_EVENT &&
161+
local_member_info->get_recovery_status() == Group_member_info::MEMBER_ONLINE)
161162
{
162163
applier_module->get_pipeline_stats_member_collector()
163164
->increment_transactions_waiting_apply();

rapid/plugin/group_replication/src/pipeline_stats.cc

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -205,14 +205,8 @@ Pipeline_stats_member_message::decode_payload(const unsigned char *buffer,
205205
}
206206

207207

208-
/*
209-
m_transactions_waiting_apply is initialized with -1 to take
210-
View_change_log_event transaction into account, that despite
211-
being queued on applier channel is applied through recovery
212-
channel.
213-
*/
214208
Pipeline_stats_member_collector::Pipeline_stats_member_collector()
215-
: m_transactions_waiting_apply(-1), m_transactions_certified(0),
209+
: m_transactions_waiting_apply(0), m_transactions_certified(0),
216210
m_transactions_applied(0), m_transactions_local(0)
217211
{}
218212

@@ -455,7 +449,7 @@ Flow_control_module::flow_control_step()
455449
if (holds > 0)
456450
{
457451
uint num_writing_members= 0;
458-
int64 min_certifier_capacity= MAXTPS, min_applier_capacity= MAXTPS;
452+
int64 min_certifier_capacity= MAXTPS, min_applier_capacity= MAXTPS, safe_capacity= MAXTPS;
459453

460454
Flow_control_module_info::iterator it= m_info.begin();
461455
while (it != m_info.end())
@@ -471,15 +465,23 @@ Flow_control_module::flow_control_step()
471465
else
472466
{
473467
if (flow_control_certifier_threshold_var > 0
468+
&& it->second.get_delta_transactions_certified() > 0
474469
&& it->second.get_transactions_waiting_certification() - flow_control_certifier_threshold_var > 0
475470
&& min_certifier_capacity > it->second.get_delta_transactions_certified())
476471
min_certifier_capacity= it->second.get_delta_transactions_certified();
477472

473+
if (it->second.get_delta_transactions_certified() > 0)
474+
safe_capacity= std::min(safe_capacity, it->second.get_delta_transactions_certified());
475+
478476
if (flow_control_applier_threshold_var > 0
477+
&& it->second.get_delta_transactions_applied() > 0
479478
&& it->second.get_transactions_waiting_apply() - flow_control_applier_threshold_var > 0
480479
&& min_applier_capacity > it->second.get_delta_transactions_applied())
481480
min_applier_capacity= it->second.get_delta_transactions_applied();
482481

482+
if (it->second.get_delta_transactions_applied() > 0)
483+
safe_capacity= std::min(safe_capacity, it->second.get_delta_transactions_applied());
484+
483485
if (it->second.get_delta_transactions_local() > 0)
484486
num_writing_members++;
485487

@@ -492,6 +494,10 @@ Flow_control_module::flow_control_step()
492494
int64 min_capacity= (min_certifier_capacity > 0 && min_certifier_capacity < min_applier_capacity)
493495
? min_certifier_capacity : min_applier_capacity;
494496

497+
// Minimum capacity will never be less than lim_throttle.
498+
int64 lim_throttle= 0.05 * std::min(flow_control_certifier_threshold_var,
499+
flow_control_applier_threshold_var);
500+
min_capacity= std::max(std::min(min_capacity, safe_capacity), lim_throttle);
495501
quota_size= (min_capacity * HOLD_FACTOR) / num_writing_members - extra_quota;
496502
my_atomic_store64(&m_quota_size, quota_size > 1 ? quota_size : 1);
497503
}
@@ -510,6 +516,8 @@ Flow_control_module::flow_control_step()
510516
}
511517

512518
case FCM_DISABLED:
519+
my_atomic_store64(&m_quota_size, 0);
520+
my_atomic_store64(&m_quota_used, 0);
513521
break;
514522

515523
default:

0 commit comments

Comments
 (0)