Skip to content
This repository was archived by the owner on Oct 31, 2023. It is now read-only.

Commit 2546fe1

Browse files
will62794Evergreen Agent
authored andcommitted
SERVER-47949 Don't fetch or install a newer config via heartbeat while in drain mode
1 parent 276ac45 commit 2546fe1

File tree

5 files changed

+239
-12
lines changed

5 files changed

+239
-12
lines changed

src/mongo/db/repl/optime.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ class OpTime {
5656
static const char kTermFieldName[];
5757

5858
// The term of an OpTime generated by old protocol version.
59-
static const long long kUninitializedTerm = -1;
59+
static constexpr long long kUninitializedTerm = -1;
6060

6161
// The initial term after the first time upgrading from protocol version 0.
6262
//

src/mongo/db/repl/replication_coordinator_impl.cpp

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3337,6 +3337,14 @@ Status ReplicationCoordinatorImpl::doReplSetReconfig(OperationContext* opCtx,
33373337
}
33383338
auto topCoordTerm = _topCoord->getTerm();
33393339

3340+
if (!force) {
3341+
// For safety of reconfig, since we must commit a config in our own term before executing a
3342+
// reconfig, so we should never have a config in an older term. If the current config was
3343+
// installed via a force reconfig, we aren't concerned about this safety guarantee.
3344+
invariant(_rsConfig.getConfigTerm() == OpTime::kUninitializedTerm ||
3345+
_rsConfig.getConfigTerm() == topCoordTerm);
3346+
}
3347+
33403348
auto configWriteConcern = _getConfigReplicationWriteConcern();
33413349
// Construct a fake OpTime that can be accepted but isn't used.
33423350
OpTime fakeOpTime(Timestamp(1, 1), topCoordTerm);
@@ -5157,18 +5165,27 @@ Status ReplicationCoordinatorImpl::processHeartbeatV1(const ReplSetHeartbeatArgs
51575165
}
51585166
} else if (result.isOK() &&
51595167
response->getConfigVersionAndTerm() < args.getConfigVersionAndTerm()) {
5168+
logv2::DynamicAttributes attr;
5169+
attr.add("configTerm", args.getConfigTerm());
5170+
attr.add("configVersion", args.getConfigVersion());
5171+
attr.add("senderHost", senderHost);
5172+
5173+
// If we are currently in drain mode, we won't allow installing newer configs, so we don't
5174+
// schedule a heartbeat to fetch one. We do allow force reconfigs to proceed even if we are
5175+
// in drain mode.
5176+
if (_memberState.primary() && !_readWriteAbility->canAcceptNonLocalWrites(lk) &&
5177+
args.getConfigTerm() != OpTime::kUninitializedTerm) {
5178+
LOGV2(4794901,
5179+
"Not scheduling a heartbeat to fetch a newer config since we are in PRIMARY "
5180+
"state but cannot accept writes yet.",
5181+
attr);
5182+
}
51605183
// Schedule a heartbeat to the sender to fetch the new config.
51615184
// Only send this if the sender's config is newer.
51625185
// We cannot cancel the enqueued heartbeat, but either this one or the enqueued heartbeat
51635186
// will trigger reconfig, which cancels and reschedules all heartbeats.
5164-
if (args.hasSender()) {
5165-
LOGV2(21401,
5166-
"Scheduling heartbeat to fetch a newer config with term {configTerm} and "
5167-
"version {configVersion} from member: {senderHost}",
5168-
"Scheduling heartbeat to fetch a newer config",
5169-
"configTerm"_attr = args.getConfigTerm(),
5170-
"configVersion"_attr = args.getConfigVersion(),
5171-
"senderHost"_attr = senderHost);
5187+
else if (args.hasSender()) {
5188+
LOGV2(21401, "Scheduling heartbeat to fetch a newer config", attr);
51725189
int senderIndex = _rsConfig.findMemberIndexByHostAndPort(senderHost);
51735190
_scheduleHeartbeatToTarget_inlock(senderHost, senderIndex, now);
51745191
}

src/mongo/db/repl/replication_coordinator_impl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1162,7 +1162,7 @@ class ReplicationCoordinatorImpl : public ReplicationCoordinator {
11621162
/**
11631163
* Schedules a replica set config change.
11641164
*/
1165-
void _scheduleHeartbeatReconfig_inlock(const ReplSetConfig& newConfig);
1165+
void _scheduleHeartbeatReconfig(WithLock lk, const ReplSetConfig& newConfig);
11661166

11671167
/**
11681168
* Method to write a configuration transmitted via heartbeat message to stable storage.

src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@ stdx::unique_lock<Latch> ReplicationCoordinatorImpl::_handleHeartbeatResponseAct
356356
break;
357357
case HeartbeatResponseAction::Reconfig:
358358
invariant(responseStatus.isOK());
359-
_scheduleHeartbeatReconfig_inlock(responseStatus.getValue().getConfig());
359+
_scheduleHeartbeatReconfig(lock, responseStatus.getValue().getConfig());
360360
break;
361361
case HeartbeatResponseAction::StepDownSelf:
362362
invariant(action.getPrimaryConfigIndex() == _selfIndex);
@@ -530,7 +530,8 @@ bool ReplicationCoordinatorImpl::_shouldStepDownOnReconfig(WithLock,
530530
!(myIndex.isOK() && newConfig.getMemberAt(myIndex.getValue()).isElectable());
531531
}
532532

533-
void ReplicationCoordinatorImpl::_scheduleHeartbeatReconfig_inlock(const ReplSetConfig& newConfig) {
533+
void ReplicationCoordinatorImpl::_scheduleHeartbeatReconfig(WithLock lk,
534+
const ReplSetConfig& newConfig) {
534535
if (_inShutdown) {
535536
return;
536537
}
@@ -566,6 +567,17 @@ void ReplicationCoordinatorImpl::_scheduleHeartbeatReconfig_inlock(const ReplSet
566567
"Aborting reconfiguration request",
567568
"_rsConfigState"_attr = int(_rsConfigState));
568569
}
570+
571+
// Allow force reconfigs to proceed even if we are not a writable primary yet.
572+
if (_memberState.primary() && !_readWriteAbility->canAcceptNonLocalWrites(lk) &&
573+
newConfig.getConfigTerm() != OpTime::kUninitializedTerm) {
574+
LOGV2_FOR_HEARTBEATS(
575+
4794900,
576+
1,
577+
"Not scheduling a heartbeat reconfig since we are in PRIMARY state but "
578+
"cannot accept writes yet.");
579+
return;
580+
}
569581
_setConfigState_inlock(kConfigHBReconfiguring);
570582
invariant(!_rsConfig.isInitialized() ||
571583
_rsConfig.getConfigVersionAndTerm() < newConfig.getConfigVersionAndTerm());

src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2037,6 +2037,204 @@ TEST_F(ReplCoordReconfigTest, ForceReconfigShouldThrowIfArbiterNodesHaveNewlyAdd
20372037
getReplCoord()->processReplSetReconfig(opCtx.get(), args, &result));
20382038
}
20392039

2040+
TEST_F(ReplCoordTest, StepUpReconfigConcurrentWithHeartbeatReconfig) {
2041+
auto severityGuard = unittest::MinimumLoggedSeverityGuard{logv2::LogComponent::kReplication,
2042+
logv2::LogSeverity::Debug(2)};
2043+
assertStartSuccess(BSON("_id"
2044+
<< "mySet"
2045+
<< "version" << 2 << "term" << 0 << "members"
2046+
<< BSON_ARRAY(BSON("_id" << 1 << "host"
2047+
<< "node1:12345")
2048+
<< BSON("_id" << 2 << "host"
2049+
<< "node2:12345"))),
2050+
HostAndPort("node1", 12345));
2051+
ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
2052+
ASSERT_EQUALS(getReplCoord()->getTerm(), 0);
2053+
replCoordSetMyLastAppliedOpTime(OpTime(Timestamp(100, 1), 0), Date_t() + Seconds(100));
2054+
replCoordSetMyLastDurableOpTime(OpTime(Timestamp(100, 1), 0), Date_t() + Seconds(100));
2055+
2056+
// Win election but don't exit drain mode.
2057+
auto electionTimeoutWhen = getReplCoord()->getElectionTimeout_forTest();
2058+
const auto opCtx = makeOperationContext();
2059+
simulateSuccessfulV1ElectionWithoutExitingDrainMode(electionTimeoutWhen, opCtx.get());
2060+
2061+
// Receive a heartbeat that should NOT schedule a new heartbeat to fetch a newer config.
2062+
ReplSetHeartbeatArgsV1 hbArgs;
2063+
auto rsConfig = getReplCoord()->getConfig();
2064+
hbArgs.setConfigVersion(3); // simulate a newer config version.
2065+
hbArgs.setConfigTerm(rsConfig.getConfigTerm());
2066+
hbArgs.setSetName(rsConfig.getReplSetName());
2067+
hbArgs.setSenderHost(HostAndPort("node2", 12345));
2068+
hbArgs.setSenderId(2);
2069+
hbArgs.setTerm(0);
2070+
ASSERT(hbArgs.isInitialized());
2071+
2072+
ReplSetHeartbeatResponse response;
2073+
ASSERT_OK(getReplCoord()->processHeartbeatV1(hbArgs, &response));
2074+
2075+
// No requests should have been scheduled.
2076+
getNet()->enterNetwork();
2077+
ASSERT_FALSE(getNet()->hasReadyRequests());
2078+
getNet()->exitNetwork();
2079+
2080+
// Receive a heartbeat that schedules a new heartbeat to fetch a newer config. We simulate a
2081+
// newer config version and an uninitialized term, so that a heartbeat will be scheduled to
2082+
// fetch a new config. When we mock the heartbeat response below, we will respond with a
2083+
// non-force config, which is to test the case where the sending node installed a non force
2084+
// config after we scheduled a heartbeat to it to fetch a force config. For safety, the
2085+
// important aspect is that we don't accept/install configs during drain mode, even if we try to
2086+
// fetch them.
2087+
hbArgs.setConfigVersion(3);
2088+
hbArgs.setConfigTerm(OpTime::kUninitializedTerm);
2089+
hbArgs.setSetName(rsConfig.getReplSetName());
2090+
hbArgs.setSenderHost(HostAndPort("node2", 12345));
2091+
hbArgs.setSenderId(2);
2092+
hbArgs.setTerm(0);
2093+
ASSERT(hbArgs.isInitialized());
2094+
2095+
ASSERT_OK(getReplCoord()->processHeartbeatV1(hbArgs, &response));
2096+
2097+
// Schedule a response with a newer config.
2098+
auto newerConfigVersion = 3;
2099+
auto newerConfig = BSON("_id"
2100+
<< "mySet"
2101+
<< "version" << newerConfigVersion << "term" << 0 << "members"
2102+
<< BSON_ARRAY(BSON("_id" << 1 << "host"
2103+
<< "node1:12345")
2104+
<< BSON("_id" << 2 << "host"
2105+
<< "node2:12345")));
2106+
auto net = getNet();
2107+
net->enterNetwork();
2108+
auto noi = net->getNextReadyRequest();
2109+
auto& request = noi->getRequest();
2110+
2111+
ReplSetHeartbeatArgsV1 args;
2112+
ASSERT_OK(args.initialize(request.cmdObj));
2113+
2114+
startCapturingLogMessages();
2115+
OpTime lastApplied(Timestamp(100, 1), 0);
2116+
ReplSetHeartbeatResponse hbResp;
2117+
ASSERT_OK(rsConfig.initialize(newerConfig));
2118+
hbResp.setConfig(rsConfig);
2119+
hbResp.setSetName(rsConfig.getReplSetName());
2120+
hbResp.setState(MemberState::RS_SECONDARY);
2121+
hbResp.setConfigVersion(rsConfig.getConfigVersion());
2122+
hbResp.setConfigTerm(rsConfig.getConfigTerm());
2123+
hbResp.setAppliedOpTimeAndWallTime({lastApplied, Date_t() + Seconds(lastApplied.getSecs())});
2124+
hbResp.setDurableOpTimeAndWallTime({lastApplied, Date_t() + Seconds(lastApplied.getSecs())});
2125+
net->scheduleResponse(noi, net->now(), makeResponseStatus(hbResp.toBSON()));
2126+
net->runReadyNetworkOperations();
2127+
net->exitNetwork();
2128+
stopCapturingLogMessages();
2129+
2130+
// Make sure the heartbeat reconfig has not been scheduled.
2131+
ASSERT_EQUALS(1, countTextFormatLogLinesContaining("Not scheduling a heartbeat reconfig"));
2132+
2133+
// Let drain mode complete.
2134+
signalDrainComplete(opCtx.get());
2135+
2136+
// We should have moved to a new term in the election, and our config should have the same term.
2137+
ASSERT_EQUALS(getReplCoord()->getTerm(), 1);
2138+
ASSERT_EQUALS(getReplCoord()->getConfig().getConfigTerm(), 1);
2139+
}
2140+
2141+
TEST_F(ReplCoordTest, StepUpReconfigConcurrentWithForceHeartbeatReconfig) {
2142+
auto severityGuard = unittest::MinimumLoggedSeverityGuard{logv2::LogComponent::kReplication,
2143+
logv2::LogSeverity::Debug(2)};
2144+
assertStartSuccess(BSON("_id"
2145+
<< "mySet"
2146+
<< "version" << 2 << "term" << 0 << "members"
2147+
<< BSON_ARRAY(BSON("_id" << 1 << "host"
2148+
<< "node1:12345")
2149+
<< BSON("_id" << 2 << "host"
2150+
<< "node2:12345"))),
2151+
HostAndPort("node1", 12345));
2152+
ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
2153+
ASSERT_EQUALS(getReplCoord()->getTerm(), 0);
2154+
replCoordSetMyLastAppliedOpTime(OpTime(Timestamp(100, 1), 0), Date_t() + Seconds(100));
2155+
replCoordSetMyLastDurableOpTime(OpTime(Timestamp(100, 1), 0), Date_t() + Seconds(100));
2156+
2157+
// Win election but don't exit drain mode.
2158+
auto electionTimeoutWhen = getReplCoord()->getElectionTimeout_forTest();
2159+
const auto opCtx = makeOperationContext();
2160+
simulateSuccessfulV1ElectionWithoutExitingDrainMode(electionTimeoutWhen, opCtx.get());
2161+
2162+
// Receive a heartbeat that schedules a new heartbeat to fetch a newer config.
2163+
ReplSetHeartbeatArgsV1 hbArgs;
2164+
auto rsConfig = getReplCoord()->getConfig();
2165+
hbArgs.setConfigVersion(3); // simulate a newer config version.
2166+
hbArgs.setConfigTerm(OpTime::kUninitializedTerm); // force config.
2167+
hbArgs.setSetName(rsConfig.getReplSetName());
2168+
hbArgs.setSenderHost(HostAndPort("node2", 12345));
2169+
hbArgs.setSenderId(2);
2170+
hbArgs.setTerm(0);
2171+
ASSERT(hbArgs.isInitialized());
2172+
2173+
ReplSetHeartbeatResponse response;
2174+
ASSERT_OK(getReplCoord()->processHeartbeatV1(hbArgs, &response));
2175+
2176+
// Schedule a response with a newer config.
2177+
auto newerConfigVersion = 3;
2178+
auto newerConfig =
2179+
BSON("_id"
2180+
<< "mySet"
2181+
<< "version" << newerConfigVersion << "term" << OpTime::kUninitializedTerm << "members"
2182+
<< BSON_ARRAY(BSON("_id" << 1 << "host"
2183+
<< "node1:12345")
2184+
<< BSON("_id" << 2 << "host"
2185+
<< "node2:12345")));
2186+
auto net = getNet();
2187+
net->enterNetwork();
2188+
auto noi = net->getNextReadyRequest();
2189+
auto& request = noi->getRequest();
2190+
2191+
ReplSetHeartbeatArgsV1 args;
2192+
ASSERT_OK(args.initialize(request.cmdObj));
2193+
2194+
OpTime lastApplied(Timestamp(100, 1), 0);
2195+
ReplSetHeartbeatResponse hbResp;
2196+
ASSERT_OK(rsConfig.initialize(newerConfig));
2197+
hbResp.setConfig(rsConfig);
2198+
hbResp.setSetName(rsConfig.getReplSetName());
2199+
hbResp.setState(MemberState::RS_SECONDARY);
2200+
hbResp.setConfigVersion(rsConfig.getConfigVersion());
2201+
hbResp.setConfigTerm(rsConfig.getConfigTerm());
2202+
hbResp.setAppliedOpTimeAndWallTime({lastApplied, Date_t() + Seconds(lastApplied.getSecs())});
2203+
hbResp.setDurableOpTimeAndWallTime({lastApplied, Date_t() + Seconds(lastApplied.getSecs())});
2204+
net->scheduleResponse(noi, net->now(), makeResponseStatus(hbResp.toBSON()));
2205+
net->exitNetwork();
2206+
2207+
{
2208+
// Prevent the heartbeat reconfig from completing.
2209+
FailPointEnableBlock fpb("blockHeartbeatReconfigFinish");
2210+
2211+
// Let the heartbeat reconfig begin.
2212+
net->enterNetwork();
2213+
net->runReadyNetworkOperations();
2214+
net->exitNetwork();
2215+
2216+
// For force reconfigs, we do allow them to proceed even if we are in drain mode, so make
2217+
// sure it is in progress, stuck at the failpoint before completion.
2218+
fpb->waitForTimesEntered(1);
2219+
2220+
// At this point the heartbeat reconfig should be in progress but blocked from completion by
2221+
// the failpoint. We now let drain mode complete. The step up reconfig should be interrupted
2222+
// by the in progress heartbeat reconfig.
2223+
signalDrainComplete(opCtx.get());
2224+
}
2225+
2226+
// The failpoint should be released now, allowing the heartbeat reconfig to complete. We run the
2227+
// clock forward so the re-scheduled heartbeat reconfig will complete.
2228+
net->enterNetwork();
2229+
net->runUntil(net->now() + Milliseconds(100));
2230+
net->exitNetwork();
2231+
2232+
// We should have moved to a new term in the election, but our config should have the term from
2233+
// the force config.
2234+
ASSERT_EQUALS(getReplCoord()->getTerm(), 1);
2235+
ASSERT_EQUALS(getReplCoord()->getConfig().getConfigTerm(), OpTime::kUninitializedTerm);
2236+
}
2237+
20402238
} // anonymous namespace
20412239
} // namespace repl
20422240
} // namespace mongo

0 commit comments

Comments
 (0)