Skip to content

Commit aee0de0

Browse files
committed
SERVER-20928 Remove HeartbeatResponseAction::ScheduleElection action
1 parent 289d877 commit aee0de0

6 files changed

+52
-236
lines changed

src/mongo/db/repl/heartbeat_response_action.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,6 @@ HeartbeatResponseAction HeartbeatResponseAction::makeReconfigAction() {
4343
return result;
4444
}
4545

46-
HeartbeatResponseAction HeartbeatResponseAction::makeScheduleElectionAction() {
47-
HeartbeatResponseAction result;
48-
result._action = ScheduleElection;
49-
return result;
50-
}
51-
5246
HeartbeatResponseAction HeartbeatResponseAction::makePriorityTakeoverAction() {
5347
HeartbeatResponseAction result;
5448
result._action = PriorityTakeover;

src/mongo/db/repl/heartbeat_response_action.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ class HeartbeatResponseAction {
4747
enum Action {
4848
NoAction,
4949
Reconfig,
50-
ScheduleElection,
5150
StartElection,
5251
StepDownSelf,
5352
StepDownRemotePrimary,
@@ -64,13 +63,6 @@ class HeartbeatResponseAction {
6463
*/
6564
static HeartbeatResponseAction makeReconfigAction();
6665

67-
/**
68-
* Makes a new action telling the current node to schedule an election due to election timeout
69-
* expiry. If an election timeout is already scheduled, the current node should not reschedule
70-
* the timeout. Valid under protocol version 1 only.
71-
*/
72-
static HeartbeatResponseAction makeScheduleElectionAction();
73-
7466
/**
7567
* Makes a new action telling the current node to attempt to elect itself primary.
7668
*/

src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp

Lines changed: 1 addition & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -85,26 +85,6 @@ void ReplCoordElectV1Test::simulateEnoughHeartbeatsForElectability() {
8585
net->exitNetwork();
8686
}
8787

88-
TEST_F(ReplCoordElectV1Test, StartElectionDoesNotStartAnElectionWhenNodeHasNoOplogEntries) {
89-
logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
90-
// Election never starts because we haven't set a lastOpTimeApplied value yet, via a
91-
// heartbeat.
92-
startCapturingLogMessages();
93-
assertStartSuccess(BSON("_id"
94-
<< "mySet"
95-
<< "version" << 1 << "members"
96-
<< BSON_ARRAY(BSON("_id" << 1 << "host"
97-
<< "node1:12345")
98-
<< BSON("_id" << 2 << "host"
99-
<< "node2:12345")) << "protocolVersion"
100-
<< 1),
101-
HostAndPort("node1", 12345));
102-
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
103-
simulateEnoughHeartbeatsForElectability();
104-
stopCapturingLogMessages();
105-
ASSERT_EQUALS(1, countLogLinesContaining("node has no applied oplog entries"));
106-
}
107-
10888
TEST_F(ReplCoordElectV1Test, ElectionSucceedsWhenNodeIsTheOnlyElectableNode) {
10989
OperationContextReplMock txn;
11090
assertStartSuccess(
@@ -643,6 +623,7 @@ TEST_F(ReplCoordElectV1Test, ElectionFailsWhenTermChangesDuringDryRun) {
643623
<< BSON("_id" << 3 << "host"
644624
<< "node3:12345")) << "protocolVersion"
645625
<< 1);
626+
646627
assertStartSuccess(configObj, HostAndPort("node1", 12345));
647628
ReplicaSetConfig config = assertMakeRSConfig(configObj);
648629

@@ -715,42 +696,6 @@ TEST_F(ReplCoordElectV1Test, ElectionFailsWhenTermChangesDuringActualElection) {
715696
countLogLinesContaining("not becoming primary, we have been superceded already"));
716697
}
717698

718-
TEST_F(ReplCoordElectV1Test, ElectionWillNotStartWhenNodeHasRecentlyLearnedAboutANewTerm) {
719-
startCapturingLogMessages();
720-
BSONObj configObj = BSON("_id"
721-
<< "mySet"
722-
<< "version" << 1 << "members"
723-
<< BSON_ARRAY(BSON("_id" << 1 << "host"
724-
<< "node1:12345")
725-
<< BSON("_id" << 2 << "host"
726-
<< "node2:12345")
727-
<< BSON("_id" << 3 << "host"
728-
<< "node3:12345")) << "protocolVersion"
729-
<< 1);
730-
assertStartSuccess(configObj, HostAndPort("node1", 12345));
731-
ReplicaSetConfig config = assertMakeRSConfig(configObj);
732-
733-
OperationContextNoop txn;
734-
OpTime time1(Timestamp(100, 1), 0);
735-
getReplCoord()->setMyLastOptime(time1);
736-
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
737-
738-
logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(2));
739-
// Learned about a new term. The following HB won't trigger election during a timeout interval.
740-
getReplCoord()->updateTerm(10);
741-
simulateEnoughHeartbeatsForElectability();
742-
stopCapturingLogMessages();
743-
ASSERT(getReplCoord()->getMemberState().secondary())
744-
<< getReplCoord()->getMemberState().toString();
745-
ASSERT_EQ(
746-
2, countLogLinesContaining("because I stood up or learned about a new term too recently"));
747-
logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Log());
748-
749-
simulateSuccessfulV1Election();
750-
ASSERT(getReplCoord()->getMemberState().primary())
751-
<< getReplCoord()->getMemberState().toString();
752-
}
753-
754699
TEST_F(ReplCoordElectV1Test, SchedulesPriorityTakeoverIfNodeHasHigherPriorityThanCurrentPrimary) {
755700
startCapturingLogMessages();
756701
BSONObj configObj = BSON("_id"

src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -215,13 +215,6 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponseAction(
215215
invariant(responseStatus.isOK());
216216
_scheduleHeartbeatReconfig(responseStatus.getValue().getConfig());
217217
break;
218-
case HeartbeatResponseAction::ScheduleElection:
219-
DEV {
220-
// Election timer should already be periodically scheduled at this point.
221-
stdx::unique_lock<stdx::mutex> lk(_mutex);
222-
fassert(28813, _handleElectionTimeoutCbh.isValid());
223-
}
224-
break;
225218
case HeartbeatResponseAction::StartElection:
226219
_startElectSelf();
227220
break;
@@ -250,9 +243,6 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponseAction(
250243
}
251244
break;
252245
}
253-
default:
254-
severe() << "Illegal heartbeat response action code " << int(action.getAction());
255-
invariant(false);
256246
}
257247
}
258248

src/mongo/db/repl/topology_coordinator_impl.cpp

Lines changed: 32 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,107 +1043,56 @@ HeartbeatResponseAction TopologyCoordinatorImpl::_updatePrimaryFromHBDataV1(
10431043
const MemberState& originalState,
10441044
Date_t now,
10451045
const OpTime& lastOpApplied) {
1046-
// This method has two interrelated responsibilities, performed in two phases.
10471046
//
1048-
// First, it updates the local notion of which remote node, if any is primary.
1047+
// Updates the local notion of which remote node, if any is primary.
1048+
// Start the priority takeover process if we are eligible.
10491049
//
1050-
// Second, if there is no remote primary, and the local node is not primary, it considers
1051-
// whether or not to stand for election.
1050+
10521051
invariant(updatedConfigIndex != _selfIndex);
10531052

1054-
// We are missing from the config, so do not participate in primary maintenance or election.
1053+
// If we are missing from the config, do not participate in primary maintenance or election.
10551054
if (_selfIndex == -1) {
10561055
return HeartbeatResponseAction::makeNoAction();
10571056
}
1058-
1059-
////////////////////
1060-
// Phase 1
1061-
////////////////////
1062-
1063-
// If we believe the node whose data was just updated is primary, confirm that
1064-
// the updated data supports that notion. If not, erase our notion of who is primary.
1065-
if (updatedConfigIndex == _currentPrimaryIndex) {
1066-
const MemberHeartbeatData& updatedHBData = _hbdata[updatedConfigIndex];
1067-
if (!updatedHBData.up() || !updatedHBData.getState().primary()) {
1068-
_currentPrimaryIndex = -1;
1069-
}
1057+
// If we are the primary, there must be no other primary, otherwise its higher term would
1058+
// have already made us step down.
1059+
if (_currentPrimaryIndex == _selfIndex) {
1060+
return HeartbeatResponseAction::makeNoAction();
10701061
}
10711062

10721063
// Scan the member list's heartbeat data for who is primary, and update _currentPrimaryIndex.
1073-
if (_currentPrimaryIndex != _selfIndex) {
1074-
int remotePrimaryIndex = -1;
1075-
for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin();
1076-
it != _hbdata.end();
1077-
++it) {
1078-
const int itIndex = indexOfIterator(_hbdata, it);
1079-
if (itIndex == _selfIndex) {
1080-
continue;
1081-
}
1082-
1083-
if (it->getState().primary() && it->up()) {
1084-
if (remotePrimaryIndex == -1 ||
1085-
_hbdata[remotePrimaryIndex].getTerm() < it->getTerm()) {
1086-
remotePrimaryIndex = itIndex;
1087-
}
1064+
int primaryIndex = -1;
1065+
for (size_t i = 0; i < _hbdata.size(); i++) {
1066+
const MemberHeartbeatData& member = _hbdata[i];
1067+
if (member.getState().primary() && member.up()) {
1068+
if (primaryIndex == -1 || _hbdata[primaryIndex].getTerm() < member.getTerm()) {
1069+
primaryIndex = i;
10881070
}
10891071
}
1090-
1091-
if (remotePrimaryIndex != -1) {
1092-
// Clear last heartbeat message on ourselves.
1093-
setMyHeartbeatMessage(now, "");
1094-
1095-
_currentPrimaryIndex = remotePrimaryIndex;
1096-
1097-
// Priority takeover when the replset is stable.
1098-
//
1099-
// Take over the primary only if the remote primary is in the latest term I know.
1100-
// Otherwise, there must be an outstanding election, which may succeed or not, but
1101-
// the remote primary will become aware of that election eventually and step down.
1102-
if (_hbdata[remotePrimaryIndex].getTerm() == _term &&
1103-
_rsConfig.getMemberAt(remotePrimaryIndex).getPriority() <
1104-
_rsConfig.getMemberAt(_selfIndex).getPriority()) {
1105-
LOG(4) << "I can take over the primary due to higher priority."
1106-
<< " Current primary index: " << remotePrimaryIndex << " in term "
1107-
<< _hbdata[remotePrimaryIndex].getTerm();
1108-
1109-
return HeartbeatResponseAction::makePriorityTakeoverAction();
1110-
}
1111-
return HeartbeatResponseAction::makeNoAction();
1112-
}
11131072
}
1114-
1115-
////////////////////
1116-
// Phase 2
1117-
////////////////////
1118-
1119-
// We do not believe any remote to be primary.
1120-
1121-
// Return if we are primary. The stepdown decision is based on liveness rather than
1122-
// heartbeats in pv 1.
1123-
if (_iAmPrimary()) {
1073+
_currentPrimaryIndex = primaryIndex;
1074+
if (_currentPrimaryIndex == -1) {
11241075
return HeartbeatResponseAction::makeNoAction();
11251076
}
11261077

1127-
fassert(28798, _currentPrimaryIndex == -1);
1078+
// Clear last heartbeat message on ourselves.
1079+
setMyHeartbeatMessage(now, "");
11281080

1129-
const MemberState currentState = getMemberState();
1130-
if (originalState.recovering() && currentState.secondary()) {
1131-
// We just transitioned from RECOVERING to SECONDARY, this can only happen if we
1132-
// received a heartbeat with an auth error when previously all the heartbeats we'd
1133-
// received had auth errors. In this case, don't return makeElectAction() because
1134-
// that could cause the election to start before the ReplicationCoordinator has updated
1135-
// its notion of the member state to SECONDARY. Instead return noAction so that the
1136-
// ReplicationCoordinator knows to update its tracking of the member state off of the
1137-
// TopologyCoordinator, and leave starting the election until the next heartbeat comes
1138-
// back.
1139-
return HeartbeatResponseAction::makeNoAction();
1140-
}
1141-
1142-
// At this point, there is no primary anywhere. Check to see if we should become a candidate.
1143-
if (!checkShouldStandForElection(now, lastOpApplied)) {
1144-
return HeartbeatResponseAction::makeNoAction();
1081+
// Priority takeover when the replset is stable.
1082+
//
1083+
// Take over the primary only if the remote primary is in the latest term I know.
1084+
// Otherwise, there must be an outstanding election, which may succeed or not, but
1085+
// the remote primary will become aware of that election eventually and step down.
1086+
if (_hbdata[primaryIndex].getTerm() == _term &&
1087+
_rsConfig.getMemberAt(primaryIndex).getPriority() <
1088+
_rsConfig.getMemberAt(_selfIndex).getPriority()) {
1089+
LOG(4) << "I can take over the primary due to higher priority."
1090+
<< " Current primary index: " << primaryIndex << " in term "
1091+
<< _hbdata[primaryIndex].getTerm();
1092+
1093+
return HeartbeatResponseAction::makePriorityTakeoverAction();
11451094
}
1146-
return HeartbeatResponseAction::makeScheduleElectionAction();
1095+
return HeartbeatResponseAction::makeNoAction();
11471096
}
11481097

11491098
HeartbeatResponseAction TopologyCoordinatorImpl::_updatePrimaryFromHBData(

0 commit comments

Comments
 (0)