1+ /**
2+ * Test that a node does not take a stable checkpoint at a timestamp earlier than minValid after
3+ * crashing post rollbackViaRefetch. This test exercises that behavior when run with
4+ * enableMajorityReadConcern:false.
5+ *
6+ * @tags : [requires_persistence]
7+ */
8+ ( function ( ) {
9+ "use strict" ;
10+
11+ load ( "jstests/replsets/libs/rollback_test.js" ) ;
12+ load ( "jstests/libs/fail_point_util.js" ) ;
13+
14+ TestData . rollbackShutdowns = true ;
15+ let dbName = "test" ;
16+ let sourceCollName = "coll" ;
17+
18+ let doc1 = { _id : 1 , x : "document_of_interest" } ;
19+
20+ let CommonOps = ( node ) => {
21+ // Insert a document that will exist on all nodes.
22+ assert . commandWorked ( node . getDB ( dbName ) [ sourceCollName ] . insert ( doc1 ) ) ;
23+ } ;
24+
25+ let SyncSourceOps = ( node ) => {
26+ // Insert some documents on the sync source so the rollback node will have a minValid it needs
27+ // to catch up to.
28+ assert . commandWorked ( node . getDB ( dbName ) [ sourceCollName ] . insert ( { x : 1 , sync_source : 1 } ) ) ;
29+ assert . commandWorked ( node . getDB ( dbName ) [ sourceCollName ] . insert ( { x : 2 , sync_source : 1 } ) ) ;
30+ assert . commandWorked ( node . getDB ( dbName ) [ sourceCollName ] . insert ( { x : 3 , sync_source : 1 } ) ) ;
31+ } ;
32+
33+ let RollbackOps = ( node ) => {
34+ // Delete the document on the rollback node so it will be refetched from sync source.
35+ assert . commandWorked ( node . getDB ( dbName ) [ sourceCollName ] . remove ( doc1 ) ) ;
36+ } ;
37+
38+ const replTest = new ReplSetTest ( { nodes : 3 , useBridge : true } ) ;
39+ replTest . startSet ( ) ;
40+ // Speed up the test.
41+ replTest . nodes . forEach ( node => {
42+ assert . commandWorked (
43+ node . adminCommand ( { configureFailPoint : 'setSmallOplogGetMoreMaxTimeMS' , mode : 'alwaysOn' } ) ) ;
44+ } ) ;
45+ let config = replTest . getReplSetConfig ( ) ;
46+ config . members [ 2 ] . priority = 0 ;
47+ config . settings = {
48+ chainingAllowed : false
49+ } ;
50+ replTest . initiateWithHighElectionTimeout ( config ) ;
51+ let rollbackTest = new RollbackTest ( "rollback_crash_before_reaching_minvalid" , replTest ) ;
52+ CommonOps ( rollbackTest . getPrimary ( ) ) ;
53+
54+ let rollbackNode = rollbackTest . transitionToRollbackOperations ( ) ;
55+
56+ // Have the node hang after rollback has completed but before it starts applying ops again.
57+ rollbackNode . adminCommand ( { configureFailPoint : 'bgSyncHangAfterRunRollback' , mode : 'alwaysOn' } ) ;
58+ RollbackOps ( rollbackNode ) ;
59+
60+ let node = rollbackTest . transitionToSyncSourceOperationsBeforeRollback ( ) ;
61+ SyncSourceOps ( node ) ;
62+
63+ // Let the rollback run.
64+ rollbackTest . transitionToSyncSourceOperationsDuringRollback ( ) ;
65+
66+ jsTestLog ( "Waiting for the rollback node to hit the failpoint." ) ;
67+ checkLog . contains ( rollbackNode , "bgSyncHangAfterRunRollback failpoint is set" ) ;
68+
69+ // Kill the rollback node before it has reached minValid. Sending a shutdown signal to the node
70+ // should cause us to break out of the hung failpoint, so we don't need to explicitly turn the
71+ // failpoint off.
72+ jsTestLog ( "Killing the rollback node." ) ;
73+ replTest . stop ( 0 , 9 , { allowedExitCode : MongoRunner . EXIT_SIGKILL } , { forRestart : true } ) ;
74+ replTest . start (
75+ 0 ,
76+ {
77+ setParameter : {
78+ // Pause oplog fetching so the node doesn't advance past minValid after restart.
79+ "failpoint.stopReplProducer" : "{'mode':'alwaysOn'}"
80+ }
81+ } ,
82+ true /* restart */ ) ;
83+
84+ // Wait long enough for the initial stable checkpoint to be triggered if it was going to be. We
85+ // expect that no stable checkpoints are taken. If they are, we expect the test to fail when we
86+ // restart below and recover from a stable checkpoint.
87+ //
88+ // First we wait until the node has a commit point, since learning of one should trigger an update
89+ // to the stable timestamp. Then, we wait for a bit after this for any potential checkpoint to
90+ // occur. In the worst case, if the checkpoint was very slow to complete, we might produce a false
91+ // negative test result (the test would pass even though a bug existed), but we consider this
92+ // acceptable if it happens rarely.
93+ assert . soonNoExcept ( ( ) => {
94+ let status = replTest . nodes [ 0 ] . adminCommand ( { replSetGetStatus : 1 } ) ;
95+ return status . optimes . lastCommittedOpTime . ts !== Timestamp ( 0 , 0 ) ;
96+ } ) ;
97+ sleep ( 5000 ) ;
98+
99+ // Kill and restart the node to test that we don't recover from an inconsistent stable checkpoint
100+ // taken above.
101+ replTest . stop ( 0 , 9 , { allowedExitCode : MongoRunner . EXIT_SIGKILL } , { forRestart : true } ) ;
102+ replTest . start (
103+ 0 ,
104+ {
105+ setParameter : {
106+ // Make sure this failpoint is not still enabled in the saved startup options.
107+ "failpoint.stopReplProducer" : "{'mode':'off'}"
108+ }
109+ } ,
110+ true /* restart */ ) ;
111+
112+ rollbackTest . transitionToSteadyStateOperations ( ) ;
113+
114+ // Check the replica set.
115+ rollbackTest . stop ( ) ;
116+ } ( ) ) ;
0 commit comments