Skip to content

Commit 6935119

Browse files
committed
SERVER-22262 Do not truncate the last applied oplog entry during batch recovery
1 parent 3cc599e commit 6935119

File tree

3 files changed

+24
-22
lines changed

3 files changed

+24
-22
lines changed

jstests/replsets/oplog_truncated_on_recovery.js

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
/**
2-
* This test will ensure that a failed a batch apply will remove the any oplog
2+
* This test will ensure that recovery from a failed batch application will remove the oplog
33
* entries from that batch.
44
*
55
* To do this we:
66
* -- Create single node replica set
7-
* -- Set minvalid manually on primary way ahead (5 minutes)
7+
* -- Set minvalid manually on primary way ahead (5 days)
88
* -- Write some oplog entries newer than minvalid.start
99
* -- Ensure restarted primary comes up in recovering and truncates the oplog
1010
* -- Success!
@@ -54,29 +54,21 @@
5454
}
5555
})));
5656

57-
// Set minvalid to something far in the future for the current primary, to
58-
// simulate recovery.
59-
// Note: This is so far in the future (5 days) that it will never become
60-
// secondary.
57+
// Set minvalid to something far in the future for the current primary, to simulate recovery.
58+
// Note: This is so far in the future (5 days) that it will never become secondary.
6159
var farFutureTS = new Timestamp(Math.floor(new Date().getTime() / 1000)
6260
+ (60 * 60 * 24 * 5 /* in five days */), 0);
6361
var rsgs = assert.commandWorked(localDB.adminCommand("replSetGetStatus"));
6462
log(rsgs);
6563
var primaryOpTime = rsgs.members[0].optime;
66-
var primaryLastTS = rsgs.members[0].optime.ts;
67-
log(primaryLastTS);
64+
log(primaryOpTime);
6865

6966
// Set the start of the failed batch
70-
primaryOpTime.ts = new Timestamp(primaryOpTime.ts.t, primaryOpTime.ts.i + 1);
7167

72-
log(primaryLastTS);
7368
jsTest.log("future TS: " + tojson(farFutureTS) + ", date:" + tsToDate(farFutureTS));
74-
// We do an update in case there is a minvalid document on the primary
75-
// already.
76-
// If the doc doesn't exist then upsert:true will create it, and the
77-
// writeConcern ensures
78-
// that update returns details of the write, like whether an update or
79-
// insert was performed.
69+
// We do an update in case there is a minvalid document on the primary already.
70+
// If the doc doesn't exist then upsert:true will create it, and the writeConcern ensures
71+
// that update returns details of the write, like whether an update or insert was performed.
8072
log(assert.writeOK(minvalidColl.update(
8173
{},
8274
{
@@ -92,12 +84,15 @@
9284
}
9385
})));
9486

87+
// Insert a diverged oplog entry that will be truncated after restart.
88+
var divergedTS = new Timestamp(primaryOpTime.ts.t, primaryOpTime.ts.i + 1);
9589
log(assert.writeOK(localDB.oplog.rs.insert(
9690
{
9791
_id : 0,
98-
ts : primaryOpTime.ts,
92+
ts : divergedTS,
9993
op : "n",
100-
term : -1
94+
h: NumberLong(0),
95+
t : NumberLong(-1)
10196
})));
10297
log(localDB.oplog.rs.find().toArray());
10398
log(assert.commandWorked(localDB.adminCommand("replSetGetStatus")));
@@ -122,7 +117,7 @@
122117
$natural : -1
123118
}).limit(-1).next().ts;
124119
log(localDB.oplog.rs.find().toArray());
125-
assert.eq(primaryLastTS, lastTS);
120+
assert.eq(primaryOpTime.ts, lastTS);
126121
return true;
127122
});
128123

src/mongo/db/repl/bgsync.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -773,7 +773,8 @@ long long BackgroundSync::_readLastAppliedHash(OperationContext* txn) {
773773
BSONElement hashElement = oplogEntry[hashFieldName];
774774
if (hashElement.eoo()) {
775775
severe() << "Most recent entry in " << rsOplogName << " missing \"" << hashFieldName
776-
<< "\" field";
776+
<< "\" field. Oplog entry: " << oplogEntry;
777+
777778
fassertFailed(18902);
778779
}
779780
if (hashElement.type() != NumberLong) {

src/mongo/db/repl/oplog.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ unique_ptr<OplogDocWriter> _logOpWriter(OperationContext* txn,
305305
}
306306
} // end anon namespace
307307

308-
// Truncates the oplog to and including the "truncateTimestamp" entry.
308+
// Truncates the oplog to but excluding the "truncateTimestamp" entry.
309309
void truncateOplogTo(OperationContext* txn, Timestamp truncateTimestamp) {
310310
const NamespaceString oplogNss(rsOplogName);
311311
ScopedTransaction transaction(txn, MODE_IX);
@@ -339,8 +339,14 @@ void truncateOplogTo(OperationContext* txn, Timestamp truncateTimestamp) {
339339
first = false;
340340
}
341341

342-
if (tsElem.timestamp() < truncateTimestamp) {
342+
if (tsElem.timestamp() == truncateTimestamp) {
343343
break;
344+
} else if (tsElem.timestamp() < truncateTimestamp) {
345+
fassertFailedWithStatusNoTrace(34411,
346+
Status(ErrorCodes::OplogOutOfOrder,
347+
str::stream() << "Can't find "
348+
<< truncateTimestamp.toString()
349+
<< " to truncate from!"));
344350
}
345351

346352
foundSomethingToTruncate = true;

0 commit comments

Comments
 (0)