Skip to content

Commit 0ab7000

Browse files
committed
SERVER-31030 Use full OpTime instead of just Timestamps to refer to oplog entries
1 parent d6267ee commit 0ab7000

32 files changed

+655
-539
lines changed

jstests/replsets/transaction_table_oplog_replay.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
let res = table.findOne({"_id.id": lsid.id});
1414

1515
assert.eq(res.txnNum, txnNumber);
16-
assert.eq(res.lastWriteOpTimeTs, ts);
16+
assert.eq(res.lastWriteOpTime.ts, ts);
1717
}
1818

1919
/**

jstests/sharding/session_info_in_oplog.js

Lines changed: 65 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,31 @@
66
(function() {
77
"use strict";
88

9-
var checkOplog = function(oplog, lsid, uid, txnNum, stmtId, prevTs) {
9+
var checkOplog = function(oplog, lsid, uid, txnNum, stmtId, prevTs, prevTerm) {
1010
assert(oplog != null);
1111
assert(oplog.lsid != null);
1212
assert.eq(lsid, oplog.lsid.id);
1313
assert.eq(uid, oplog.lsid.uid);
1414
assert.eq(txnNum, oplog.txnNumber);
1515
assert.eq(stmtId, oplog.stmtId);
16-
assert.eq(prevTs.getTime(), oplog.prevTs.getTime());
17-
assert.eq(prevTs.getInc(), oplog.prevTs.getInc());
16+
17+
var oplogPrevTs = oplog.prevOpTime.ts;
18+
assert.eq(prevTs.getTime(), oplogPrevTs.getTime());
19+
assert.eq(prevTs.getInc(), oplogPrevTs.getInc());
20+
assert.eq(prevTerm, oplog.prevOpTime.t);
1821
};
1922

20-
var checkSessionCatalog = function(conn, sessionId, uid, txnNum, expectedTs) {
23+
var checkSessionCatalog = function(conn, sessionId, uid, txnNum, expectedTs, expectedTerm) {
2124
var coll = conn.getDB('config').transactions;
2225
var sessionDoc = coll.findOne({'_id': {id: sessionId, uid: uid}});
2326

2427
assert.eq(txnNum, sessionDoc.txnNum);
25-
assert.eq(expectedTs.getTime(), sessionDoc.lastWriteOpTimeTs.getTime());
26-
assert.eq(expectedTs.getInc(), sessionDoc.lastWriteOpTimeTs.getInc());
28+
29+
var oplogTs = sessionDoc.lastWriteOpTime.ts;
30+
assert.eq(expectedTs.getTime(), oplogTs.getTime());
31+
assert.eq(expectedTs.getInc(), oplogTs.getInc());
32+
33+
assert.eq(expectedTerm, sessionDoc.lastWriteOpTime.t);
2734
};
2835

2936
var runTests = function(mainConn, priConn) {
@@ -61,12 +68,12 @@
6168
var oplog = priConn.getDB('local').oplog.rs;
6269

6370
var firstDoc = oplog.findOne({ns: 'test.user', 'o._id': 10});
64-
checkOplog(firstDoc, lsid, uid, txnNumber, 0, Timestamp(0, 0));
71+
checkOplog(firstDoc, lsid, uid, txnNumber, 0, Timestamp(0, 0), -1);
6572

6673
var secondDoc = oplog.findOne({ns: 'test.user', 'o._id': 30});
67-
checkOplog(secondDoc, lsid, uid, txnNumber, 1, firstDoc.ts);
74+
checkOplog(secondDoc, lsid, uid, txnNumber, 1, firstDoc.ts, firstDoc.t);
6875

69-
checkSessionCatalog(priConn, lsid, uid, txnNumber, secondDoc.ts);
76+
checkSessionCatalog(priConn, lsid, uid, txnNumber, secondDoc.ts, secondDoc.t);
7077

7178
////////////////////////////////////////////////////////////////////////
7279
// Test update command
@@ -87,15 +94,15 @@
8794
assert.commandWorked(mainConn.getDB('test').runCommand(cmd));
8895

8996
firstDoc = oplog.findOne({ns: 'test.user', op: 'u', 'o2._id': 10});
90-
checkOplog(firstDoc, lsid, uid, txnNumber, 0, Timestamp(0, 0));
97+
checkOplog(firstDoc, lsid, uid, txnNumber, 0, Timestamp(0, 0), -1);
9198

9299
secondDoc = oplog.findOne({ns: 'test.user', op: 'i', 'o._id': 20});
93-
checkOplog(secondDoc, lsid, uid, txnNumber, 1, firstDoc.ts);
100+
checkOplog(secondDoc, lsid, uid, txnNumber, 1, firstDoc.ts, firstDoc.t);
94101

95102
var thirdDoc = oplog.findOne({ns: 'test.user', op: 'u', 'o2._id': 30});
96-
checkOplog(thirdDoc, lsid, uid, txnNumber, 2, secondDoc.ts);
103+
checkOplog(thirdDoc, lsid, uid, txnNumber, 2, secondDoc.ts, secondDoc.t);
97104

98-
checkSessionCatalog(priConn, lsid, uid, txnNumber, thirdDoc.ts);
105+
checkSessionCatalog(priConn, lsid, uid, txnNumber, thirdDoc.ts, thirdDoc.t);
99106

100107
////////////////////////////////////////////////////////////////////////
101108
// Test delete command
@@ -112,12 +119,12 @@
112119
assert.commandWorked(mainConn.getDB('test').runCommand(cmd));
113120

114121
firstDoc = oplog.findOne({ns: 'test.user', op: 'd', 'o._id': 10});
115-
checkOplog(firstDoc, lsid, uid, txnNumber, 0, Timestamp(0, 0));
122+
checkOplog(firstDoc, lsid, uid, txnNumber, 0, Timestamp(0, 0), -1);
116123

117124
secondDoc = oplog.findOne({ns: 'test.user', op: 'd', 'o._id': 20});
118-
checkOplog(secondDoc, lsid, uid, txnNumber, 1, firstDoc.ts);
125+
checkOplog(secondDoc, lsid, uid, txnNumber, 1, firstDoc.ts, firstDoc.t);
119126

120-
checkSessionCatalog(priConn, lsid, uid, txnNumber, secondDoc.ts);
127+
checkSessionCatalog(priConn, lsid, uid, txnNumber, secondDoc.ts, secondDoc.t);
121128

122129
////////////////////////////////////////////////////////////////////////
123130
// Test findAndModify command (upsert)
@@ -136,12 +143,12 @@
136143
assert.commandWorked(mainConn.getDB('test').runCommand(cmd));
137144

138145
firstDoc = oplog.findOne({ns: 'test.user', op: 'i', 'o._id': 40});
139-
checkOplog(firstDoc, lsid, uid, txnNumber, 0, Timestamp(0, 0));
146+
checkOplog(firstDoc, lsid, uid, txnNumber, 0, Timestamp(0, 0), -1);
140147

141148
assert.eq(null, firstDoc.preImageTs);
142149
assert.eq(null, firstDoc.postImageTs);
143150

144-
checkSessionCatalog(priConn, lsid, uid, txnNumber, firstDoc.ts);
151+
checkSessionCatalog(priConn, lsid, uid, txnNumber, firstDoc.ts, firstDoc.t);
145152
var lastTs = firstDoc.ts;
146153

147154
////////////////////////////////////////////////////////////////////////
@@ -162,14 +169,19 @@
162169
var res = assert.commandWorked(mainConn.getDB('test').runCommand(cmd));
163170

164171
firstDoc = oplog.findOne({ns: 'test.user', op: 'u', 'o2._id': 40, ts: {$gt: lastTs}});
165-
checkOplog(firstDoc, lsid, uid, txnNumber, 0, Timestamp(0, 0));
172+
checkOplog(firstDoc, lsid, uid, txnNumber, 0, Timestamp(0, 0), -1);
166173

167174
assert.eq(null, firstDoc.postImageTs);
168175

169-
var savedDoc = oplog.findOne({ns: 'test.user', op: 'n', ts: firstDoc.preImageTs});
176+
var savedDoc = oplog.findOne({
177+
ns: 'test.user',
178+
op: 'n',
179+
ts: firstDoc.preImageOpTime.ts,
180+
t: firstDoc.preImageOpTime.t
181+
});
170182
assert.eq(beforeDoc, savedDoc.o);
171183

172-
checkSessionCatalog(priConn, lsid, uid, txnNumber, firstDoc.ts);
184+
checkSessionCatalog(priConn, lsid, uid, txnNumber, firstDoc.ts, firstDoc.t);
173185
lastTs = firstDoc.ts;
174186

175187
////////////////////////////////////////////////////////////////////////
@@ -190,14 +202,19 @@
190202
var afterDoc = mainConn.getDB('test').user.findOne({_id: 40});
191203

192204
firstDoc = oplog.findOne({ns: 'test.user', op: 'u', 'o2._id': 40, ts: {$gt: lastTs}});
193-
checkOplog(firstDoc, lsid, uid, txnNumber, 0, Timestamp(0, 0));
205+
checkOplog(firstDoc, lsid, uid, txnNumber, 0, Timestamp(0, 0), -1);
194206

195207
assert.eq(null, firstDoc.preImageTs);
196208

197-
savedDoc = oplog.findOne({ns: 'test.user', op: 'n', ts: firstDoc.postImageTs});
209+
savedDoc = oplog.findOne({
210+
ns: 'test.user',
211+
op: 'n',
212+
ts: firstDoc.postImageOpTime.ts,
213+
t: firstDoc.postImageOpTime.t
214+
});
198215
assert.eq(afterDoc, savedDoc.o);
199216

200-
checkSessionCatalog(priConn, lsid, uid, txnNumber, firstDoc.ts);
217+
checkSessionCatalog(priConn, lsid, uid, txnNumber, firstDoc.ts, firstDoc.t);
201218
lastTs = firstDoc.ts;
202219

203220
////////////////////////////////////////////////////////////////////////
@@ -218,14 +235,19 @@
218235
res = assert.commandWorked(mainConn.getDB('test').runCommand(cmd));
219236

220237
firstDoc = oplog.findOne({ns: 'test.user', op: 'u', 'o2._id': 40, ts: {$gt: lastTs}});
221-
checkOplog(firstDoc, lsid, uid, txnNumber, 0, Timestamp(0, 0));
238+
checkOplog(firstDoc, lsid, uid, txnNumber, 0, Timestamp(0, 0), -1);
222239

223240
assert.eq(null, firstDoc.postImageTs);
224241

225-
savedDoc = oplog.findOne({ns: 'test.user', op: 'n', ts: firstDoc.preImageTs});
242+
savedDoc = oplog.findOne({
243+
ns: 'test.user',
244+
op: 'n',
245+
ts: firstDoc.preImageOpTime.ts,
246+
t: firstDoc.preImageOpTime.t
247+
});
226248
assert.eq(beforeDoc, savedDoc.o);
227249

228-
checkSessionCatalog(priConn, lsid, uid, txnNumber, firstDoc.ts);
250+
checkSessionCatalog(priConn, lsid, uid, txnNumber, firstDoc.ts, firstDoc.t);
229251
lastTs = firstDoc.ts;
230252

231253
////////////////////////////////////////////////////////////////////////
@@ -246,14 +268,19 @@
246268
afterDoc = mainConn.getDB('test').user.findOne({_id: 40});
247269

248270
firstDoc = oplog.findOne({ns: 'test.user', op: 'u', 'o2._id': 40, ts: {$gt: lastTs}});
249-
checkOplog(firstDoc, lsid, uid, txnNumber, 0, Timestamp(0, 0));
271+
checkOplog(firstDoc, lsid, uid, txnNumber, 0, Timestamp(0, 0), -1);
250272

251273
assert.eq(null, firstDoc.preImageTs);
252274

253-
savedDoc = oplog.findOne({ns: 'test.user', op: 'n', ts: firstDoc.postImageTs});
275+
savedDoc = oplog.findOne({
276+
ns: 'test.user',
277+
op: 'n',
278+
ts: firstDoc.postImageOpTime.ts,
279+
t: firstDoc.postImageOpTime.t
280+
});
254281
assert.eq(afterDoc, savedDoc.o);
255282

256-
checkSessionCatalog(priConn, lsid, uid, txnNumber, firstDoc.ts);
283+
checkSessionCatalog(priConn, lsid, uid, txnNumber, firstDoc.ts, firstDoc.t);
257284
lastTs = firstDoc.ts;
258285

259286
////////////////////////////////////////////////////////////////////////
@@ -273,14 +300,19 @@
273300
res = assert.commandWorked(mainConn.getDB('test').runCommand(cmd));
274301

275302
firstDoc = oplog.findOne({ns: 'test.user', op: 'd', 'o._id': 40, ts: {$gt: lastTs}});
276-
checkOplog(firstDoc, lsid, uid, txnNumber, 0, Timestamp(0, 0));
303+
checkOplog(firstDoc, lsid, uid, txnNumber, 0, Timestamp(0, 0), -1);
277304

278305
assert.eq(null, firstDoc.postImageTs);
279306

280-
savedDoc = oplog.findOne({ns: 'test.user', op: 'n', ts: firstDoc.preImageTs});
307+
savedDoc = oplog.findOne({
308+
ns: 'test.user',
309+
op: 'n',
310+
ts: firstDoc.preImageOpTime.ts,
311+
t: firstDoc.preImageOpTime.t
312+
});
281313
assert.eq(beforeDoc, savedDoc.o);
282314

283-
checkSessionCatalog(priConn, lsid, uid, txnNumber, firstDoc.ts);
315+
checkSessionCatalog(priConn, lsid, uid, txnNumber, firstDoc.ts, firstDoc.t);
284316
lastTs = firstDoc.ts;
285317
};
286318

src/mongo/db/op_observer_impl.cpp

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -75,13 +75,12 @@ void onWriteOpCompleted(OperationContext* opCtx,
7575
Session* session,
7676
std::vector<StmtId> stmtIdsWritten,
7777
const repl::OpTime& lastStmtIdWriteOpTime) {
78-
const auto lastStmtIdWriteTs = lastStmtIdWriteOpTime.getTimestamp();
79-
if (lastStmtIdWriteTs.isNull())
78+
if (lastStmtIdWriteOpTime.isNull())
8079
return;
8180

8281
if (session) {
8382
session->onWriteOpCompletedOnPrimary(
84-
opCtx, *opCtx->getTxnNumber(), std::move(stmtIdsWritten), lastStmtIdWriteTs);
83+
opCtx, *opCtx->getTxnNumber(), std::move(stmtIdsWritten), lastStmtIdWriteOpTime);
8584
}
8685
}
8786

@@ -140,7 +139,7 @@ OpTimeBundle replLogUpdate(OperationContext* opCtx,
140139
if (session) {
141140
sessionInfo.setSessionId(*opCtx->getLogicalSessionId());
142141
sessionInfo.setTxnNumber(*opCtx->getTxnNumber());
143-
oplogLink.prevTs = session->getLastWriteOpTimeTs(*opCtx->getTxnNumber());
142+
oplogLink.prevOpTime = session->getLastWriteOpTime(*opCtx->getTxnNumber());
144143
}
145144

146145
OpTimeBundle opTimes;
@@ -160,9 +159,9 @@ OpTimeBundle replLogUpdate(OperationContext* opCtx,
160159
opTimes.prePostImageOpTime = noteUpdateOpTime;
161160

162161
if (args.storeDocOption == OplogUpdateEntryArgs::StoreDocOption::PreImage) {
163-
oplogLink.preImageTs = noteUpdateOpTime.getTimestamp();
162+
oplogLink.preImageOpTime = noteUpdateOpTime;
164163
} else if (args.storeDocOption == OplogUpdateEntryArgs::StoreDocOption::PostImage) {
165-
oplogLink.postImageTs = noteUpdateOpTime.getTimestamp();
164+
oplogLink.postImageOpTime = noteUpdateOpTime;
166165
}
167166
}
168167

@@ -197,7 +196,7 @@ OpTimeBundle replLogDelete(OperationContext* opCtx,
197196
if (session) {
198197
sessionInfo.setSessionId(*opCtx->getLogicalSessionId());
199198
sessionInfo.setTxnNumber(*opCtx->getTxnNumber());
200-
oplogLink.prevTs = session->getLastWriteOpTimeTs(*opCtx->getTxnNumber());
199+
oplogLink.prevOpTime = session->getLastWriteOpTime(*opCtx->getTxnNumber());
201200
}
202201

203202
OpTimeBundle opTimes;
@@ -206,7 +205,7 @@ OpTimeBundle replLogDelete(OperationContext* opCtx,
206205
auto noteOplog = repl::logOp(
207206
opCtx, "n", nss, uuid, deletedDoc.get(), nullptr, false, sessionInfo, stmtId, {});
208207
opTimes.prePostImageOpTime = noteOplog;
209-
oplogLink.preImageTs = noteOplog.getTimestamp();
208+
oplogLink.preImageOpTime = noteOplog;
210209
}
211210

212211
opTimes.writeOpTime = repl::logOp(opCtx,
@@ -280,10 +279,7 @@ void OpObserverImpl::onInserts(OperationContext* opCtx,
280279
bool fromMigrate) {
281280
Session* const session = opCtx->getTxnNumber() ? OperationContextSession::get(opCtx) : nullptr;
282281

283-
const size_t count = end - begin;
284-
auto timestamps = stdx::make_unique<Timestamp[]>(count);
285-
const auto lastOpTime =
286-
repl::logInsertOps(opCtx, nss, uuid, session, begin, end, timestamps.get(), fromMigrate);
282+
const auto opTimeList = repl::logInsertOps(opCtx, nss, uuid, session, begin, end, fromMigrate);
287283

288284
auto css = CollectionShardingState::get(opCtx, nss.ns());
289285

@@ -292,10 +288,12 @@ void OpObserverImpl::onInserts(OperationContext* opCtx,
292288
AuthorizationManager::get(opCtx->getServiceContext())
293289
->logOp(opCtx, "i", nss, it->doc, nullptr);
294290
if (!fromMigrate) {
295-
css->onInsertOp(opCtx, it->doc, timestamps[index]);
291+
auto opTime = opTimeList.empty() ? repl::OpTime() : opTimeList[index];
292+
css->onInsertOp(opCtx, it->doc, opTime);
296293
}
297294
}
298295

296+
auto lastOpTime = opTimeList.empty() ? repl::OpTime() : opTimeList.back();
299297
if (nss.coll() == "system.js") {
300298
Scope::storedFuncMod(opCtx);
301299
} else if (nss.coll() == DurableViewCatalog::viewsCollectionName()) {
@@ -336,8 +334,8 @@ void OpObserverImpl::onUpdate(OperationContext* opCtx, const OplogUpdateEntryArg
336334
args.criteria,
337335
args.update,
338336
args.updatedDoc,
339-
opTime.writeOpTime.getTimestamp(),
340-
opTime.prePostImageOpTime.getTimestamp());
337+
opTime.writeOpTime,
338+
opTime.prePostImageOpTime);
341339
}
342340

343341
if (args.nss.coll() == "system.js") {
@@ -351,7 +349,6 @@ void OpObserverImpl::onUpdate(OperationContext* opCtx, const OplogUpdateEntryArg
351349
SessionCatalog::get(opCtx)->invalidateSessions(opCtx, args.updatedDoc);
352350
}
353351

354-
355352
onWriteOpCompleted(
356353
opCtx, args.nss, session, std::vector<StmtId>{args.stmtId}, opTime.writeOpTime);
357354
}
@@ -383,10 +380,7 @@ void OpObserverImpl::onDelete(OperationContext* opCtx,
383380

384381
auto css = CollectionShardingState::get(opCtx, nss.ns());
385382
if (!fromMigrate) {
386-
css->onDeleteOp(opCtx,
387-
deleteState,
388-
opTime.writeOpTime.getTimestamp(),
389-
opTime.prePostImageOpTime.getTimestamp());
383+
css->onDeleteOp(opCtx, deleteState, opTime.writeOpTime, opTime.prePostImageOpTime);
390384
}
391385

392386
if (nss.coll() == "system.js") {

src/mongo/db/ops/write_ops_retryability.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ void validateFindAndModifyRetryability(const FindAndModifyRequest& request,
6767
uassert(40607,
6868
str::stream() << "No pre-image available for findAndModify retry request:"
6969
<< redact(request.toBSON()),
70-
oplogWithCorrectLinks.getPreImageTs());
70+
oplogWithCorrectLinks.getPreImageOpTime());
7171
} else if (opType == repl::OpTypeEnum::kInsert) {
7272
uassert(
7373
40608,
@@ -99,7 +99,7 @@ void validateFindAndModifyRetryability(const FindAndModifyRequest& request,
9999
<< ts.toString()
100100
<< ", oplog: "
101101
<< redact(oplogEntry.toBSON()),
102-
oplogWithCorrectLinks.getPostImageTs());
102+
oplogWithCorrectLinks.getPostImageOpTime());
103103
} else {
104104
uassert(40612,
105105
str::stream() << "findAndModify retry request: " << redact(request.toBSON())
@@ -108,7 +108,7 @@ void validateFindAndModifyRetryability(const FindAndModifyRequest& request,
108108
<< ts.toString()
109109
<< ", oplog: "
110110
<< redact(oplogEntry.toBSON()),
111-
oplogWithCorrectLinks.getPreImageTs());
111+
oplogWithCorrectLinks.getPreImageOpTime());
112112
}
113113
}
114114
}
@@ -118,21 +118,21 @@ void validateFindAndModifyRetryability(const FindAndModifyRequest& request,
118118
* oplog.
119119
*/
120120
BSONObj extractPreOrPostImage(OperationContext* opCtx, const repl::OplogEntry& oplog) {
121-
invariant(oplog.getPreImageTs() || oplog.getPostImageTs());
122-
auto ts =
123-
oplog.getPreImageTs() ? oplog.getPreImageTs().value() : oplog.getPostImageTs().value();
121+
invariant(oplog.getPreImageOpTime() || oplog.getPostImageOpTime());
122+
auto opTime = oplog.getPreImageOpTime() ? oplog.getPreImageOpTime().value()
123+
: oplog.getPostImageOpTime().value();
124124

125125
DBDirectClient client(opCtx);
126-
auto oplogDoc = client.findOne(NamespaceString::kRsOplogNamespace.ns(), BSON("ts" << ts));
126+
auto oplogDoc = client.findOne(NamespaceString::kRsOplogNamespace.ns(), opTime.asQuery());
127127

128128
uassert(40613,
129129
str::stream() << "oplog no longer contains the complete write history of this "
130-
"transaction, log with ts "
131-
<< ts.toString()
130+
"transaction, log with opTime "
131+
<< opTime.toString()
132132
<< " cannot be found",
133133
!oplogDoc.isEmpty());
134-
auto oplogEntry = uassertStatusOK(repl::OplogEntry::parse(oplogDoc));
135134

135+
auto oplogEntry = uassertStatusOK(repl::OplogEntry::parse(oplogDoc));
136136
return oplogEntry.getObject().getOwned();
137137
}
138138

0 commit comments

Comments
 (0)