Skip to content

Commit 94b35f4

Browse files
committed
Generalize recloning docs on initial oplog application SERVER-4270
Conflicts: db/repl.cpp db/repl.h db/repl/rs_sync.cpp
1 parent 1519ef3 commit 94b35f4

File tree

5 files changed

+48
-41
lines changed

5 files changed

+48
-41
lines changed

db/oplog.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,40 @@ namespace mongo {
480480
}
481481
}
482482

483+
bool shouldRetry(const BSONObj& o, const string& hn) {
484+
OplogReader missingObjReader;
485+
486+
// we don't have the object yet, which is possible on initial sync. get it.
487+
log() << "replication info adding missing object" << endl; // rare enough we can log
488+
uassert(15916, str::stream() << "Can no longer connect to initial sync source: " << hn, missingObjReader.connect(hn));
489+
490+
const char *ns = o.getStringField("ns");
491+
// might be more than just _id in the update criteria
492+
BSONObj query = BSONObjBuilder().append(o.getObjectField("o2")["_id"]).obj();
493+
BSONObj missingObj;
494+
try {
495+
missingObj = missingObjReader.findOne(ns, query);
496+
} catch(DBException& e) {
497+
log() << "replication assertion fetching missing object: " << e.what() << endl;
498+
throw;
499+
}
500+
501+
if( missingObj.isEmpty() ) {
502+
log() << "replication missing object not found on source. presumably deleted later in oplog" << endl;
503+
log() << "replication object: " << o.getObjectField("o2").toString() << endl;
504+
log() << "replication o object: " << o.getObjectField("o").toString() << endl;
505+
506+
return false;
507+
}
508+
else {
509+
Client::Context ctx(ns);
510+
DiskLoc d = theDataFileMgr.insert(ns, (void*) missingObj.objdata(), missingObj.objsize());
511+
uassert(15917, "Got bad disk location when attempting to insert", !d.isNull());
512+
513+
return true;
514+
}
515+
}
516+
483517
/** @param fromRepl false if from ApplyOpsCmd
484518
@return true if was and update should have happened and the document DNE. see replset initial sync code.
485519
*/

db/oplog.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,12 @@ namespace mongo {
212212
* take an op and apply locally
213213
* used for applying from an oplog
214214
* @param fromRepl really from replication or for testing/internal/command/etc...
215+
* Returns if the op was an update that could not be applied (true on failure)
215216
*/
216217
bool applyOperation_inlock(const BSONObj& op , bool fromRepl = true );
218+
219+
/**
220+
* If applyOperation_inlock should be called again after an update fails.
221+
*/
222+
bool shouldRetry(const BSONObj& op , const string& hn);
217223
}

db/repl.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -851,7 +851,10 @@ namespace mongo {
851851

852852
void ReplSource::applyOperation(const BSONObj& op) {
853853
try {
854-
applyOperation_inlock( op );
854+
bool failedUpdate = applyOperation_inlock( op );
855+
if (failedUpdate && shouldRetry(op, hostName)) {
856+
uassert(15914, "Failure retrying initial sync update", applyOperation_inlock(op));
857+
}
855858
}
856859
catch ( UserException& e ) {
857860
log() << "sync: caught user assertion " << e << " while applying op: " << op << endl;;
@@ -1351,6 +1354,7 @@ namespace mongo {
13511354
setLastSavedLocalTs( nextLastSaved );
13521355
}
13531356
}
1357+
13541358
if( oplogReader.awaitCapable() && tailing )
13551359
okResultCode = 0; // don't sleep
13561360
syncedTo = nextOpTime;

db/repl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ namespace mongo {
134134
public:
135135
OplogReader oplogReader;
136136

137-
static void applyOperation(const BSONObj& op);
137+
void applyOperation(const BSONObj& op);
138138
bool replacing; // in "replace mode" -- see CmdReplacePeer
139139
bool paired; // --pair in use
140140
string hostName; // ip addr or hostname plus optionally, ":<port>"

db/repl/rs_sync.cpp

Lines changed: 2 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ namespace mongo {
5858

5959
const string hn = source->h().toString();
6060
OplogReader r;
61-
OplogReader missingObjReader;
6261
try {
6362
if( !r.connect(hn) ) {
6463
log() << "replSet initial sync error can't connect to " << hn << " to read " << rsoplog << rsLog;
@@ -137,44 +136,8 @@ namespace mongo {
137136

138137
if( ts >= applyGTE ) { // optimes before we started copying need not be applied.
139138
bool failedUpdate = syncApply(o);
140-
if( failedUpdate ) {
141-
// we don't have the object yet, which is possible on initial sync. get it.
142-
log() << "replSet info adding missing object" << endl; // rare enough we can log
143-
if( !missingObjReader.connect(hn) ) { // ok to call more than once
144-
log() << "replSet initial sync fails, couldn't connect to " << hn << endl;
145-
return false;
146-
}
147-
const char *ns = o.getStringField("ns");
148-
BSONObj query = BSONObjBuilder().append(o.getObjectField("o2")["_id"]).obj(); // might be more than just _id in the update criteria
149-
BSONObj missingObj;
150-
try {
151-
missingObj = missingObjReader.findOne(
152-
ns,
153-
query );
154-
} catch(...) {
155-
log() << "replSet assertion fetching missing object" << endl;
156-
throw;
157-
}
158-
if( missingObj.isEmpty() ) {
159-
log() << "replSet missing object not found on source. presumably deleted later in oplog" << endl;
160-
log() << "replSet op: " << o.toString() << endl;
161-
}
162-
else {
163-
Client::Context ctx(ns);
164-
try {
165-
DiskLoc d = theDataFileMgr.insert(ns, (void*) missingObj.objdata(), missingObj.objsize());
166-
assert( !d.isNull() );
167-
} catch(...) {
168-
log() << "replSet assertion during insert of missing object" << endl;
169-
throw;
170-
}
171-
// now reapply the update from above
172-
bool failed = syncApply(o);
173-
if( failed ) {
174-
log() << "replSet update still fails after adding missing object " << ns << endl;
175-
assert(false);
176-
}
177-
}
139+
if( failedUpdate && shouldRetry(o, hn)) {
140+
uassert(15915, "replSet update still fails after adding missing object", syncApply(o));
178141
}
179142
}
180143
_logOpObjRS(o); /* with repl sets we write the ops to our oplog too */

0 commit comments

Comments
 (0)