Skip to content

Commit ba0ee77

Browse files
committed
SERVER-30733 make distlocks hierarchical for metadata commands on databases and collections
1 parent 2b10c28 commit ba0ee77

11 files changed

+73
-127
lines changed

src/mongo/db/s/config/configsvr_create_database_command.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,16 @@ class ConfigSvrCreateDatabaseCommand : public BasicCommand {
115115
// Make sure to force update of any stale metadata
116116
ON_BLOCK_EXIT([opCtx, dbname] { Grid::get(opCtx)->catalogCache()->purgeDatabase(dbname); });
117117

118+
// Remove the backwards compatible lock after 3.6 ships.
119+
auto const catalogClient = Grid::get(opCtx)->catalogClient();
120+
auto backwardsCompatibleDbDistLock = uassertStatusOK(
121+
catalogClient->getDistLockManager()->lock(opCtx,
122+
dbname + "-movePrimary",
123+
"createDatabase",
124+
DistLockManager::kDefaultLockTimeout));
125+
auto dbDistLock = uassertStatusOK(catalogClient->getDistLockManager()->lock(
126+
opCtx, dbname, "createDatabase", DistLockManager::kDefaultLockTimeout));
127+
118128
uassertStatusOK(ShardingCatalogManager::get(opCtx)->createDatabase(opCtx, dbname));
119129

120130
return true;

src/mongo/db/s/config/configsvr_enable_sharding_command.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,16 @@ class ConfigSvrEnableShardingCommand : public BasicCommand {
121121
// Make sure to force update of any stale metadata
122122
ON_BLOCK_EXIT([opCtx, dbname] { Grid::get(opCtx)->catalogCache()->purgeDatabase(dbname); });
123123

124+
// Remove the backwards compatible lock after 3.6 ships.
125+
auto const catalogClient = Grid::get(opCtx)->catalogClient();
126+
auto backwardsCompatibleDbDistLock = uassertStatusOK(
127+
catalogClient->getDistLockManager()->lock(opCtx,
128+
dbname + "-movePrimary",
129+
"enableSharding",
130+
DistLockManager::kDefaultLockTimeout));
131+
auto dbDistLock = uassertStatusOK(catalogClient->getDistLockManager()->lock(
132+
opCtx, dbname, "enableSharding", DistLockManager::kDefaultLockTimeout));
133+
124134
uassertStatusOK(ShardingCatalogManager::get(opCtx)->enableSharding(opCtx, dbname));
125135
audit::logEnableSharding(Client::getCurrent(), dbname);
126136

src/mongo/db/s/config/configsvr_move_primary_command.cpp

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -136,15 +136,14 @@ class ConfigSvrMovePrimaryCommand : public BasicCommand {
136136
auto const catalogCache = Grid::get(opCtx)->catalogCache();
137137
auto const shardRegistry = Grid::get(opCtx)->shardRegistry();
138138

139-
// The first lock is taken to ensure that different movePrimary commands cannot run
140-
// concurrently in mixed 3.4 and 3.6 MongoS versions. The second lock is what is
141-
// consistently used to lock the actual database.
142-
const std::string whyMessage(str::stream() << "Moving primary shard of " << dbname);
143-
auto backwardsCompatibleLock = uassertStatusOK(catalogClient->getDistLockManager()->lock(
144-
opCtx, dbname + "-movePrimary", whyMessage, DistLockManager::kDefaultLockTimeout));
145-
146-
auto scopedDistLock = uassertStatusOK(catalogClient->getDistLockManager()->lock(
147-
opCtx, dbname, whyMessage, DistLockManager::kDefaultLockTimeout));
139+
// Remove the backwards compatible lock after 3.6 ships.
140+
auto backwardsCompatibleDbDistLock = uassertStatusOK(
141+
catalogClient->getDistLockManager()->lock(opCtx,
142+
dbname + "-movePrimary",
143+
"movePrimary",
144+
DistLockManager::kDefaultLockTimeout));
145+
auto dbDistLock = uassertStatusOK(catalogClient->getDistLockManager()->lock(
146+
opCtx, dbname, "movePrimary", DistLockManager::kDefaultLockTimeout));
148147

149148
auto dbType = uassertStatusOK(catalogClient->getDatabase(
150149
opCtx, dbname, repl::ReadConcernLevel::kLocalReadConcern))

src/mongo/db/s/config/configsvr_shard_collection_command.cpp

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -744,27 +744,21 @@ class ConfigSvrShardCollectionCommand : public BasicCommand {
744744

745745
auto const catalogManager = ShardingCatalogManager::get(opCtx);
746746
auto const catalogCache = Grid::get(opCtx)->catalogCache();
747+
auto const catalogClient = Grid::get(opCtx)->catalogClient();
747748

748-
// Take a lock to ensure that different movePrimary and shardCollection commands cannot run
749-
// concurrently in mixed 3.4 and 3.6 MongoS versions.
750-
boost::optional<DistLockManager::ScopedDistLock> backwardsCompatibleLock(
751-
uassertStatusOK(Grid::get(opCtx)->catalogClient()->getDistLockManager()->lock(
752-
opCtx,
753-
nss.db() + "-movePrimary",
754-
"shardCollection",
755-
DistLockManager::kDefaultLockTimeout)));
756-
757-
// If shardCollection is called concurrently with movePrimary, which changes the UUID of a
758-
// collection, shardCollection may persist the original UUID on the config server, leaving
759-
// the collection UUIDs inconsistent between the moved collection and the config server.
760-
boost::optional<DistLockManager::ScopedDistLock> scopedDatabaseDistLock(
761-
uassertStatusOK(Grid::get(opCtx)->catalogClient()->getDistLockManager()->lock(
749+
// Make the distlocks boost::optional so that they can be released by being reset below.
750+
// Remove the backwards compatible lock after 3.6 ships.
751+
boost::optional<DistLockManager::ScopedDistLock> backwardsCompatibleDbDistLock(
752+
uassertStatusOK(
753+
catalogClient->getDistLockManager()->lock(opCtx,
754+
nss.db() + "-movePrimary",
755+
"shardCollection",
756+
DistLockManager::kDefaultLockTimeout)));
757+
boost::optional<DistLockManager::ScopedDistLock> dbDistLock(
758+
uassertStatusOK(catalogClient->getDistLockManager()->lock(
762759
opCtx, nss.db(), "shardCollection", DistLockManager::kDefaultLockTimeout)));
763-
764-
// Lock the collection to prevent older mongos instances from trying to shard or drop it
765-
// concurrently.
766-
boost::optional<DistLockManager::ScopedDistLock> scopedCollectionDistLock(
767-
uassertStatusOK(Grid::get(opCtx)->catalogClient()->getDistLockManager()->lock(
760+
boost::optional<DistLockManager::ScopedDistLock> collDistLock(
761+
uassertStatusOK(catalogClient->getDistLockManager()->lock(
768762
opCtx, nss.ns(), "shardCollection", DistLockManager::kDefaultLockTimeout)));
769763

770764
// Ensure sharding is allowed on the database.
@@ -894,13 +888,10 @@ class ConfigSvrShardCollectionCommand : public BasicCommand {
894888
// Make sure the cached metadata for the collection knows that we are now sharded
895889
catalogCache->invalidateShardedCollection(nss);
896890

897-
// Free the collection dist lock in order to allow the initial splits and moves below to
898-
// proceed.
899-
scopedCollectionDistLock.reset();
900-
901-
// Free the database and backwards compatibility dist locks, as they are no longer needed.
902-
scopedDatabaseDistLock.reset();
903-
backwardsCompatibleLock.reset();
891+
// Free the distlocks to allow the splits and migrations below to proceed.
892+
collDistLock.reset();
893+
dbDistLock.reset();
894+
backwardsCompatibleDbDistLock.reset();
904895

905896
// Step 7. Migrate initial chunks to distribute them across shards.
906897
migrateAndFurtherSplitInitialChunks(

src/mongo/s/catalog/sharding_catalog_client_impl.cpp

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@
8282
namespace mongo {
8383

8484
MONGO_FP_DECLARE(failApplyChunkOps);
85-
MONGO_FP_DECLARE(setDropCollDistLockWait);
8685

8786
using repl::OpTime;
8887
using std::set;
@@ -440,20 +439,6 @@ Status ShardingCatalogClientImpl::dropCollection(OperationContext* opCtx,
440439

441440
LOG(1) << "dropCollection " << ns << " started";
442441

443-
// Lock the collection globally so that split/migrate cannot run
444-
Seconds waitFor(DistLockManager::kDefaultLockTimeout);
445-
MONGO_FAIL_POINT_BLOCK(setDropCollDistLockWait, customWait) {
446-
const BSONObj& data = customWait.getData();
447-
waitFor = Seconds(data["waitForSecs"].numberInt());
448-
}
449-
450-
auto scopedDistLock = getDistLockManager()->lock(opCtx, ns.ns(), "drop", waitFor);
451-
if (!scopedDistLock.isOK()) {
452-
return scopedDistLock.getStatus();
453-
}
454-
455-
LOG(1) << "dropCollection " << ns << " locked";
456-
457442
const auto dropCommandBSON = [opCtx, &ns] {
458443
BSONObjBuilder builder;
459444
builder.append("drop", ns.coll());

src/mongo/s/catalog/sharding_catalog_create_database_test.cpp

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -153,24 +153,6 @@ TEST_F(CreateDatabaseTest, createDatabaseSuccess) {
153153
future.timed_get(kFutureTimeout);
154154
}
155155

156-
TEST_F(CreateDatabaseTest, createDatabaseDistLockHeld) {
157-
const std::string dbname = "db2";
158-
159-
ASSERT_OK(distLockCatalog()
160-
->grabLock(operationContext(),
161-
dbname,
162-
OID::gen(),
163-
"dummyWho",
164-
"dummyProcessId",
165-
Date_t::now(),
166-
"dummyReason")
167-
.getStatus());
168-
169-
Status status =
170-
ShardingCatalogManager::get(operationContext())->createDatabase(operationContext(), dbname);
171-
ASSERT_EQUALS(ErrorCodes::LockBusy, status);
172-
}
173-
174156
TEST_F(CreateDatabaseTest, createDatabaseDBExists) {
175157
const std::string dbname = "db3";
176158

src/mongo/s/catalog/sharding_catalog_drop_coll_test.cpp

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -271,25 +271,6 @@ TEST_F(DropColl2ShardTest, ConfigTargeterError) {
271271
future.timed_get(kFutureTimeout);
272272
}
273273

274-
TEST_F(DropColl2ShardTest, DistLockBusy) {
275-
distLock()->expectLock([](StringData, StringData, Milliseconds) {},
276-
{ErrorCodes::LockBusy, "test lock taken"});
277-
278-
auto future = launchAsync([this] {
279-
auto status = catalogClient()->dropCollection(operationContext(), dropNS());
280-
ASSERT_EQ(ErrorCodes::LockBusy, status.code());
281-
ASSERT_FALSE(status.reason().empty());
282-
});
283-
284-
expectChangeLogCreate(configHost(), BSON("ok" << 1));
285-
expectChangeLogInsert(
286-
configHost(), network()->now(), "dropCollection.start", dropNS().ns(), BSONObj());
287-
288-
expectGetShards({shard1(), shard2()});
289-
290-
future.timed_get(kFutureTimeout);
291-
}
292-
293274
TEST_F(DropColl2ShardTest, FirstShardTargeterError) {
294275
auto shard1Targeter = RemoteCommandTargeterMock::get(
295276
uassertStatusOK(shardRegistry()->getShard(operationContext(), shard1().getName()))

src/mongo/s/catalog/sharding_catalog_enable_sharding_test.cpp

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -104,23 +104,6 @@ TEST_F(EnableShardingTest, noDBExists) {
104104
future.timed_get(kFutureTimeout);
105105
}
106106

107-
TEST_F(EnableShardingTest, lockBusy) {
108-
std::string db = "db2";
109-
ASSERT_OK(distLockCatalog()
110-
->grabLock(operationContext(),
111-
db,
112-
OID::gen(),
113-
"dummyWho",
114-
"dummyProcessId",
115-
Date_t::now(),
116-
"dummyReason")
117-
.getStatus());
118-
119-
auto status =
120-
ShardingCatalogManager::get(operationContext())->enableSharding(operationContext(), db);
121-
ASSERT_EQ(ErrorCodes::LockBusy, status.code());
122-
}
123-
124107
TEST_F(EnableShardingTest, dbExistsWithDifferentCase) {
125108
ShardType shard;
126109
shard.setName("shard0");

src/mongo/s/catalog/sharding_catalog_manager_database_operations.cpp

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,6 @@ Status ShardingCatalogManager::createDatabase(OperationContext* opCtx, const std
6161
str::stream() << "cannot manually create database '" << dbName << "'");
6262
}
6363

64-
// Lock the database globally to prevent conflicts with simultaneous database creation.
65-
auto scopedDistLock = Grid::get(opCtx)->catalogClient()->getDistLockManager()->lock(
66-
opCtx, dbName, "createDatabase", DistLockManager::kDefaultLockTimeout);
67-
if (!scopedDistLock.isOK()) {
68-
return scopedDistLock.getStatus();
69-
}
70-
7164
// check for case sensitivity violations
7265
Status status = _checkDbDoesNotExist(opCtx, dbName, nullptr);
7366
if (!status.isOK()) {
@@ -115,14 +108,6 @@ Status ShardingCatalogManager::enableSharding(OperationContext* opCtx, const std
115108
return Status::OK();
116109
}
117110

118-
// Lock the database globally to prevent conflicts with simultaneous database
119-
// creation/modification.
120-
auto scopedDistLock = Grid::get(opCtx)->catalogClient()->getDistLockManager()->lock(
121-
opCtx, dbName, "enableSharding", DistLockManager::kDefaultLockTimeout);
122-
if (!scopedDistLock.isOK()) {
123-
return scopedDistLock.getStatus();
124-
}
125-
126111
// Check for case sensitivity violations
127112
DatabaseType db;
128113
Status status = _checkDbDoesNotExist(opCtx, dbName, &db);

src/mongo/s/commands/cluster_drop_cmd.cpp

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,14 @@
4141
#include "mongo/s/commands/cluster_commands_helpers.h"
4242
#include "mongo/s/grid.h"
4343
#include "mongo/s/stale_exception.h"
44+
#include "mongo/util/fail_point_service.h"
4445
#include "mongo/util/log.h"
4546

4647
namespace mongo {
4748
namespace {
4849

50+
MONGO_FP_DECLARE(setDropCollDistLockWait);
51+
4952
class DropCmd : public BasicCommand {
5053
public:
5154
DropCmd() : BasicCommand("drop") {}
@@ -77,6 +80,21 @@ class DropCmd : public BasicCommand {
7780
const NamespaceString nss(parseNsCollectionRequired(dbname, cmdObj));
7881

7982
auto const catalogCache = Grid::get(opCtx)->catalogCache();
83+
auto const catalogClient = Grid::get(opCtx)->catalogClient();
84+
85+
// Remove the backwards compatible lock after 3.6 ships.
86+
Seconds waitFor(DistLockManager::kDefaultLockTimeout);
87+
MONGO_FAIL_POINT_BLOCK(setDropCollDistLockWait, customWait) {
88+
const BSONObj& data = customWait.getData();
89+
waitFor = Seconds(data["waitForSecs"].numberInt());
90+
}
91+
auto backwardsCompatibleDbDistLock =
92+
uassertStatusOK(catalogClient->getDistLockManager()->lock(
93+
opCtx, nss.db() + "-movePrimary", "dropCollection", waitFor));
94+
auto dbDistLock = uassertStatusOK(
95+
catalogClient->getDistLockManager()->lock(opCtx, nss.db(), "dropCollection", waitFor));
96+
auto collDistLock = uassertStatusOK(
97+
catalogClient->getDistLockManager()->lock(opCtx, nss.ns(), "dropCollection", waitFor));
8098

8199
auto routingInfoStatus = catalogCache->getCollectionRoutingInfo(opCtx, nss);
82100
if (routingInfoStatus == ErrorCodes::NamespaceNotFound) {
@@ -88,7 +106,7 @@ class DropCmd : public BasicCommand {
88106
if (!routingInfo.cm()) {
89107
_dropUnshardedCollectionFromShard(opCtx, routingInfo.primaryId(), nss, &result);
90108
} else {
91-
uassertStatusOK(Grid::get(opCtx)->catalogClient()->dropCollection(opCtx, nss));
109+
uassertStatusOK(catalogClient->dropCollection(opCtx, nss));
92110
catalogCache->invalidateShardedCollection(nss);
93111
}
94112

@@ -104,12 +122,8 @@ class DropCmd : public BasicCommand {
104122
const ShardId& shardId,
105123
const NamespaceString& nss,
106124
BSONObjBuilder* result) {
107-
const auto catalogClient = Grid::get(opCtx)->catalogClient();
108125
const auto shardRegistry = Grid::get(opCtx)->shardRegistry();
109126

110-
auto scopedDistLock = uassertStatusOK(catalogClient->getDistLockManager()->lock(
111-
opCtx, nss.ns(), "drop", DistLockManager::kDefaultLockTimeout));
112-
113127
const auto dropCommandBSON = [shardRegistry, opCtx, &shardId, &nss] {
114128
BSONObjBuilder builder;
115129
builder.append("drop", nss.coll());

0 commit comments

Comments
 (0)