Skip to content

Commit d43207b

Browse files
committed
HDFS-5504. In HA mode, OP_DELETE_SNAPSHOT is not decrementing the safemode threshold, leads to NN safemode. Contributed by Vinay.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1541773 13f79535-47bb-0310-9956-ffa450edef68
1 parent 1df87eb commit d43207b

File tree

4 files changed

+73
-4
lines changed

4 files changed

+73
-4
lines changed

hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,9 @@ Release 2.3.0 - UNRELEASED
550550
HDFS-5075. httpfs-config.sh calls out incorrect env script name
551551
(Timothy St. Clair via stevel)
552552

553+
HDFS-5504. In HA mode, OP_DELETE_SNAPSHOT is not decrementing the safemode threshold,
554+
leads to NN safemode. (Vinay via jing9)
555+
553556
Release 2.2.1 - UNRELEASED
554557

555558
INCOMPATIBLE CHANGES

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -593,7 +593,7 @@ private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir,
593593
fsNamesys.getSnapshotManager().deleteSnapshot(
594594
deleteSnapshotOp.snapshotRoot, deleteSnapshotOp.snapshotName,
595595
collectedBlocks, removedINodes);
596-
fsNamesys.removeBlocks(collectedBlocks);
596+
fsNamesys.removeBlocksAndUpdateSafemodeTotal(collectedBlocks);
597597
collectedBlocks.clear();
598598
fsNamesys.dir.removeFromInodeMap(removedINodes);
599599
removedINodes.clear();

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3316,6 +3316,18 @@ void removePathAndBlocks(String src, BlocksMapUpdateInfo blocks,
33163316
return;
33173317
}
33183318

3319+
removeBlocksAndUpdateSafemodeTotal(blocks);
3320+
}
3321+
3322+
/**
3323+
* Removes the blocks from blocksmap and updates the safemode blocks total
3324+
*
3325+
* @param blocks
3326+
* An instance of {@link BlocksMapUpdateInfo} which contains a list
3327+
* of blocks that need to be removed from blocksMap
3328+
*/
3329+
void removeBlocksAndUpdateSafemodeTotal(BlocksMapUpdateInfo blocks) {
3330+
assert hasWriteLock();
33193331
// In the case that we are a Standby tailing edits from the
33203332
// active while in safe-mode, we need to track the total number
33213333
// of blocks and safe blocks in the system.
@@ -3336,9 +3348,9 @@ void removePathAndBlocks(String src, BlocksMapUpdateInfo blocks,
33363348
}
33373349
if (trackBlockCounts) {
33383350
if (LOG.isDebugEnabled()) {
3339-
LOG.debug("Adjusting safe-mode totals for deletion of " + src + ":" +
3340-
"decreasing safeBlocks by " + numRemovedSafe +
3341-
", totalBlocks by " + numRemovedComplete);
3351+
LOG.debug("Adjusting safe-mode totals for deletion."
3352+
+ "decreasing safeBlocks by " + numRemovedSafe
3353+
+ ", totalBlocks by " + numRemovedComplete);
33423354
}
33433355
adjustSafeModeBlockTotals(-numRemovedSafe, -numRemovedComplete);
33443356
}

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDeletion.java

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import org.apache.hadoop.hdfs.DFSUtil;
3939
import org.apache.hadoop.hdfs.DistributedFileSystem;
4040
import org.apache.hadoop.hdfs.MiniDFSCluster;
41+
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
4142
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
4243
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
4344
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
@@ -47,7 +48,10 @@
4748
import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
4849
import org.apache.hadoop.hdfs.server.namenode.INodeDirectoryWithQuota;
4950
import org.apache.hadoop.hdfs.server.namenode.INodeFile;
51+
import org.apache.hadoop.hdfs.server.namenode.NameNode;
52+
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
5053
import org.apache.hadoop.hdfs.server.namenode.Quota;
54+
import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
5155
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectoryWithSnapshot.DirectoryDiffList;
5256
import org.apache.hadoop.hdfs.util.ReadOnlyList;
5357
import org.apache.hadoop.io.IOUtils;
@@ -949,4 +953,54 @@ public void testDeleteSnapshotCommandWithIllegalArguments() throws Exception {
949953
psOut.close();
950954
out.close();
951955
}
956+
957+
/*
958+
* OP_DELETE_SNAPSHOT edits op was not decrementing the safemode threshold on
959+
* restart in HA mode. HDFS-5504
960+
*/
961+
@Test(timeout = 60000)
962+
public void testHANNRestartAfterSnapshotDeletion() throws Exception {
963+
hdfs.close();
964+
cluster.shutdown();
965+
conf = new Configuration();
966+
cluster = new MiniDFSCluster.Builder(conf)
967+
.nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(1)
968+
.build();
969+
cluster.transitionToActive(0);
970+
// stop the standby namenode
971+
NameNode snn = cluster.getNameNode(1);
972+
snn.stop();
973+
974+
hdfs = (DistributedFileSystem) HATestUtil
975+
.configureFailoverFs(cluster, conf);
976+
Path dir = new Path("/dir");
977+
Path subDir = new Path(dir, "sub");
978+
hdfs.mkdirs(dir);
979+
hdfs.allowSnapshot(dir);
980+
for (int i = 0; i < 5; i++) {
981+
DFSTestUtil.createFile(hdfs, new Path(subDir, "" + i), 100, (short) 1,
982+
1024L);
983+
}
984+
985+
// take snapshot
986+
hdfs.createSnapshot(dir, "s0");
987+
988+
// delete the subdir
989+
hdfs.delete(subDir, true);
990+
991+
// roll the edit log
992+
NameNode ann = cluster.getNameNode(0);
993+
ann.getRpcServer().rollEditLog();
994+
995+
hdfs.deleteSnapshot(dir, "s0");
996+
// wait for the blocks deletion at namenode
997+
Thread.sleep(2000);
998+
999+
NameNodeAdapter.abortEditLogs(ann);
1000+
cluster.restartNameNode(0, false);
1001+
cluster.transitionToActive(0);
1002+
1003+
// wait till the cluster becomes active
1004+
cluster.waitClusterUp();
1005+
}
9521006
}

0 commit comments

Comments
 (0)