Skip to content

Commit 16a3998

Browse files
committed
HDFS-3442. Incorrect count for Missing Replicas in FSCK report. Contributed by Andrew Wang.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1345408 13f79535-47bb-0310-9956-ffa450edef68
1 parent 973b4b9 commit 16a3998

File tree

3 files changed

+126
-27
lines changed

3 files changed

+126
-27
lines changed

hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,9 @@ Release 2.0.1-alpha - UNRELEASED
298298
HDFS-3487. offlineimageviewer should give byte offset information
299299
when it encounters an exception. (Colin Patrick McCabe via eli)
300300

301+
HDFS-3442. Incorrect count for Missing Replicas in FSCK report. (Andrew
302+
Wang via atm)
303+
301304
Release 2.0.0-alpha - UNRELEASED
302305

303306
INCOMPATIBLE CHANGES

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java

Lines changed: 28 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@
5353
import org.apache.hadoop.net.NodeBase;
5454
import org.apache.hadoop.security.UserGroupInformation;
5555

56+
import com.google.common.annotations.VisibleForTesting;
57+
5658
/**
5759
* This class provides rudimentary checking of DFS volumes for errors and
5860
* sub-optimal conditions.
@@ -244,7 +246,8 @@ private void listCorruptFileBlocks() throws IOException {
244246
out.println();
245247
}
246248

247-
private void check(String parent, HdfsFileStatus file, Result res) throws IOException {
249+
@VisibleForTesting
250+
void check(String parent, HdfsFileStatus file, Result res) throws IOException {
248251
String path = file.getFullName(parent);
249252
boolean isOpen = false;
250253

@@ -313,6 +316,7 @@ private void check(String parent, HdfsFileStatus file, Result res) throws IOExce
313316
DatanodeInfo[] locs = lBlk.getLocations();
314317
res.totalReplicas += locs.length;
315318
short targetFileReplication = file.getReplication();
319+
res.numExpectedReplicas += targetFileReplication;
316320
if (locs.length > targetFileReplication) {
317321
res.excessiveReplicas += (locs.length - targetFileReplication);
318322
res.numOverReplicatedBlocks += 1;
@@ -608,29 +612,31 @@ private void lostFoundInit(DFSClient dfs) {
608612
/**
609613
* FsckResult of checking, plus overall DFS statistics.
610614
*/
611-
private static class Result {
612-
private List<String> missingIds = new ArrayList<String>();
613-
private long missingSize = 0L;
614-
private long corruptFiles = 0L;
615-
private long corruptBlocks = 0L;
616-
private long excessiveReplicas = 0L;
617-
private long missingReplicas = 0L;
618-
private long numOverReplicatedBlocks = 0L;
619-
private long numUnderReplicatedBlocks = 0L;
620-
private long numMisReplicatedBlocks = 0L; // blocks that do not satisfy block placement policy
621-
private long numMinReplicatedBlocks = 0L; // minimally replicatedblocks
622-
private long totalBlocks = 0L;
623-
private long totalOpenFilesBlocks = 0L;
624-
private long totalFiles = 0L;
625-
private long totalOpenFiles = 0L;
626-
private long totalDirs = 0L;
627-
private long totalSize = 0L;
628-
private long totalOpenFilesSize = 0L;
629-
private long totalReplicas = 0L;
615+
@VisibleForTesting
616+
static class Result {
617+
List<String> missingIds = new ArrayList<String>();
618+
long missingSize = 0L;
619+
long corruptFiles = 0L;
620+
long corruptBlocks = 0L;
621+
long excessiveReplicas = 0L;
622+
long missingReplicas = 0L;
623+
long numOverReplicatedBlocks = 0L;
624+
long numUnderReplicatedBlocks = 0L;
625+
long numMisReplicatedBlocks = 0L; // blocks that do not satisfy block placement policy
626+
long numMinReplicatedBlocks = 0L; // minimally replicatedblocks
627+
long totalBlocks = 0L;
628+
long numExpectedReplicas = 0L;
629+
long totalOpenFilesBlocks = 0L;
630+
long totalFiles = 0L;
631+
long totalOpenFiles = 0L;
632+
long totalDirs = 0L;
633+
long totalSize = 0L;
634+
long totalOpenFilesSize = 0L;
635+
long totalReplicas = 0L;
630636

631637
final short replication;
632638

633-
private Result(Configuration conf) {
639+
Result(Configuration conf) {
634640
this.replication = (short)conf.getInt(DFSConfigKeys.DFS_REPLICATION_KEY,
635641
DFSConfigKeys.DFS_REPLICATION_DEFAULT);
636642
}
@@ -726,7 +732,7 @@ public String toString() {
726732
missingReplicas);
727733
if (totalReplicas > 0) {
728734
res.append(" (").append(
729-
((float) (missingReplicas * 100) / (float) totalReplicas)).append(
735+
((float) (missingReplicas * 100) / (float) numExpectedReplicas)).append(
730736
" %)");
731737
}
732738
return res.toString();

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java

Lines changed: 95 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,27 @@
1818

1919
package org.apache.hadoop.hdfs.server.namenode;
2020

21+
import static org.junit.Assert.*;
22+
2123
import java.io.BufferedReader;
2224
import java.io.ByteArrayOutputStream;
2325
import java.io.File;
2426
import java.io.FileReader;
2527
import java.io.IOException;
2628
import java.io.PrintStream;
29+
import java.io.PrintWriter;
2730
import java.io.RandomAccessFile;
31+
import java.io.StringWriter;
32+
import java.io.Writer;
33+
import java.net.InetAddress;
2834
import java.net.InetSocketAddress;
2935
import java.nio.channels.FileChannel;
3036
import java.security.PrivilegedExceptionAction;
37+
import java.util.HashMap;
38+
import java.util.Map;
3139
import java.util.Random;
3240
import java.util.regex.Pattern;
3341

34-
import junit.framework.TestCase;
35-
3642
import org.apache.commons.logging.impl.Log4JLogger;
3743
import org.apache.hadoop.conf.Configuration;
3844
import org.apache.hadoop.fs.FSDataOutputStream;
@@ -42,25 +48,30 @@
4248
import org.apache.hadoop.hdfs.DFSClient;
4349
import org.apache.hadoop.hdfs.DFSConfigKeys;
4450
import org.apache.hadoop.hdfs.DFSTestUtil;
51+
import org.apache.hadoop.hdfs.DistributedFileSystem;
4552
import org.apache.hadoop.hdfs.HdfsConfiguration;
4653
import org.apache.hadoop.hdfs.MiniDFSCluster;
47-
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
4854
import org.apache.hadoop.hdfs.protocol.CorruptFileBlocks;
55+
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
56+
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
4957
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
58+
import org.apache.hadoop.hdfs.server.namenode.NamenodeFsck.Result;
5059
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
5160
import org.apache.hadoop.hdfs.tools.DFSck;
5261
import org.apache.hadoop.io.IOUtils;
62+
import org.apache.hadoop.net.NetworkTopology;
5363
import org.apache.hadoop.security.UserGroupInformation;
5464
import org.apache.hadoop.util.ToolRunner;
5565
import org.apache.log4j.Level;
5666
import org.apache.log4j.Logger;
5767
import org.apache.log4j.PatternLayout;
5868
import org.apache.log4j.RollingFileAppender;
69+
import org.junit.Test;
5970

6071
/**
6172
* A JUnit test for doing fsck
6273
*/
63-
public class TestFsck extends TestCase {
74+
public class TestFsck {
6475
static final String auditLogFile = System.getProperty("test.build.dir",
6576
"build/test") + "/audit.log";
6677

@@ -79,13 +90,15 @@ static String runFsck(Configuration conf, int expectedErrCode,
7990
PrintStream out = new PrintStream(bStream, true);
8091
((Log4JLogger)FSPermissionChecker.LOG).getLogger().setLevel(Level.ALL);
8192
int errCode = ToolRunner.run(new DFSck(conf, out), path);
82-
if (checkErrorCode)
93+
if (checkErrorCode) {
8394
assertEquals(expectedErrCode, errCode);
95+
}
8496
((Log4JLogger)FSPermissionChecker.LOG).getLogger().setLevel(Level.INFO);
8597
return bStream.toString();
8698
}
8799

88100
/** do fsck */
101+
@Test
89102
public void testFsck() throws Exception {
90103
DFSTestUtil util = new DFSTestUtil("TestFsck", 20, 3, 8*1024);
91104
MiniDFSCluster cluster = null;
@@ -158,6 +171,7 @@ private void verifyAuditLogs() throws IOException {
158171
assertNull("Unexpected event in audit log", reader.readLine());
159172
}
160173

174+
@Test
161175
public void testFsckNonExistent() throws Exception {
162176
DFSTestUtil util = new DFSTestUtil("TestFsck", 20, 3, 8*1024);
163177
MiniDFSCluster cluster = null;
@@ -180,6 +194,7 @@ public void testFsckNonExistent() throws Exception {
180194
}
181195

182196
/** Test fsck with permission set on inodes */
197+
@Test
183198
public void testFsckPermission() throws Exception {
184199
final DFSTestUtil util = new DFSTestUtil(getClass().getSimpleName(), 20, 3, 8*1024);
185200
final Configuration conf = new HdfsConfiguration();
@@ -227,6 +242,7 @@ public Object run() throws Exception {
227242
}
228243
}
229244

245+
@Test
230246
public void testFsckMoveAndDelete() throws Exception {
231247
final int MAX_MOVE_TRIES = 5;
232248
DFSTestUtil util = new DFSTestUtil("TestFsck", 5, 3, 8*1024);
@@ -300,6 +316,7 @@ public void testFsckMoveAndDelete() throws Exception {
300316
}
301317
}
302318

319+
@Test
303320
public void testFsckOpenFiles() throws Exception {
304321
DFSTestUtil util = new DFSTestUtil("TestFsck", 4, 3, 8*1024);
305322
MiniDFSCluster cluster = null;
@@ -350,6 +367,7 @@ public void testFsckOpenFiles() throws Exception {
350367
}
351368
}
352369

370+
@Test
353371
public void testCorruptBlock() throws Exception {
354372
Configuration conf = new HdfsConfiguration();
355373
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
@@ -426,6 +444,7 @@ public void testCorruptBlock() throws Exception {
426444
*
427445
* @throws Exception
428446
*/
447+
@Test
429448
public void testFsckError() throws Exception {
430449
MiniDFSCluster cluster = null;
431450
try {
@@ -460,6 +479,7 @@ public void testFsckError() throws Exception {
460479
}
461480

462481
/** check if option -list-corruptfiles of fsck command works properly */
482+
@Test
463483
public void testFsckListCorruptFilesBlocks() throws Exception {
464484
Configuration conf = new Configuration();
465485
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
@@ -529,6 +549,7 @@ public void testFsckListCorruptFilesBlocks() throws Exception {
529549
* Test for checking fsck command on illegal arguments should print the proper
530550
* usage.
531551
*/
552+
@Test
532553
public void testToCheckTheFsckCommandOnIllegalArguments() throws Exception {
533554
MiniDFSCluster cluster = null;
534555
try {
@@ -560,4 +581,73 @@ public void testToCheckTheFsckCommandOnIllegalArguments() throws Exception {
560581
}
561582
}
562583
}
584+
585+
/**
586+
* Tests that the # of missing block replicas and expected replicas is correct
587+
* @throws IOException
588+
*/
589+
@Test
590+
public void testFsckMissingReplicas() throws IOException {
591+
// Desired replication factor
592+
// Set this higher than NUM_REPLICAS so it's under-replicated
593+
final short REPL_FACTOR = 2;
594+
// Number of replicas to actually start
595+
final short NUM_REPLICAS = 1;
596+
// Number of blocks to write
597+
final short NUM_BLOCKS = 3;
598+
// Set a small-ish blocksize
599+
final long blockSize = 512;
600+
601+
Configuration conf = new Configuration();
602+
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
603+
604+
MiniDFSCluster cluster = null;
605+
DistributedFileSystem dfs = null;
606+
607+
try {
608+
// Startup a minicluster
609+
cluster =
610+
new MiniDFSCluster.Builder(conf).numDataNodes(NUM_REPLICAS).build();
611+
assertNotNull("Failed Cluster Creation", cluster);
612+
cluster.waitClusterUp();
613+
dfs = (DistributedFileSystem) cluster.getFileSystem();
614+
assertNotNull("Failed to get FileSystem", dfs);
615+
616+
// Create a file that will be intentionally under-replicated
617+
final String pathString = new String("/testfile");
618+
final Path path = new Path(pathString);
619+
long fileLen = blockSize * NUM_BLOCKS;
620+
DFSTestUtil.createFile(dfs, path, fileLen, REPL_FACTOR, 1);
621+
622+
// Create an under-replicated file
623+
NameNode namenode = cluster.getNameNode();
624+
NetworkTopology nettop = cluster.getNamesystem().getBlockManager()
625+
.getDatanodeManager().getNetworkTopology();
626+
Map<String,String[]> pmap = new HashMap<String, String[]>();
627+
Writer result = new StringWriter();
628+
PrintWriter out = new PrintWriter(result, true);
629+
InetAddress remoteAddress = InetAddress.getLocalHost();
630+
NamenodeFsck fsck = new NamenodeFsck(conf, namenode, nettop, pmap, out,
631+
NUM_REPLICAS, (short)1, remoteAddress);
632+
633+
// Run the fsck and check the Result
634+
final HdfsFileStatus file =
635+
namenode.getRpcServer().getFileInfo(pathString);
636+
assertNotNull(file);
637+
Result res = new Result(conf);
638+
fsck.check(pathString, file, res);
639+
// Also print the output from the fsck, for ex post facto sanity checks
640+
System.out.println(result.toString());
641+
assertEquals(res.missingReplicas,
642+
(NUM_BLOCKS*REPL_FACTOR) - (NUM_BLOCKS*NUM_REPLICAS));
643+
assertEquals(res.numExpectedReplicas, NUM_BLOCKS*REPL_FACTOR);
644+
} finally {
645+
if(dfs != null) {
646+
dfs.close();
647+
}
648+
if(cluster != null) {
649+
cluster.shutdown();
650+
}
651+
}
652+
}
563653
}

0 commit comments

Comments
 (0)