Skip to content

Commit fcb7884

Browse files
fapiftajojochuang
authored andcommitted
HDFS-8178. QJM doesn't move aside stale inprogress edits files. Contributed by Istvan Fajth.
Signed-off-by: Wei-Chiu Chuang <[email protected]>
1 parent 3b22fcd commit fcb7884

File tree

4 files changed

+180
-67
lines changed

4 files changed

+180
-67
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,7 @@ class JNStorage extends Storage {
5252
private final StorageDirectory sd;
5353
private StorageState state;
5454

55-
private static final List<Pattern> CURRENT_DIR_PURGE_REGEXES =
56-
ImmutableList.of(
57-
Pattern.compile("edits_\\d+-(\\d+)"),
58-
Pattern.compile("edits_inprogress_(\\d+)(?:\\..*)?"));
59-
60-
private static final List<Pattern> PAXOS_DIR_PURGE_REGEXES =
55+
private static final List<Pattern> PAXOS_DIR_PURGE_REGEXES =
6156
ImmutableList.of(Pattern.compile("(\\d+)"));
6257

6358
private static final String STORAGE_EDITS_SYNC = "edits.sync";
@@ -181,8 +176,8 @@ File getRoot() {
181176
* the given txid.
182177
*/
183178
void purgeDataOlderThan(long minTxIdToKeep) throws IOException {
184-
purgeMatching(sd.getCurrentDir(),
185-
CURRENT_DIR_PURGE_REGEXES, minTxIdToKeep);
179+
fjm.purgeLogsOlderThan(minTxIdToKeep);
180+
186181
purgeMatching(getOrCreatePaxosDir(),
187182
PAXOS_DIR_PURGE_REGEXES, minTxIdToKeep);
188183
}

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ public class FileJournalManager implements JournalManager {
7575
private static final Pattern EDITS_INPROGRESS_STALE_REGEX = Pattern.compile(
7676
NameNodeFile.EDITS_INPROGRESS.getName() + "_(\\d+).*(\\S+)");
7777

78-
private File currentInProgress = null;
78+
@VisibleForTesting
79+
File currentInProgress = null;
7980

8081
/**
8182
* A FileJournalManager should maintain the largest Tx ID that has been
@@ -178,20 +179,50 @@ public void setLastReadableTxId(long id) {
178179
this.lastReadableTxId = id;
179180
}
180181

182+
/**
183+
* Purges the unnecessary edits and edits_inprogress files.
184+
*
185+
* Edits files that are ending before the minTxIdToKeep are purged.
186+
* Edits in progress files that are starting before minTxIdToKeep are purged.
187+
* Edits in progress files that are marked as empty, trash, corrupted or
188+
* stale by file extension and starting before minTxIdToKeep are purged.
189+
* Edits in progress files that are after minTxIdToKeep, but before the
190+
* current edits in progress files are marked as stale for clarity.
191+
*
192+
* In case file removal or rename is failing a warning is logged, but that
193+
* does not fail the operation.
194+
*
195+
* @param minTxIdToKeep the lowest transaction ID that should be retained
196+
* @throws IOException if listing the storage directory fails.
197+
*/
181198
@Override
182199
public void purgeLogsOlderThan(long minTxIdToKeep)
183200
throws IOException {
184201
LOG.info("Purging logs older than " + minTxIdToKeep);
185202
File[] files = FileUtil.listFiles(sd.getCurrentDir());
186203
List<EditLogFile> editLogs = matchEditLogs(files, true);
187-
for (EditLogFile log : editLogs) {
188-
if (log.getFirstTxId() < minTxIdToKeep &&
189-
log.getLastTxId() < minTxIdToKeep) {
190-
purger.purgeLog(log);
204+
synchronized (this) {
205+
for (EditLogFile log : editLogs) {
206+
if (log.getFirstTxId() < minTxIdToKeep &&
207+
log.getLastTxId() < minTxIdToKeep) {
208+
purger.purgeLog(log);
209+
} else if (isStaleInProgressLog(minTxIdToKeep, log)) {
210+
purger.markStale(log);
211+
}
191212
}
192213
}
193214
}
194215

216+
private boolean isStaleInProgressLog(long minTxIdToKeep, EditLogFile log) {
217+
return log.isInProgress() &&
218+
!log.getFile().equals(currentInProgress) &&
219+
log.getFirstTxId() >= minTxIdToKeep &&
220+
// at last we check if this segment is not already marked as .trash,
221+
// .empty or .corrupted, in which case it does not match the strict
222+
// regex pattern.
223+
EDITS_INPROGRESS_REGEX.matcher(log.getFile().getName()).matches();
224+
}
225+
195226
/**
196227
* Find all editlog segments starting at or above the given txid.
197228
* @param firstTxId the txnid which to start looking
@@ -596,7 +627,12 @@ public void moveAsideEmptyFile() throws IOException {
596627
assert lastTxId == HdfsServerConstants.INVALID_TXID;
597628
renameSelf(".empty");
598629
}
599-
630+
631+
public void moveAsideStaleInprogressFile() throws IOException {
632+
assert isInProgress;
633+
renameSelf(".stale");
634+
}
635+
600636
private void renameSelf(String newSuffix) throws IOException {
601637
File src = file;
602638
File dst = new File(src.getParent(), src.getName() + newSuffix);

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -207,21 +207,22 @@ private long getImageTxIdToRetain(
207207
/**
208208
* Interface responsible for disposing of old checkpoints and edit logs.
209209
*/
210-
static interface StoragePurger {
210+
interface StoragePurger {
211211
void purgeLog(EditLogFile log);
212212
void purgeImage(FSImageFile image);
213+
void markStale(EditLogFile log);
213214
}
214215

215216
static class DeletionStoragePurger implements StoragePurger {
216217
@Override
217218
public void purgeLog(EditLogFile log) {
218-
LOG.info("Purging old edit log " + log);
219+
LOG.info("Purging old edit log {}", log);
219220
deleteOrWarn(log.getFile());
220221
}
221222

222223
@Override
223224
public void purgeImage(FSImageFile image) {
224-
LOG.info("Purging old image " + image);
225+
LOG.info("Purging old image {}", image);
225226
deleteOrWarn(image.getFile());
226227
deleteOrWarn(MD5FileUtils.getDigestFileForFile(image.getFile()));
227228
}
@@ -230,9 +231,19 @@ private static void deleteOrWarn(File file) {
230231
if (!file.delete()) {
231232
// It's OK if we fail to delete something -- we'll catch it
232233
// next time we swing through this directory.
233-
LOG.warn("Could not delete " + file);
234+
LOG.warn("Could not delete {}", file);
234235
}
235236
}
237+
238+
public void markStale(EditLogFile log){
239+
try {
240+
log.moveAsideStaleInprogressFile();
241+
} catch (IOException e) {
242+
// It is ok to just log the rename failure and go on, we will try next
243+
// time just as with deletions.
244+
LOG.warn("Could not mark {} as stale", log, e);
245+
}
246+
}
236247
}
237248

238249
/**

0 commit comments

Comments
 (0)