Skip to content

Commit 3c0cc22

Browse files
committed
Fix possible race condition during snapshot deletion
1 parent 0a7c6c9 commit 3c0cc22

File tree

1 file changed

+35
-22
lines changed

1 file changed

+35
-22
lines changed

src/main/java/org/elasticsearch/snapshots/SnapshotsService.java

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@
7474
* <li>Once shard snapshot is created data node updates state of the shard in the cluster state using the {@link #updateIndexShardSnapshotStatus(UpdateIndexShardSnapshotStatusRequest)} method</li>
7575
* <li>When last shard is completed master node in {@link #innerUpdateSnapshotState} method marks the snapshot as completed</li>
7676
* <li>After cluster state is updated, the {@link #endSnapshot(SnapshotMetaData.Entry)} finalizes snapshot in the repository,
77-
* notifies all {@link #snapshotCompletionListeners} that snapshot is completed, and finally calls {@link #removeSnapshotFromClusterState(SnapshotId)} to remove snapshot from cluster state</li>
77+
* notifies all {@link #snapshotCompletionListeners} that snapshot is completed, and finally calls {@link #removeSnapshotFromClusterState(SnapshotId, SnapshotInfo, Throwable)} to remove snapshot from cluster state</li>
7878
* </ul>
7979
*/
8080
public class SnapshotsService extends AbstractComponent implements ClusterStateListener {
@@ -241,8 +241,8 @@ private void validate(SnapshotRequest request, ClusterState state) throws Elasti
241241
* <p/>
242242
* Creates snapshot in repository and updates snapshot metadata record with list of shards that needs to be processed.
243243
*
244-
* @param clusterState cluster state
245-
* @param snapshot snapshot meta data
244+
* @param clusterState cluster state
245+
* @param snapshot snapshot meta data
246246
* @param userCreateSnapshotListener listener
247247
*/
248248
private void beginSnapshot(ClusterState clusterState, final SnapshotMetaData.Entry snapshot, final CreateSnapshotListener userCreateSnapshotListener) {
@@ -681,24 +681,11 @@ public void run() {
681681
}
682682
}
683683
Snapshot snapshot = repository.finalizeSnapshot(snapshotId, null, entry.shards().size(), ImmutableList.copyOf(shardFailures));
684-
for (SnapshotCompletionListener listener : snapshotCompletionListeners) {
685-
try {
686-
listener.onSnapshotCompletion(snapshotId, new SnapshotInfo(snapshot));
687-
} catch (Throwable t) {
688-
logger.warn("failed to refresh settings for [{}]", t, listener);
689-
}
690-
}
684+
removeSnapshotFromClusterState(snapshotId, new SnapshotInfo(snapshot), null);
691685
} catch (Throwable t) {
692686
logger.warn("[{}] failed to finalize snapshot", t, snapshotId);
693-
for (SnapshotCompletionListener listener : snapshotCompletionListeners) {
694-
try {
695-
listener.onSnapshotFailure(snapshotId, t);
696-
} catch (Throwable t2) {
697-
logger.warn("failed to update snapshot status for [{}]", t2, listener);
698-
}
699-
}
687+
removeSnapshotFromClusterState(snapshotId, null, t);
700688
}
701-
removeSnapshotFromClusterState(snapshotId);
702689
}
703690
});
704691
}
@@ -707,9 +694,11 @@ public void run() {
707694
* Removes record of running snapshot from cluster state
708695
*
709696
* @param snapshotId snapshot id
697+
* @param snapshot snapshot info if snapshot was successful
698+
* @param t exception if snapshot failed
710699
*/
711-
private void removeSnapshotFromClusterState(final SnapshotId snapshotId) {
712-
clusterService.submitStateUpdateTask("remove snapshot metadata", new ClusterStateUpdateTask() {
700+
private void removeSnapshotFromClusterState(final SnapshotId snapshotId, final SnapshotInfo snapshot, final Throwable t) {
701+
clusterService.submitStateUpdateTask("remove snapshot metadata", new ProcessedClusterStateUpdateTask() {
713702
@Override
714703
public ClusterState execute(ClusterState currentState) {
715704
MetaData metaData = currentState.metaData();
@@ -738,6 +727,22 @@ public ClusterState execute(ClusterState currentState) {
738727
public void onFailure(String source, Throwable t) {
739728
logger.warn("[{}][{}] failed to remove snapshot metadata", t, snapshotId);
740729
}
730+
731+
@Override
732+
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
733+
for (SnapshotCompletionListener listener : snapshotCompletionListeners) {
734+
try {
735+
if (snapshot != null) {
736+
listener.onSnapshotCompletion(snapshotId, snapshot);
737+
} else {
738+
listener.onSnapshotFailure(snapshotId, t);
739+
}
740+
} catch (Throwable t) {
741+
logger.warn("failed to refresh settings for [{}]", t, listener);
742+
}
743+
}
744+
745+
}
741746
});
742747
}
743748

@@ -787,10 +792,14 @@ public ClusterState execute(ClusterState currentState) throws Exception {
787792
}
788793
}
789794
shards = shardsBuilder.build();
790-
} else {
791-
// snapshot hasn't started yet or already finished - just end it
795+
} else if (snapshot.state() == State.INIT) {
796+
// snapshot hasn't started yet - end it
792797
shards = snapshot.shards();
793798
endSnapshot(snapshot);
799+
} else {
800+
// snapshot is being finalized - wait for it
801+
logger.trace("trying to delete completed snapshot - save to delete");
802+
return currentState;
794803
}
795804
SnapshotMetaData.Entry newSnapshot = new SnapshotMetaData.Entry(snapshotId, snapshot.includeGlobalState(), State.ABORTED, snapshot.indices(), shards);
796805
snapshots = new SnapshotMetaData(newSnapshot);
@@ -807,20 +816,24 @@ public void onFailure(String source, Throwable t) {
807816
@Override
808817
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
809818
if (waitForSnapshot) {
819+
logger.trace("adding snapshot completion listener to wait for deleted snapshot to finish");
810820
addListener(new SnapshotCompletionListener() {
811821
@Override
812822
public void onSnapshotCompletion(SnapshotId snapshotId, SnapshotInfo snapshot) {
823+
logger.trace("deleted snapshot completed - deleting files");
813824
removeListener(this);
814825
deleteSnapshotFromRepository(snapshotId, listener);
815826
}
816827

817828
@Override
818829
public void onSnapshotFailure(SnapshotId snapshotId, Throwable t) {
830+
logger.trace("deleted snapshot failed - deleting files", t);
819831
removeListener(this);
820832
deleteSnapshotFromRepository(snapshotId, listener);
821833
}
822834
});
823835
} else {
836+
logger.trace("deleted snapshot is not running - deleting files");
824837
deleteSnapshotFromRepository(snapshotId, listener);
825838
}
826839
}

0 commit comments

Comments
 (0)