Skip to content

Commit c1b74df

Browse files
author
Andrew Or
committed
[SPARK-5771] Master UI inconsistently displays application cores
If the user calls `sc.stop()`, then the number of cores under "Completed Applications" will be 0. If the user does not call `sc.stop()`, then the number of cores will be however many cores were being used before the application exited. This PR makes both cases have the behavior of the latter. Note that there have been a series of PR that attempted to fix this. For the full discussion, please refer to apache#4841. The unregister event is necessary because of a subtle race condition explained in that PR. Tested this locally with and without calling `sc.stop()`. Author: Andrew Or <[email protected]> Closes apache#5177 from andrewor14/master-ui-cores and squashes the following commits: 62449d1 [Andrew Or] Freeze application state before finishing it
1 parent acef51d commit c1b74df

File tree

4 files changed

+16
-1
lines changed

4 files changed

+16
-1
lines changed

core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ private[deploy] object DeployMessages {
101101
case class RegisterApplication(appDescription: ApplicationDescription)
102102
extends DeployMessage
103103

104+
case class UnregisterApplication(appId: String)
105+
104106
case class MasterChangeAcknowledged(appId: String)
105107

106108
// Master to AppClient

core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ private[spark] class AppClient(
157157

158158
case StopAppClient =>
159159
markDead("Application has been stopped.")
160+
master ! UnregisterApplication(appId)
160161
sender ! true
161162
context.stop(self)
162163
}

core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,10 @@ private[deploy] class ApplicationInfo(
111111
endTime = System.currentTimeMillis()
112112
}
113113

114+
private[master] def isFinished: Boolean = {
115+
state != ApplicationState.WAITING && state != ApplicationState.RUNNING
116+
}
117+
114118
def duration: Long = {
115119
if (endTime != -1) {
116120
endTime - startTime

core/src/main/scala/org/apache/spark/deploy/master/Master.scala

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,11 @@ private[master] class Master(
339339
if (ExecutorState.isFinished(state)) {
340340
// Remove this executor from the worker and app
341341
logInfo(s"Removing executor ${exec.fullId} because it is $state")
342-
appInfo.removeExecutor(exec)
342+
// If an application has already finished, preserve its
343+
// state to display its information properly on the UI
344+
if (!appInfo.isFinished) {
345+
appInfo.removeExecutor(exec)
346+
}
343347
exec.worker.removeExecutor(exec)
344348

345349
val normalExit = exitStatus == Some(0)
@@ -428,6 +432,10 @@ private[master] class Master(
428432
if (canCompleteRecovery) { completeRecovery() }
429433
}
430434

435+
case UnregisterApplication(applicationId) =>
436+
logInfo(s"Received unregister request from application $applicationId")
437+
idToApp.get(applicationId).foreach(finishApplication)
438+
431439
case DisassociatedEvent(_, address, _) => {
432440
// The disconnected client could've been either a worker or an app; remove whichever it was
433441
logInfo(s"$address got disassociated, removing it.")

0 commit comments

Comments
 (0)