Skip to content

Commit df3d559

Browse files
Marcelo Vanzinsrowen
Marcelo Vanzin
authored andcommitted
[SPARK-5801] [core] Avoid creating nested directories.
Cache the value of the local root dirs to use for storing local data, so that the same directories are reused. Also, to avoid an extra level of nesting, use a different env variable to propagate the local dirs from the Worker to the executors. And make the executor directory use a different name. Author: Marcelo Vanzin <[email protected]> Closes apache#4747 from vanzin/SPARK-5801 and squashes the following commits: e0114e1 [Marcelo Vanzin] Update unit test. 18ee0a7 [Marcelo Vanzin] [SPARK-5801] [core] Avoid creating nested directories.
1 parent 192e42a commit df3d559

File tree

4 files changed

+32
-5
lines changed

4 files changed

+32
-5
lines changed

core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ private[spark] class ExecutorRunner(
135135
logInfo("Launch command: " + command.mkString("\"", "\" \"", "\""))
136136

137137
builder.directory(executorDir)
138-
builder.environment.put("SPARK_LOCAL_DIRS", appLocalDirs.mkString(","))
138+
builder.environment.put("SPARK_EXECUTOR_DIRS", appLocalDirs.mkString(File.pathSeparator))
139139
// In case we are running this from within the Spark Shell, avoid creating a "scala"
140140
// parent process for the executor command
141141
builder.environment.put("SPARK_LAUNCH_WITH_SCALA", "0")

core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -345,11 +345,11 @@ private[spark] class Worker(
345345
}
346346

347347
// Create local dirs for the executor. These are passed to the executor via the
348-
// SPARK_LOCAL_DIRS environment variable, and deleted by the Worker when the
348+
// SPARK_EXECUTOR_DIRS environment variable, and deleted by the Worker when the
349349
// application finishes.
350350
val appLocalDirs = appDirectories.get(appId).getOrElse {
351351
Utils.getOrCreateLocalRootDirs(conf).map { dir =>
352-
Utils.createDirectory(dir).getAbsolutePath()
352+
Utils.createDirectory(dir, namePrefix = "executor").getAbsolutePath()
353353
}.toSeq
354354
}
355355
appDirectories(appId) = appLocalDirs

core/src/main/scala/org/apache/spark/util/Utils.scala

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ private[spark] object Utils extends Logging {
6363
val random = new Random()
6464

6565
private val MAX_DIR_CREATION_ATTEMPTS: Int = 10
66+
@volatile private var localRootDirs: Array[String] = null
6667

6768
/** Serialize an object using Java serialization */
6869
def serialize[T](o: T): Array[Byte] = {
@@ -683,14 +684,31 @@ private[spark] object Utils extends Logging {
683684
* and returns only the directories that exist / could be created.
684685
*
685686
* If no directories could be created, this will return an empty list.
687+
*
688+
* This method will cache the local directories for the application when it's first invoked.
689+
* So calling it multiple times with a different configuration will always return the same
690+
* set of directories.
686691
*/
687692
private[spark] def getOrCreateLocalRootDirs(conf: SparkConf): Array[String] = {
693+
if (localRootDirs == null) {
694+
this.synchronized {
695+
if (localRootDirs == null) {
696+
localRootDirs = getOrCreateLocalRootDirsImpl(conf)
697+
}
698+
}
699+
}
700+
localRootDirs
701+
}
702+
703+
private def getOrCreateLocalRootDirsImpl(conf: SparkConf): Array[String] = {
688704
if (isRunningInYarnContainer(conf)) {
689705
// If we are in yarn mode, systems can have different disk layouts so we must set it
690706
// to what Yarn on this system said was available. Note this assumes that Yarn has
691707
// created the directories already, and that they are secured so that only the
692708
// user has access to them.
693709
getYarnLocalDirs(conf).split(",")
710+
} else if (conf.getenv("SPARK_EXECUTOR_DIRS") != null) {
711+
conf.getenv("SPARK_EXECUTOR_DIRS").split(File.pathSeparator)
694712
} else {
695713
// In non-Yarn mode (or for the driver in yarn-client mode), we cannot trust the user
696714
// configuration to point to a secure directory. So create a subdirectory with restricted
@@ -734,6 +752,11 @@ private[spark] object Utils extends Logging {
734752
localDirs
735753
}
736754

755+
/** Used by unit tests. Do not call from other places. */
756+
private[spark] def clearLocalRootDirs(): Unit = {
757+
localRootDirs = null
758+
}
759+
737760
/**
738761
* Shuffle the elements of a collection into a random order, returning the
739762
* result in a new collection. Unlike scala.util.Random.shuffle, this method

core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,19 @@ package org.apache.spark.storage
2020
import java.io.File
2121

2222
import org.apache.spark.util.Utils
23-
import org.scalatest.FunSuite
23+
import org.scalatest.{BeforeAndAfter, FunSuite}
2424

2525
import org.apache.spark.SparkConf
2626

2727

2828
/**
2929
* Tests for the spark.local.dir and SPARK_LOCAL_DIRS configuration options.
3030
*/
31-
class LocalDirsSuite extends FunSuite {
31+
class LocalDirsSuite extends FunSuite with BeforeAndAfter {
32+
33+
before {
34+
Utils.clearLocalRootDirs()
35+
}
3236

3337
test("Utils.getLocalDir() returns a valid directory, even if some local dirs are missing") {
3438
// Regression test for SPARK-2974

0 commit comments

Comments
 (0)