Skip to content

Commit

Permalink
Add pre-installed conda configuration and use to find rlib directory (#…
Browse files Browse the repository at this point in the history
…700)

* Add pre-installed conda environment variable

* Update FORK.md

Co-authored-by: Jeremy Liu <[email protected]>
  • Loading branch information
jeremyjliu and Jeremy Liu authored Jul 13, 2020
1 parent 5154b7b commit 597d5a2
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 2 deletions.
1 change: 1 addition & 0 deletions FORK.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
* Gradle plugin to easily create custom docker images for use with k8s
* Filter rLibDir by exists so that daemon.R references the correct file [460](https://github.com/palantir/spark/pull/460)
* Implementation of the shuffle I/O plugins from SPARK-25299 that asynchronously backs up shuffle files to remote storage
* Add pre-installed conda configuration and use to find rlib directory [700](https://github.com/palantir/spark/pull/700)

# Reverted
* [SPARK-25908](https://issues.apache.org/jira/browse/SPARK-25908) - Removal of `monotonicall_increasing_id`, `toDegree`, `toRadians`, `approxCountDistinct`, `unionAll`
Expand Down
14 changes: 12 additions & 2 deletions core/src/main/scala/org/apache/spark/api/r/RRunner.scala
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import org.apache.spark.broadcast.Broadcast
import org.apache.spark.deploy.Common.Provenance
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config.BUFFER_SIZE
import org.apache.spark.internal.config.CONDA_PRE_INSTALLED_PATH
import org.apache.spark.internal.config.R._
import org.apache.spark.util.Utils

Expand Down Expand Up @@ -363,6 +364,7 @@ private[r] object RRunner {
val sparkConf = SparkEnv.get.conf
val requestedRCommand = Provenance.fromConfOpt(sparkConf, R_COMMAND)
.getOrElse(Provenance.fromConf(sparkConf, SPARKR_COMMAND))
val preInstalledCondaPath = Provenance.fromConfOpt(sparkConf, CONDA_PRE_INSTALLED_PATH)
val condaEnv = condaSetupInstructions.map(CondaEnvironmentManager.getOrCreateCondaEnvironment)
val rCommand = condaEnv.map { conda =>
if (requestedRCommand.value != SPARKR_COMMAND.defaultValue.get) {
Expand All @@ -375,9 +377,17 @@ private[r] object RRunner {

val rConnectionTimeout = sparkConf.get(R_BACKEND_CONNECTION_TIMEOUT)
val rOptions = "--vanilla"
val rLibPath = "/lib/R/library"
val rLibDir = condaEnv.map(conda =>
RUtils.sparkRPackagePath(isDriver = false) :+ (conda.condaEnvDir + "/lib/R/library"))
.getOrElse(RUtils.sparkRPackagePath(isDriver = false))
RUtils.sparkRPackagePath(isDriver = false) :+ (conda.condaEnvDir + rLibPath))
.getOrElse({
val sparkRPackagePaths = RUtils.sparkRPackagePath(isDriver = false)
if (preInstalledCondaPath.isDefined) {
sparkRPackagePaths :+ (preInstalledCondaPath.get + rLibPath)
} else {
sparkRPackagePaths
}
})
.filter(dir => new File(dir).exists)
if (rLibDir.isEmpty) {
throw new SparkException("SparkR package is not installed on executor.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,11 @@ package object config {
.stringConf
.createOptional

private[spark] val CONDA_PRE_INSTALLED_PATH = ConfigBuilder("spark.conda.preInstalledPath")
.doc("The path to pre-installed conda directory.")
.stringConf
.createOptional

private[spark] val CONDA_VERBOSITY = ConfigBuilder("spark.conda.verbosity")
.doc("How many times to apply -v to conda. A number between 0 and 3, with 0 being default.")
.intConf
Expand Down

0 comments on commit 597d5a2

Please sign in to comment.