Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
package com.kakao.actionbase.pipeline.runner

import scala.collection.JavaConverters._
import scala.util.matching.Regex

/** Resolves `${{ ... }}` expressions in a parsed YAML tree against a context.
*
* Vocabulary:
* - `env.<key>` — workflow `env:` value (string)
* - `needs.<id>.result` — upstream job result: `success` / `failure` / `skipped` / `cancelled`
* - `needs.<id>.outputs.<key>` — value emitted by an upstream job
* - `presets.<name>` — entry from the workflow `presets:` section (any value, often a map)
* - `load('<path>')` — content of another YAML file at `<path>` relative to the workflow file's dir
*
* An expression that fills the entire string value (e.g. `"${{ presets.X }}"`) is replaced by the raw evaluated value
* (which may be a map / list, not just a string). Otherwise expressions are stringified and substituted within the
* surrounding text.
*/
object Expression {

/** Inputs the evaluator needs. `needs` is empty at YAML-load time and populated at runtime. `loadYaml` is used by the
* `load(...)` form; injected so test mocks don't have to touch the file system.
*/
case class Context(
env: Map[String, String] = Map.empty,
presets: Map[String, Any] = Map.empty,
needs: Map[String, NeedsView] = Map.empty,
loadYaml: String => Any = _ => sys.error("load(...) not configured")
)

case class NeedsView(result: String, outputs: Map[String, String] = Map.empty)

private val Token: Regex = """\$\{\{\s*(.+?)\s*\}\}""".r
private val WholeToken: Regex = """\A\s*\$\{\{\s*(.+?)\s*\}\}\s*\z""".r
private val LoadCall: Regex = """\Aload\(\s*'([^']*)'\s*\)\z""".r

/** Resolve every string leaf in a YAML-shaped tree. Map / List structure preserved.
*
* `lenient = true` preserves the original `${{ ... }}` token when an expression cannot be evaluated against the
* current context (typically: `needs.*` at load-time, or operator forms like `needs.X.result == 'success'` inside
* `when:` that this evaluator does not parse). At runtime the same tree is walked again with full context to finish
* those.
*/
def resolveDeep(value: Any, ctx: Context, lenient: Boolean = false): Any = value match {
case s: String =>
s match {
case WholeToken(expr) => evaluate(expr, ctx, lenient) // raw value, may be map / list
case other =>
Token.replaceAllIn(other, m => Regex.quoteReplacement(stringify(evaluate(m.group(1), ctx, lenient))))
}
case m: java.util.Map[_, _] =>
m.asInstanceOf[java.util.Map[String, Any]]
.asScala
.map { case (k, v) => k -> resolveDeep(v, ctx, lenient) }
.toMap
.asJava
case m: scala.collection.Map[_, _] =>
m.asInstanceOf[scala.collection.Map[String, Any]].map { case (k, v) => k -> resolveDeep(v, ctx, lenient) }.toMap
case l: java.util.List[_] =>
l.asInstanceOf[java.util.List[Any]].asScala.map(resolveDeep(_, ctx, lenient)).asJava
case l: scala.collection.Iterable[_] =>
l.map(resolveDeep(_, ctx, lenient)).toSeq
case other => other
}

/** Evaluate an expression body (without the outer `${{ }}`) and return the raw value.
*
* In lenient mode, an unresolvable expression (unknown key, unsupported syntax) returns the original `${{ ... }}`
* string unchanged so it can be evaluated later by a fuller context.
*/
def evaluate(expr: String, ctx: Context, lenient: Boolean = false): Any = {
try evaluateImpl(expr, ctx)
catch {
case _: NoSuchElementException if lenient => "${{ " + expr.trim + " }}"
case _: IllegalArgumentException if lenient => "${{ " + expr.trim + " }}"
}
}

private def evaluateImpl(expr: String, ctx: Context): Any = {
val e = expr.trim
if (e.startsWith("env.")) {
val k = e.stripPrefix("env.")
ctx.env.getOrElse(k, throw new NoSuchElementException(s"unknown env key: $k"))
} else if (e.startsWith("presets.")) {
val k = e.stripPrefix("presets.")
ctx.presets.getOrElse(k, throw new NoSuchElementException(s"unknown preset: $k"))
} else if (e.startsWith("needs.")) {
evalNeeds(e.stripPrefix("needs."), ctx)
} else if (e.startsWith("load(")) {
e match {
case LoadCall(path) => ctx.loadYaml(path)
case _ => throw new IllegalArgumentException(s"malformed load(): $e — expected `load('path')`")
}
} else {
throw new IllegalArgumentException(s"unknown expression: $e")
}
}

private def evalNeeds(rest: String, ctx: Context): Any = {
val parts = rest.split('.')
if (parts.length < 2) throw new IllegalArgumentException(s"malformed needs.* expression: needs.$rest")
val id = parts(0)
val view = ctx.needs.getOrElse(id, throw new NoSuchElementException(s"unknown needs id: $id"))
parts(1) match {
case "result" => view.result
case "outputs" =>
if (parts.length != 3)
throw new IllegalArgumentException(s"needs.$id.outputs requires a key: needs.$id.outputs.<name>")
view.outputs.getOrElse(
parts(2),
throw new NoSuchElementException(s"unknown output: needs.$id.outputs.${parts(2)}")
)
case other => throw new IllegalArgumentException(s"unknown needs field: needs.$id.$other")
}
}

private def stringify(value: Any): String = value match {
case null => ""
case s: String => s
case other => other.toString
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package com.kakao.actionbase.pipeline.runner

import com.kakao.actionbase.pipeline.runner.Expression.Context

import scala.collection.JavaConverters._

/** Resolves `$extends` directives in a parsed YAML tree.
*
* For every map containing `$extends:`:
* 1. Evaluate the value (must be a single `${{ ... }}` expression that resolves to a map). 2. Recursively resolve
* `$extends` inside both the extended map and the surrounding sibling keys. 3. Deep-merge: the extended map
* provides defaults; the sibling map's keys override.
*
* `$` keys are reserved as processor directives — only `$extends` is recognized today; unknown `$<name>` keys are left
* untouched (forward-compatible). Cycles are rejected via a depth limit.
*/
object ExtendsResolver {

private val ExtendsKey = "$extends"
private val MaxDepth = 32
private val WholeToken = """\A\s*\$\{\{\s*(.+?)\s*\}\}\s*\z""".r

def resolve(value: Any, ctx: Context): Any = walk(value, ctx, depth = 0)

private def walk(value: Any, ctx: Context, depth: Int): Any = value match {
case m: java.util.Map[_, _] =>
walkMap(m.asInstanceOf[java.util.Map[String, Any]].asScala.toMap, ctx, depth)
case m: scala.collection.Map[_, _] =>
walkMap(m.asInstanceOf[scala.collection.Map[String, Any]].toMap, ctx, depth)
case l: java.util.List[_] =>
l.asInstanceOf[java.util.List[Any]].asScala.map(walk(_, ctx, depth)).asJava
case l: scala.collection.Iterable[_] =>
l.map(walk(_, ctx, depth)).toSeq
case other => other
}

private def walkMap(m: Map[String, Any], ctx: Context, depth: Int): java.util.Map[String, Any] = {
if (depth > MaxDepth) throw new IllegalStateException(s"$$extends nesting exceeds $MaxDepth (cycle?)")

m.get(ExtendsKey) match {
case None =>
m.map { case (k, v) => k -> walk(v, ctx, depth) }.toMap.asJava

case Some(expr) =>
val incoming = evalExtendsValue(expr, ctx)
val resolvedIncoming = walk(incoming, ctx, depth + 1) match {
case jm: java.util.Map[_, _] => jm.asInstanceOf[java.util.Map[String, Any]].asScala.toMap
case other =>
throw new IllegalArgumentException(s"$$extends must resolve to a map, got: ${other.getClass.getSimpleName}")
}
val sibling = (m - ExtendsKey).map { case (k, v) => k -> walk(v, ctx, depth) }
deepMerge(resolvedIncoming, sibling).asJava
}
}

private def evalExtendsValue(expr: Any, ctx: Context): Any = expr match {
case s: String =>
s.trim match {
case WholeToken(inner) => Expression.evaluate(inner, ctx)
case other =>
throw new IllegalArgumentException(
s"$$extends value must be a single `\\$${{ ... }}` expression, got: $other"
)
}
case other =>
throw new IllegalArgumentException(s"$$extends value must be a string expression, got: $other")
}

/** Recursive deep-merge: maps merge key-by-key (sibling wins on conflict at non-map leaves); other values are
* replaced wholesale by the sibling.
*/
private def deepMerge(base: Map[String, Any], over: Map[String, Any]): Map[String, Any] = {
val keys = base.keySet ++ over.keySet
keys.iterator.map { k =>
(base.get(k), over.get(k)) match {
case (Some(a), Some(b)) =>
k -> mergeValue(a, b)
case (Some(a), None) => k -> a
case (None, Some(b)) => k -> b
case _ => sys.error("unreachable")
}
}.toMap
}

private def mergeValue(a: Any, b: Any): Any = (a, b) match {
case (am: java.util.Map[_, _], bm: java.util.Map[_, _]) =>
deepMerge(
am.asInstanceOf[java.util.Map[String, Any]].asScala.toMap,
bm.asInstanceOf[java.util.Map[String, Any]].asScala.toMap
).asJava
case _ => b
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package com.kakao.actionbase.pipeline.workflow

import com.fasterxml.jackson.annotation.JsonProperty

/** Parsed workflow YAML.
*
* Job-to-job DAG. Each `jobs` entry is dispatched by its `kind:` to a runner-side handler. `presets:` is a free-form
* reusable map referenced from elsewhere via `${{ presets.<name> }}`. `$extends` (a `${{ ... }}` expression that
* resolves to a map) provides defaults via deep merge — see `ExtendsResolver`.
*/
case class Workflow(
name: String,
env: Map[String, String] = Map.empty,
presets: Map[String, Map[String, Any]] = Map.empty,
jobs: Map[String, JobSpec]
)

/** A single job in a workflow.
*
* - `kind` — discriminator: `spark` | `bash`. Routes to a runner-side handler.
* - `artifact` — Gradle coord `group:name:version` (spark only). The runner resolves the JAR.
* - `mainClass` — class to invoke (spark only); short / sub-package / FQN forms all resolve via
* `ClassResolver.JobRoots`.
* - `args` — bound onto the Job's Cfg case class. Values may include `${{ ... }}` expressions.
* - `submit` — passed through to `spark-submit` CLI (kebab keys → flags). Nested `conf` map → `--conf` repeats.
* - `run` — shell command (bash only).
* - `needs` — ids of jobs that must complete before this one runs.
* - `when` — boolean expression; the job runs only when truthy. Defaults to true when absent.
*/
case class JobSpec(
kind: String,
artifact: Option[String] = None,
mainClass: Option[String] = None,
args: Map[String, Any] = Map.empty,
submit: Map[String, Any] = Map.empty,
run: Option[String] = None,
needs: Seq[String] = Seq.empty,
@JsonProperty("when") `when`: Option[String] = None
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
package com.kakao.actionbase.pipeline.workflow

import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory
import com.fasterxml.jackson.module.scala.{ClassTagExtensions, DefaultScalaModule}
import com.kakao.actionbase.pipeline.runner.{Expression, ExtendsResolver}

import java.nio.charset.StandardCharsets
import java.nio.file.{Files, Path, Paths}
import scala.collection.JavaConverters._

/** Loads a workflow YAML into a `Workflow` case class.
*
* Pipeline:
* 1. Parse YAML to a raw `Map[String, Any]`. 2. Resolve `presets:` first (so its `$extends`/expressions resolve
* before downstream uses). 3. Build an `Expression.Context` with the resolved presets and any `env:` block. 4.
* Resolve `$extends` everywhere else in the tree. 5. Resolve remaining `${{ ... }}` expressions in string leaves
* (env / presets / load — `needs.*` is deferred to runtime since job results aren't yet known at load time). 6.
* Bind the resulting tree onto `Workflow` via Jackson.
*/
object WorkflowLoader {

private[pipeline] lazy val mapper: ObjectMapper with ClassTagExtensions = {
val m = new ObjectMapper(new YAMLFactory()) with ClassTagExtensions
m.registerModule(DefaultScalaModule)
m.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
m.configure(DeserializationFeature.FAIL_ON_NULL_FOR_PRIMITIVES, true)
m
}

def load(path: Path): Workflow = {
val raw = parseRaw(readUtf8(path))
val baseDir = path.toAbsolutePath.getParent
bind(raw, baseDir)
}

private def readUtf8(path: Path): String =
new String(Files.readAllBytes(path), StandardCharsets.UTF_8)

/** Used by tests / programmatic callers when YAML is already a string. `baseDir` is needed so `load(...)` expressions
* can resolve relative paths.
*/
def loadString(yaml: String, baseDir: Path): Workflow = bind(parseRaw(yaml), baseDir)

private def parseRaw(yaml: String): Map[String, Any] = {
val node = mapper.readValue(yaml, classOf[java.util.Map[String, Any]])
if (node == null) Map.empty else node.asScala.toMap
}

private def bind(raw: Map[String, Any], baseDir: Path): Workflow = {
// 1. env (raw, no expression evaluation — env values are static strings)
val env: Map[String, String] = raw.get("env") match {
case Some(m: java.util.Map[_, _]) =>
m.asInstanceOf[java.util.Map[String, Any]].asScala.iterator.map { case (k, v) => k -> stringify(v) }.toMap
case Some(m: scala.collection.Map[_, _]) =>
m.asInstanceOf[scala.collection.Map[String, Any]].iterator.map { case (k, v) => k -> stringify(v) }.toMap
case _ => Map.empty
}

val loader: String => Any = pathStr => {
val p = Paths.get(pathStr)
val resolved = if (p.isAbsolute) p else baseDir.resolve(p).normalize()
parseRaw(readUtf8(resolved))
}

// 2. resolve presets first with a context that has only env + load (presets refers to itself recursively
// only via `$extends: ${{ presets.X }}`, which would loop — current model: presets resolve in declaration order)
val rawPresets = raw.get("presets") match {
case Some(m: java.util.Map[_, _]) => m.asInstanceOf[java.util.Map[String, Any]].asScala.toMap
case Some(m: scala.collection.Map[_, _]) => m.asInstanceOf[scala.collection.Map[String, Any]].toMap
case _ => Map.empty
}
val presetCtx = Expression.Context(env = env, loadYaml = loader)
val resolvedPresetsRaw = ExtendsResolver.resolve(rawPresets.asJava, presetCtx) match {
case jm: java.util.Map[_, _] => jm.asInstanceOf[java.util.Map[String, Any]].asScala.toMap
case _ => Map.empty[String, Any]
}
val resolvedPresets: Map[String, Any] = Expression.resolveDeep(resolvedPresetsRaw.asJava, presetCtx) match {
case jm: java.util.Map[_, _] => jm.asInstanceOf[java.util.Map[String, Any]].asScala.toMap
case _ => Map.empty
}

// 3. full context for the rest of the tree
val fullCtx = Expression.Context(env = env, presets = resolvedPresets, loadYaml = loader)

// 4. resolve $extends everywhere in the rest of the tree
val withoutPresets = raw - "presets"
val extendsResolved = ExtendsResolver.resolve(withoutPresets.asJava, fullCtx)

// 5. resolve remaining expressions in string leaves; needs.* and `when:` operator forms are deferred to runtime
val resolved = Expression.resolveDeep(extendsResolved, fullCtx, lenient = true) match {
case jm: java.util.Map[_, _] => jm.asInstanceOf[java.util.Map[String, Any]].asScala.toMap
case other => sys.error(s"workflow root must be a map, got: $other")
}

// 6. attach the resolved presets back so the resulting Workflow case class carries them
val withPresets = resolved + ("presets" -> resolvedPresets)

mapper.convertValue(withPresets.asJava, classOf[Workflow])
}

private def stringify(value: Any): String = value match {
case null => ""
case s: String => s
case other => other.toString
}
}
11 changes: 11 additions & 0 deletions pipeline/src/test/resources/workflows/extends-via-load.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
name: extends-via-load

jobs:
pi:
kind: spark
artifact: "com.kakao.actionbase:pipeline:0.x"
mainClass: SparkPiJob
submit:
$extends: ${{ load('preset/spark-small.yaml') }}
conf:
spark.sql.shuffle.partitions: 8
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
driver-memory: 1g
executor-memory: 2g
Loading
Loading