From 323e93784e8f315ad4f43a58be3066d2660a1859 Mon Sep 17 00:00:00 2001 From: Jon Malkin <786705+jmalkin@users.noreply.github.com> Date: Wed, 22 Jan 2025 14:32:57 -0800 Subject: [PATCH 1/4] attempt #1 at ci workflow --- .github/workflows/ci.yaml | 74 +++++++++++++++++++++++++++++++++++++++ build.sbt | 34 ++++++++++-------- 2 files changed, 94 insertions(+), 14 deletions(-) create mode 100644 .github/workflows/ci.yaml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..b604f7e --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,74 @@ +name: Continuous Integration + +on: + push: + paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] + branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] + pull_request: + paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] + # The branches below must be a subset of the branches above + branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] + workflow_dispatch: + +#env: +# MAVEN_OPTS: -Xmx4g -Xms1g + +jobs: + build: + name: Build and Test + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + jdk: [ 8, 11, 17 ] + spark: [ 3.4.4, 3.5.4 ] + + env: + JDK_VERSION: ${{ matrix.jdk }} + SPARK_VERSION: ${{ matrix.spark }} + + steps: + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Print Current workflow + run: > + cat .github/workflows/auto-jdk-matrix.yml + + - name: Cache local Maven repository + uses: actions/cache@v4 + with: + path: ~/.m2/repository + key: build-${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: build-${{ runner.os }}-maven- + + - name: Setup JDK + uses: actions/setup-java@v4 + with: + java-version: ${{ matrix.jdk }} + distribution: 'temurin' + java-package: jdk + architecture: x64 + cache: sbt + + - name: Setup SBT + uses: sbt/setup-sbt@v1 + + - name: Echo Java Version + run: > + java -version + + - name: Build and test + run: > + sbt --batch clean test + +# Architecture options: x86, x64, armv7, aarch64, ppc64le +# setup-java@v4 has a "with cache" option +# Lifecycle: validate, compile, test, package, verify, install, deploy +# -B batch mode, never stops for user input +# -V show Version without stopping +# -X debug mode +# -q quiet, only show errors \ No newline at end of file diff --git a/build.sbt b/build.sbt index 483edbf..2adb823 100644 --- a/build.sbt +++ b/build.sbt @@ -24,28 +24,34 @@ description := "The Apache DataSketches package for Spark" licenses += ("Apache-2.0", url("http://www.apache.org/licenses/LICENSE-2.0")) +val sparkVersion = settingKey[String]("The version of Spark") +sparkVersion := sys.env.getOrElse("SPARK_VERSION", "3.4.4") + // determine our java version val jvmVersionString = settingKey[String]("The JVM version") jvmVersionString := sys.props("java.version") +// Map of JVM version prefix to: +// (JVM major version, datasketches-java version) +val jvmVersionMap = Map( + "21" -> ("21", "8.0.0"), + "17" -> ("17", "7.0.1"), + "11" -> ("11", "6.2.0"), + "8" -> ("8", "6.2.0"), + "1.8" -> ("8", "6.2.0") +) + +// determine the JVM major verison (default: 11) val jvmVersion = settingKey[String]("The JVM major version") -jvmVersion := { - val version = jvmVersionString.value - if (version.startsWith("21")) "21" - else if (version.startsWith("17")) "17" - else if (version.startsWith("11")) "11" - else "8" -} +jvmVersion := jvmVersionMap.collectFirst { + case (prefix, (major, _)) if jvmVersionString.value.startsWith(prefix) => major +}.getOrElse("11") +// look up the associated datasketches-java version val dsJavaVersion = settingKey[String]("The DataSketches Java version") -dsJavaVersion := { - if (jvmVersion.value == "11") "6.2.0" - else if (jvmVersion.value == "17") "7.0.1" - else if (jvmVersion.value == "21") "8.0.0" - else "6.2.0" -} +dsJavaVersion := jvmVersionMap.get(jvmVersion.value).map(_._2).get + -// these do not impact code generation in spark javacOptions ++= Seq("-source", jvmVersion.value, "-target", jvmVersion.value) scalacOptions ++= Seq("-encoding", "UTF-8", "-release", jvmVersion.value) Test / javacOptions ++= Seq("-source", jvmVersion.value, "-target", jvmVersion.value) From 1a86eb7970b5f3f5e486e87452348fd2a6cada71 Mon Sep 17 00:00:00 2001 From: Jon Malkin <786705+jmalkin@users.noreply.github.com> Date: Wed, 22 Jan 2025 14:39:07 -0800 Subject: [PATCH 2/4] remove unnecessary cat of workflow --- .github/workflows/ci.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b604f7e..6df2cd6 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -34,10 +34,6 @@ jobs: with: persist-credentials: false - - name: Print Current workflow - run: > - cat .github/workflows/auto-jdk-matrix.yml - - name: Cache local Maven repository uses: actions/cache@v4 with: From c22065fd918e7d7437e1bf9cddd4cb70dae63ed4 Mon Sep 17 00:00:00 2001 From: Jon Malkin <786705+jmalkin@users.noreply.github.com> Date: Wed, 22 Jan 2025 14:46:58 -0800 Subject: [PATCH 3/4] use env variable for spark version, default to 3.5.4 --- build.sbt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.sbt b/build.sbt index 2adb823..a399f4f 100644 --- a/build.sbt +++ b/build.sbt @@ -25,7 +25,7 @@ description := "The Apache DataSketches package for Spark" licenses += ("Apache-2.0", url("http://www.apache.org/licenses/LICENSE-2.0")) val sparkVersion = settingKey[String]("The version of Spark") -sparkVersion := sys.env.getOrElse("SPARK_VERSION", "3.4.4") +sparkVersion := sys.env.getOrElse("SPARK_VERSION", "3.5.4") // determine our java version val jvmVersionString = settingKey[String]("The JVM version") @@ -60,7 +60,7 @@ Test / scalacOptions ++= Seq("-encoding", "UTF-8", "-release", jvmVersion.value) libraryDependencies ++= Seq( "org.apache.datasketches" % "datasketches-java" % dsJavaVersion.value % "compile", "org.scala-lang" % "scala-library" % "2.12.6", - "org.apache.spark" %% "spark-sql" % "3.5.4" % "provided", + "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided", "org.scalatest" %% "scalatest" % "3.2.19" % "test", "org.scalatestplus" %% "junit-4-13" % "3.2.19.0" % "test" ) From 9663a21fd303798fb8963b19a493008eef6a1b97 Mon Sep 17 00:00:00 2001 From: Jon Malkin <786705+jmalkin@users.noreply.github.com> Date: Wed, 22 Jan 2025 14:53:02 -0800 Subject: [PATCH 4/4] add jdk and spark versions to cache --- .github/workflows/ci.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6df2cd6..42d8359 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -38,8 +38,8 @@ jobs: uses: actions/cache@v4 with: path: ~/.m2/repository - key: build-${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: build-${{ runner.os }}-maven- + key: build-${{ runner.os }}-jdk-${{ matrix.jdk }}-spark-${{ matrix.spark }}-${{ hashFiles('**/pom.xml') }} + restore-keys: build-${{ runner.os }}-jdk-${{matrix.jdk}}-spark-${{ matrix.spark }}-maven- - name: Setup JDK uses: actions/setup-java@v4