delta-io · allisonport-db · Dec 9, 2025 · Dec 2, 2025 · Dec 2, 2025 · Dec 2, 2025
diff --git a/.github/workflows/iceberg_test.yaml b/.github/workflows/iceberg_test.yaml
@@ -25,7 +25,7 @@ jobs:
         uses: actions/setup-java@v3
         with:
           distribution: "zulu"
-          java-version: "11"
+          java-version: "17"
       - name: Cache Scala, SBT
         uses: actions/cache@v3
         with:

diff --git a/.github/workflows/kernel_test.yaml b/.github/workflows/kernel_test.yaml
@@ -41,7 +41,7 @@ jobs:
         uses: actions/setup-java@v4
         with:
           distribution: "zulu"
-          java-version: "11"
+          java-version: "17"
       - name: Cache SBT and dependencies
         id: cache-sbt
         uses: actions/cache@v4
@@ -59,7 +59,8 @@ jobs:
           else
             echo "❌ Cache MISS - will download dependencies"
           fi
-      - name: Run tests
+      # Run unit tests with JDK 17. These unit tests depend on Spark, and Spark 4.0+ is JDK 17.
+      - name: Run unit tests
         run: |
           python run-tests.py --group kernel --coverage --shard ${{ matrix.shard }}
 
@@ -73,6 +74,7 @@ jobs:
         with:
           distribution: "zulu"
           java-version: "11"
+      # Run integration tests with JDK 11, as they have no Spark dependency
       - name: Run integration tests
         run: |
           cd kernel/examples && python run-kernel-examples.py --use-local
diff --git a/.github/workflows/kernel_unitycatalog_test.yaml b/.github/workflows/kernel_unitycatalog_test.yaml
@@ -22,7 +22,7 @@ jobs:
         uses: actions/setup-java@v3
         with:
           distribution: "zulu"
-          java-version: "11"
+          java-version: "17"
         if: steps.git-diff.outputs.diff
       - name: Run Unity tests with coverage
         run: |

diff --git a/.github/workflows/spark_examples_test.yaml b/.github/workflows/spark_examples_test.yaml
@@ -24,7 +24,7 @@ jobs:
         uses: actions/setup-java@v3
         with:
           distribution: "zulu"
-          java-version: "11"
+          java-version: "17"
       - name: Cache Scala, SBT
         uses: actions/cache@v3
         with:

diff --git a/.github/workflows/spark_master_test.yaml b/.github/workflows/spark_master_test.yaml
diff --git a/.github/workflows/spark_python_test.yaml b/.github/workflows/spark_python_test.yaml
@@ -25,7 +25,7 @@ jobs:
         uses: actions/setup-java@v3
         with:
           distribution: "zulu"
-          java-version: "11"
+          java-version: "17"
       - name: Cache Scala, SBT
         uses: actions/cache@v3
         with:
@@ -53,33 +53,36 @@ jobs:
           export PATH="~/.pyenv/bin:$PATH"
           eval "$(pyenv init -)"
           eval "$(pyenv virtualenv-init -)"
-          pyenv install 3.8.18
-          pyenv global system 3.8.18
-          pipenv --python 3.8 install
+          pyenv install 3.9
+          pyenv global system 3.9
+          pipenv --python 3.9 install
           # Update the pip version to 24.0. By default `pyenv.run` installs the latest pip version
           # available. From version 24.1, `pip` doesn't allow installing python packages
           # with version string containing `-`. In Delta-Spark case, the pypi package generated has
           # `-SNAPSHOT` in version (e.g. `3.3.0-SNAPSHOT`) as the version is picked up from
           # the`version.sbt` file.
           pipenv run pip install pip==24.0 setuptools==69.5.1 wheel==0.43.0
-          # Install PySpark without bundled Scala 2.12 JARs - read more in the future note below
-          pipenv run pip install pyspark==3.5.3 --no-deps
-          pipenv run pip install py4j==0.10.9.7
-          pipenv run pip install flake8==3.5.0 pypandoc==1.3.3
-          pipenv run pip install black==23.9.1
+          pipenv run pip install pyspark==4.0.1
+          pipenv run pip install flake8==3.9.0
+          pipenv run pip install black==23.12.1
           pipenv run pip install importlib_metadata==3.10.0
-          # The mypy versions 0.982 and 1.8.0 have conflicting rules (cannot get style checks to
-          # pass for both versions on the same file) so we upgrade this to match Spark 4.0
           pipenv run pip install mypy==1.8.0
           pipenv run pip install mypy-protobuf==3.3.0
           pipenv run pip install cryptography==37.0.4
           pipenv run pip install twine==4.0.1
           pipenv run pip install wheel==0.33.4
           pipenv run pip install setuptools==41.1.0
           pipenv run pip install pydocstyle==3.0.0
-          pipenv run pip install pandas==1.1.3
-          pipenv run pip install pyarrow==8.0.0
-          pipenv run pip install numpy==1.20.3
+          pipenv run pip install pandas==2.2.0
+          pipenv run pip install pyarrow==11.0.0
+          pipenv run pip install pypandoc==1.3.3
+          pipenv run pip install numpy==1.22.4
+          pipenv run pip install grpcio==1.67.0
+          pipenv run pip install grpcio-status==1.67.0
+          pipenv run pip install googleapis-common-protos==1.65.0
+          pipenv run pip install protobuf==5.29.1
+          pipenv run pip install googleapis-common-protos-stubs==2.2.0
+          pipenv run pip install grpc-stubs==1.24.11
         if: steps.git-diff.outputs.diff
       - name: Run Python tests
         # when changing TEST_PARALLELISM_COUNT make sure to also change it in spark_master_test.yaml

diff --git a/.github/workflows/spark_test.yaml b/.github/workflows/spark_test.yaml
@@ -29,7 +29,7 @@ jobs:
         uses: actions/setup-java@v3
         with:
           distribution: "zulu"
-          java-version: "11"
+          java-version: "17"
       - name: Cache Scala, SBT
         uses: actions/cache@v3
         with:
@@ -57,29 +57,36 @@ jobs:
           export PATH="~/.pyenv/bin:$PATH"
           eval "$(pyenv init -)"
           eval "$(pyenv virtualenv-init -)"
-          pyenv install 3.8.18
-          pyenv global system 3.8.18
-          pipenv --python 3.8 install
+          pyenv install 3.9
+          pyenv global system 3.9
+          pipenv --python 3.9 install
           # Update the pip version to 24.0. By default `pyenv.run` installs the latest pip version
           # available. From version 24.1, `pip` doesn't allow installing python packages
           # with version string containing `-`. In Delta-Spark case, the pypi package generated has
           # `-SNAPSHOT` in version (e.g. `3.3.0-SNAPSHOT`) as the version is picked up from
           # the`version.sbt` file.
           pipenv run pip install pip==24.0 setuptools==69.5.1 wheel==0.43.0
-          pipenv run pip install pyspark==3.5.3
-          pipenv run pip install flake8==3.5.0 pypandoc==1.3.3
-          pipenv run pip install black==23.9.1
+          pipenv run pip install pyspark==4.0.1
+          pipenv run pip install flake8==3.9.0
+          pipenv run pip install black==23.12.1
           pipenv run pip install importlib_metadata==3.10.0
-          pipenv run pip install mypy==0.982
+          pipenv run pip install mypy==1.8.0
           pipenv run pip install mypy-protobuf==3.3.0
           pipenv run pip install cryptography==37.0.4
           pipenv run pip install twine==4.0.1
           pipenv run pip install wheel==0.33.4
           pipenv run pip install setuptools==41.1.0
           pipenv run pip install pydocstyle==3.0.0
-          pipenv run pip install pandas==1.1.3
-          pipenv run pip install pyarrow==8.0.0
-          pipenv run pip install numpy==1.20.3
+          pipenv run pip install pandas==2.2.0
+          pipenv run pip install pyarrow==11.0.0
+          pipenv run pip install pypandoc==1.3.3
+          pipenv run pip install numpy==1.22.4
+          pipenv run pip install grpcio==1.67.0
+          pipenv run pip install grpcio-status==1.67.0
+          pipenv run pip install googleapis-common-protos==1.65.0
+          pipenv run pip install protobuf==5.29.1
+          pipenv run pip install googleapis-common-protos-stubs==2.2.0
+          pipenv run pip install grpc-stubs==1.24.11
         if: steps.git-diff.outputs.diff
       - name: Scala structured logging style check
         run: |

diff --git a/.github/workflows/unidoc.yaml b/.github/workflows/unidoc.yaml
@@ -13,7 +13,7 @@
           uses: actions/setup-java@v3
           with:
             distribution: "zulu"
-            java-version: "11"
+            java-version: "17"
         - uses: actions/checkout@v3
         - name: generate unidoc
           run: build/sbt "++ ${{ matrix.scala }}" unidoc
diff --git a/build.sbt b/build.sbt
@@ -66,7 +66,7 @@ val sparkVersion = settingKey[String]("Spark version")
 
 // Dependent library versions
 val defaultSparkVersion = SparkVersionSpec.DEFAULT.fullVersion // Spark version to use for testing in non-delta-spark related modules
-val hadoopVersion = "3.3.4"
+val hadoopVersion = "3.4.0"
 val scalaTestVersion = "3.2.15"
 val scalaTestVersionForConnectors = "3.0.8"
 val parquet4sVersion = "1.9.4"
@@ -257,7 +257,7 @@ lazy val connectClient = (project in file("spark-connect/client"))
         // Create a symlink for the log4j properties
         val confDir = distributionDir / "conf"
         IO.createDirectory(confDir)
-        val log4jProps = (spark / Test / resourceDirectory).value / "log4j2_spark_master.properties"
+        val log4jProps = (spark / Test / resourceDirectory).value / "log4j2.properties"
         val linkedLog4jProps = confDir / "log4j2.properties"
         Files.createSymbolicLink(linkedLog4jProps.toPath, log4jProps.toPath)
       }
@@ -715,22 +715,6 @@ lazy val sharing = (project in file("sharing"))
     releaseSettings,
     CrossSparkVersions.sparkDependentSettings(sparkVersion),
     Test / javaOptions ++= Seq("-ea"),
-    Compile / compile := runTaskOnlyOnSparkMaster(
-      task = Compile / compile,
-      taskName = "compile",
-      projectName = "delta-sharing-spark",
-      emptyValue = Analysis.empty.asInstanceOf[CompileAnalysis]
-    ).value,
-    Test / test := runTaskOnlyOnSparkMaster(
-      task = Test / test,
-      taskName = "test",
-      projectName = "delta-sharing-spark",
-      emptyValue = ()).value,
-    publish := runTaskOnlyOnSparkMaster(
-      task = publish,
-      taskName = "publish",
-      projectName = "delta-sharing-spark",
-      emptyValue = ()).value,
     libraryDependencies ++= Seq(
       "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided",
 
@@ -898,7 +882,7 @@ lazy val kernelDefaults = (project in file("kernel/kernel-defaults"))
       // such as warm runs, cold runs, defining benchmark parameter variables etc.
       "org.openjdk.jmh" % "jmh-core" % "1.37" % "test",
       "org.openjdk.jmh" % "jmh-generator-annprocess" % "1.37" % "test",
-      "io.delta" %% "delta-spark" % "3.3.2" % "test",
+      "io.delta" %% "delta-spark" % "4.0.0" % "test",
 
       "org.apache.spark" %% "spark-hive" % defaultSparkVersion % "test" classifier "tests",
       "org.apache.spark" %% "spark-sql" % defaultSparkVersion % "test" classifier "tests",
@@ -1010,6 +994,8 @@ lazy val storageS3DynamoDB = (project in file("storage-s3-dynamodb"))
     )
   ).configureUnidoc()
 
+/*
+TODO: readd delta-iceberg on Spark 4.0+
 val icebergSparkRuntimeArtifactName = {
  val (expMaj, expMin, _) = getMajorMinorPatch(defaultSparkVersion)
  s"iceberg-spark-runtime-$expMaj.$expMin"
@@ -1165,6 +1151,7 @@ lazy val icebergShaded = (project in file("icebergShaded"))
     assembly / assemblyMergeStrategy := updateMergeStrategy((assembly / assemblyMergeStrategy).value),
     assemblyPackageScala / assembleArtifact := false,
   )
+*/
 
 lazy val hudi = (project in file("hudi"))
   .dependsOn(spark % "compile->compile;test->test;provided->provided")
@@ -1539,6 +1526,7 @@ lazy val sparkGroup = project
     publish / skip := false,
   )
 
+/*
 lazy val icebergGroup = project
   .aggregate(iceberg, testDeltaIcebergJar)
   .settings(
@@ -1547,6 +1535,7 @@ lazy val icebergGroup = project
     publishArtifact := false,
     publish / skip := false,
   )
+*/
 
 lazy val kernelGroup = project
   .aggregate(kernelApi, kernelDefaults, kernelBenchmarks)

diff --git a/examples/scala/build.sbt b/examples/scala/build.sbt
@@ -42,8 +42,8 @@ def getMajorMinor(version: String): (Int, Int) = {
   }
 }
 val lookupSparkVersion: PartialFunction[(Int, Int), String] = {
-  // version 4.0.0-preview1
-  case (major, minor) if major >= 4 => "4.0.0-preview1"
+  // version 4.0.0
+  case (major, minor) if major >= 4 => "4.0.0"
   // versions 3.3.x+
   case (major, minor) if major >= 3 && minor >=3 => "3.5.3"
   // versions 3.0.0 to 3.2.x

diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/checkpoints/Checkpointer.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/checkpoints/Checkpointer.java
@@ -91,6 +91,11 @@ public static void checkpoint(Engine engine, Clock clock, SnapshotImpl snapshot)
       numberOfAddFiles = checkpointDataIter.getNumberOfAddActions();
     } catch (FileAlreadyExistsException faee) {
       throw new CheckpointAlreadyExistsException(version);
+    } catch (IOException io) {
+      if (io.getCause() instanceof FileAlreadyExistsException) {
+        throw new CheckpointAlreadyExistsException(version);
+      }
+      throw io;
     }
 
     final CheckpointMetaData checkpointMetaData =

diff --git a/...-defaults/src/test/scala/io/delta/kernel/defaults/engine/DefaultParquetHandlerSuite.scala b/...-defaults/src/test/scala/io/delta/kernel/defaults/engine/DefaultParquetHandlerSuite.scala
@@ -15,6 +15,7 @@
  */
 package io.delta.kernel.defaults.engine
 
+import java.io.IOException
 import java.nio.file.FileAlreadyExistsException
 
 import scala.collection.JavaConverters._
@@ -63,11 +64,12 @@ class DefaultParquetHandlerSuite extends AnyFunSuite with ParquetSuiteBase {
       writeAndVerify()
 
       // Try to write as same file and expect an error
-      intercept[FileAlreadyExistsException] {
+      val e = intercept[IOException] {
         parquetHandler.writeParquetFileAtomically(
           filePath,
           toCloseableIterator(dataToWrite.asJava.iterator()))
       }
+      assert(e.getCause.isInstanceOf[FileAlreadyExistsException])
     }
   }
 }