-
Notifications
You must be signed in to change notification settings - Fork 2k
[Spark][Infra] Drop support for Spark 3.5 and formally pin to released Spark 4.0.1 #5616
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 41 commits
3501613
77d305b
83ebe6b
90e1e1f
2a0f940
86fea31
20f72f3
77a9a42
13ac618
5abacfa
a8c2804
e3dd2b9
ec34fd8
8fd880f
70effca
11cec06
73deabf
c9dc7fa
36b8472
9e41fec
ee907b7
73bea33
6aa8f58
1c58630
6bb2c54
d1f8616
2374c06
386925a
ba34b06
96f574f
da50359
f8b0508
a0a4d27
5ae7ee7
219c0a5
17319b5
3e3317b
603ab3f
4bc7d73
a6fd612
0de65e8
8d6d117
4dbad62
f8c5c3f
264e629
ae7363c
49ff48c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,7 +22,7 @@ jobs: | |
| uses: actions/setup-java@v3 | ||
| with: | ||
| distribution: "zulu" | ||
| java-version: "11" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. general question @scottsand-db why is the kernel unitycatalog is a separate github action from kernel? |
||
| java-version: "17" | ||
| if: steps.git-diff.outputs.diff | ||
| - name: Run Unity tests with coverage | ||
| run: | | ||
|
|
||
This file was deleted.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -66,7 +66,7 @@ val sparkVersion = settingKey[String]("Spark version") | |
|
|
||
| // Dependent library versions | ||
| val defaultSparkVersion = SparkVersionSpec.DEFAULT.fullVersion // Spark version to use for testing in non-delta-spark related modules | ||
| val hadoopVersion = "3.3.4" | ||
| val hadoopVersion = "3.4.0" | ||
| val scalaTestVersion = "3.2.15" | ||
| val scalaTestVersionForConnectors = "3.0.8" | ||
| val parquet4sVersion = "1.9.4" | ||
|
|
@@ -257,7 +257,7 @@ lazy val connectClient = (project in file("spark-connect/client")) | |
| // Create a symlink for the log4j properties | ||
| val confDir = distributionDir / "conf" | ||
| IO.createDirectory(confDir) | ||
| val log4jProps = (spark / Test / resourceDirectory).value / "log4j2_spark_master.properties" | ||
| val log4jProps = (spark / Test / resourceDirectory).value / "log4j2.properties" | ||
| val linkedLog4jProps = confDir / "log4j2.properties" | ||
| Files.createSymbolicLink(linkedLog4jProps.toPath, log4jProps.toPath) | ||
| } | ||
|
|
@@ -715,22 +715,6 @@ lazy val sharing = (project in file("sharing")) | |
| releaseSettings, | ||
| CrossSparkVersions.sparkDependentSettings(sparkVersion), | ||
| Test / javaOptions ++= Seq("-ea"), | ||
| Compile / compile := runTaskOnlyOnSparkMaster( | ||
| task = Compile / compile, | ||
| taskName = "compile", | ||
| projectName = "delta-sharing-spark", | ||
| emptyValue = Analysis.empty.asInstanceOf[CompileAnalysis] | ||
| ).value, | ||
| Test / test := runTaskOnlyOnSparkMaster( | ||
| task = Test / test, | ||
| taskName = "test", | ||
| projectName = "delta-sharing-spark", | ||
| emptyValue = ()).value, | ||
| publish := runTaskOnlyOnSparkMaster( | ||
| task = publish, | ||
| taskName = "publish", | ||
| projectName = "delta-sharing-spark", | ||
| emptyValue = ()).value, | ||
| libraryDependencies ++= Seq( | ||
| "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided", | ||
|
|
||
|
|
@@ -898,7 +882,7 @@ lazy val kernelDefaults = (project in file("kernel/kernel-defaults")) | |
| // such as warm runs, cold runs, defining benchmark parameter variables etc. | ||
| "org.openjdk.jmh" % "jmh-core" % "1.37" % "test", | ||
| "org.openjdk.jmh" % "jmh-generator-annprocess" % "1.37" % "test", | ||
| "io.delta" %% "delta-spark" % "3.3.2" % "test", | ||
| "io.delta" %% "delta-spark" % "4.0.0" % "test", | ||
|
|
||
| "org.apache.spark" %% "spark-hive" % defaultSparkVersion % "test" classifier "tests", | ||
| "org.apache.spark" %% "spark-sql" % defaultSparkVersion % "test" classifier "tests", | ||
|
|
@@ -1010,6 +994,8 @@ lazy val storageS3DynamoDB = (project in file("storage-s3-dynamodb")) | |
| ) | ||
| ).configureUnidoc() | ||
|
|
||
| /* | ||
| TODO: readd delta-iceberg on Spark 4.0+ | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @lzlfred Hey Fred, we will be releasing in on both Spark 4.0 and Spark 4.1 next release, we will need to update this build to work for that
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also tracking the todo at #5326 |
||
| val icebergSparkRuntimeArtifactName = { | ||
| val (expMaj, expMin, _) = getMajorMinorPatch(defaultSparkVersion) | ||
| s"iceberg-spark-runtime-$expMaj.$expMin" | ||
|
|
@@ -1165,6 +1151,7 @@ lazy val icebergShaded = (project in file("icebergShaded")) | |
| assembly / assemblyMergeStrategy := updateMergeStrategy((assembly / assemblyMergeStrategy).value), | ||
| assemblyPackageScala / assembleArtifact := false, | ||
| ) | ||
| */ | ||
|
|
||
| lazy val hudi = (project in file("hudi")) | ||
| .dependsOn(spark % "compile->compile;test->test;provided->provided") | ||
|
|
@@ -1539,6 +1526,7 @@ lazy val sparkGroup = project | |
| publish / skip := false, | ||
| ) | ||
|
|
||
| /* | ||
| lazy val icebergGroup = project | ||
| .aggregate(iceberg, testDeltaIcebergJar) | ||
| .settings( | ||
|
|
@@ -1547,6 +1535,7 @@ lazy val icebergGroup = project | |
| publishArtifact := false, | ||
| publish / skip := false, | ||
| ) | ||
| */ | ||
|
|
||
| lazy val kernelGroup = project | ||
| .aggregate(kernelApi, kernelDefaults, kernelBenchmarks) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -91,6 +91,11 @@ public static void checkpoint(Engine engine, Clock clock, SnapshotImpl snapshot) | |
| numberOfAddFiles = checkpointDataIter.getNumberOfAddActions(); | ||
| } catch (FileAlreadyExistsException faee) { | ||
| throw new CheckpointAlreadyExistsException(version); | ||
| } catch (IOException io) { | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Upgrading the hadoop version changes this error class
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm .. I wonder what the change was?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure the change in hadoop but instead of seeing a
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. They seem similar enough so didn't look further into it. Seems like a minor API difference.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @scottsand-db can you approve this change? |
||
| if (io.getCause() instanceof FileAlreadyExistsException) { | ||
| throw new CheckpointAlreadyExistsException(version); | ||
| } | ||
| throw io; | ||
| } | ||
|
|
||
| final CheckpointMetaData checkpointMetaData = | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,6 +15,7 @@ | |
| */ | ||
| package io.delta.kernel.defaults.engine | ||
|
|
||
| import java.io.IOException | ||
| import java.nio.file.FileAlreadyExistsException | ||
|
|
||
| import scala.collection.JavaConverters._ | ||
|
|
@@ -63,11 +64,12 @@ class DefaultParquetHandlerSuite extends AnyFunSuite with ParquetSuiteBase { | |
| writeAndVerify() | ||
|
|
||
| // Try to write as same file and expect an error | ||
| intercept[FileAlreadyExistsException] { | ||
| val e = intercept[IOException] { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is remove spark 3.5 causing all these kernel code changes?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I upgraded our hadoop version to match Spark 4.0. More details on #5616 (comment) Kernel changes are only related to this. |
||
| parquetHandler.writeParquetFileAtomically( | ||
| filePath, | ||
| toCloseableIterator(dataToWrite.asJava.iterator())) | ||
| } | ||
| assert(e.getCause.isInstanceOf[FileAlreadyExistsException]) | ||
| } | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.