diff --git a/.github/workflows/iceberg_test.yaml b/.github/workflows/iceberg_test.yaml
index 001e9a97ede..9c637cd6802 100644
--- a/.github/workflows/iceberg_test.yaml
+++ b/.github/workflows/iceberg_test.yaml
@@ -1,5 +1,5 @@
name: "Delta Iceberg Latest"
-on: [push, pull_request]
+on: [] # [push, pull_request]
jobs:
test:
name: "DIL: Scala ${{ matrix.scala }}"
@@ -25,7 +25,7 @@ jobs:
uses: actions/setup-java@v3
with:
distribution: "zulu"
- java-version: "11"
+ java-version: "17"
- name: Cache Scala, SBT
uses: actions/cache@v3
with:
diff --git a/.github/workflows/kernel_test.yaml b/.github/workflows/kernel_test.yaml
index b43fdaca81b..2b3d0211fa1 100644
--- a/.github/workflows/kernel_test.yaml
+++ b/.github/workflows/kernel_test.yaml
@@ -37,11 +37,12 @@ jobs:
echo "Runner arch: ${{ runner.arch }}"
- name: Checkout code
uses: actions/checkout@v4
+ # Run unit tests with JDK 17. These unit tests depend on Spark, and Spark 4.0+ is JDK 17.
- name: install java
uses: actions/setup-java@v4
with:
distribution: "zulu"
- java-version: "11"
+ java-version: "17"
- name: Cache SBT and dependencies
id: cache-sbt
uses: actions/cache@v4
@@ -59,7 +60,7 @@ jobs:
else
echo "❌ Cache MISS - will download dependencies"
fi
- - name: Run tests
+ - name: Run unit tests
run: |
python run-tests.py --group kernel --coverage --shard ${{ matrix.shard }}
@@ -68,6 +69,7 @@ jobs:
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v3
+ # Run integration tests with JDK 11, as they have no Spark dependency
- name: install java
uses: actions/setup-java@v3
with:
diff --git a/.github/workflows/kernel_unitycatalog_test.yaml b/.github/workflows/kernel_unitycatalog_test.yaml
index b53c927c09f..6864cdca46b 100644
--- a/.github/workflows/kernel_unitycatalog_test.yaml
+++ b/.github/workflows/kernel_unitycatalog_test.yaml
@@ -22,7 +22,7 @@ jobs:
uses: actions/setup-java@v3
with:
distribution: "zulu"
- java-version: "11"
+ java-version: "17"
if: steps.git-diff.outputs.diff
- name: Run Unity tests with coverage
run: |
diff --git a/.github/workflows/spark_examples_test.yaml b/.github/workflows/spark_examples_test.yaml
index 302cc150f3d..d5574761310 100644
--- a/.github/workflows/spark_examples_test.yaml
+++ b/.github/workflows/spark_examples_test.yaml
@@ -24,7 +24,7 @@ jobs:
uses: actions/setup-java@v3
with:
distribution: "zulu"
- java-version: "11"
+ java-version: "17"
- name: Cache Scala, SBT
uses: actions/cache@v3
with:
diff --git a/.github/workflows/spark_master_test.yaml b/.github/workflows/spark_master_test.yaml
deleted file mode 100644
index b2e88046a60..00000000000
--- a/.github/workflows/spark_master_test.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-name: "Delta Spark Master"
-on: [push, pull_request]
-jobs:
- test:
- name: "DSM: Scala ${{ matrix.scala }}, Shard ${{ matrix.shard }}"
- runs-on: ubuntu-24.04
- strategy:
- matrix:
- # These Scala versions must match those in the build.sbt
- scala: [2.13.16]
- # Important: This list of shards must be [0..NUM_SHARDS - 1]
- shard: [0, 1, 2, 3]
- env:
- SCALA_VERSION: ${{ matrix.scala }}
- # Important: This must be the same as the length of shards in matrix
- NUM_SHARDS: 4
- steps:
- - uses: actions/checkout@v3
- - uses: technote-space/get-diff-action@v4
- id: git-diff
- with:
- PATTERNS: |
- **
- .github/workflows/**
- !unity/**
- !kernel/**
- !connectors/**
- - name: install java
- uses: actions/setup-java@v3
- with:
- distribution: "zulu"
- java-version: "17"
- - name: Cache Scala, SBT
- uses: actions/cache@v3
- with:
- path: |
- ~/.sbt
- ~/.ivy2
- ~/.cache/coursier
- !~/.cache/coursier/v1/https/repository.apache.org/content/groups/snapshots
- # Change the key if dependencies are changed. For each key, GitHub Actions will cache the
- # the above directories when we use the key for the first time. After that, each run will
- # just use the cache. The cache is immutable so we need to use a new key when trying to
- # cache new stuff.
- key: delta-sbt-cache-spark-master-scala${{ matrix.scala }}
- - name: Install Job dependencies
- run: |
- sudo apt-get update
- sudo apt-get install -y make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev libffi-dev liblzma-dev python3-openssl git
- sudo apt install libedit-dev
- if: steps.git-diff.outputs.diff
- - name: Run Spark Master tests
- # when changing TEST_PARALLELISM_COUNT make sure to also change it in spark_test.yaml
- run: |
- TEST_PARALLELISM_COUNT=4 SHARD_ID=${{matrix.shard}} build/sbt -DsparkVersion=master "++ ${{ matrix.scala }}" clean sparkV2/test
- TEST_PARALLELISM_COUNT=4 SHARD_ID=${{matrix.shard}} build/sbt -DsparkVersion=master "++ ${{ matrix.scala }}" clean spark/test
- TEST_PARALLELISM_COUNT=4 build/sbt -DsparkVersion=master "++ ${{ matrix.scala }}" clean connectServer/test
- TEST_PARALLELISM_COUNT=4 build/sbt -DsparkVersion=master "++ ${{ matrix.scala }}" clean connectServer/assembly connectClient/test
- if: steps.git-diff.outputs.diff
diff --git a/.github/workflows/spark_python_test.yaml b/.github/workflows/spark_python_test.yaml
index 29eef085fbe..6ab3ab23bb8 100644
--- a/.github/workflows/spark_python_test.yaml
+++ b/.github/workflows/spark_python_test.yaml
@@ -25,7 +25,7 @@ jobs:
uses: actions/setup-java@v3
with:
distribution: "zulu"
- java-version: "11"
+ java-version: "17"
- name: Cache Scala, SBT
uses: actions/cache@v3
with:
@@ -53,23 +53,19 @@ jobs:
export PATH="~/.pyenv/bin:$PATH"
eval "$(pyenv init -)"
eval "$(pyenv virtualenv-init -)"
- pyenv install 3.8.18
- pyenv global system 3.8.18
- pipenv --python 3.8 install
+ pyenv install 3.9
+ pyenv global system 3.9
+ pipenv --python 3.9 install
# Update the pip version to 24.0. By default `pyenv.run` installs the latest pip version
# available. From version 24.1, `pip` doesn't allow installing python packages
# with version string containing `-`. In Delta-Spark case, the pypi package generated has
# `-SNAPSHOT` in version (e.g. `3.3.0-SNAPSHOT`) as the version is picked up from
# the`version.sbt` file.
pipenv run pip install pip==24.0 setuptools==69.5.1 wheel==0.43.0
- # Install PySpark without bundled Scala 2.12 JARs - read more in the future note below
- pipenv run pip install pyspark==3.5.3 --no-deps
- pipenv run pip install py4j==0.10.9.7
- pipenv run pip install flake8==3.5.0 pypandoc==1.3.3
- pipenv run pip install black==23.9.1
+ pipenv run pip install pyspark==4.0.1
+ pipenv run pip install flake8==3.9.0
+ pipenv run pip install black==23.12.1
pipenv run pip install importlib_metadata==3.10.0
- # The mypy versions 0.982 and 1.8.0 have conflicting rules (cannot get style checks to
- # pass for both versions on the same file) so we upgrade this to match Spark 4.0
pipenv run pip install mypy==1.8.0
pipenv run pip install mypy-protobuf==3.3.0
pipenv run pip install cryptography==37.0.4
@@ -77,9 +73,16 @@ jobs:
pipenv run pip install wheel==0.33.4
pipenv run pip install setuptools==41.1.0
pipenv run pip install pydocstyle==3.0.0
- pipenv run pip install pandas==1.1.3
- pipenv run pip install pyarrow==8.0.0
- pipenv run pip install numpy==1.20.3
+ pipenv run pip install pandas==2.2.0
+ pipenv run pip install pyarrow==11.0.0
+ pipenv run pip install pypandoc==1.3.3
+ pipenv run pip install numpy==1.22.4
+ pipenv run pip install grpcio==1.67.0
+ pipenv run pip install grpcio-status==1.67.0
+ pipenv run pip install googleapis-common-protos==1.65.0
+ pipenv run pip install protobuf==5.29.1
+ pipenv run pip install googleapis-common-protos-stubs==2.2.0
+ pipenv run pip install grpc-stubs==1.24.11
if: steps.git-diff.outputs.diff
- name: Run Python tests
# when changing TEST_PARALLELISM_COUNT make sure to also change it in spark_master_test.yaml
diff --git a/.github/workflows/spark_test.yaml b/.github/workflows/spark_test.yaml
index d0001bdd10e..046eb5f0295 100644
--- a/.github/workflows/spark_test.yaml
+++ b/.github/workflows/spark_test.yaml
@@ -29,7 +29,7 @@ jobs:
uses: actions/setup-java@v3
with:
distribution: "zulu"
- java-version: "11"
+ java-version: "17"
- name: Cache Scala, SBT
uses: actions/cache@v3
with:
@@ -57,29 +57,36 @@ jobs:
export PATH="~/.pyenv/bin:$PATH"
eval "$(pyenv init -)"
eval "$(pyenv virtualenv-init -)"
- pyenv install 3.8.18
- pyenv global system 3.8.18
- pipenv --python 3.8 install
+ pyenv install 3.9
+ pyenv global system 3.9
+ pipenv --python 3.9 install
# Update the pip version to 24.0. By default `pyenv.run` installs the latest pip version
# available. From version 24.1, `pip` doesn't allow installing python packages
# with version string containing `-`. In Delta-Spark case, the pypi package generated has
# `-SNAPSHOT` in version (e.g. `3.3.0-SNAPSHOT`) as the version is picked up from
# the`version.sbt` file.
pipenv run pip install pip==24.0 setuptools==69.5.1 wheel==0.43.0
- pipenv run pip install pyspark==3.5.3
- pipenv run pip install flake8==3.5.0 pypandoc==1.3.3
- pipenv run pip install black==23.9.1
+ pipenv run pip install pyspark==4.0.1
+ pipenv run pip install flake8==3.9.0
+ pipenv run pip install black==23.12.1
pipenv run pip install importlib_metadata==3.10.0
- pipenv run pip install mypy==0.982
+ pipenv run pip install mypy==1.8.0
pipenv run pip install mypy-protobuf==3.3.0
pipenv run pip install cryptography==37.0.4
pipenv run pip install twine==4.0.1
pipenv run pip install wheel==0.33.4
pipenv run pip install setuptools==41.1.0
pipenv run pip install pydocstyle==3.0.0
- pipenv run pip install pandas==1.1.3
- pipenv run pip install pyarrow==8.0.0
- pipenv run pip install numpy==1.20.3
+ pipenv run pip install pandas==2.2.0
+ pipenv run pip install pyarrow==11.0.0
+ pipenv run pip install pypandoc==1.3.3
+ pipenv run pip install numpy==1.22.4
+ pipenv run pip install grpcio==1.67.0
+ pipenv run pip install grpcio-status==1.67.0
+ pipenv run pip install googleapis-common-protos==1.65.0
+ pipenv run pip install protobuf==5.29.1
+ pipenv run pip install googleapis-common-protos-stubs==2.2.0
+ pipenv run pip install grpc-stubs==1.24.11
if: steps.git-diff.outputs.diff
- name: Scala structured logging style check
run: |
diff --git a/.github/workflows/unidoc.yaml b/.github/workflows/unidoc.yaml
index 979c1cfa962..735bdf806d4 100644
--- a/.github/workflows/unidoc.yaml
+++ b/.github/workflows/unidoc.yaml
@@ -13,7 +13,7 @@
uses: actions/setup-java@v3
with:
distribution: "zulu"
- java-version: "11"
+ java-version: "17"
- uses: actions/checkout@v3
- name: generate unidoc
run: build/sbt "++ ${{ matrix.scala }}" unidoc
diff --git a/build.sbt b/build.sbt
index ee8f2c122f9..9694aeda251 100644
--- a/build.sbt
+++ b/build.sbt
@@ -66,7 +66,7 @@ val sparkVersion = settingKey[String]("Spark version")
// Dependent library versions
val defaultSparkVersion = SparkVersionSpec.DEFAULT.fullVersion // Spark version to use for testing in non-delta-spark related modules
-val hadoopVersion = "3.3.4"
+val hadoopVersion = "3.4.0"
val scalaTestVersion = "3.2.15"
val scalaTestVersionForConnectors = "3.0.8"
val parquet4sVersion = "1.9.4"
@@ -257,7 +257,7 @@ lazy val connectClient = (project in file("spark-connect/client"))
// Create a symlink for the log4j properties
val confDir = distributionDir / "conf"
IO.createDirectory(confDir)
- val log4jProps = (spark / Test / resourceDirectory).value / "log4j2_spark_master.properties"
+ val log4jProps = (spark / Test / resourceDirectory).value / "log4j2.properties"
val linkedLog4jProps = confDir / "log4j2.properties"
Files.createSymbolicLink(linkedLog4jProps.toPath, log4jProps.toPath)
}
@@ -705,6 +705,8 @@ lazy val contribs = (project in file("contribs"))
Compile / compile := ((Compile / compile) dependsOn createTargetClassesDir).value
).configureUnidoc()
+/*
+TODO: compilation broken for Spark 4.0
lazy val sharing = (project in file("sharing"))
.dependsOn(spark % "compile->compile;test->test;provided->provided")
.disablePlugins(JavaFormatterPlugin, ScalafmtPlugin)
@@ -715,22 +717,6 @@ lazy val sharing = (project in file("sharing"))
releaseSettings,
CrossSparkVersions.sparkDependentSettings(sparkVersion),
Test / javaOptions ++= Seq("-ea"),
- Compile / compile := runTaskOnlyOnSparkMaster(
- task = Compile / compile,
- taskName = "compile",
- projectName = "delta-sharing-spark",
- emptyValue = Analysis.empty.asInstanceOf[CompileAnalysis]
- ).value,
- Test / test := runTaskOnlyOnSparkMaster(
- task = Test / test,
- taskName = "test",
- projectName = "delta-sharing-spark",
- emptyValue = ()).value,
- publish := runTaskOnlyOnSparkMaster(
- task = publish,
- taskName = "publish",
- projectName = "delta-sharing-spark",
- emptyValue = ()).value,
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided",
@@ -747,6 +733,7 @@ lazy val sharing = (project in file("sharing"))
"org.apache.spark" %% "spark-hive" % sparkVersion.value % "test" classifier "tests",
)
).configureUnidoc()
+*/
lazy val kernelApi = (project in file("kernel/kernel-api"))
.enablePlugins(ScalafmtPlugin)
@@ -898,7 +885,7 @@ lazy val kernelDefaults = (project in file("kernel/kernel-defaults"))
// such as warm runs, cold runs, defining benchmark parameter variables etc.
"org.openjdk.jmh" % "jmh-core" % "1.37" % "test",
"org.openjdk.jmh" % "jmh-generator-annprocess" % "1.37" % "test",
- "io.delta" %% "delta-spark" % "3.3.2" % "test",
+ "io.delta" %% "delta-spark" % "4.0.0" % "test",
"org.apache.spark" %% "spark-hive" % defaultSparkVersion % "test" classifier "tests",
"org.apache.spark" %% "spark-sql" % defaultSparkVersion % "test" classifier "tests",
@@ -1010,6 +997,8 @@ lazy val storageS3DynamoDB = (project in file("storage-s3-dynamodb"))
)
).configureUnidoc()
+/*
+TODO: readd delta-iceberg on Spark 4.0+
val icebergSparkRuntimeArtifactName = {
val (expMaj, expMin, _) = getMajorMinorPatch(defaultSparkVersion)
s"iceberg-spark-runtime-$expMaj.$expMin"
@@ -1165,6 +1154,7 @@ lazy val icebergShaded = (project in file("icebergShaded"))
assembly / assemblyMergeStrategy := updateMergeStrategy((assembly / assemblyMergeStrategy).value),
assemblyPackageScala / assembleArtifact := false,
)
+*/
lazy val hudi = (project in file("hudi"))
.dependsOn(spark % "compile->compile;test->test;provided->provided")
@@ -1265,7 +1255,8 @@ val createTargetClassesDir = taskKey[Unit]("create target classes dir")
// Don't use these groups for any other projects
lazy val sparkGroup = project
- .aggregate(spark, sparkV1, sparkV1Filtered, sparkV2, contribs, storage, storageS3DynamoDB, sharing, hudi)
+ // TODO: add sharing back after fixing compilation
+ .aggregate(spark, sparkV1, sparkV1Filtered, sparkV2, contribs, storage, storageS3DynamoDB, hudi)
.settings(
// crossScalaVersions must be set to Nil on the aggregating project
crossScalaVersions := Nil,
@@ -1273,6 +1264,7 @@ lazy val sparkGroup = project
publish / skip := false,
)
+/*
lazy val icebergGroup = project
.aggregate(iceberg, testDeltaIcebergJar)
.settings(
@@ -1281,6 +1273,7 @@ lazy val icebergGroup = project
publishArtifact := false,
publish / skip := false,
)
+*/
lazy val kernelGroup = project
.aggregate(kernelApi, kernelDefaults, kernelBenchmarks)
diff --git a/examples/scala/build.sbt b/examples/scala/build.sbt
index 0f07d46a159..0f242990fe7 100644
--- a/examples/scala/build.sbt
+++ b/examples/scala/build.sbt
@@ -42,8 +42,10 @@ def getMajorMinor(version: String): (Int, Int) = {
}
}
val lookupSparkVersion: PartialFunction[(Int, Int), String] = {
- // version 4.0.0-preview1
- case (major, minor) if major >= 4 => "4.0.0-preview1"
+ // TODO: how to run integration tests for multiple Spark versions
+ case (major, minor) if major >= 4 && minor >= 1 => "4.0.1"
+ // version 4.0.0
+ case (major, minor) if major >= 4 => "4.0.0"
// versions 3.3.x+
case (major, minor) if major >= 3 && minor >=3 => "3.5.3"
// versions 3.0.0 to 3.2.x
diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/checkpoints/Checkpointer.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/checkpoints/Checkpointer.java
index b651881c25e..5b326f5e000 100644
--- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/checkpoints/Checkpointer.java
+++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/checkpoints/Checkpointer.java
@@ -91,6 +91,11 @@ public static void checkpoint(Engine engine, Clock clock, SnapshotImpl snapshot)
numberOfAddFiles = checkpointDataIter.getNumberOfAddActions();
} catch (FileAlreadyExistsException faee) {
throw new CheckpointAlreadyExistsException(version);
+ } catch (IOException io) {
+ if (io.getCause() instanceof FileAlreadyExistsException) {
+ throw new CheckpointAlreadyExistsException(version);
+ }
+ throw io;
}
final CheckpointMetaData checkpointMetaData =
diff --git a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/engine/DefaultParquetHandlerSuite.scala b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/engine/DefaultParquetHandlerSuite.scala
index 6d5d04bad7a..e8ea9d844a5 100644
--- a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/engine/DefaultParquetHandlerSuite.scala
+++ b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/engine/DefaultParquetHandlerSuite.scala
@@ -15,6 +15,7 @@
*/
package io.delta.kernel.defaults.engine
+import java.io.IOException
import java.nio.file.FileAlreadyExistsException
import scala.collection.JavaConverters._
@@ -63,11 +64,12 @@ class DefaultParquetHandlerSuite extends AnyFunSuite with ParquetSuiteBase {
writeAndVerify()
// Try to write as same file and expect an error
- intercept[FileAlreadyExistsException] {
+ val e = intercept[IOException] {
parquetHandler.writeParquetFileAtomically(
filePath,
toCloseableIterator(dataToWrite.asJava.iterator()))
}
+ assert(e.getCause.isInstanceOf[FileAlreadyExistsException])
}
}
}
diff --git a/project/CrossSparkVersions.scala b/project/CrossSparkVersions.scala
index 16cd96c2c39..2f25458c9f3 100644
--- a/project/CrossSparkVersions.scala
+++ b/project/CrossSparkVersions.scala
@@ -176,7 +176,7 @@ import Unidoc._
case class SparkVersionSpec(
fullVersion: String,
targetJvm: String,
- additionalSourceDir: Option[String],
+ additionalSourceDir: Option[String] = None,
antlr4Version: String,
additionalJavaOptions: Seq[String] = Seq.empty,
jacksonVersion: String = "2.15.2"
@@ -194,11 +194,8 @@ case class SparkVersionSpec(
/** Whether this is the master Spark version */
def isMaster: Boolean = SparkVersionSpec.MASTER.contains(this)
- /** Returns log4j config file based on source directory */
- def log4jConfig: String = {
- if (additionalSourceDir.exists(_.contains("master"))) "log4j2_spark_master.properties"
- else "log4j2.properties"
- }
+ /** Returns log4j config file */
+ def log4jConfig: String = "log4j2.properties"
/** Whether to export JARs instead of class directories (needed for Spark Connect on master) */
def exportJars: Boolean = additionalSourceDir.exists(_.contains("master"))
@@ -209,18 +206,9 @@ case class SparkVersionSpec(
object SparkVersionSpec {
- private val spark35 = SparkVersionSpec(
- fullVersion = "3.5.7",
- targetJvm = "11",
- additionalSourceDir = Some("scala-spark-3.5"),
- antlr4Version = "4.9.3",
- additionalJavaOptions = Seq.empty
- )
-
- private val spark40Snapshot = SparkVersionSpec(
- fullVersion = "4.0.2-SNAPSHOT",
+ private val spark40 = SparkVersionSpec(
+ fullVersion = "4.0.1",
targetJvm = "17",
- additionalSourceDir = Some("scala-spark-master"),
antlr4Version = "4.13.1",
additionalJavaOptions = Seq(
// Copied from SparkBuild.scala to support Java 17 for unit tests (see apache/spark#34153)
@@ -240,13 +228,13 @@ object SparkVersionSpec {
)
/** Default Spark version */
- val DEFAULT = spark35
+ val DEFAULT = spark40
/** Spark master branch version (optional). Release branches should not build against master */
- val MASTER: Option[SparkVersionSpec] = Some(spark40Snapshot)
+ val MASTER: Option[SparkVersionSpec] = None
/** All supported Spark versions - internal use only */
- val ALL_SPECS = Seq(spark35, spark40Snapshot)
+ val ALL_SPECS = Seq(spark40)
}
/** See docs on top of this file */
@@ -263,6 +251,7 @@ object CrossSparkVersions extends AutoPlugin {
// Resolve aliases first
val resolvedInput = input match {
case "default" => SparkVersionSpec.DEFAULT.fullVersion
+ /*
case "master" => SparkVersionSpec.MASTER match {
case Some(masterSpec) => masterSpec.fullVersion
case None => throw new IllegalArgumentException(
@@ -270,6 +259,7 @@ object CrossSparkVersions extends AutoPlugin {
SparkVersionSpec.ALL_SPECS.map(_.fullVersion).mkString(", ")
)
}
+ */
case other => other
}
diff --git a/project/SparkMimaExcludes.scala b/project/SparkMimaExcludes.scala
index 20af16c8423..21d850b8023 100644
--- a/project/SparkMimaExcludes.scala
+++ b/project/SparkMimaExcludes.scala
@@ -89,7 +89,11 @@ object SparkMimaExcludes {
// Changes in 4.0.0
ProblemFilters.exclude[IncompatibleResultTypeProblem]("io.delta.tables.DeltaTable.improveUnsupportedOpError"),
ProblemFilters.exclude[IncompatibleResultTypeProblem]("io.delta.tables.DeltaMergeBuilder.improveUnsupportedOpError"),
- ProblemFilters.exclude[IncompatibleResultTypeProblem]("io.delta.tables.DeltaMergeBuilder.execute")
+ ProblemFilters.exclude[IncompatibleResultTypeProblem]("io.delta.tables.DeltaMergeBuilder.execute"),
+
+ // Changes in 4.1.0
+ // TODO: change in type hierarchy due to removal of DeltaThrowableConditionShim
+ ProblemFilters.exclude[MissingTypesProblem]("io.delta.exceptions.*")
// scalastyle:on line.size.limit
)
diff --git a/project/tests/test_cross_spark_publish.py b/project/tests/test_cross_spark_publish.py
index 9a317b8c207..47de42c8009 100755
--- a/project/tests/test_cross_spark_publish.py
+++ b/project/tests/test_cross_spark_publish.py
@@ -32,9 +32,9 @@
"delta-connect-common{suffix}_2.13-{version}.jar",
"delta-connect-client{suffix}_2.13-{version}.jar",
"delta-connect-server{suffix}_2.13-{version}.jar",
- "delta-sharing-spark{suffix}_2.13-{version}.jar",
+ # "delta-sharing-spark{suffix}_2.13-{version}.jar", TODO add back after fixing build
"delta-contribs{suffix}_2.13-{version}.jar",
- "delta-iceberg{suffix}_2.13-{version}.jar"
+ # "delta-iceberg{suffix}_2.13-{version}.jar" TODO add back after fixing build
]
# Non-spark-related modules (built once, same for all Spark versions)
@@ -76,12 +76,11 @@ def all_jars(self) -> List[str]:
# Spark versions to test (key = full version string, value = spec with suffix)
SPARK_VERSIONS: Dict[str, SparkVersionSpec] = {
- "3.5.7": SparkVersionSpec(""), # Default Spark version without suffix
- "4.0.2-SNAPSHOT": SparkVersionSpec("_4.0") # Other Spark versions with suffix
+ "4.0.1": SparkVersionSpec("") # Default Spark version without suffix
}
# The default Spark version (no suffix in artifact names)
-DEFAULT_SPARK = "3.5.7"
+DEFAULT_SPARK = "4.0.1"
def substitute_xversion(jar_templates: List[str], delta_version: str) -> Set[str]:
@@ -200,7 +199,7 @@ def test_default_publish(self) -> bool:
def test_run_only_for_spark_modules(self) -> bool:
"""runOnlyForReleasableSparkModules should publish only Spark-dependent modules."""
- spark_version = "4.0.2-SNAPSHOT"
+ spark_version = "4.0.1"
spark_spec = SPARK_VERSIONS[spark_version]
print("\n" + "="*70)
diff --git a/python/delta/pip_utils.py b/python/delta/pip_utils.py
index ad95de096e2..52ca1babbdd 100644
--- a/python/delta/pip_utils.py
+++ b/python/delta/pip_utils.py
@@ -83,7 +83,7 @@ def configure_spark_with_delta_pip(
# Determine the artifact name based on Spark version
# NOTE: When updating LATEST_RELEASED_SPARK_VERSION in project/CrossSparkVersions.scala,
# also update the version check here to match the new latest version.
- latest_released_spark_version_prefix = "3.5."
+ latest_released_spark_version_prefix = "4.0."
artifact_name = f"delta-spark_{scala_version}"
diff --git a/run-tests.py b/run-tests.py
index e8e5f43544f..59778b10dde 100755
--- a/run-tests.py
+++ b/run-tests.py
@@ -86,76 +86,9 @@ def run_sbt_tests(root_dir, test_group, coverage, scala_version=None, shard=None
cmd += ["-J-Xmx6G"]
run_cmd(cmd, stream_output=True)
-def setup_pyspark_scala213_compatibility():
- """
- Setup PySpark with Scala 2.13 compatibility when SCALA_VERSION is set to 2.13.x.
- This downloads Spark with Scala 2.13 and sets up the environment variables.
-
- Download and setup Spark 3.5.3 with Scala 2.13 for compatibility with Delta Scala 2.13
- Future note for Spark 4.0 upgrade: PySpark 3.5.3 from pip includes Scala 2.12 JARs, but
- because of the upgrade to Scala 2.13, it was causing binary incompatibility errors.
- For now (before Spark 4.0), we install PySpark without dependencies and use Spark 3.5.3 compiled
- for Scala 2.13 to ensure compatibility. Remove the four steps below for Spark 4.0 upgrade.
- """
- scala_version = os.getenv("SCALA_VERSION")
- if not scala_version or not scala_version.startswith("2.13"):
- return False
-
- print("##### Setting up PySpark Scala 2.13 compatibility #####")
-
- # Check if Scala 2.13 Spark is already set up
- spark_home = os.getenv("SPARK_HOME")
- if spark_home and "scala2.13" in spark_home:
- print(f"PySpark Scala 2.13 already configured: {spark_home}")
- return True
-
- try:
- import subprocess
- from pathlib import Path
-
- # Download Spark 3.5.3 with Scala 2.13
- SPARK_VERSION = "3.5.3"
- SCALA_SUFFIX = "2.13"
- SPARK_DIR = f"spark-{SPARK_VERSION}-bin-hadoop3-scala{SCALA_SUFFIX}"
-
- spark_url = f"https://archive.apache.org/dist/spark/spark-{SPARK_VERSION}/{SPARK_DIR}.tgz"
- spark_tgz = f"{SPARK_DIR}.tgz"
-
- # Download if not already present
- if not os.path.exists(SPARK_DIR):
- print(f"Downloading Spark with Scala 2.13: {spark_url}")
- run_cmd(["curl", "-LO", spark_url], stream_output=True)
- print(f"Extracting {spark_tgz}")
- run_cmd(["tar", "-xzf", spark_tgz], stream_output=True)
- else:
- print(f"Using existing Spark directory: {SPARK_DIR}")
-
- # Set SPARK_HOME environment variable
- new_spark_home = os.path.abspath(SPARK_DIR)
- os.environ["SPARK_HOME"] = new_spark_home
- print(f"Set SPARK_HOME to: {new_spark_home}")
-
- # Add Spark bin to PATH
- spark_bin = os.path.join(new_spark_home, "bin")
- current_path = os.environ.get("PATH", "")
- if spark_bin not in current_path:
- os.environ["PATH"] = f"{spark_bin}:{current_path}"
- print(f"Added to PATH: {spark_bin}")
-
- print("PySpark Scala 2.13 compatibility setup completed successfully")
- return True
-
- except Exception as e:
- print(f"Warning: Failed to setup PySpark Scala 2.13 compatibility: {e}")
- print("Continuing with existing PySpark installation...")
- return False
-
-
def run_python_tests(root_dir):
print("##### Running Python tests #####")
- # Setup PySpark Scala 2.13 compatibility if needed
- setup_pyspark_scala213_compatibility()
python_test_script = path.join(root_dir, path.join("python", "run-tests.py"))
print("Calling script %s", python_test_script)
run_cmd(["python3", python_test_script], env={'DELTA_TESTING': '1'}, stream_output=True)
diff --git a/setup.py b/setup.py
index 3541c7c447c..03d2443e01f 100644
--- a/setup.py
+++ b/setup.py
@@ -13,19 +13,7 @@ def get_version_from_sbt():
version = fp.read().strip()
return version.split('"')[1]
-
VERSION = get_version_from_sbt()
-MAJOR_VERSION = int(VERSION.split(".")[0])
-
-if MAJOR_VERSION < 4:
- packages_arg = ['delta', 'delta.exceptions']
- install_requires_arg = ['pyspark>=3.5.2,<3.6.0', 'importlib_metadata>=1.0.0']
- python_requires_arg = '>=3.6'
-else: # MAJOR_VERSION >= 4
- # Delta 4.0+ contains Delta Connect code and uses Spark 4.0+
- packages_arg = ['delta', 'delta.connect', 'delta.connect.proto', 'delta.exceptions']
- install_requires_arg = ['pyspark>=4.0.0', 'importlib_metadata>=1.0.0']
- python_requires_arg = '>=3.9'
class VerifyVersionCommand(install):
"""Custom command to verify that the git tag matches our version"""
@@ -44,6 +32,10 @@ def run(self):
with open("python/README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()
+# TODO: once we support multiple Spark versions update this to be compatible with both
+install_requires_arg = ['pyspark>=4.0.1', 'importlib_metadata>=1.0.0']
+python_requires_arg = '>=3.9'
+
setup(
name="delta-spark",
version=VERSION,
@@ -70,7 +62,7 @@ def run(self):
],
keywords='delta.io',
package_dir={'': 'python'},
- packages=packages_arg,
+ packages=['delta', 'delta.connect', 'delta.connect.proto', 'delta.exceptions'],
package_data={
'delta': ['py.typed'],
},
diff --git a/sharing/src/test/scala/io/delta/sharing/spark/DeltaFormatSharingSourceSuite.scala b/sharing/src/test/scala/io/delta/sharing/spark/DeltaFormatSharingSourceSuite.scala
index 6f26f8d2370..04eff52e8d8 100644
--- a/sharing/src/test/scala/io/delta/sharing/spark/DeltaFormatSharingSourceSuite.scala
+++ b/sharing/src/test/scala/io/delta/sharing/spark/DeltaFormatSharingSourceSuite.scala
@@ -18,7 +18,7 @@ package io.delta.sharing.spark
import java.time.LocalDateTime
-import org.apache.spark.sql.delta.{DeltaExcludedBySparkVersionTestMixinShims, DeltaIllegalStateException, DeltaLog}
+import org.apache.spark.sql.delta.{DeltaIllegalStateException, DeltaLog}
import org.apache.spark.sql.delta.DeltaOptions.{
IGNORE_CHANGES_OPTION,
IGNORE_DELETES_OPTION,
@@ -49,8 +49,7 @@ class DeltaFormatSharingSourceSuite
extends StreamTest
with DeltaSQLCommandTest
with DeltaSharingTestSparkUtils
- with DeltaSharingDataSourceDeltaTestUtils
- with DeltaExcludedBySparkVersionTestMixinShims {
+ with DeltaSharingDataSourceDeltaTestUtils {
import testImplicits._
@@ -1216,9 +1215,7 @@ class DeltaFormatSharingSourceSuite
}
}
- testSparkMasterOnly(
- "streaming variant query works"
- ) {
+ test("streaming variant query works") {
withTempDirs { (inputDir, outputDir, checkpointDir) =>
val deltaTableName = "variant_table"
withTable(deltaTableName) {
diff --git a/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingDataSourceDeltaSuite.scala b/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingDataSourceDeltaSuite.scala
index 1cd22b47df0..65f20da8b1d 100644
--- a/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingDataSourceDeltaSuite.scala
+++ b/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingDataSourceDeltaSuite.scala
@@ -21,7 +21,7 @@ package io.delta.sharing.spark
import scala.concurrent.duration._
-import org.apache.spark.sql.delta.{DeltaConfigs, DeltaExcludedBySparkVersionTestMixinShims, VariantShreddingPreviewTableFeature, VariantTypePreviewTableFeature, VariantTypeTableFeature}
+import org.apache.spark.sql.delta.{DeltaConfigs, VariantShreddingPreviewTableFeature, VariantTypePreviewTableFeature, VariantTypeTableFeature}
import org.apache.spark.sql.delta.sources.DeltaSQLConf
import org.apache.spark.sql.delta.test.DeltaSQLCommandTest
@@ -43,8 +43,7 @@ trait DeltaSharingDataSourceDeltaSuiteBase
extends QueryTest
with DeltaSQLCommandTest
with DeltaSharingTestSparkUtils
- with DeltaSharingDataSourceDeltaTestUtils
- with DeltaExcludedBySparkVersionTestMixinShims {
+ with DeltaSharingDataSourceDeltaTestUtils {
override def beforeEach(): Unit = {
spark.sessionState.conf.setConfString(
@@ -1519,7 +1518,7 @@ trait DeltaSharingDataSourceDeltaSuiteBase
VariantTypeTableFeature,
VariantShreddingPreviewTableFeature
).foreach { feature =>
- testSparkMasterOnly(s"basic variant test - table feature: $feature") {
+ test(s"basic variant test - table feature: $feature") {
withTempDir { tempDir =>
val extraConfs = feature match {
case VariantShreddingPreviewTableFeature => Map(
diff --git a/spark/src/main/scala-spark-3.5/shims/ColumnConversionShims.scala b/spark/src/main/scala-spark-3.5/shims/ColumnConversionShims.scala
deleted file mode 100644
index 97816828d4e..00000000000
--- a/spark/src/main/scala-spark-3.5/shims/ColumnConversionShims.scala
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-import org.apache.spark.sql.Column
-import org.apache.spark.sql.catalyst.expressions.Expression
-
-/**
- * Conversions from a [[org.apache.spark.sql.Column]] to an
- * [[org.apache.spark.sql.catalyst.expressions.Expression]], and vice versa.
- */
-object ClassicColumnConversions {
- def expression(c: Column): Expression = c.expr
-}
diff --git a/spark/src/main/scala-spark-3.5/shims/ColumnDefinitionShims.scala b/spark/src/main/scala-spark-3.5/shims/ColumnDefinitionShims.scala
deleted file mode 100644
index b548a282ca7..00000000000
--- a/spark/src/main/scala-spark-3.5/shims/ColumnDefinitionShims.scala
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (2024) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.parser.ParserInterface
-import org.apache.spark.sql.types.{StructField, StructType}
-
-object ColumnDefinitionShims {
-
- /**
- * Helps handle a breaking change in [[org.apache.spark.sql.catalyst.plans.logical.CreateTable]]
- * between Spark 3.5 and Spark 4.0:
- * - In 3.5, `CreateTable` accepts a `tableSchema: StructType`.
- * - In 4.0, `CreateTable` accepts a `columns: Seq[ColumnDefinition]`.
- */
- def parseColumns(columns: Seq[StructField], sqlParser: ParserInterface): StructType = {
- StructType(columns.toSeq)
- }
-}
diff --git a/spark/src/main/scala-spark-3.5/shims/CreatableRelationProviderShims.scala b/spark/src/main/scala-spark-3.5/shims/CreatableRelationProviderShims.scala
deleted file mode 100644
index b5249d349c9..00000000000
--- a/spark/src/main/scala-spark-3.5/shims/CreatableRelationProviderShims.scala
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (2024) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources
-
-import org.apache.spark.sql.types.DataType
-
-trait CreatableRelationProviderShim extends CreatableRelationProvider {
-
- /**
- * The `supportsDataType` method is not defined in Spark 3.5 but is overidden by `DeltaDataSource`
- * in Spark 4.0.
- */
- def supportsDataType(dt: DataType): Boolean = throw new UnsupportedOperationException(
- "This method is not defined in Spark 3.5."
- )
-}
diff --git a/spark/src/main/scala-spark-3.5/shims/DataFrameShims.scala b/spark/src/main/scala-spark-3.5/shims/DataFrameShims.scala
deleted file mode 100644
index 53145e36f70..00000000000
--- a/spark/src/main/scala-spark-3.5/shims/DataFrameShims.scala
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.delta
-
-import org.apache.spark.sql.{Column, DataFrame, Dataset, Encoders, SparkSession}
-import org.apache.spark.sql.execution.QueryExecution
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-
-object DataFrameUtils {
- def ofRows(spark: SparkSession, plan: LogicalPlan): DataFrame = Dataset.ofRows(spark, plan)
- def ofRows(queryExecution: QueryExecution): DataFrame = {
- val ds = new Dataset(queryExecution, Encoders.row(queryExecution.analyzed.schema))
- ds.asInstanceOf[DataFrame]
- }
-}
diff --git a/spark/src/main/scala-spark-3.5/shims/DecimalPrecisionTypeCoercionShims.scala b/spark/src/main/scala-spark-3.5/shims/DecimalPrecisionTypeCoercionShims.scala
deleted file mode 100644
index 41f050828c1..00000000000
--- a/spark/src/main/scala-spark-3.5/shims/DecimalPrecisionTypeCoercionShims.scala
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-import org.apache.spark.sql.catalyst.analysis.DecimalPrecision
-import org.apache.spark.sql.types.DecimalType
-
-object DecimalPrecisionTypeCoercionShims {
- // Returns the wider decimal type that's wider than both of them
- def widerDecimalType(d1: DecimalType, d2: DecimalType): DecimalType =
- DecimalPrecision.widerDecimalType(d1, d2)
-}
diff --git a/spark/src/main/scala-spark-3.5/shims/DeltaInvariantCheckerExecShims.scala b/spark/src/main/scala-spark-3.5/shims/DeltaInvariantCheckerExecShims.scala
deleted file mode 100644
index 5e40f5a9ea5..00000000000
--- a/spark/src/main/scala-spark-3.5/shims/DeltaInvariantCheckerExecShims.scala
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (2024) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta.constraints
-
-import org.apache.spark.sql.catalyst.optimizer.ReplaceExpressions
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.rules.RuleExecutor
-
-trait DeltaInvariantCheckerOptimizerShims { self: RuleExecutor[LogicalPlan] =>
- val DELTA_INVARIANT_CHECKER_OPTIMIZER_BATCHES = Seq(
- Batch("Finish Analysis", Once, ReplaceExpressions)
- )
-}
diff --git a/spark/src/main/scala-spark-3.5/shims/DeltaSqlParserShims.scala b/spark/src/main/scala-spark-3.5/shims/DeltaSqlParserShims.scala
deleted file mode 100644
index 2ecc7e5e65e..00000000000
--- a/spark/src/main/scala-spark-3.5/shims/DeltaSqlParserShims.scala
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package io.delta.sql.parser
-
-trait DeltaSqlParserShims
diff --git a/spark/src/main/scala-spark-3.5/shims/DeltaTableValueFunctionsShims.scala b/spark/src/main/scala-spark-3.5/shims/DeltaTableValueFunctionsShims.scala
deleted file mode 100644
index 5e91563c8b7..00000000000
--- a/spark/src/main/scala-spark-3.5/shims/DeltaTableValueFunctionsShims.scala
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (2024) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-import org.apache.spark.sql.catalyst.expressions.Expression
-
-object DeltaTableValueFunctionsShims {
-
- /**
- * Handles a breaking change between Spark 3.5 and Spark Master (4.0).
- *
- * In Spark 4.0, SPARK-46331 [https://github.com/apache/spark/pull/44261] removed CodegenFallback
- * from a subset of DateTime expressions, making the `now()` expression unevaluable.
- */
- def evaluateTimeOption(value: Expression): String = {
- value.eval().toString
- }
-}
diff --git a/spark/src/main/scala-spark-3.5/shims/DeltaThrowableHelperShims.scala b/spark/src/main/scala-spark-3.5/shims/DeltaThrowableHelperShims.scala
deleted file mode 100644
index 945141e0907..00000000000
--- a/spark/src/main/scala-spark-3.5/shims/DeltaThrowableHelperShims.scala
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (2024) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-import org.apache.spark.SparkThrowable
-import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.errors.QueryCompilationErrors
-
-object DeltaThrowableHelperShims {
- /**
- * Handles a breaking change (SPARK-46810) between Spark 3.5 and Spark Master (4.0) where
- * `error-classes.json` was renamed to `error-conditions.json`.
- */
- val SPARK_ERROR_CLASS_SOURCE_FILE = "error/error-classes.json"
-
- def showColumnsWithConflictDatabasesError(
- db: Seq[String], v1TableName: TableIdentifier): Throwable = {
- QueryCompilationErrors.showColumnsWithConflictDatabasesError(db, v1TableName)
- }
-}
-
-trait DeltaThrowableConditionShim extends SparkThrowable {
- def getCondition(): String = getErrorClass()
- override def getErrorClass(): String
-}
diff --git a/spark/src/main/scala-spark-3.5/shims/DeltaTimeTravelSpecShims.scala b/spark/src/main/scala-spark-3.5/shims/DeltaTimeTravelSpecShims.scala
deleted file mode 100644
index 8202cbe2bd8..00000000000
--- a/spark/src/main/scala-spark-3.5/shims/DeltaTimeTravelSpecShims.scala
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (2024) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-import org.apache.spark.sql.SparkSession
-
-object DeltaTimeTravelSpecShims {
-
- /**
- * Ensures only a single time travel syntax is used (i.e. not version AND timestamp).
- *
- * Handles a breaking change between Spark 3.5 and 4.0 which added support for
- * DataFrame-based time travel in Spark (https://github.com/apache/spark/pull/43403).
- *
- * TLDR: Starting in Spark 4.0, we end up with two time travel specifications in DeltaTableV2 if
- * options are used to specify the time travel version/timestamp. This breaks an existing check we
- * had (against Spark 3.5) which ensures only one time travel specification is used.
- *
- * The solution to get around this is just to ignore two specs if they are the same. If the user
- * did actually provide two different time travel specs, that would have been caught by Spark
- * earlier.
- *
- * @param currSpecOpt: The table's current [[DeltaTimeTravelSpec]]
- * @param newSpecOpt: The new [[DeltaTimeTravelSpec]] to be applied to the table
- */
- def validateTimeTravelSpec(
- spark: SparkSession,
- currSpecOpt: Option[DeltaTimeTravelSpec],
- newSpecOpt: Option[DeltaTimeTravelSpec]): Unit = {
- if (currSpecOpt.nonEmpty && newSpecOpt.nonEmpty) {
- throw DeltaErrors.multipleTimeTravelSyntaxUsed
- }
- }
-}
diff --git a/spark/src/main/scala-spark-3.5/shims/IncrementalExecutionShims.scala b/spark/src/main/scala-spark-3.5/shims/IncrementalExecutionShims.scala
deleted file mode 100644
index 9432a5dabcc..00000000000
--- a/spark/src/main/scala-spark-3.5/shims/IncrementalExecutionShims.scala
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (2024) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.streaming
-
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-
-object IncrementalExecutionShims {
-
- /**
- * Handles a breaking change in the [[IncrementalExecution]] constructor between Spark 3.5 and
- * 4.0:
- * - Spark 3.5: no `isFirstBatch: Boolean` param
- * - Spark 4.0: adds `isFirstBatch: Boolean` param
- */
- def newInstance(
- sparkSession: SparkSession,
- logicalPlan: LogicalPlan,
- incrementalExecution: IncrementalExecution): IncrementalExecution = new IncrementalExecution(
- sparkSession,
- logicalPlan,
- incrementalExecution.outputMode,
- incrementalExecution.checkpointLocation,
- incrementalExecution.queryId,
- incrementalExecution.runId,
- incrementalExecution.currentBatchId,
- incrementalExecution.prevOffsetSeqMetadata,
- incrementalExecution.offsetSeqMetadata,
- incrementalExecution.watermarkPropagator
- )
-}
diff --git a/spark/src/main/scala-spark-3.5/shims/LogKeyShims.scala b/spark/src/main/scala-spark-3.5/shims/LogKeyShims.scala
deleted file mode 100644
index 3ebdd76038a..00000000000
--- a/spark/src/main/scala-spark-3.5/shims/LogKeyShims.scala
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * This file contains code from the Apache Spark project (original license above).
- * It contains modifications, which are licensed as follows:
- */
-
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.internal
-
-// LogKey is part of Spark's Structured Logging API and is not available in Spark 3.5.
-trait LogKeyShims {
- def name: String = ""
-}
diff --git a/spark/src/main/scala-spark-3.5/shims/LoggingShims.scala b/spark/src/main/scala-spark-3.5/shims/LoggingShims.scala
deleted file mode 100644
index f56a51850f6..00000000000
--- a/spark/src/main/scala-spark-3.5/shims/LoggingShims.scala
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * This file contains code from the Apache Spark project (original license above).
- * It contains modifications, which are licensed as follows:
- */
-
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.internal
-
-// MDC is part of Spark's Structured Logging API and is not available in Spark 3.5.
-case class MDC(key: LogKeyShims, value: Any) {
- require(!value.isInstanceOf[MessageWithContext],
- "the class of value cannot be MessageWithContext")
-}
-
-object MDC {
- def of(key: LogKeyShims, value: Any): MDC = MDC(key, value)
-}
-
-// MessageWithContext is part of Spark's Structured Logging API and is not available in Spark 3.5.
-case class MessageWithContext(message: String, context: java.util.HashMap[String, String]) {
- def +(mdc: MessageWithContext): MessageWithContext = {
- MessageWithContext(message + mdc.message, new java.util.HashMap[String, String]())
- }
-
- def stripMargin: MessageWithContext = copy(message = message.stripMargin)
-}
-
-// LogEntry is part of Spark's Structured Logging API and is not available in Spark 3.5.
-class LogEntry(messageWithContext: => MessageWithContext) {
- def message: String = messageWithContext.message
-
- def context: java.util.HashMap[String, String] = messageWithContext.context
-}
-
-object LogEntry {
- import scala.language.implicitConversions
-
- implicit def from(msgWithCtx: => MessageWithContext): LogEntry =
- new LogEntry(msgWithCtx)
-}
-
-trait LoggingShims extends Logging {
- implicit class LogStringContext(val sc: StringContext) {
- def log(args: MDC*): MessageWithContext = {
- val processedParts = sc.parts.iterator
- val sb = new StringBuilder(processedParts.next())
-
- args.foreach { mdc =>
- val value = if (mdc.value != null) mdc.value.toString else null
- sb.append(value)
-
- if (processedParts.hasNext) {
- sb.append(processedParts.next())
- }
- }
-
- MessageWithContext(sb.toString(), new java.util.HashMap[String, String]())
- }
- }
-
- protected def logInfo(entry: LogEntry): Unit = {
- if (log.isInfoEnabled) {
- log.info(entry.message)
- }
- }
-
- protected def logInfo(entry: LogEntry, throwable: Throwable): Unit = {
- if (log.isInfoEnabled) {
- log.info(entry.message, throwable)
- }
- }
-
- protected def logDebug(entry: LogEntry): Unit = {
- if (log.isDebugEnabled) {
- log.debug(entry.message)
- }
- }
-
- protected def logDebug(entry: LogEntry, throwable: Throwable): Unit = {
- if (log.isDebugEnabled) {
- log.debug(entry.message, throwable)
- }
- }
-
- protected def logTrace(entry: LogEntry): Unit = {
- if (log.isTraceEnabled) {
- log.trace(entry.message)
- }
- }
-
- protected def logTrace(entry: LogEntry, throwable: Throwable): Unit = {
- if (log.isTraceEnabled) {
- log.trace(entry.message, throwable)
- }
- }
-
- protected def logWarning(entry: LogEntry): Unit = {
- if (log.isWarnEnabled) {
- log.warn(entry.message)
- }
- }
-
- protected def logWarning(entry: LogEntry, throwable: Throwable): Unit = {
- if (log.isWarnEnabled) {
- log.warn(entry.message, throwable)
- }
- }
-
- protected def logError(entry: LogEntry): Unit = {
- if (log.isErrorEnabled) {
- log.error(entry.message)
- }
- }
-
- protected def logError(entry: LogEntry, throwable: Throwable): Unit = {
- if (log.isErrorEnabled) {
- log.error(entry.message, throwable)
- }
- }
-}
diff --git a/spark/src/main/scala-spark-3.5/shims/LogicalRelationShims.scala b/spark/src/main/scala-spark-3.5/shims/LogicalRelationShims.scala
deleted file mode 100644
index 171491c01be..00000000000
--- a/spark/src/main/scala-spark-3.5/shims/LogicalRelationShims.scala
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.datasources
-
-import org.apache.spark.sql.catalyst.catalog.CatalogTable
-import org.apache.spark.sql.catalyst.expressions.AttributeReference
-import org.apache.spark.sql.sources.BaseRelation
-
-object LogicalRelationShims {
- /**
- * Handles a breaking change in the [[LogicalRelation]] constructor between Spark 3.5 and
- * 4.0:
- * - Spark 3.5: no `stream: Option[SparkDataStream]` param
- * - Spark 4.0: adds `stream: Option[SparkDataStream]` param
- */
- def newInstance(
- relation: BaseRelation,
- output: Seq[AttributeReference],
- catalogTable: Option[CatalogTable],
- isStreaming: Boolean): LogicalRelation = {
- LogicalRelation(relation, output, catalogTable, isStreaming)
- }
-}
-
-// Handles a breaking change between Spark 3.5 and Spark Master (4.0).
-// `LogicalRelationWithTable` is a new object in Spark 4.0.
-
-/**
- * Extract the [[BaseRelation]] and [[CatalogTable]] from [[LogicalRelation]]. You can also
- * retrieve the instance of LogicalRelation like following:
- *
- * case l @ LogicalRelationWithTable(relation, catalogTable) => ...
- */
-object LogicalRelationWithTable {
- def unapply(plan: LogicalRelation): Option[(BaseRelation, Option[CatalogTable])] = {
- Some(plan.relation, plan.catalogTable)
- }
-}
diff --git a/spark/src/main/scala-spark-3.5/shims/MergeIntoMaterializeSourceShims.scala b/spark/src/main/scala-spark-3.5/shims/MergeIntoMaterializeSourceShims.scala
deleted file mode 100644
index 1e0658a47c0..00000000000
--- a/spark/src/main/scala-spark-3.5/shims/MergeIntoMaterializeSourceShims.scala
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta.commands.merge
-
-import org.apache.spark.SparkException
-
-object MergeIntoMaterializeSourceShims {
-
- /** In Spark 3.5 we can only check for the error message :( */
- def mergeMaterializedSourceRddBlockLostError(e: SparkException, rddId: Int): Boolean = {
- e.getMessage.matches(s"(?s).*Checkpoint block rdd_${rddId}_[0-9]+ not found!.*")
- }
-}
diff --git a/spark/src/main/scala-spark-3.5/shims/RelocatedClassesShims.scala b/spark/src/main/scala-spark-3.5/shims/RelocatedClassesShims.scala
deleted file mode 100644
index f3ec11d1419..00000000000
--- a/spark/src/main/scala-spark-3.5/shims/RelocatedClassesShims.scala
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-import org.apache.spark.sql.{SparkSession => SparkSessionImpl, DataFrameWriter => DataFrameWriterImpl}
-
-object Relocated {
- type SparkSession = SparkSessionImpl
- def setActiveSession(session: SparkSession): Unit = SparkSessionImpl.setActiveSession(session)
- val dataFrameWriterClassName = classOf[DataFrameWriterImpl[_]].getCanonicalName
-}
diff --git a/spark/src/main/scala-spark-3.5/shims/SqlScriptingLogicalOperatorsShims.scala b/spark/src/main/scala-spark-3.5/shims/SqlScriptingLogicalOperatorsShims.scala
deleted file mode 100644
index 044235116ab..00000000000
--- a/spark/src/main/scala-spark-3.5/shims/SqlScriptingLogicalOperatorsShims.scala
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.parser
-
-// Handles a breaking change between Spark 3.5 and Spark Master (4.0).
-// `CompoundBody` is a new class in Spark 4.0.
-/**
- * Trait for all SQL Scripting logical operators that are product of parsing phase.
- * These operators will be used by the SQL Scripting interpreter to generate execution nodes.
- */
-sealed trait CompoundPlanStatement
-
-/**
- * Logical operator for a compound body. Contains all statements within the compound body.
- * @param collection Collection of statements within the compound body.
- */
-case class CompoundBody(collection: Seq[CompoundPlanStatement]) extends CompoundPlanStatement
diff --git a/spark/src/main/scala-spark-3.5/shims/TableSpecShims.scala b/spark/src/main/scala-spark-3.5/shims/TableSpecShims.scala
deleted file mode 100644
index e383ea300ad..00000000000
--- a/spark/src/main/scala-spark-3.5/shims/TableSpecShims.scala
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.delta
-
-import org.apache.spark.sql.catalyst.plans.logical.TableSpec
-
-object TableSpecUtils {
- def create(
- properties: Map[String, String],
- provider: Option[String],
- location: Option[String],
- comment: Option[String]): TableSpec = {
- TableSpec(
- properties = properties,
- provider = provider,
- options = Map.empty,
- location = location,
- comment = comment,
- serde = None,
- external = false)
- }
-}
diff --git a/spark/src/main/scala-spark-3.5/shims/TypeWideningShims.scala b/spark/src/main/scala-spark-3.5/shims/TypeWideningShims.scala
deleted file mode 100644
index ef2bdc32449..00000000000
--- a/spark/src/main/scala-spark-3.5/shims/TypeWideningShims.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (2024) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-import org.apache.spark.sql.catalyst.expressions.Cast
-import org.apache.spark.sql.types._
-
-/**
- * Type widening only supports a limited set of type changes with Spark 3.5 due to the parquet
- * readers lacking the corresponding conversions that were added in Spark 4.0.
- * This shim is for Delta on Spark 3.5 which supports:
- * - byte -> short -> int
- */
-object TypeWideningShims {
-
- /**
- * Returns whether the given type change is eligible for widening. This only checks atomic types.
- * It is the responsibility of the caller to recurse into structs, maps and arrays.
- */
- def isTypeChangeSupported(fromType: AtomicType, toType: AtomicType): Boolean =
- (fromType, toType) match {
- case (from, to) if from == to => true
- // All supported type changes below are supposed to be widening, but to be safe, reject any
- // non-widening change upfront.
- case (from, to) if !Cast.canUpCast(from, to) => false
- case (ByteType, ShortType) => true
- case (ByteType | ShortType, IntegerType) => true
- case _ => false
- }
-
- /**
- * Returns whether the given type change can be applied during schema evolution. Only a
- * subset of supported type changes are considered for schema evolution.
- */
- def isTypeChangeSupportedForSchemaEvolution(fromType: AtomicType, toType: AtomicType): Boolean = {
- // All supported type changes are eligible for schema evolution.
- isTypeChangeSupported(fromType, toType)
- }
-}
diff --git a/spark/src/main/scala-spark-3.5/shims/UnresolvedTableShims.scala b/spark/src/main/scala-spark-3.5/shims/UnresolvedTableShims.scala
deleted file mode 100644
index 85f26843899..00000000000
--- a/spark/src/main/scala-spark-3.5/shims/UnresolvedTableShims.scala
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (2024) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.analysis
-
-object UnresolvedTableImplicits {
-
- /**
- * Handles a breaking change in [[UnresolvedTable]] constructor between Spark 3.5 and 4.0:
- * - Spark 3.5: requires `relationTypeMismatchHint` param
- * - Spark 4.0: gets rid of `relationTypeMismatchHint`param
- */
- implicit class UnresolvedTableShim(self: UnresolvedTable.type) {
- def apply(
- tableNameParts: Seq[String],
- commandName: String): UnresolvedTable = {
- UnresolvedTable(tableNameParts, commandName, relationTypeMismatchHint = None)
- }
- }
-}
diff --git a/spark/src/main/scala-spark-3.5/shims/VariantShims.scala b/spark/src/main/scala-spark-3.5/shims/VariantShims.scala
deleted file mode 100644
index 6918c409dc1..00000000000
--- a/spark/src/main/scala-spark-3.5/shims/VariantShims.scala
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright (2024) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.types
-
-object VariantShims {
-
- /**
- * Spark's variant type is implemented for Spark 4.0 and is not implemented in Spark 3.5. Thus,
- * any Spark 3.5 DataType cannot be a variant type.
- */
- def isVariantType(dt: DataType): Boolean = false
-}
diff --git a/spark/src/main/scala-spark-master/shims/ColumnDefinitionShims.scala b/spark/src/main/scala-spark-master/shims/ColumnDefinitionShims.scala
deleted file mode 100644
index 9b3b4f55715..00000000000
--- a/spark/src/main/scala-spark-master/shims/ColumnDefinitionShims.scala
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.parser.ParserInterface
-import org.apache.spark.sql.types.StructField
-
-object ColumnDefinitionShims {
-
- /**
- * Helps handle a breaking change in [[org.apache.spark.sql.catalyst.plans.logical.CreateTable]]
- * between Spark 3.5 and Spark 4.0:
- * - In 3.5, `CreateTable` accepts a `tableSchema: StructType`.
- * - In 4.0, `CreateTable` accepts a `columns: Seq[ColumnDefinition]`.
- */
- def parseColumns(columns: Seq[StructField], sqlParser: ParserInterface): Seq[ColumnDefinition] = {
- columns.map(ColumnDefinition.fromV1Column(_, sqlParser)).toSeq
- }
-}
diff --git a/spark/src/main/scala-spark-master/shims/CreatableRelationProviderShims.scala b/spark/src/main/scala-spark-master/shims/CreatableRelationProviderShims.scala
deleted file mode 100644
index 4720396c6a6..00000000000
--- a/spark/src/main/scala-spark-master/shims/CreatableRelationProviderShims.scala
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources
-
-/**
- * Spark 4.0 added additional methods to `CreatableRelationProvider`, such as `supportsDataType`,
- * that can be overridden by child classes and need to be shimmed when compiling with Spark 3.5.
- */
-trait CreatableRelationProviderShim extends CreatableRelationProvider
diff --git a/spark/src/main/scala-spark-master/shims/DecimalPrecisionTypeCoercionShims.scala b/spark/src/main/scala-spark-master/shims/DecimalPrecisionTypeCoercionShims.scala
deleted file mode 100644
index a907179fdd2..00000000000
--- a/spark/src/main/scala-spark-master/shims/DecimalPrecisionTypeCoercionShims.scala
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-import org.apache.spark.sql.catalyst.analysis.DecimalPrecisionTypeCoercion
-import org.apache.spark.sql.types.DecimalType
-
-object DecimalPrecisionTypeCoercionShims {
- // Returns the wider decimal type that's wider than both of them
- def widerDecimalType(d1: DecimalType, d2: DecimalType): DecimalType =
- DecimalPrecisionTypeCoercion.widerDecimalType(d1, d2)
-}
diff --git a/spark/src/main/scala-spark-master/shims/DeltaInvariantCheckerExecShims.scala b/spark/src/main/scala-spark-master/shims/DeltaInvariantCheckerExecShims.scala
deleted file mode 100644
index 84b13f984f6..00000000000
--- a/spark/src/main/scala-spark-master/shims/DeltaInvariantCheckerExecShims.scala
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta.constraints
-
-import org.apache.spark.sql.catalyst.optimizer.{ReplaceExpressions, RewriteWithExpression}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.rules.RuleExecutor
-
-trait DeltaInvariantCheckerOptimizerShims { self: RuleExecutor[LogicalPlan] =>
- val DELTA_INVARIANT_CHECKER_OPTIMIZER_BATCHES = Seq(
- Batch("Finish Analysis", Once, ReplaceExpressions),
- Batch("Rewrite With expression", Once, RewriteWithExpression)
- )
-}
diff --git a/spark/src/main/scala-spark-master/shims/DeltaSqlParserShims.scala b/spark/src/main/scala-spark-master/shims/DeltaSqlParserShims.scala
deleted file mode 100644
index 5945fa91eec..00000000000
--- a/spark/src/main/scala-spark-master/shims/DeltaSqlParserShims.scala
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package io.delta.sql.parser
-
-import org.apache.spark.sql.catalyst.parser.ParserInterface
-import org.apache.spark.sql.types.StructType
-
-trait DeltaSqlParserShims extends ParserInterface {
- def delegate: ParserInterface
- override def parseRoutineParam(sqlText: String): StructType = delegate.parseRoutineParam(sqlText)
-}
diff --git a/spark/src/main/scala-spark-master/shims/DeltaTableValueFunctionsShims.scala b/spark/src/main/scala-spark-master/shims/DeltaTableValueFunctionsShims.scala
deleted file mode 100644
index f8ad0f668fd..00000000000
--- a/spark/src/main/scala-spark-master/shims/DeltaTableValueFunctionsShims.scala
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-import org.apache.spark.sql.delta.util.AnalysisHelper
-
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.optimizer.ComputeCurrentTime
-
-object DeltaTableValueFunctionsShims {
-
- /**
- * Handles a breaking change between Spark 3.5 and Spark Master (4.0).
- *
- * In Spark 4.0, SPARK-46331 [https://github.com/apache/spark/pull/44261] removed CodegenFallback
- * from a subset of DateTime expressions, making the `now()` expression unevaluable.
- */
- def evaluateTimeOption(value: Expression): String = {
- val fakePlan = AnalysisHelper.FakeLogicalPlan(Seq(value), Nil)
- val timestampExpression = ComputeCurrentTime(fakePlan).expressions.head
- timestampExpression.eval().toString
- }
-}
diff --git a/spark/src/main/scala-spark-master/shims/DeltaThrowableHelperShims.scala b/spark/src/main/scala-spark-master/shims/DeltaThrowableHelperShims.scala
deleted file mode 100644
index a8906ff1784..00000000000
--- a/spark/src/main/scala-spark-master/shims/DeltaThrowableHelperShims.scala
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-import org.apache.spark.SparkThrowable
-import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.errors.QueryCompilationErrors
-
-object DeltaThrowableHelperShims {
- /**
- * Handles a breaking change (SPARK-46810) between Spark 3.5 and Spark Master (4.0) where
- * `error-classes.json` was renamed to `error-conditions.json`.
- */
- val SPARK_ERROR_CLASS_SOURCE_FILE = "error/error-conditions.json"
-
- def showColumnsWithConflictDatabasesError(
- db: Seq[String], v1TableName: TableIdentifier): Throwable = {
- QueryCompilationErrors.showColumnsWithConflictNamespacesError(
- namespaceA = db,
- namespaceB = v1TableName.database.get :: Nil)
- }
-}
-
-trait DeltaThrowableConditionShim extends SparkThrowable {
- override def getCondition(): String = getErrorClass()
- override def getErrorClass(): String
-}
diff --git a/spark/src/main/scala-spark-master/shims/DeltaTimeTravelSpecShims.scala b/spark/src/main/scala-spark-master/shims/DeltaTimeTravelSpecShims.scala
deleted file mode 100644
index 9fcfb0e956a..00000000000
--- a/spark/src/main/scala-spark-master/shims/DeltaTimeTravelSpecShims.scala
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-import org.apache.spark.sql.SparkSession
-
-object DeltaTimeTravelSpecShims {
-
- /**
- * Ensures only a single time travel syntax is used (i.e. not version AND timestamp).
- *
- * Handles a breaking change between Spark 3.5 and 4.0 which added support for
- * DataFrame-based time travel in Spark (https://github.com/apache/spark/pull/43403).
- *
- * TLDR: Starting in Spark 4.0, we end up with two time travel specifications in DeltaTableV2 if
- * options are used to specify the time travel version/timestamp. This breaks an existing check we
- * had (against Spark 3.5) which ensures only one time travel specification is used.
- *
- * The solution to get around this is just to ignore two specs if they are the same. If the user
- * did actually provide two different time travel specs, that would have been caught by Spark
- * earlier.
- *
- * @param currSpecOpt: The table's current [[DeltaTimeTravelSpec]]
- * @param newSpecOpt: The new [[DeltaTimeTravelSpec]] to be applied to the table
- */
- def validateTimeTravelSpec(
- spark: SparkSession,
- currSpecOpt: Option[DeltaTimeTravelSpec],
- newSpecOpt: Option[DeltaTimeTravelSpec]): Unit = (currSpecOpt, newSpecOpt) match {
- case (Some(currSpec), Some(newSpec))
- if currSpec.version != newSpec.version ||
- currSpec.getTimestampOpt(spark.sessionState.conf).map(_.getTime) !=
- newSpec.getTimestampOpt(spark.sessionState.conf).map(_.getTime) =>
- throw DeltaErrors.multipleTimeTravelSyntaxUsed
- case _ =>
- }
-}
diff --git a/spark/src/main/scala-spark-master/shims/IncrementalExecutionShims.scala b/spark/src/main/scala-spark-master/shims/IncrementalExecutionShims.scala
deleted file mode 100644
index 21f98a6acea..00000000000
--- a/spark/src/main/scala-spark-master/shims/IncrementalExecutionShims.scala
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.streaming
-
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.classic.ClassicConversions._
-
-object IncrementalExecutionShims {
-
- /**
- * Handles a breaking change in the [[IncrementalExecution]] constructor between Spark 3.5 and
- * 4.0:
- * - Spark 3.5: no `isFirstBatch: Boolean` param
- * - Spark 4.0: adds `isFirstBatch: Boolean` param
- */
- def newInstance(
- sparkSession: SparkSession,
- logicalPlan: LogicalPlan,
- incrementalExecution: IncrementalExecution): IncrementalExecution = new IncrementalExecution(
- sparkSession,
- logicalPlan,
- incrementalExecution.outputMode,
- incrementalExecution.checkpointLocation,
- incrementalExecution.queryId,
- incrementalExecution.runId,
- incrementalExecution.currentBatchId,
- incrementalExecution.prevOffsetSeqMetadata,
- incrementalExecution.offsetSeqMetadata,
- incrementalExecution.watermarkPropagator,
- incrementalExecution.isFirstBatch // Spark 4.0 API
- )
-}
diff --git a/spark/src/main/scala-spark-master/shims/LogKeyShims.scala b/spark/src/main/scala-spark-master/shims/LogKeyShims.scala
deleted file mode 100644
index eab10fbf6d9..00000000000
--- a/spark/src/main/scala-spark-master/shims/LogKeyShims.scala
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * This file contains code from the Apache Spark project (original license above).
- * It contains modifications, which are licensed as follows:
- */
-
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.internal
-
-/**
- * All structured logging `keys` used in `MDC` must be extends `LogKey`
- *
- *
- * `LogKey`s serve as identifiers for mapped diagnostic contexts (MDC) within logs.
- * Follow these guidelines when adding a new LogKey:
- *
- * -
- * Define all structured logging keys in `DeltaLogKeys.scala`, and sort them alphabetically for
- * ease of search.
- *
- * -
- * Use `UPPER_SNAKE_CASE` for key names.
- *
- * -
- * Key names should be both simple and broad, yet include specific identifiers like `STAGE_ID`,
- * `TASK_ID`, and `JOB_ID` when needed for clarity. For instance, use `MAX_ATTEMPTS` as a
- * general key instead of creating separate keys for each scenario such as
- * `EXECUTOR_STATE_SYNC_MAX_ATTEMPTS` and `MAX_TASK_FAILURES`.
- * This balances simplicity with the detail needed for effective logging.
- *
- * -
- * Use abbreviations in names if they are widely understood,
- * such as `APP_ID` for APPLICATION_ID, and `K8S` for KUBERNETES.
- *
- * -
- * For time-related keys, use milliseconds as the unit of time.
- *
- *
- */
-trait LogKeyShims extends LogKey
diff --git a/spark/src/main/scala-spark-master/shims/LoggingShims.scala b/spark/src/main/scala-spark-master/shims/LoggingShims.scala
deleted file mode 100644
index c20133d5a04..00000000000
--- a/spark/src/main/scala-spark-master/shims/LoggingShims.scala
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * This file contains code from the Apache Spark project (original license above).
- * It contains modifications, which are licensed as follows:
- */
-
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.internal
-
-/**
- * Guidelines for the Structured Logging Framework - Scala Logging
- *
- *
- * Use the `org.apache.spark.internal.Logging` trait for logging in Scala code:
- * Logging Messages with Variables:
- * When logging a message with variables, wrap all the variables with `MDC`s and they will be
- * automatically added to the Mapped Diagnostic Context (MDC).
- * This allows for structured logging and better log analysis.
- *
- *
- * logInfo(log"Trying to recover app: ${MDC(DeltaLogKeys.APP_ID, app.id)}")
- *
- *
- * Constant String Messages:
- * If you are logging a constant string message, use the log methods that accept a constant
- * string.
- *
- *
- * logInfo("StateStore stopped")
- *
- *
- * Exceptions:
- * To ensure logs are compatible with Spark SQL and log analysis tools, avoid
- * `Exception.printStackTrace()`. Use `logError`, `logWarning`, and `logInfo` methods from
- * the `Logging` trait to log exceptions, maintaining structured and parsable logs.
- *
- *
- * If you want to output logs in `scala code` through the structured log framework,
- * you can define `custom LogKey` and use it in `scala` code as follows:
- *
- *
- * // To add a `custom LogKey`, implement `LogKey`
- * case object CUSTOM_LOG_KEY extends LogKey
- * import org.apache.spark.internal.MDC;
- * logInfo(log"${MDC(CUSTOM_LOG_KEY, "key")}")
- */
-trait LoggingShims extends Logging
diff --git a/spark/src/main/scala-spark-master/shims/LogicalRelationShims.scala b/spark/src/main/scala-spark-master/shims/LogicalRelationShims.scala
deleted file mode 100644
index 54caef60722..00000000000
--- a/spark/src/main/scala-spark-master/shims/LogicalRelationShims.scala
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.datasources
-
-import org.apache.spark.sql.catalyst.catalog.CatalogTable
-import org.apache.spark.sql.catalyst.expressions.AttributeReference
-import org.apache.spark.sql.sources.BaseRelation
-
-object LogicalRelationShims {
- /**
- * Handles a breaking change in the [[LogicalRelation]] constructor between Spark 3.5 and
- * 4.0:
- * - Spark 3.5: no `stream: Option[SparkDataStream]` param
- * - Spark 4.0: adds `stream: Option[SparkDataStream]` param
- */
- def newInstance(
- relation: BaseRelation,
- output: Seq[AttributeReference],
- catalogTable: Option[CatalogTable],
- isStreaming: Boolean): LogicalRelation = {
- LogicalRelation(relation, output, catalogTable, isStreaming, stream = None)
- }
-}
diff --git a/spark/src/main/scala-spark-master/shims/MergeIntoMaterializeSourceShims.scala b/spark/src/main/scala-spark-master/shims/MergeIntoMaterializeSourceShims.scala
deleted file mode 100644
index b9452385643..00000000000
--- a/spark/src/main/scala-spark-master/shims/MergeIntoMaterializeSourceShims.scala
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta.commands.merge
-
-import org.apache.spark.SparkException
-
-object MergeIntoMaterializeSourceShims {
-
- /** In Spark 4.0+ we could check on error class, which is more stable. */
- def mergeMaterializedSourceRddBlockLostError(e: SparkException, rddId: Int): Boolean = {
- e.getErrorClass == "CHECKPOINT_RDD_BLOCK_ID_NOT_FOUND" &&
- e.getMessageParameters.get("rddBlockId").contains(s"rdd_${rddId}")
- }
-}
diff --git a/spark/src/main/scala-spark-master/shims/RelocatedClassesShims.scala b/spark/src/main/scala-spark-master/shims/RelocatedClassesShims.scala
deleted file mode 100644
index 86b9d549205..00000000000
--- a/spark/src/main/scala-spark-master/shims/RelocatedClassesShims.scala
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-import org.apache.spark.sql.classic.{SparkSession => SparkSessionImpl, DataFrameWriter => DataFrameWriterImpl}
-
-object Relocated {
- type SparkSession = SparkSessionImpl
- def setActiveSession(session: SparkSession): Unit = SparkSessionImpl.setActiveSession(session)
- val dataFrameWriterClassName = classOf[DataFrameWriterImpl[_]].getCanonicalName
-}
diff --git a/spark/src/main/scala-spark-master/shims/TableSpecShims.scala b/spark/src/main/scala-spark-master/shims/TableSpecShims.scala
deleted file mode 100644
index ded2daa7b8c..00000000000
--- a/spark/src/main/scala-spark-master/shims/TableSpecShims.scala
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.delta
-
-import org.apache.spark.sql.catalyst.plans.logical.TableSpec
-
-object TableSpecUtils {
- def create(
- properties: Map[String, String],
- provider: Option[String],
- location: Option[String],
- comment: Option[String]): TableSpec = {
- TableSpec(
- properties = properties,
- provider = provider,
- options = Map.empty,
- location = location,
- comment = comment,
- collation = None,
- serde = None,
- external = false)
- }
-}
diff --git a/spark/src/main/scala-spark-master/shims/TypeWideningShims.scala b/spark/src/main/scala-spark-master/shims/TypeWideningShims.scala
deleted file mode 100644
index 911123e09cb..00000000000
--- a/spark/src/main/scala-spark-master/shims/TypeWideningShims.scala
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-import org.apache.spark.sql.catalyst.expressions.Cast
-import org.apache.spark.sql.types._
-
-/**
- * Type widening only supports a limited set of type changes with Spark 3.5 due to the parquet
- * readers lacking the corresponding conversions that were added in Spark 4.0.
- * This shim is for Delta on Spark 4.0 which supports:
- * - byte -> short -> int -> long.
- * - float -> double.
- * - date -> timestamp_ntz.
- * - {byte, short, int} -> double.
- * - decimal -> wider decimal.
- * - {byte, short, int} -> decimal(10, 0) and wider.
- * - long -> decimal(20, 0) and wider.
- */
-object TypeWideningShims {
-
- /**
- * Returns whether the given type change is eligible for widening. This only checks atomic types.
- * It is the responsibility of the caller to recurse into structs, maps and arrays.
- */
- def isTypeChangeSupported(fromType: AtomicType, toType: AtomicType): Boolean =
- (fromType, toType) match {
- case (from, to) if from == to => true
- // All supported type changes below are supposed to be widening, but to be safe, reject any
- // non-widening change upfront.
- case (from, to) if !Cast.canUpCast(from, to) => false
- case (from: IntegralType, to: IntegralType) => from.defaultSize <= to.defaultSize
- case (FloatType, DoubleType) => true
- case (DateType, TimestampNTZType) => true
- case (ByteType | ShortType | IntegerType, DoubleType) => true
- case (from: DecimalType, to: DecimalType) => to.isWiderThan(from)
- // Byte, Short, Integer are all stored as INT32 in parquet. The parquet readers support
- // converting INT32 to Decimal(10, 0) and wider.
- case (ByteType | ShortType | IntegerType, d: DecimalType) => d.isWiderThan(IntegerType)
- // The parquet readers support converting INT64 to Decimal(20, 0) and wider.
- case (LongType, d: DecimalType) => d.isWiderThan(LongType)
- case _ => false
- }
-
- /**
- * Returns whether the given type change can be applied during schema evolution. Only a
- * subset of supported type changes are considered for schema evolution.
- */
- def isTypeChangeSupportedForSchemaEvolution(fromType: AtomicType, toType: AtomicType): Boolean =
- (fromType, toType) match {
- case (from, to) if from == to => true
- case (from, to) if !isTypeChangeSupported(from, to) => false
- case (from: IntegralType, to: IntegralType) => from.defaultSize <= to.defaultSize
- case (FloatType, DoubleType) => true
- case (from: DecimalType, to: DecimalType) => to.isWiderThan(from)
- case (DateType, TimestampNTZType) => true
- case _ => false
- }
-}
diff --git a/spark/src/main/scala-spark-master/shims/UnresolvedTableShims.scala b/spark/src/main/scala-spark-master/shims/UnresolvedTableShims.scala
deleted file mode 100644
index 3521f15c456..00000000000
--- a/spark/src/main/scala-spark-master/shims/UnresolvedTableShims.scala
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.analysis
-
-object UnresolvedTableImplicits {
-
- /**
- * Handles a breaking change in [[UnresolvedTable]] constructor between Spark 3.5 and 4.0:
- * - Spark 3.5: requires `relationTypeMismatchHint` param
- * - Spark 4.0: gets rid of `relationTypeMismatchHint`param
- */
- implicit class UnresolvedTableShim(self: UnresolvedTable.type) {
- def apply(
- tableNameParts: Seq[String],
- commandName: String): UnresolvedTable = {
- UnresolvedTable(tableNameParts, commandName)
- }
- }
-}
diff --git a/spark/src/main/scala-spark-master/shims/VariantShims.scala b/spark/src/main/scala-spark-master/shims/VariantShims.scala
deleted file mode 100644
index 7ea201fdd20..00000000000
--- a/spark/src/main/scala-spark-master/shims/VariantShims.scala
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.types
-
-object VariantShims {
-
- /** Spark's variant type is only implemented in Spark 4.0 and above. */
- def isVariantType(dt: DataType): Boolean = dt.isInstanceOf[VariantType]
-}
diff --git a/spark/src/main/scala/io/delta/sql/parser/DeltaSqlParser.scala b/spark/src/main/scala/io/delta/sql/parser/DeltaSqlParser.scala
index 173ab2eaa81..2dcea9e6d36 100644
--- a/spark/src/main/scala/io/delta/sql/parser/DeltaSqlParser.scala
+++ b/spark/src/main/scala/io/delta/sql/parser/DeltaSqlParser.scala
@@ -58,7 +58,6 @@ import org.apache.spark.sql.{AnalysisException, SparkSession}
import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.analysis.UnresolvedTableImplicits._
import org.apache.spark.sql.catalyst.parser.{ParseErrorListener, ParseException, ParserInterface}
import org.apache.spark.sql.catalyst.parser.ParserUtils.{checkDuplicateClauses, string, withOrigin}
import org.apache.spark.sql.catalyst.plans.logical.{AlterColumnSyncIdentity, AlterTableAddConstraint, AlterTableDropConstraint, AlterTableDropFeature, CloneTableStatement, LogicalPlan, RestoreTableStatement}
@@ -73,8 +72,7 @@ import org.apache.spark.sql.types._
* forward the call to `delegate`.
*/
class DeltaSqlParser(val delegate: ParserInterface)
- extends ParserInterface
- with DeltaSqlParserShims {
+ extends ParserInterface {
private val builder = new DeltaSqlAstBuilder
private val substitution = new VariableSubstitution
@@ -157,6 +155,8 @@ class DeltaSqlParser(val delegate: ParserInterface)
override def parseTableSchema(sqlText: String): StructType = delegate.parseTableSchema(sqlText)
override def parseDataType(sqlText: String): DataType = delegate.parseDataType(sqlText)
+
+ override def parseRoutineParam(sqlText: String): StructType = delegate.parseRoutineParam(sqlText)
}
/**
diff --git a/spark/src/main/scala/io/delta/tables/DeltaTableBuilder.scala b/spark/src/main/scala/io/delta/tables/DeltaTableBuilder.scala
index 3b24fb92897..fd2733a9428 100644
--- a/spark/src/main/scala/io/delta/tables/DeltaTableBuilder.scala
+++ b/spark/src/main/scala/io/delta/tables/DeltaTableBuilder.scala
@@ -18,7 +18,7 @@ package io.delta.tables
import scala.collection.mutable
-import org.apache.spark.sql.delta.{DeltaErrors, DeltaTableUtils, TableSpecUtils}
+import org.apache.spark.sql.delta.{DeltaErrors, DeltaTableUtils}
import org.apache.spark.sql.delta.DeltaTableUtils.withActiveSession
import org.apache.spark.sql.delta.sources.DeltaSQLConf
import io.delta.tables.execution._
@@ -26,7 +26,7 @@ import io.delta.tables.execution._
import org.apache.spark.annotation._
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.plans.logical.{ColumnDefinitionShims, CreateTable, ReplaceTable}
+import org.apache.spark.sql.catalyst.plans.logical.{ColumnDefinition, CreateTable, ReplaceTable}
import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
import org.apache.spark.sql.connector.expressions.Transform
import org.apache.spark.sql.execution.SQLExecution
@@ -357,19 +357,22 @@ class DeltaTableBuilder private[tables](
DeltaTableUtils.parseColsToClusterByTransform(colNames)
})
- val tableSpec = TableSpecUtils.create(
+ val tableSpec = org.apache.spark.sql.catalyst.plans.logical.TableSpec(
properties = properties,
provider = Some(FORMAT_NAME),
+ options = Map.empty,
location = location,
- comment = tblComment)
+ comment = tblComment,
+ collation = None,
+ serde = None,
+ external = false)
val stmt = builderOption match {
case CreateTableOptions(ifNotExists) =>
val unresolvedTable = org.apache.spark.sql.catalyst.analysis.UnresolvedIdentifier(table)
CreateTable(
unresolvedTable,
- // Callout: Spark 3.5 returns StructType, Spark 4.0 returns Seq[ColumnDefinition]
- ColumnDefinitionShims.parseColumns(columns.toSeq, spark.sessionState.sqlParser),
+ columns.map(ColumnDefinition.fromV1Column(_, spark.sessionState.sqlParser)).toSeq,
partitioning,
tableSpec,
ifNotExists)
@@ -377,8 +380,7 @@ class DeltaTableBuilder private[tables](
val unresolvedTable = org.apache.spark.sql.catalyst.analysis.UnresolvedIdentifier(table)
ReplaceTable(
unresolvedTable,
- // Callout: Spark 3.5 returns StructType, Spark 4.0 returns Seq[ColumnDefinition]
- ColumnDefinitionShims.parseColumns(columns.toSeq, spark.sessionState.sqlParser),
+ columns.map(ColumnDefinition.fromV1Column(_, spark.sessionState.sqlParser)).toSeq,
partitioning,
tableSpec,
orCreate)
diff --git a/spark/src/main/scala/io/delta/tables/execution/VacuumTableCommand.scala b/spark/src/main/scala/io/delta/tables/execution/VacuumTableCommand.scala
index d2f9d688e80..e2acc18499d 100644
--- a/spark/src/main/scala/io/delta/tables/execution/VacuumTableCommand.scala
+++ b/spark/src/main/scala/io/delta/tables/execution/VacuumTableCommand.scala
@@ -21,7 +21,6 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.UnresolvedTable
-import org.apache.spark.sql.catalyst.analysis.UnresolvedTableImplicits._
import org.apache.spark.sql.delta.catalog.DeltaTableV2
import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog, DeltaTableIdentifier, DeltaTableUtils, UnresolvedDeltaPathOrIdentifier}
import org.apache.spark.sql.delta.commands.DeltaCommand
diff --git a/spark/src/main/scala-spark-master/shims/ColumnConversionShims.scala b/spark/src/main/scala/org/apache/spark/sql/delta/ClassicColumnConversions.scala
similarity index 100%
rename from spark/src/main/scala-spark-master/shims/ColumnConversionShims.scala
rename to spark/src/main/scala/org/apache/spark/sql/delta/ClassicColumnConversions.scala
diff --git a/spark/src/main/scala-spark-master/shims/DataFrameShims.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DataFrameUtils.scala
similarity index 100%
rename from spark/src/main/scala-spark-master/shims/DataFrameShims.scala
rename to spark/src/main/scala/org/apache/spark/sql/delta/DataFrameUtils.scala
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaAnalysis.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaAnalysis.scala
index 324867ce4d1..14a78f295eb 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaAnalysis.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaAnalysis.scala
@@ -66,7 +66,7 @@ import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTrans
import org.apache.spark.sql.errors.QueryCompilationErrors
import org.apache.spark.sql.execution.command.CreateTableLikeCommand
import org.apache.spark.sql.execution.command.RunnableCommand
-import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, LogicalRelationShims, LogicalRelationWithTable}
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, LogicalRelationWithTable}
import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
import org.apache.spark.sql.execution.streaming.StreamingRelation
@@ -622,7 +622,7 @@ class DeltaAnalysis(session: SparkSession)
val v1TableName = child.identifier.asTableIdentifier
namespace.foreach { ns =>
if (v1TableName.database.exists(!resolver(_, ns.head))) {
- throw DeltaThrowableHelperShims.showColumnsWithConflictDatabasesError(ns, v1TableName)
+ throw DeltaThrowableHelper.showColumnsWithConflictDatabasesError(ns, v1TableName)
}
}
ShowDeltaTableColumnsCommand(child)
@@ -1397,7 +1397,7 @@ object DeltaRelation extends DeltaLogging {
} else {
v2Relation.output
}
- LogicalRelationShims.newInstance(relation, output, d.ttSafeCatalogTable, isStreaming = false)
+ LogicalRelation(relation, output, d.ttSafeCatalogTable, isStreaming = false, stream = None)
}
}
}
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaLogFileIndex.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaLogFileIndex.scala
index 2b8a8549735..81e95dfc440 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaLogFileIndex.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaLogFileIndex.scala
@@ -20,7 +20,7 @@ import org.apache.spark.sql.delta.logging.DeltaLogKeys
import org.apache.spark.sql.delta.util.FileNames
import org.apache.hadoop.fs._
-import org.apache.spark.internal.{LoggingShims, MDC}
+import org.apache.spark.internal.{Logging, MDC}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.execution.datasources.{FileFormat, FileIndex, PartitionDirectory}
@@ -40,7 +40,7 @@ case class DeltaLogFileIndex private (
format: FileFormat,
files: Array[FileStatus])
extends FileIndex
- with LoggingShims {
+ with Logging {
import DeltaLogFileIndex._
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaParquetFileFormat.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaParquetFileFormat.scala
index d2f204435bf..d92a07fba5f 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaParquetFileFormat.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaParquetFileFormat.scala
@@ -34,7 +34,7 @@ import org.apache.hadoop.mapreduce.Job
import org.apache.parquet.hadoop.ParquetOutputFormat
import org.apache.parquet.hadoop.util.ContextUtil
-import org.apache.spark.internal.{LoggingShims, MDC}
+import org.apache.spark.internal.{Logging, MDC}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.FileSourceConstantMetadataStructField
@@ -63,7 +63,7 @@ abstract class DeltaParquetFileFormatBase(
protected val optimizationsEnabled: Boolean = true,
protected val tablePath: Option[String] = None,
protected val isCDCRead: Boolean = false)
- extends ParquetFileFormat with LoggingShims {
+ extends ParquetFileFormat with Logging {
// Validate either we have all arguments for DV enabled read or none of them.
if (hasTablePath) {
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTable.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTable.scala
index 1811c2fe7ee..f64391ec80b 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTable.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTable.scala
@@ -27,11 +27,10 @@ import org.apache.spark.sql.delta.skipping.clustering.temp.{ClusterByTransform =
import org.apache.spark.sql.delta.sources.{DeltaSourceUtils, DeltaSQLConf}
import org.apache.hadoop.fs.{FileSystem, Path}
-import org.apache.spark.internal.{LoggingShims, MDC}
+import org.apache.spark.internal.{Logging, MDC}
import org.apache.spark.sql.{Column, DataFrame, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, UnresolvedLeafNode, UnresolvedTable}
-import org.apache.spark.sql.catalyst.analysis.UnresolvedTableImplicits._
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, SessionCatalog}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTableValueFunctions.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTableValueFunctions.scala
index 47b0bbbd778..6f210cff1a2 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTableValueFunctions.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTableValueFunctions.scala
@@ -111,7 +111,10 @@ trait CDCStatementBase extends DeltaTableValueFunction {
protected def getOptions: CaseInsensitiveStringMap = {
def toDeltaOption(keyPrefix: String, value: Expression): (String, String) = {
val evaluated = try {
- DeltaTableValueFunctionsShims.evaluateTimeOption(value)
+ val fakePlan = util.AnalysisHelper.FakeLogicalPlan(Seq(value), Nil)
+ val timestampExpression =
+ org.apache.spark.sql.catalyst.optimizer.ComputeCurrentTime(fakePlan).expressions.head
+ timestampExpression.eval().toString
} catch {
case _: NullPointerException => throw DeltaErrors.nullRangeBoundaryInCDCRead()
}
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaThrowable.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaThrowable.scala
index 020a7f586bb..9e9bc74c9d5 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaThrowable.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaThrowable.scala
@@ -21,7 +21,10 @@ import org.apache.spark.SparkThrowable
/**
* The trait for all exceptions of Delta code path.
*/
-trait DeltaThrowable extends SparkThrowable with DeltaThrowableConditionShim {
+trait DeltaThrowable extends SparkThrowable {
+
+ override def getCondition(): String = getErrorClass()
+
// Portable error identifier across SQL engines
// If null, error class or SQLSTATE is not set
override def getSqlState: String =
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaThrowableHelper.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaThrowableHelper.scala
index d2455f0bbc3..e6fb1d89a1b 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaThrowableHelper.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaThrowableHelper.scala
@@ -22,7 +22,9 @@ import java.net.URL
import scala.collection.JavaConverters._
-import org.apache.spark.sql.delta.DeltaThrowableHelperShims._
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.SparkThrowable
import org.apache.spark.ErrorClassesJsonReader
import org.apache.spark.util.Utils
@@ -33,6 +35,19 @@ import org.apache.spark.util.Utils
*/
object DeltaThrowableHelper
{
+ /**
+ * Handles a breaking change (SPARK-46810) between Spark 3.5 and Spark Master (4.0) where
+ * `error-classes.json` was renamed to `error-conditions.json`.
+ */
+ val SPARK_ERROR_CLASS_SOURCE_FILE = "error/error-conditions.json"
+
+ def showColumnsWithConflictDatabasesError(
+ db: Seq[String], v1TableName: TableIdentifier): Throwable = {
+ QueryCompilationErrors.showColumnsWithConflictNamespacesError(
+ namespaceA = db,
+ namespaceB = v1TableName.database.get :: Nil)
+ }
+
/**
* Try to find the error class source file and throw exception if it is no found.
*/
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/FileMetadataMaterializationTracker.scala b/spark/src/main/scala/org/apache/spark/sql/delta/FileMetadataMaterializationTracker.scala
index 122c47c7b4a..1f6b1c699ea 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/FileMetadataMaterializationTracker.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/FileMetadataMaterializationTracker.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.delta.logging.DeltaLogKeys
import org.apache.spark.sql.delta.metering.DeltaLogging
import org.apache.spark.sql.delta.sources.DeltaSQLConf
-import org.apache.spark.internal.{LoggingShims, MDC}
+import org.apache.spark.internal.{Logging, MDC}
import org.apache.spark.sql.SparkSession
/**
@@ -43,7 +43,7 @@ import org.apache.spark.sql.SparkSession
* Accessed by the thread materializing files and by the thread releasing resources after execution.
*
*/
-class FileMetadataMaterializationTracker extends LoggingShims {
+class FileMetadataMaterializationTracker extends Logging {
/** The number of permits allocated from the global file materialization semaphore */
@volatile private var numPermitsFromSemaphore: Int = 0
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/GeneratedColumn.scala b/spark/src/main/scala/org/apache/spark/sql/delta/GeneratedColumn.scala
index fb2bc51b449..17e67cde07f 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/GeneratedColumn.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/GeneratedColumn.scala
@@ -211,7 +211,7 @@ object GeneratedColumn extends DeltaLogging with AnalysisHelper {
// Generated columns cannot be variant types because the writer must be able to enforce that
// the <=> . Variants are currently not comprable so
// this condition is impossible to enforce.
- if (VariantShims.isVariantType(c.dataType)) {
+ if (c.dataType.isInstanceOf[VariantType]) {
throw DeltaErrors.generatedColumnsUnsupportedType(c.dataType)
}
}
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/TypeWidening.scala b/spark/src/main/scala/org/apache/spark/sql/delta/TypeWidening.scala
index 47e904a9bfe..f6a4c391700 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/TypeWidening.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/TypeWidening.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.delta
import org.apache.spark.sql.delta.actions.{AddFile, Metadata, Protocol, TableFeatureProtocolUtils}
import org.apache.spark.sql.delta.sources.DeltaSQLConf
+import org.apache.spark.sql.catalyst.expressions.Cast
import org.apache.spark.sql.functions.{col, lit}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
@@ -66,13 +67,38 @@ object TypeWidening {
/**
* Returns whether the given type change is eligible for widening. This only checks atomic types.
* It is the responsibility of the caller to recurse into structs, maps and arrays.
+ *
+ * Type widening supports:
+ * - byte -> short -> int -> long.
+ * - float -> double.
+ * - date -> timestamp_ntz.
+ * - {byte, short, int} -> double.
+ * - decimal -> wider decimal.
+ * - {byte, short, int} -> decimal(10, 0) and wider.
+ * - long -> decimal(20, 0) and wider.
*/
def isTypeChangeSupported(fromType: AtomicType, toType: AtomicType): Boolean =
- TypeWideningShims.isTypeChangeSupported(fromType = fromType, toType = toType)
+ (fromType, toType) match {
+ case (from, to) if from == to => true
+ // All supported type changes below are supposed to be widening, but to be safe, reject any
+ // non-widening change upfront.
+ case (from, to) if !Cast.canUpCast(from, to) => false
+ case (from: IntegralType, to: IntegralType) => from.defaultSize <= to.defaultSize
+ case (FloatType, DoubleType) => true
+ case (DateType, TimestampNTZType) => true
+ case (ByteType | ShortType | IntegerType, DoubleType) => true
+ case (from: DecimalType, to: DecimalType) => to.isWiderThan(from)
+ // Byte, Short, Integer are all stored as INT32 in parquet. The parquet readers support
+ // converting INT32 to Decimal(10, 0) and wider.
+ case (ByteType | ShortType | IntegerType, d: DecimalType) => d.isWiderThan(IntegerType)
+ // The parquet readers support converting INT64 to Decimal(20, 0) and wider.
+ case (LongType, d: DecimalType) => d.isWiderThan(LongType)
+ case _ => false
+ }
def isTypeChangeSupported(
fromType: AtomicType, toType: AtomicType, uniformIcebergCompatibleOnly: Boolean): Boolean =
- TypeWideningShims.isTypeChangeSupported(fromType = fromType, toType = toType) &&
+ isTypeChangeSupported(fromType, toType) &&
(!uniformIcebergCompatibleOnly ||
isTypeChangeSupportedByIceberg(fromType = fromType, toType = toType))
@@ -83,14 +109,22 @@ object TypeWidening {
def isTypeChangeSupportedForSchemaEvolution(
fromType: AtomicType,
toType: AtomicType,
- uniformIcebergCompatibleOnly: Boolean): Boolean =
- TypeWideningShims.isTypeChangeSupportedForSchemaEvolution(
- fromType = fromType,
- toType = toType
- ) && (
+ uniformIcebergCompatibleOnly: Boolean): Boolean = {
+ val supportedForSchemaEvolution = (fromType, toType) match {
+ case (from, to) if from == to => true
+ case (from, to) if !isTypeChangeSupported(from, to) => false
+ case (from: IntegralType, to: IntegralType) => from.defaultSize <= to.defaultSize
+ case (FloatType, DoubleType) => true
+ case (from: DecimalType, to: DecimalType) => to.isWiderThan(from)
+ case (DateType, TimestampNTZType) => true
+ case _ => false
+ }
+
+ supportedForSchemaEvolution && (
!uniformIcebergCompatibleOnly ||
isTypeChangeSupportedByIceberg(fromType = fromType, toType = toType)
)
+ }
/**
* Returns whether the given type change is supported by Iceberg, and by extension can be read
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/TypeWideningMode.scala b/spark/src/main/scala/org/apache/spark/sql/delta/TypeWideningMode.scala
index 38601df0862..d38dd19e535 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/TypeWideningMode.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/TypeWideningMode.scala
@@ -16,7 +16,7 @@
package org.apache.spark.sql.delta
-import org.apache.spark.sql.delta.DecimalPrecisionTypeCoercionShims
+import org.apache.spark.sql.catalyst.analysis.DecimalPrecisionTypeCoercion
import org.apache.spark.sql.delta.metering.DeltaLogging
import org.apache.spark.sql.delta.sources.DeltaSQLConf.AllowAutomaticWideningMode
import org.apache.spark.sql.util.ScalaExtensions._
@@ -124,7 +124,7 @@ object TypeWideningMode {
case (l, r) if TypeWidening.isTypeChangeSupported(l, r) => Some(r)
case (l, r) if TypeWidening.isTypeChangeSupported(r, l) => Some(l)
case (l: DecimalType, r: DecimalType) =>
- val wider = DecimalPrecisionTypeCoercionShims.widerDecimalType(l, r)
+ val wider = DecimalPrecisionTypeCoercion.widerDecimalType(l, r)
Option.when(
TypeWidening.isTypeChangeSupported(l, wider) &&
TypeWidening.isTypeChangeSupported(r, wider))(wider)
@@ -149,7 +149,7 @@ object TypeWideningMode {
case (l, r) if typeChangeSupported(l, r) => Some(r)
case (l, r) if typeChangeSupported(r, l) => Some(l)
case (l: DecimalType, r: DecimalType) =>
- val wider = DecimalPrecisionTypeCoercionShims.widerDecimalType(l, r)
+ val wider = DecimalPrecisionTypeCoercion.widerDecimalType(l, r)
Option.when(typeChangeSupported(l, wider) && typeChangeSupported(r, wider))(wider)
case _ => None
}
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/catalog/DeltaTableV2.scala b/spark/src/main/scala/org/apache/spark/sql/delta/catalog/DeltaTableV2.scala
index 0f5baea0303..1e04ea7e41d 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/catalog/DeltaTableV2.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/catalog/DeltaTableV2.scala
@@ -38,7 +38,6 @@ import org.apache.hadoop.fs.Path
import org.apache.spark.sql.{DataFrame, Dataset, SaveMode, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.{ResolvedTable, UnresolvedTable}
-import org.apache.spark.sql.catalyst.analysis.UnresolvedTableImplicits._
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, CatalogUtils}
import org.apache.spark.sql.catalyst.plans.logical.{AnalysisHelper, LogicalPlan, SubqueryAlias}
import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
@@ -49,7 +48,7 @@ import org.apache.spark.sql.connector.catalog.V1Table
import org.apache.spark.sql.connector.expressions._
import org.apache.spark.sql.connector.write.{LogicalWriteInfo, SupportsDynamicOverwrite, SupportsOverwrite, SupportsTruncate, V1Write, WriteBuilder}
import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.execution.datasources.{LogicalRelation, LogicalRelationShims}
+import org.apache.spark.sql.execution.datasources.LogicalRelation
import org.apache.spark.sql.sources.{BaseRelation, Filter, InsertableRelation}
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -328,8 +327,12 @@ class DeltaTableV2 private(
/** Creates a [[LogicalRelation]] that represents this table */
lazy val toLogicalRelation: LogicalRelation = {
val relation = this.toBaseRelation
- LogicalRelationShims.newInstance(
- relation, toAttributes(relation.schema), ttSafeCatalogTable, isStreaming = false)
+ LogicalRelation(
+ relation,
+ toAttributes(relation.schema),
+ ttSafeCatalogTable,
+ isStreaming = false,
+ stream = None)
}
/** Creates a [[DataFrame]] that uses the requested spark session to read from this table */
@@ -351,10 +354,15 @@ class DeltaTableV2 private(
val ttSpec = DeltaDataSource.getTimeTravelVersion(newOptions)
// Spark 4.0 and 3.5 handle time travel options differently.
- DeltaTimeTravelSpecShims.validateTimeTravelSpec(
- spark,
- currSpecOpt = timeTravelOpt,
- newSpecOpt = ttSpec)
+ // Validate that only one time travel spec is being used
+ (timeTravelOpt, ttSpec) match {
+ case (Some(currSpec), Some(newSpec))
+ if currSpec.version != newSpec.version ||
+ currSpec.getTimestampOpt(spark.sessionState.conf).map(_.getTime) !=
+ newSpec.getTimestampOpt(spark.sessionState.conf).map(_.getTime) =>
+ throw DeltaErrors.multipleTimeTravelSyntaxUsed
+ case _ =>
+ }
val caseInsensitiveNewOptions = new CaseInsensitiveStringMap(newOptions.asJava)
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/CreateDeltaTableLike.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/CreateDeltaTableLike.scala
index 753566bf114..7a240587bc7 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/commands/CreateDeltaTableLike.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/CreateDeltaTableLike.scala
@@ -17,7 +17,6 @@
package org.apache.spark.sql.delta.commands
import org.apache.spark.sql.delta.{DeltaErrors, Snapshot}
-import org.apache.spark.sql.delta.Relocated
import org.apache.spark.sql.delta.hooks.{UpdateCatalog, UpdateCatalogFactory}
import org.apache.spark.sql.delta.sources.DeltaSQLConf
@@ -173,6 +172,6 @@ trait CreateDeltaTableLike extends SQLConfHelper {
*/
protected def isV1Writer: Boolean = {
Thread.currentThread().getStackTrace.exists(_.toString.contains(
- Relocated.dataFrameWriterClassName + "."))
+ classOf[org.apache.spark.sql.classic.DataFrameWriter[_]].getCanonicalName + "."))
}
}
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/MergeIntoMaterializeSource.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/MergeIntoMaterializeSource.scala
index 919d3e9f5de..0c9a86bc62d 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/MergeIntoMaterializeSource.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/MergeIntoMaterializeSource.scala
@@ -176,9 +176,8 @@ trait MergeIntoMaterializeSource extends DeltaLogging with DeltaSparkPlanUtils {
// SparkCoreErrors.checkpointRDDBlockIdNotFoundError from LocalCheckpointRDD.compute.
case s: SparkException
if materializedSourceRDD.nonEmpty &&
- MergeIntoMaterializeSourceShims.mergeMaterializedSourceRddBlockLostError(
- s,
- materializedSourceRDD.get.id) =>
+ s.getErrorClass == "CHECKPOINT_RDD_BLOCK_ID_NOT_FOUND" &&
+ s.getMessageParameters.get("rddBlockId").contains(s"rdd_${materializedSourceRDD.get.id}") =>
logWarning(log"Materialized ${MDC(DeltaLogKeys.OPERATION, operation)} source RDD block " +
log"lost. ${MDC(DeltaLogKeys.OPERATION, operation)} needs to be restarted. " +
log"This was attempt number ${MDC(DeltaLogKeys.ATTEMPT, attempt)}.")
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/constraints/DeltaInvariantCheckerExec.scala b/spark/src/main/scala/org/apache/spark/sql/delta/constraints/DeltaInvariantCheckerExec.scala
index df013ea6295..3c905a4a898 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/constraints/DeltaInvariantCheckerExec.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/constraints/DeltaInvariantCheckerExec.scala
@@ -119,10 +119,13 @@ object DeltaInvariantCheckerExec extends DeltaLogging {
}
// Specialized optimizer to run necessary rules so that the check expressions can be evaluated.
- object DeltaInvariantCheckerOptimizer
- extends RuleExecutor[LogicalPlan]
- with DeltaInvariantCheckerOptimizerShims {
- final override protected def batches = DELTA_INVARIANT_CHECKER_OPTIMIZER_BATCHES
+ object DeltaInvariantCheckerOptimizer extends RuleExecutor[LogicalPlan] {
+ import org.apache.spark.sql.catalyst.optimizer.{ReplaceExpressions, RewriteWithExpression}
+
+ final override protected def batches = Seq(
+ Batch("Finish Analysis", Once, ReplaceExpressions),
+ Batch("Rewrite With expression", Once, RewriteWithExpression)
+ )
}
/** Build the extractor for a particular column. */
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/coordinatedcommits/AbstractBatchBackfillingCommitCoordinatorClient.scala b/spark/src/main/scala/org/apache/spark/sql/delta/coordinatedcommits/AbstractBatchBackfillingCommitCoordinatorClient.scala
index 4915497a8c6..54b2fb714ee 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/coordinatedcommits/AbstractBatchBackfillingCommitCoordinatorClient.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/coordinatedcommits/AbstractBatchBackfillingCommitCoordinatorClient.scala
@@ -32,7 +32,7 @@ import io.delta.storage.commit.{CommitCoordinatorClient, CommitFailedException =
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileStatus, Path}
-import org.apache.spark.internal.{LoggingShims, MDC}
+import org.apache.spark.internal.{Logging, MDC}
/**
* An abstract [[CommitCoordinatorClient]] which triggers backfills every n commits.
@@ -40,7 +40,7 @@ import org.apache.spark.internal.{LoggingShims, MDC}
*/
trait AbstractBatchBackfillingCommitCoordinatorClient
extends CommitCoordinatorClient
- with LoggingShims {
+ with Logging {
/**
* Size of batch that should be backfilled. So every commit version which satisfies
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/files/DeltaFileFormatWriter.scala b/spark/src/main/scala/org/apache/spark/sql/delta/files/DeltaFileFormatWriter.scala
index b138f2a16ce..75c73457864 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/files/DeltaFileFormatWriter.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/files/DeltaFileFormatWriter.scala
@@ -29,7 +29,7 @@ import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
import org.apache.spark._
-import org.apache.spark.internal.{LoggingShims, MDC}
+import org.apache.spark.internal.{Logging, MDC}
import org.apache.spark.internal.io.{FileCommitProtocol, SparkHadoopWriterUtils}
import org.apache.spark.shuffle.FetchFailedException
import org.apache.spark.sql.SparkSession
@@ -55,7 +55,7 @@ import org.apache.spark.util.{SerializableConfiguration, Utils}
* values to data files. Specifically L123-126, L132, and L140 where it adds option
* WRITE_PARTITION_COLUMNS
*/
-object DeltaFileFormatWriter extends LoggingShims {
+object DeltaFileFormatWriter extends Logging {
/**
* A variable used in tests to check whether the output ordering of the query matches the
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/logging/DeltaLogKeys.scala b/spark/src/main/scala/org/apache/spark/sql/delta/logging/DeltaLogKeys.scala
index 5b2f63afd34..1984a2cf4ec 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/logging/DeltaLogKeys.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/logging/DeltaLogKeys.scala
@@ -38,106 +38,106 @@
package org.apache.spark.sql.delta.logging
-import org.apache.spark.internal.LogKeyShims
+import org.apache.spark.internal.LogKey
/**
* Various keys used for mapped diagnostic contexts(MDC) in logging. All structured logging keys
* should be defined here for standardization.
*/
trait DeltaLogKeysBase {
- case object APP_ID extends LogKeyShims
- case object ATTEMPT extends LogKeyShims
- case object BATCH_ID extends LogKeyShims
- case object BATCH_SIZE extends LogKeyShims
- case object CATALOG extends LogKeyShims
- case object CLONE_SOURCE_DESC extends LogKeyShims
- case object CONFIG extends LogKeyShims
- case object CONFIG_KEY extends LogKeyShims
- case object COORDINATOR_CONF extends LogKeyShims
- case object COORDINATOR_NAME extends LogKeyShims
- case object COUNT extends LogKeyShims
- case object DATA_FILTER extends LogKeyShims
- case object DATE extends LogKeyShims
- case object DELTA_COMMIT_INFO extends LogKeyShims
- case object DELTA_METADATA extends LogKeyShims
- case object DIR extends LogKeyShims
- case object DURATION extends LogKeyShims
- case object ERROR_ID extends LogKeyShims
- case object END_INDEX extends LogKeyShims
- case object END_OFFSET extends LogKeyShims
- case object END_VERSION extends LogKeyShims
- case object ERROR extends LogKeyShims
- case object EXCEPTION extends LogKeyShims
- case object EXECUTOR_ID extends LogKeyShims
- case object EXPR extends LogKeyShims
- case object FILE_INDEX extends LogKeyShims
- case object FILE_NAME extends LogKeyShims
- case object FILE_STATUS extends LogKeyShims
- case object FILE_SYSTEM_SCHEME extends LogKeyShims
- case object FILTER extends LogKeyShims
- case object FILTER2 extends LogKeyShims
- case object HOOK_NAME extends LogKeyShims
- case object INVARIANT_CHECK_INFO extends LogKeyShims
- case object ISOLATION_LEVEL extends LogKeyShims
- case object IS_DRY_RUN extends LogKeyShims
- case object IS_INIT_SNAPSHOT extends LogKeyShims
- case object IS_PATH_TABLE extends LogKeyShims
- case object JOB_ID extends LogKeyShims
- case object LOG_SEGMENT extends LogKeyShims
- case object MAX_SIZE extends LogKeyShims
- case object METADATA_ID extends LogKeyShims
- case object METADATA_NEW extends LogKeyShims
- case object METADATA_OLD extends LogKeyShims
- case object METRICS extends LogKeyShims
- case object METRIC_NAME extends LogKeyShims
- case object MIN_SIZE extends LogKeyShims
- case object NUM_ACTIONS extends LogKeyShims
- case object NUM_ACTIONS2 extends LogKeyShims
- case object NUM_ATTEMPT extends LogKeyShims
- case object NUM_BYTES extends LogKeyShims
- case object NUM_DIRS extends LogKeyShims
- case object NUM_FILES extends LogKeyShims
- case object NUM_FILES2 extends LogKeyShims
- case object NUM_PARTITIONS extends LogKeyShims
- case object NUM_PREDICATES extends LogKeyShims
- case object NUM_RECORDS extends LogKeyShims
- case object NUM_RECORDS2 extends LogKeyShims
- case object NUM_SKIPPED extends LogKeyShims
- case object OFFSET extends LogKeyShims
- case object OPERATION extends LogKeyShims
- case object OP_NAME extends LogKeyShims
- case object PARTITION_FILTER extends LogKeyShims
- case object PATH extends LogKeyShims
- case object PATH2 extends LogKeyShims
- case object PATHS extends LogKeyShims
- case object PATHS2 extends LogKeyShims
- case object PATHS3 extends LogKeyShims
- case object PATHS4 extends LogKeyShims
- case object PROTOCOL extends LogKeyShims
- case object QUERY_ID extends LogKeyShims
- case object SCHEMA extends LogKeyShims
- case object SCHEMA_DIFF extends LogKeyShims
- case object SNAPSHOT extends LogKeyShims
- case object START_INDEX extends LogKeyShims
- case object START_VERSION extends LogKeyShims
- case object STATS extends LogKeyShims
- case object STATUS extends LogKeyShims
- case object STATUS_MESSAGE extends LogKeyShims
- case object SYSTEM_CLASS_NAME extends LogKeyShims
- case object TABLE_FEATURES extends LogKeyShims
- case object TABLE_ID extends LogKeyShims
- case object TABLE_NAME extends LogKeyShims
- case object TBL_PROPERTIES extends LogKeyShims
- case object THREAD_NAME extends LogKeyShims
- case object TIMESTAMP extends LogKeyShims
- case object TIMESTAMP2 extends LogKeyShims
- case object TIME_MS extends LogKeyShims
- case object TIME_STATS extends LogKeyShims
- case object TXN_ID extends LogKeyShims
- case object URI extends LogKeyShims
- case object VACUUM_STATS extends LogKeyShims
- case object VERSION extends LogKeyShims
- case object VERSION2 extends LogKeyShims
+ case object APP_ID extends LogKey
+ case object ATTEMPT extends LogKey
+ case object BATCH_ID extends LogKey
+ case object BATCH_SIZE extends LogKey
+ case object CATALOG extends LogKey
+ case object CLONE_SOURCE_DESC extends LogKey
+ case object CONFIG extends LogKey
+ case object CONFIG_KEY extends LogKey
+ case object COORDINATOR_CONF extends LogKey
+ case object COORDINATOR_NAME extends LogKey
+ case object COUNT extends LogKey
+ case object DATA_FILTER extends LogKey
+ case object DATE extends LogKey
+ case object DELTA_COMMIT_INFO extends LogKey
+ case object DELTA_METADATA extends LogKey
+ case object DIR extends LogKey
+ case object DURATION extends LogKey
+ case object ERROR_ID extends LogKey
+ case object END_INDEX extends LogKey
+ case object END_OFFSET extends LogKey
+ case object END_VERSION extends LogKey
+ case object ERROR extends LogKey
+ case object EXCEPTION extends LogKey
+ case object EXECUTOR_ID extends LogKey
+ case object EXPR extends LogKey
+ case object FILE_INDEX extends LogKey
+ case object FILE_NAME extends LogKey
+ case object FILE_STATUS extends LogKey
+ case object FILE_SYSTEM_SCHEME extends LogKey
+ case object FILTER extends LogKey
+ case object FILTER2 extends LogKey
+ case object HOOK_NAME extends LogKey
+ case object INVARIANT_CHECK_INFO extends LogKey
+ case object ISOLATION_LEVEL extends LogKey
+ case object IS_DRY_RUN extends LogKey
+ case object IS_INIT_SNAPSHOT extends LogKey
+ case object IS_PATH_TABLE extends LogKey
+ case object JOB_ID extends LogKey
+ case object LOG_SEGMENT extends LogKey
+ case object MAX_SIZE extends LogKey
+ case object METADATA_ID extends LogKey
+ case object METADATA_NEW extends LogKey
+ case object METADATA_OLD extends LogKey
+ case object METRICS extends LogKey
+ case object METRIC_NAME extends LogKey
+ case object MIN_SIZE extends LogKey
+ case object NUM_ACTIONS extends LogKey
+ case object NUM_ACTIONS2 extends LogKey
+ case object NUM_ATTEMPT extends LogKey
+ case object NUM_BYTES extends LogKey
+ case object NUM_DIRS extends LogKey
+ case object NUM_FILES extends LogKey
+ case object NUM_FILES2 extends LogKey
+ case object NUM_PARTITIONS extends LogKey
+ case object NUM_PREDICATES extends LogKey
+ case object NUM_RECORDS extends LogKey
+ case object NUM_RECORDS2 extends LogKey
+ case object NUM_SKIPPED extends LogKey
+ case object OFFSET extends LogKey
+ case object OPERATION extends LogKey
+ case object OP_NAME extends LogKey
+ case object PARTITION_FILTER extends LogKey
+ case object PATH extends LogKey
+ case object PATH2 extends LogKey
+ case object PATHS extends LogKey
+ case object PATHS2 extends LogKey
+ case object PATHS3 extends LogKey
+ case object PATHS4 extends LogKey
+ case object PROTOCOL extends LogKey
+ case object QUERY_ID extends LogKey
+ case object SCHEMA extends LogKey
+ case object SCHEMA_DIFF extends LogKey
+ case object SNAPSHOT extends LogKey
+ case object START_INDEX extends LogKey
+ case object START_VERSION extends LogKey
+ case object STATS extends LogKey
+ case object STATUS extends LogKey
+ case object STATUS_MESSAGE extends LogKey
+ case object SYSTEM_CLASS_NAME extends LogKey
+ case object TABLE_FEATURES extends LogKey
+ case object TABLE_ID extends LogKey
+ case object TABLE_NAME extends LogKey
+ case object TBL_PROPERTIES extends LogKey
+ case object THREAD_NAME extends LogKey
+ case object TIMESTAMP extends LogKey
+ case object TIMESTAMP2 extends LogKey
+ case object TIME_MS extends LogKey
+ case object TIME_STATS extends LogKey
+ case object TXN_ID extends LogKey
+ case object URI extends LogKey
+ case object VACUUM_STATS extends LogKey
+ case object VERSION extends LogKey
+ case object VERSION2 extends LogKey
}
object DeltaLogKeys extends DeltaLogKeysBase
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/metering/DeltaLogging.scala b/spark/src/main/scala/org/apache/spark/sql/delta/metering/DeltaLogging.scala
index ba4c26abbd0..fd458dbc9d3 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/metering/DeltaLogging.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/metering/DeltaLogging.scala
@@ -39,7 +39,7 @@ import org.apache.spark.sql.util.ScalaExtensions._
import org.apache.hadoop.fs.Path
import org.apache.spark.SparkThrowable
-import org.apache.spark.internal.{LoggingShims, MDC, MessageWithContext}
+import org.apache.spark.internal.{Logging, MDC, MessageWithContext}
/**
* Convenience wrappers for logging that include delta specific options and
@@ -233,7 +233,7 @@ object DeltaLogging {
class LogThrottler(
val bucketSize: Int = 100,
val tokenRecoveryInterval: FiniteDuration = 1.second,
- val timeSource: NanoTimeTimeSource = SystemNanoTimeSource) extends LoggingShims {
+ val timeSource: NanoTimeTimeSource = SystemNanoTimeSource) extends Logging {
private var remainingTokens = bucketSize
private var nextRecovery: DeadlineWithTimeSource =
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/perf/OptimizeMetadataOnlyDeltaQuery.scala b/spark/src/main/scala/org/apache/spark/sql/delta/perf/OptimizeMetadataOnlyDeltaQuery.scala
index 3387ab95f1b..b4de244672c 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/perf/OptimizeMetadataOnlyDeltaQuery.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/perf/OptimizeMetadataOnlyDeltaQuery.scala
@@ -16,7 +16,7 @@
package org.apache.spark.sql.delta.perf
-import org.apache.spark.internal.LoggingShims
+import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate._
@@ -45,7 +45,7 @@ import java.util.Locale
* - Query has no GROUP BY.
* Example of valid query: SELECT COUNT(*), MIN(id), MAX(partition_col) FROM MyDeltaTable
*/
-trait OptimizeMetadataOnlyDeltaQuery extends LoggingShims {
+trait OptimizeMetadataOnlyDeltaQuery extends Logging {
def optimizeQueryWithMetadata(plan: LogicalPlan): LogicalPlan = {
plan.transformUpWithSubqueries {
case agg@MetadataOptimizableAggregate(tahoeLogFileIndex) =>
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/schema/SchemaUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/schema/SchemaUtils.scala
index f6a9e6dab82..ded73f7b5e0 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/schema/SchemaUtils.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/schema/SchemaUtils.scala
@@ -1502,7 +1502,7 @@ def normalizeColumnNamesInDataType(
* Returns 'true' if any VariantType exists in the table schema.
*/
def checkForVariantTypeColumnsRecursively(schema: StructType): Boolean = {
- SchemaUtils.typeExistsRecursively(schema)(VariantShims.isVariantType(_))
+ SchemaUtils.typeExistsRecursively(schema)(_.isInstanceOf[VariantType])
}
/**
@@ -1537,7 +1537,7 @@ def normalizeColumnNamesInDataType(
case DateType =>
case TimestampType =>
case TimestampNTZType =>
- case dt if VariantShims.isVariantType(dt) =>
+ case dt if dt.isInstanceOf[VariantType] =>
case BinaryType =>
case _: DecimalType =>
case a: ArrayType =>
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaDataSource.scala b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaDataSource.scala
index 677733fbd46..6ab2408042c 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaDataSource.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaDataSource.scala
@@ -45,8 +45,7 @@ import org.apache.spark.sql.connector.expressions.Transform
import org.apache.spark.sql.execution.streaming.{Sink, Source}
import org.apache.spark.sql.sources._
import org.apache.spark.sql.streaming.OutputMode
-import org.apache.spark.sql.types.{DataType, VariantShims}
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.{DataType, StructType, VariantType}
import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -55,7 +54,7 @@ class DeltaDataSource
extends RelationProvider
with StreamSourceProvider
with StreamSinkProvider
- with CreatableRelationProviderShim
+ with CreatableRelationProvider
with DataSourceRegister
with TableProvider
with DeltaLogging {
@@ -304,10 +303,9 @@ class DeltaDataSource
/**
* Extend the default `supportsDataType` to allow VariantType.
- * Implemented by `CreatableRelationProviderShim`.
*/
override def supportsDataType(dt: DataType): Boolean = {
- VariantShims.isVariantType(dt) || super.supportsDataType(dt)
+ dt.isInstanceOf[VariantType] || super.supportsDataType(dt)
}
override def shortName(): String = {
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaStreamUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaStreamUtils.scala
index 787992a56cd..89a2852f5a8 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaStreamUtils.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaStreamUtils.scala
@@ -23,8 +23,9 @@ import org.apache.hadoop.fs.Path
import org.apache.spark.sql.delta.DataFrameUtils
import org.apache.spark.sql.{Column, DataFrame}
+import org.apache.spark.sql.classic.ClassicConversions._
import org.apache.spark.sql.execution.QueryExecution
-import org.apache.spark.sql.execution.streaming.{IncrementalExecution, IncrementalExecutionShims, StreamExecution}
+import org.apache.spark.sql.execution.streaming.{IncrementalExecution, StreamExecution}
object DeltaStreamUtils {
@@ -39,10 +40,18 @@ object DeltaStreamUtils {
df: DataFrame,
cols: Column*): DataFrame = {
val newMicroBatch = df.select(cols: _*)
- val newIncrementalExecution = IncrementalExecutionShims.newInstance(
+ val newIncrementalExecution = new IncrementalExecution(
newMicroBatch.sparkSession,
newMicroBatch.queryExecution.logical,
- incrementalExecution)
+ incrementalExecution.outputMode,
+ incrementalExecution.checkpointLocation,
+ incrementalExecution.queryId,
+ incrementalExecution.runId,
+ incrementalExecution.currentBatchId,
+ incrementalExecution.prevOffsetSeqMetadata,
+ incrementalExecution.offsetSeqMetadata,
+ incrementalExecution.watermarkPropagator,
+ incrementalExecution.isFirstBatch)
newIncrementalExecution.executedPlan // Force the lazy generation of execution plan
DataFrameUtils.ofRows(newIncrementalExecution)
}
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/stats/StatsCollectionUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/stats/StatsCollectionUtils.scala
index 2ffaa1da1e1..2120b35e43b 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/stats/StatsCollectionUtils.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/stats/StatsCollectionUtils.scala
@@ -39,7 +39,7 @@ import org.apache.parquet.io.api.Binary
import org.apache.parquet.schema.LogicalTypeAnnotation._
import org.apache.parquet.schema.PrimitiveType
-import org.apache.spark.internal.{LoggingShims, MDC}
+import org.apache.spark.internal.{Logging, MDC}
import org.apache.spark.sql.{Dataset, SparkSession}
import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.execution.datasources.DataSourceUtils
@@ -49,7 +49,7 @@ import org.apache.spark.util.SerializableConfiguration
object StatsCollectionUtils
- extends LoggingShims
+ extends Logging
{
/**
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/streaming/SchemaTrackingLog.scala b/spark/src/main/scala/org/apache/spark/sql/delta/streaming/SchemaTrackingLog.scala
index 7c838ecc09c..61dbacec7d5 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/streaming/SchemaTrackingLog.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/streaming/SchemaTrackingLog.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.delta.logging.DeltaLogKeys
import org.apache.spark.sql.delta.util.JsonUtils
import com.fasterxml.jackson.annotation.JsonIgnore
-import org.apache.spark.internal.{LoggingShims, MDC}
+import org.apache.spark.internal.{Logging, MDC}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.execution.streaming.{HDFSMetadataLog, MetadataVersionUtil}
import org.apache.spark.sql.types.{DataType, StructType}
@@ -103,7 +103,7 @@ class SchemaTrackingLog[T <: PartitionAndDataSchema: ClassTag: Manifest](
sparkSession: SparkSession,
path: String,
schemaSerializer: SchemaSerializer[T])
- extends HDFSMetadataLog[T](sparkSession, path) with LoggingShims {
+ extends HDFSMetadataLog[T](sparkSession, path) with Logging {
import SchemaTrackingExceptions._
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/DeltaProgressReporter.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/DeltaProgressReporter.scala
index 82632cfeb85..42d1133b2bb 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/util/DeltaProgressReporter.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/DeltaProgressReporter.scala
@@ -19,10 +19,10 @@ package org.apache.spark.sql.delta.util
import org.apache.spark.sql.delta.logging.DeltaLogKeys
import org.apache.spark.SparkContext
-import org.apache.spark.internal.{LoggingShims, MDC}
+import org.apache.spark.internal.{Logging, MDC}
import org.apache.spark.sql.SparkSession
-trait DeltaProgressReporter extends LoggingShims {
+trait DeltaProgressReporter extends Logging {
/**
* Report a log to indicate some command is running.
*/
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/PartitionUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/PartitionUtils.scala
index 09c7984f29a..6bb8d4818ad 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/util/PartitionUtils.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/PartitionUtils.scala
@@ -630,7 +630,7 @@ private[delta] object PartitionUtils {
partitionColumnsSchema(schema, partitionColumns, caseSensitive).foreach {
field => field.dataType match {
// Variant types are not orderable and thus cannot be partition columns.
- case a: AtomicType if !VariantShims.isVariantType(a) => // OK
+ case a: AtomicType if !a.isInstanceOf[VariantType] => // OK
case _ => throw DeltaErrors.cannotUseDataTypeForPartitionColumnError(field)
}
}
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/threads/SparkThreadLocalForwardingThreadPoolExecutor.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/threads/SparkThreadLocalForwardingThreadPoolExecutor.scala
index 109606dab40..6eeaf5a91d2 100644
--- a/spark/src/main/scala/org/apache/spark/sql/delta/util/threads/SparkThreadLocalForwardingThreadPoolExecutor.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/threads/SparkThreadLocalForwardingThreadPoolExecutor.scala
@@ -24,7 +24,7 @@ import scala.collection.JavaConverters._
import org.apache.spark.sql.delta.logging.DeltaLogKeys
import org.apache.spark.{SparkContext, TaskContext}
-import org.apache.spark.internal.{LoggingShims, MDC}
+import org.apache.spark.internal.{Logging, MDC}
import org.apache.spark.util.{Utils => SparkUtils}
/**
@@ -48,7 +48,7 @@ class SparkThreadLocalForwardingThreadPoolExecutor(
}
-trait SparkThreadLocalCapturingHelper extends LoggingShims {
+trait SparkThreadLocalCapturingHelper extends Logging {
// At the time of creating this instance we capture the task context and command context.
val capturedTaskContext = TaskContext.get()
val sparkContext = SparkContext.getActive
diff --git a/spark/src/test/resources/log4j2.properties b/spark/src/test/resources/log4j2.properties
index 43daec1a285..0a8d5bb856f 100644
--- a/spark/src/test/resources/log4j2.properties
+++ b/spark/src/test/resources/log4j2.properties
@@ -38,18 +38,18 @@ appender.file.append = true
appender.file.layout.type = PatternLayout
appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
-# Pattern Logging Appender
-appender.pattern.type = File
-appender.pattern.name = pattern
-appender.pattern.fileName = target/pattern.log
-appender.pattern.layout.type = PatternLayout
-appender.pattern.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex
+# Structured Logging Appender
+appender.structured.type = File
+appender.structured.name = structured
+appender.structured.fileName = target/structured.log
+appender.structured.layout.type = JsonTemplateLayout
+appender.structured.layout.eventTemplateUri = classpath:org/apache/spark/SparkLayout.json
-# Custom logger for testing structured logging with Spark 3.5 shims
-logger.pattern_logging.name = org.apache.spark.sql.delta.logging.DeltaPatternLoggingSuite
-logger.pattern_logging.level = trace
-logger.pattern_logging.appenderRefs = pattern
-logger.pattern_logging.appenderRef.pattern.ref = pattern
+# Custom logger for testing structured logging with Spark 4.0+
+logger.structured_logging.name = org.apache.spark.sql.delta.logging.DeltaStructuredLoggingSuite
+logger.structured_logging.level = trace
+logger.structured_logging.appenderRefs = structured
+logger.structured_logging.appenderRef.structured.ref = structured
# Tests that launch java subprocesses can set the "test.appender" system property to
# "console" to avoid having the child process's logs overwrite the unit test's
diff --git a/spark/src/test/resources/log4j2_spark_master.properties b/spark/src/test/resources/log4j2_spark_master.properties
deleted file mode 100644
index 95aea7050b7..00000000000
--- a/spark/src/test/resources/log4j2_spark_master.properties
+++ /dev/null
@@ -1,65 +0,0 @@
-#
-# Copyright (2021) The Delta Lake Project Authors.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Set everything to be logged to the file target/unit-tests.log
-rootLogger.level = warn
-rootLogger.appenderRef.file.ref = ${sys:test.appender:-File}
-
-appender.file.type = File
-appender.file.name = File
-appender.file.fileName = target/unit-tests.log
-appender.file.append = true
-appender.file.layout.type = PatternLayout
-appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
-
-# Structured Logging Appender
-appender.structured.type = File
-appender.structured.name = structured
-appender.structured.fileName = target/structured.log
-appender.structured.layout.type = JsonTemplateLayout
-appender.structured.layout.eventTemplateUri = classpath:org/apache/spark/SparkLayout.json
-
-# Custom logger for testing structured logging with Spark master
-logger.structured_logging.name = org.apache.spark.sql.delta.logging.DeltaStructuredLoggingSuite
-logger.structured_logging.level = trace
-logger.structured_logging.appenderRefs = structured
-logger.structured_logging.appenderRef.structured.ref = structured
-
-# Tests that launch java subprocesses can set the "test.appender" system property to
-# "console" to avoid having the child process's logs overwrite the unit test's
-# log file.
-appender.console.type = Console
-appender.console.name = console
-appender.console.target = SYSTEM_ERR
-appender.console.layout.type = PatternLayout
-appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
-
-# Ignore messages below warning level from Jetty, because it's a bit verbose
-logger.jetty.name = org.sparkproject.jetty
-logger.jetty.level = warn
diff --git a/spark/src/test/scala-spark-3.5/org/apache/spark/sql/delta/logging/DeltaPatternLoggingSuite.scala b/spark/src/test/scala-spark-3.5/org/apache/spark/sql/delta/logging/DeltaPatternLoggingSuite.scala
deleted file mode 100644
index 33f126ac3b8..00000000000
--- a/spark/src/test/scala-spark-3.5/org/apache/spark/sql/delta/logging/DeltaPatternLoggingSuite.scala
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * This file contains code from the Apache Spark project (original license above).
- * It contains modifications, which are licensed as follows:
- */
-
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta.logging
-
-import java.util.regex.Pattern
-
-import org.apache.logging.log4j.Level
-
-class DeltaPatternLoggingSuite extends DeltaStructuredLoggingSuiteBase {
- override def className: String = classOf[DeltaPatternLoggingSuite].getSimpleName
- override def logFilePath: String = "target/pattern.log"
-
- override def expectedPatternForBasicMsg(level: Level): String = {
- s""".*$level $className: This is a log message\n"""
- }
-
- override def expectedPatternForBasicMsgWithException(level: Level): String = {
- s""".*$level $className: This is a log message\n[\\s\\S]*"""
- }
-
- override def expectedPatternForMsgWithMDC(level: Level): String =
- s""".*$level $className: Lost executor 1.\n"""
-
- override def expectedPatternForMsgWithMDCValueIsNull(level: Level): String =
- s""".*$level $className: Lost executor null.\n"""
-
- override def expectedPatternForMsgWithMDCAndException(level: Level): String =
- s""".*$level $className: Error in executor 1.\njava.lang.RuntimeException: OOM\n[\\s\\S]*"""
-
- override def expectedPatternForCustomLogKey(level: Level): String = {
- s""".*$level $className: Custom log message.\n"""
- }
-
- override def verifyMsgWithConcat(level: Level, logOutput: String): Unit = {
- val pattern =
- s""".*$level $className: Min Size: 2, Max Size: 4. Please double check.\n"""
- assert(Pattern.compile(pattern).matcher(logOutput).matches())
- }
-}
diff --git a/spark/src/test/scala-spark-3.5/shims/DeltaExcludedBySparkVersionTestMixinShims.scala b/spark/src/test/scala-spark-3.5/shims/DeltaExcludedBySparkVersionTestMixinShims.scala
deleted file mode 100644
index 587e6a32cab..00000000000
--- a/spark/src/test/scala-spark-3.5/shims/DeltaExcludedBySparkVersionTestMixinShims.scala
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (2024) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-import org.apache.spark.sql.QueryTest
-
-trait DeltaExcludedBySparkVersionTestMixinShims extends QueryTest {
- /**
- * Tests that are meant for Delta compiled against Spark Latest Release only. Executed since this
- * is the Spark Latest Release shim.
- */
- protected def testSparkLatestOnly(
- testName: String, testTags: org.scalatest.Tag*)
- (testFun: => Any)
- (implicit pos: org.scalactic.source.Position): Unit = {
- test(testName, testTags: _*)(testFun)(pos)
- }
-
- /**
- * Tests that are meant for Delta compiled against Spark Master Release only. Ignored since this
- * is the Spark Latest Release shim.
- */
- protected def testSparkMasterOnly(
- testName: String, testTags: org.scalatest.Tag*)
- (testFun: => Any)
- (implicit pos: org.scalactic.source.Position): Unit = {
- ignore(testName, testTags: _*)(testFun)(pos)
- }
-}
diff --git a/spark/src/test/scala-spark-3.5/shims/DeltaGenerateSymlinkManifestSuiteShims.scala b/spark/src/test/scala-spark-3.5/shims/DeltaGenerateSymlinkManifestSuiteShims.scala
deleted file mode 100644
index 41a3acc1340..00000000000
--- a/spark/src/test/scala-spark-3.5/shims/DeltaGenerateSymlinkManifestSuiteShims.scala
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (2024) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-object DeltaGenerateSymlinkManifestSuiteShims {
- val FAILS_ON_TEMP_VIEWS_ERROR_MSG = "v is a temp view. 'GENERATE' expects a table"
-}
diff --git a/spark/src/test/scala-spark-3.5/shims/DeltaHistoryManagerSuiteShims.scala b/spark/src/test/scala-spark-3.5/shims/DeltaHistoryManagerSuiteShims.scala
deleted file mode 100644
index 98765c9b667..00000000000
--- a/spark/src/test/scala-spark-3.5/shims/DeltaHistoryManagerSuiteShims.scala
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (2024) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-object DeltaHistoryManagerSuiteShims {
- type MULTIPLE_TIME_TRAVEL_FORMATS_ERROR_TYPE = java.lang.IllegalArgumentException
-
- val MULTIPLE_TIME_TRAVEL_FORMATS_ERROR_MSG = "either provide 'timestampAsOf' or 'versionAsOf'"
-}
diff --git a/spark/src/test/scala-spark-3.5/shims/DeltaInsertIntoTableSuiteShims.scala b/spark/src/test/scala-spark-3.5/shims/DeltaInsertIntoTableSuiteShims.scala
deleted file mode 100644
index 06624c21ea4..00000000000
--- a/spark/src/test/scala-spark-3.5/shims/DeltaInsertIntoTableSuiteShims.scala
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (2024) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-object DeltaInsertIntoTableSuiteShims {
- val INSERT_INTO_TMP_VIEW_ERROR_MSG = "Inserting into a view is not allowed"
-
- val INVALID_COLUMN_DEFAULT_VALUE_ERROR_MSG = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION"
-}
diff --git a/spark/src/test/scala-spark-3.5/shims/DeltaSuiteShims.scala b/spark/src/test/scala-spark-3.5/shims/DeltaSuiteShims.scala
deleted file mode 100644
index 978638f796b..00000000000
--- a/spark/src/test/scala-spark-3.5/shims/DeltaSuiteShims.scala
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-object DeltaSuiteShims {
- val THROWS_ON_CORRUPTED_FILE_ERROR_MSG = "is not a Parquet file"
-
- val THROWS_ON_DELETED_FILE_ERROR_MSG = "FileNotFound"
-}
diff --git a/spark/src/test/scala-spark-3.5/shims/DeltaVacuumSuiteShims.scala b/spark/src/test/scala-spark-3.5/shims/DeltaVacuumSuiteShims.scala
deleted file mode 100644
index 8b640829f98..00000000000
--- a/spark/src/test/scala-spark-3.5/shims/DeltaVacuumSuiteShims.scala
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (2024) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-object DeltaVacuumSuiteShims {
- val SQL_COMMAND_ON_TEMP_VIEW_NOT_SUPPORTED_ERROR_MSG =
- "v is a temp view. 'VACUUM' expects a table."
-}
diff --git a/spark/src/test/scala-spark-3.5/shims/DescribeDeltaHistorySuiteShims.scala b/spark/src/test/scala-spark-3.5/shims/DescribeDeltaHistorySuiteShims.scala
deleted file mode 100644
index f1c693a000c..00000000000
--- a/spark/src/test/scala-spark-3.5/shims/DescribeDeltaHistorySuiteShims.scala
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (2024) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-object DescribeDeltaHistorySuiteShims {
- val FAILS_ON_VIEWS_ERROR_MSG =
- "spark_catalog.default.delta_view is a view. 'DESCRIBE HISTORY' expects a table"
-
- val FAILS_ON_TEMP_VIEWS_ERROR_MSG =
- "v is a temp view. 'DESCRIBE HISTORY' expects a table"
-}
diff --git a/spark/src/test/scala-spark-3.5/shims/ImplicitDMLCastingSuiteShims.scala b/spark/src/test/scala-spark-3.5/shims/ImplicitDMLCastingSuiteShims.scala
deleted file mode 100644
index 5c2ffe237b3..00000000000
--- a/spark/src/test/scala-spark-3.5/shims/ImplicitDMLCastingSuiteShims.scala
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (2024) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-object ImplicitDMLCastingSuiteShims {
- /**
- * Discrepancy in error message between Spark 3.5 and Master (4.0) due to SPARK-47798
- * (https://github.com/apache/spark/pull/45981)
- */
- val NUMERIC_VALUE_OUT_OF_RANGE_ERROR_MSG = "NUMERIC_VALUE_OUT_OF_RANGE"
-}
diff --git a/spark/src/test/scala-spark-3.5/shims/MergeIntoMetricsShims.scala b/spark/src/test/scala-spark-3.5/shims/MergeIntoMetricsShims.scala
deleted file mode 100644
index 8d8bce1bbc7..00000000000
--- a/spark/src/test/scala-spark-3.5/shims/MergeIntoMetricsShims.scala
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (2024) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-object MergeIntoMetricsShims {
- val DELETE_WITH_DUPLICATE_NUM_TARGET_FILES_ADDED_NON_PARTITIONED_NO_CDF = 1
-}
diff --git a/spark/src/test/scala-spark-3.5/shims/SnapshotManagementSuiteShims.scala b/spark/src/test/scala-spark-3.5/shims/SnapshotManagementSuiteShims.scala
deleted file mode 100644
index 6dcea6ba77c..00000000000
--- a/spark/src/test/scala-spark-3.5/shims/SnapshotManagementSuiteShims.scala
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (2024) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-object SnapshotManagementSuiteShims {
- val SHOULD_NOT_RECOVER_CHECKPOINT_ERROR_MSG = ".parquet is not a Parquet file"
-}
diff --git a/spark/src/test/scala-spark-3.5/shims/TypeWideningTestCasesShims.scala b/spark/src/test/scala-spark-3.5/shims/TypeWideningTestCasesShims.scala
deleted file mode 100644
index 753ab18fa6a..00000000000
--- a/spark/src/test/scala-spark-3.5/shims/TypeWideningTestCasesShims.scala
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (2024) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta.typewidening
-
-import org.apache.spark.sql.test.SQLTestUtils
-import org.apache.spark.sql.types._
-
-/**
- * The set of type changes supported by type widening is different between Spark 3.5 and Spark 4.0.
- * See [[TypeWideningShims]]. This shim splits the test cases into supported and unsupported
- * accordingly for delta on Spark 3.5.
- */
-trait TypeWideningTestCasesShims {
- self: TypeWideningTestCases with SQLTestUtils =>
-
- import testImplicits._
-
- // Type changes that are supported by all Parquet readers. Byte, Short, Int are all stored as
- // INT32 in parquet so these changes are guaranteed to be supported.
- protected val supportedTestCases: Seq[TypeEvolutionTestCase] = Seq(
- SupportedTypeEvolutionTestCase(ByteType, ShortType,
- Seq(1, -1, Byte.MinValue, Byte.MaxValue, null.asInstanceOf[Byte]),
- Seq(4, -4, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short])),
- SupportedTypeEvolutionTestCase(ByteType, IntegerType,
- Seq(1, -1, Byte.MinValue, Byte.MaxValue, null.asInstanceOf[Byte]),
- Seq(4, -4, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int])),
- SupportedTypeEvolutionTestCase(ShortType, IntegerType,
- Seq(1, -1, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short]),
- Seq(4, -4, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int]))
- )
-
- // Type changes that are only eligible for automatic widening when
- // spark.databricks.delta.typeWidening.allowAutomaticWidening = ALWAYS.
- protected val restrictedAutomaticWideningTestCases: Seq[TypeEvolutionTestCase] = Seq.empty
-
- // Test type changes that aren't supported.
- protected val unsupportedTestCases: Seq[TypeEvolutionTestCase] = Seq(
- UnsupportedTypeEvolutionTestCase(IntegerType, ByteType,
- Seq(1, 2, Int.MinValue)),
- UnsupportedTypeEvolutionTestCase(LongType, IntegerType,
- Seq(4, 5, Long.MaxValue)),
- UnsupportedTypeEvolutionTestCase(DoubleType, FloatType,
- Seq(987654321.987654321d, Double.NaN, Double.NegativeInfinity,
- Double.PositiveInfinity, Double.MinPositiveValue,
- Double.MinValue, Double.MaxValue)),
- UnsupportedTypeEvolutionTestCase(ByteType, DecimalType(2, 0),
- Seq(1, -1, Byte.MinValue)),
- UnsupportedTypeEvolutionTestCase(ShortType, DecimalType(4, 0),
- Seq(1, -1, Short.MinValue)),
- UnsupportedTypeEvolutionTestCase(IntegerType, DecimalType(9, 0),
- Seq(1, -1, Int.MinValue)),
- UnsupportedTypeEvolutionTestCase(LongType, DecimalType(19, 0),
- Seq(1, -1, Long.MinValue)),
- UnsupportedTypeEvolutionTestCase(TimestampNTZType, DateType,
- Seq("2020-03-17 15:23:15", "2023-12-31 23:59:59", "0001-01-01 00:00:00")),
- // Reduce scale
- UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2),
- DecimalType(Decimal.MAX_INT_DIGITS, 3),
- Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 2) + ".99"))),
- // Reduce precision
- UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2),
- DecimalType(Decimal.MAX_INT_DIGITS - 1, 2),
- Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 2) + ".99"))),
- // Reduce precision & scale
- UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_LONG_DIGITS, 2),
- DecimalType(Decimal.MAX_INT_DIGITS - 1, 1),
- Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99"))),
- // Increase scale more than precision
- UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2),
- DecimalType(Decimal.MAX_INT_DIGITS + 1, 4),
- Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 2) + ".99"))),
- // Smaller scale and larger precision.
- UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_LONG_DIGITS, 2),
- DecimalType(Decimal.MAX_INT_DIGITS + 3, 1),
- Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99"))),
- SupportedTypeEvolutionTestCase(IntegerType, DoubleType,
- Seq(1, -1, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int]),
- Seq(987654321.987654321d, -0d, 0d, Double.NaN, Double.NegativeInfinity,
- Double.PositiveInfinity, Double.MinPositiveValue, Double.MinValue, Double.MaxValue,
- null.asInstanceOf[Double])),
- SupportedTypeEvolutionTestCase(ByteType, DecimalType(10, 0),
- Seq(1, -1, Byte.MinValue, Byte.MaxValue, null.asInstanceOf[Byte]),
- Seq(BigDecimal("1.23"), BigDecimal("9" * 10), null.asInstanceOf[BigDecimal])),
- SupportedTypeEvolutionTestCase(ShortType, DecimalType(10, 0),
- Seq(1, -1, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short]),
- Seq(BigDecimal("1.23"), BigDecimal("9" * 10), null.asInstanceOf[BigDecimal])),
- SupportedTypeEvolutionTestCase(IntegerType, DecimalType(10, 0),
- Seq(1, -1, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int]),
- Seq(BigDecimal("1.23"), BigDecimal("9" * 10), null.asInstanceOf[BigDecimal])),
- SupportedTypeEvolutionTestCase(LongType, DecimalType(20, 0),
- Seq(1L, -1L, Long.MinValue, Long.MaxValue, null.asInstanceOf[Int]),
- Seq(BigDecimal("1.23"), BigDecimal("9" * 20), null.asInstanceOf[BigDecimal])),
- SupportedTypeEvolutionTestCase(ShortType, LongType,
- Seq(1, -1, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short]),
- Seq(4L, -4L, Long.MinValue, Long.MaxValue, null.asInstanceOf[Long])),
- SupportedTypeEvolutionTestCase(IntegerType, LongType,
- Seq(1, -1, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int]),
- Seq(4L, -4L, Long.MinValue, Long.MaxValue, null.asInstanceOf[Long])),
- SupportedTypeEvolutionTestCase(FloatType, DoubleType,
- Seq(1234.56789f, -0f, 0f, Float.NaN, Float.NegativeInfinity, Float.PositiveInfinity,
- Float.MinPositiveValue, Float.MinValue, Float.MaxValue, null.asInstanceOf[Float]),
- Seq(987654321.987654321d, -0d, 0d, Double.NaN, Double.NegativeInfinity,
- Double.PositiveInfinity, Double.MinPositiveValue, Double.MinValue, Double.MaxValue,
- null.asInstanceOf[Double])),
- SupportedTypeEvolutionTestCase(DateType, TimestampNTZType,
- Seq("2020-01-01", "2024-02-29", "1312-02-27"),
- Seq("2020-03-17 15:23:15.123456", "2058-12-31 23:59:59.999", "0001-01-01 00:00:00")),
- // Larger precision.
- SupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2),
- DecimalType(Decimal.MAX_LONG_DIGITS, 2),
- Seq(BigDecimal("1.23"), BigDecimal("10.34"), null.asInstanceOf[BigDecimal]),
- Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99"),
- null.asInstanceOf[BigDecimal])),
- // Larger precision and scale, same physical type.
- SupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS - 1, 2),
- DecimalType(Decimal.MAX_INT_DIGITS, 3),
- Seq(BigDecimal("1.23"), BigDecimal("10.34"), null.asInstanceOf[BigDecimal]),
- Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 3) + ".99"),
- null.asInstanceOf[BigDecimal])),
- // Larger precision and scale, different physical types.
- SupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2),
- DecimalType(Decimal.MAX_LONG_DIGITS + 1, 3),
- Seq(BigDecimal("1.23"), BigDecimal("10.34"), null.asInstanceOf[BigDecimal]),
- Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99"),
- null.asInstanceOf[BigDecimal]))
- )
-}
diff --git a/spark/src/test/scala-spark-master/shims/DeltaExcludedBySparkVersionTestMixinShims.scala b/spark/src/test/scala-spark-master/shims/DeltaExcludedBySparkVersionTestMixinShims.scala
deleted file mode 100644
index 0dea4b2d536..00000000000
--- a/spark/src/test/scala-spark-master/shims/DeltaExcludedBySparkVersionTestMixinShims.scala
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-import org.apache.spark.sql.QueryTest
-
-trait DeltaExcludedBySparkVersionTestMixinShims extends QueryTest {
-
- /**
- * Tests that are meant for Delta compiled against Spark Latest Release only. Ignored since this
- * is the Spark Master shim.
- */
- protected def testSparkLatestOnly(
- testName: String, testTags: org.scalatest.Tag*)
- (testFun: => Any)
- (implicit pos: org.scalactic.source.Position): Unit = {
- ignore(testName + " (Spark Latest Release Only)", testTags: _*)(testFun)(pos)
- }
-
- /**
- * Tests that are meant for Delta compiled against Spark Master (4.0+). Executed since this is the
- * Spark Master shim.
- */
- protected def testSparkMasterOnly(
- testName: String, testTags: org.scalatest.Tag*)
- (testFun: => Any)
- (implicit pos: org.scalactic.source.Position): Unit = {
- test(testName, testTags: _*)(testFun)(pos)
- }
-
-}
diff --git a/spark/src/test/scala-spark-master/shims/DeltaGenerateSymlinkManifestSuiteShims.scala b/spark/src/test/scala-spark-master/shims/DeltaGenerateSymlinkManifestSuiteShims.scala
deleted file mode 100644
index 72e879122d8..00000000000
--- a/spark/src/test/scala-spark-master/shims/DeltaGenerateSymlinkManifestSuiteShims.scala
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (2024) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-object DeltaGenerateSymlinkManifestSuiteShims {
- val FAILS_ON_TEMP_VIEWS_ERROR_MSG = "'GENERATE' expects a table but `v` is a view."
-}
diff --git a/spark/src/test/scala-spark-master/shims/DeltaHistoryManagerSuiteShims.scala b/spark/src/test/scala-spark-master/shims/DeltaHistoryManagerSuiteShims.scala
deleted file mode 100644
index cdb9377d550..00000000000
--- a/spark/src/test/scala-spark-master/shims/DeltaHistoryManagerSuiteShims.scala
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-object DeltaHistoryManagerSuiteShims {
- type MULTIPLE_TIME_TRAVEL_FORMATS_ERROR_TYPE = org.apache.spark.sql.AnalysisException
-
- val MULTIPLE_TIME_TRAVEL_FORMATS_ERROR_MSG = "Cannot specify both version and timestamp"
-}
diff --git a/spark/src/test/scala-spark-master/shims/DeltaInsertIntoTableSuiteShims.scala b/spark/src/test/scala-spark-master/shims/DeltaInsertIntoTableSuiteShims.scala
deleted file mode 100644
index 55b8b09a0d2..00000000000
--- a/spark/src/test/scala-spark-master/shims/DeltaInsertIntoTableSuiteShims.scala
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-object DeltaInsertIntoTableSuiteShims {
- val INSERT_INTO_TMP_VIEW_ERROR_MSG = "[EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE]"
-
- val INVALID_COLUMN_DEFAULT_VALUE_ERROR_MSG = "INVALID_DEFAULT_VALUE.NOT_CONSTANT"
-}
diff --git a/spark/src/test/scala-spark-master/shims/DeltaSuiteShims.scala b/spark/src/test/scala-spark-master/shims/DeltaSuiteShims.scala
deleted file mode 100644
index fcc0a769e64..00000000000
--- a/spark/src/test/scala-spark-master/shims/DeltaSuiteShims.scala
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-object DeltaSuiteShims {
- val THROWS_ON_CORRUPTED_FILE_ERROR_MSG = "[FAILED_READ_FILE.NO_HINT]"
-
- val THROWS_ON_DELETED_FILE_ERROR_MSG = "[FAILED_READ_FILE.FILE_NOT_EXIST]"
-}
diff --git a/spark/src/test/scala-spark-master/shims/DeltaVacuumSuiteShims.scala b/spark/src/test/scala-spark-master/shims/DeltaVacuumSuiteShims.scala
deleted file mode 100644
index bf45efa28de..00000000000
--- a/spark/src/test/scala-spark-master/shims/DeltaVacuumSuiteShims.scala
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-object DeltaVacuumSuiteShims {
- val SQL_COMMAND_ON_TEMP_VIEW_NOT_SUPPORTED_ERROR_MSG =
- "'VACUUM' expects a table but `v` is a view"
-}
diff --git a/spark/src/test/scala-spark-master/shims/DescribeDeltaHistorySuiteShims.scala b/spark/src/test/scala-spark-master/shims/DescribeDeltaHistorySuiteShims.scala
deleted file mode 100644
index 81b7aed6b42..00000000000
--- a/spark/src/test/scala-spark-master/shims/DescribeDeltaHistorySuiteShims.scala
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-object DescribeDeltaHistorySuiteShims {
- val FAILS_ON_VIEWS_ERROR_MSG =
- "'DESCRIBE HISTORY' expects a table but `spark_catalog`.`default`.`delta_view` is a view."
-
- val FAILS_ON_TEMP_VIEWS_ERROR_MSG =
- "'DESCRIBE HISTORY' expects a table but `v` is a view."
-}
diff --git a/spark/src/test/scala-spark-master/shims/ImplicitDMLCastingSuiteShims.scala b/spark/src/test/scala-spark-master/shims/ImplicitDMLCastingSuiteShims.scala
deleted file mode 100644
index 11be5107df3..00000000000
--- a/spark/src/test/scala-spark-master/shims/ImplicitDMLCastingSuiteShims.scala
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-object ImplicitDMLCastingSuiteShims {
- /**
- * Discrepancy in error message between Spark 3.5 and Master (4.0) due to SPARK-47798
- * (https://github.com/apache/spark/pull/45981)
- */
- val NUMERIC_VALUE_OUT_OF_RANGE_ERROR_MSG = "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION"
-}
diff --git a/spark/src/test/scala-spark-master/shims/MergeIntoMetricsShims.scala b/spark/src/test/scala-spark-master/shims/MergeIntoMetricsShims.scala
deleted file mode 100644
index 33f66c0470f..00000000000
--- a/spark/src/test/scala-spark-master/shims/MergeIntoMetricsShims.scala
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-object MergeIntoMetricsShims {
- val DELETE_WITH_DUPLICATE_NUM_TARGET_FILES_ADDED_NON_PARTITIONED_NO_CDF = 1
-}
diff --git a/spark/src/test/scala-spark-master/shims/SnapshotManagementSuiteShims.scala b/spark/src/test/scala-spark-master/shims/SnapshotManagementSuiteShims.scala
deleted file mode 100644
index fa5ed1bb886..00000000000
--- a/spark/src/test/scala-spark-master/shims/SnapshotManagementSuiteShims.scala
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta
-
-object SnapshotManagementSuiteShims {
- val SHOULD_NOT_RECOVER_CHECKPOINT_ERROR_MSG = "Encountered error while reading file"
-}
diff --git a/spark/src/test/scala-spark-master/shims/TypeWideningTestCasesShims.scala b/spark/src/test/scala-spark-master/shims/TypeWideningTestCasesShims.scala
deleted file mode 100644
index d35f41b6f44..00000000000
--- a/spark/src/test/scala-spark-master/shims/TypeWideningTestCasesShims.scala
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta.typewidening
-
-import org.apache.spark.sql.test.SQLTestUtils
-import org.apache.spark.sql.types._
-
-/**
- * The set of type changes supported by type widening is different between Spark 3.5 and Spark 4.0.
- * See [[TypeWideningShims]]. This shim splits the test cases into supported and unsupported
- * accordingly for delta on Spark 4.0.
- */
-trait TypeWideningTestCasesShims {
- self: TypeWideningTestCases with SQLTestUtils =>
-
- import testImplicits._
-
- // Type changes that are supported by all Parquet readers. Byte, Short, Int are all stored as
- // INT32 in parquet so these changes are guaranteed to be supported.
- protected val supportedTestCases: Seq[TypeEvolutionTestCase] = Seq(
- SupportedTypeEvolutionTestCase(ByteType, ShortType,
- Seq(1, -1, Byte.MinValue, Byte.MaxValue, null.asInstanceOf[Byte]),
- Seq(4, -4, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short])),
- SupportedTypeEvolutionTestCase(ByteType, IntegerType,
- Seq(1, -1, Byte.MinValue, Byte.MaxValue, null.asInstanceOf[Byte]),
- Seq(4, -4, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int])),
- SupportedTypeEvolutionTestCase(ShortType, IntegerType,
- Seq(1, -1, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short]),
- Seq(4, -4, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int])),
- SupportedTypeEvolutionTestCase(ShortType, LongType,
- Seq(1, -1, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short]),
- Seq(4L, -4L, Long.MinValue, Long.MaxValue, null.asInstanceOf[Long])),
- SupportedTypeEvolutionTestCase(IntegerType, LongType,
- Seq(1, -1, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int]),
- Seq(4L, -4L, Long.MinValue, Long.MaxValue, null.asInstanceOf[Long])),
- SupportedTypeEvolutionTestCase(FloatType, DoubleType,
- Seq(1234.56789f, -0f, 0f, Float.NaN, Float.NegativeInfinity, Float.PositiveInfinity,
- Float.MinPositiveValue, Float.MinValue, Float.MaxValue, null.asInstanceOf[Float]),
- Seq(987654321.987654321d, -0d, 0d, Double.NaN, Double.NegativeInfinity,
- Double.PositiveInfinity, Double.MinPositiveValue, Double.MinValue, Double.MaxValue,
- null.asInstanceOf[Double])),
- SupportedTypeEvolutionTestCase(DateType, TimestampNTZType,
- Seq("2020-01-01", "2024-02-29", "1312-02-27"),
- Seq("2020-03-17 15:23:15.123456", "2058-12-31 23:59:59.999", "0001-01-01 00:00:00")),
- // Larger precision.
- SupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2),
- DecimalType(Decimal.MAX_LONG_DIGITS, 2),
- Seq(BigDecimal("1.23"), BigDecimal("10.34"), null.asInstanceOf[BigDecimal]),
- Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99"),
- null.asInstanceOf[BigDecimal])),
- // Larger precision and scale, same physical type.
- SupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS - 1, 2),
- DecimalType(Decimal.MAX_INT_DIGITS, 3),
- Seq(BigDecimal("1.23"), BigDecimal("10.34"), null.asInstanceOf[BigDecimal]),
- Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 3) + ".99"),
- null.asInstanceOf[BigDecimal])),
- // Larger precision and scale, different physical types.
- SupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2),
- DecimalType(Decimal.MAX_LONG_DIGITS + 1, 3),
- Seq(BigDecimal("1.23"), BigDecimal("10.34"), null.asInstanceOf[BigDecimal]),
- Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99"),
- null.asInstanceOf[BigDecimal]))
- )
-
- // Type changes that are only eligible for automatic widening when
- // spark.databricks.delta.typeWidening.allowAutomaticWidening = ALWAYS.
- protected val restrictedAutomaticWideningTestCases: Seq[TypeEvolutionTestCase] = Seq(
- SupportedTypeEvolutionTestCase(IntegerType, DoubleType,
- Seq(1, -1, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int]),
- Seq(987654321.987654321d, -0d, 0d, Double.NaN, Double.NegativeInfinity,
- Double.PositiveInfinity, Double.MinPositiveValue, Double.MinValue, Double.MaxValue,
- null.asInstanceOf[Double])),
- SupportedTypeEvolutionTestCase(ByteType, DecimalType(10, 0),
- Seq(1, -1, Byte.MinValue, Byte.MaxValue, null.asInstanceOf[Byte]),
- Seq(BigDecimal("1.23"), BigDecimal("9" * 10), null.asInstanceOf[BigDecimal])),
- SupportedTypeEvolutionTestCase(ShortType, DecimalType(10, 0),
- Seq(1, -1, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short]),
- Seq(BigDecimal("1.23"), BigDecimal("9" * 10), null.asInstanceOf[BigDecimal])),
- SupportedTypeEvolutionTestCase(IntegerType, DecimalType(10, 0),
- Seq(1, -1, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int]),
- Seq(BigDecimal("1.23"), BigDecimal("9" * 10), null.asInstanceOf[BigDecimal])),
- SupportedTypeEvolutionTestCase(LongType, DecimalType(20, 0),
- Seq(1L, -1L, Long.MinValue, Long.MaxValue, null.asInstanceOf[Int]),
- Seq(BigDecimal("1.23"), BigDecimal("9" * 20), null.asInstanceOf[BigDecimal]))
- )
-
- // Test type changes that aren't supported.
- protected val unsupportedTestCases: Seq[TypeEvolutionTestCase] = Seq(
- UnsupportedTypeEvolutionTestCase(IntegerType, ByteType,
- Seq(1, 2, Int.MinValue)),
- UnsupportedTypeEvolutionTestCase(LongType, IntegerType,
- Seq(4, 5, Long.MaxValue)),
- UnsupportedTypeEvolutionTestCase(DoubleType, FloatType,
- Seq(987654321.987654321d, Double.NaN, Double.NegativeInfinity,
- Double.PositiveInfinity, Double.MinPositiveValue,
- Double.MinValue, Double.MaxValue)),
- UnsupportedTypeEvolutionTestCase(ByteType, DecimalType(2, 0),
- Seq(1, -1, Byte.MinValue)),
- UnsupportedTypeEvolutionTestCase(ShortType, DecimalType(4, 0),
- Seq(1, -1, Short.MinValue)),
- UnsupportedTypeEvolutionTestCase(IntegerType, DecimalType(9, 0),
- Seq(1, -1, Int.MinValue)),
- UnsupportedTypeEvolutionTestCase(LongType, DecimalType(19, 0),
- Seq(1, -1, Long.MinValue)),
- UnsupportedTypeEvolutionTestCase(TimestampNTZType, DateType,
- Seq("2020-03-17 15:23:15", "2023-12-31 23:59:59", "0001-01-01 00:00:00")),
- // Reduce scale
- UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2),
- DecimalType(Decimal.MAX_INT_DIGITS, 3),
- Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 2) + ".99"))),
- // Reduce precision
- UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2),
- DecimalType(Decimal.MAX_INT_DIGITS - 1, 2),
- Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 2) + ".99"))),
- // Reduce precision & scale
- UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_LONG_DIGITS, 2),
- DecimalType(Decimal.MAX_INT_DIGITS - 1, 1),
- Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99"))),
- // Increase scale more than precision
- UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2),
- DecimalType(Decimal.MAX_INT_DIGITS + 1, 4),
- Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 2) + ".99"))),
- // Smaller scale and larger precision.
- UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_LONG_DIGITS, 2),
- DecimalType(Decimal.MAX_INT_DIGITS + 3, 1),
- Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99")))
- )
-}
diff --git a/spark/src/test/scala-spark-master/shims/logging/DeltaStructuredLoggingSuite.scala b/spark/src/test/scala-spark-master/shims/logging/DeltaStructuredLoggingSuite.scala
deleted file mode 100644
index 852147ec017..00000000000
--- a/spark/src/test/scala-spark-master/shims/logging/DeltaStructuredLoggingSuite.scala
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * This file contains code from the Apache Spark project (original license above).
- * It contains modifications, which are licensed as follows:
- */
-
-/*
- * Copyright (2021) The Delta Lake Project Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.delta.logging
-
-import java.util.regex.Pattern
-
-import com.fasterxml.jackson.databind.ObjectMapper
-import com.fasterxml.jackson.module.scala.DefaultScalaModule
-import org.apache.logging.log4j.Level
-
-import org.apache.spark.internal.Logging
-
-class DeltaStructuredLoggingSuite extends DeltaStructuredLoggingSuiteBase {
- override def className: String = classOf[DeltaStructuredLoggingSuite].getSimpleName
- override def logFilePath: String = "target/structured.log"
-
- override def beforeAll(): Unit = {
- super.beforeAll()
- Logging.enableStructuredLogging()
- }
-
- override def afterAll(): Unit = {
- Logging.disableStructuredLogging()
- super.afterAll()
- }
-
- private val jsonMapper = new ObjectMapper().registerModule(DefaultScalaModule)
- private def compactAndToRegexPattern(json: String): String = {
- jsonMapper.readTree(json).toString.
- replace("", """[^"]+""").
- replace("""""""", """.*""").
- replace("{", """\{""") + "\n"
- }
-
- override def expectedPatternForBasicMsg(level: Level): String = {
- compactAndToRegexPattern(
- s"""
- {
- "ts": "",
- "level": "$level",
- "msg": "This is a log message",
- "logger": "$className"
- }""")
- }
-
- override def expectedPatternForBasicMsgWithException(level: Level): String = {
- compactAndToRegexPattern(
- s"""
- {
- "ts": "",
- "level": "$level",
- "msg": "This is a log message",
- "exception": {
- "class": "java.lang.RuntimeException",
- "msg": "OOM",
- "stacktrace": ""
- },
- "logger": "$className"
- }""")
- }
-
- override def expectedPatternForMsgWithMDC(level: Level): String = {
- compactAndToRegexPattern(
- s"""
- {
- "ts": "",
- "level": "$level",
- "msg": "Lost executor 1.",
- "context": {
- "executor_id": "1"
- },
- "logger": "$className"
- }""")
- }
-
- def expectedPatternForMsgWithMDCValueIsNull(level: Level): String = {
- compactAndToRegexPattern(
- s"""
- {
- "ts": "",
- "level": "$level",
- "msg": "Lost executor null.",
- "context": {
- "executor_id": null
- },
- "logger": "$className"
- }""")
- }
-
- override def expectedPatternForMsgWithMDCAndException(level: Level): String = {
- compactAndToRegexPattern(
- s"""
- {
- "ts": "",
- "level": "$level",
- "msg": "Error in executor 1.",
- "context": {
- "executor_id": "1"
- },
- "exception": {
- "class": "java.lang.RuntimeException",
- "msg": "OOM",
- "stacktrace": ""
- },
- "logger": "$className"
- }""")
- }
-
- override def expectedPatternForCustomLogKey(level: Level): String = {
- compactAndToRegexPattern(
- s"""
- {
- "ts": "",
- "level": "$level",
- "msg": "Custom log message.",
- "logger": "$className"
- }"""
- )
- }
-
- override def verifyMsgWithConcat(level: Level, logOutput: String): Unit = {
- val pattern1 = compactAndToRegexPattern(
- s"""
- {
- "ts": "",
- "level": "$level",
- "msg": "Min Size: 2, Max Size: 4. Please double check.",
- "context": {
- "min_size": "2",
- "max_size": "4"
- },
- "logger": "$className"
- }""")
-
- val pattern2 = compactAndToRegexPattern(
- s"""
- {
- "ts": "",
- "level": "$level",
- "msg": "Min Size: 2, Max Size: 4. Please double check.",
- "context": {
- "max_size": "4",
- "min_size": "2"
- },
- "logger": "$className"
- }""")
- assert(Pattern.compile(pattern1).matcher(logOutput).matches() ||
- Pattern.compile(pattern2).matcher(logOutput).matches())
- }
-}
diff --git a/spark/src/test/scala/io/delta/sql/parser/DeltaSqlParserSuite.scala b/spark/src/test/scala/io/delta/sql/parser/DeltaSqlParserSuite.scala
index f22e33e0d7b..aa69ae445f4 100644
--- a/spark/src/test/scala/io/delta/sql/parser/DeltaSqlParserSuite.scala
+++ b/spark/src/test/scala/io/delta/sql/parser/DeltaSqlParserSuite.scala
@@ -28,7 +28,6 @@ import org.apache.spark.sql.delta.commands.{DeltaOptimizeContext, DescribeDeltaD
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.{TableIdentifier, TimeTravel}
import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedRelation, UnresolvedTable}
-import org.apache.spark.sql.catalyst.analysis.UnresolvedTableImplicits._
import org.apache.spark.sql.catalyst.expressions.Literal
import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.catalyst.plans.SQLHelper
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/AutoCompactSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/AutoCompactSuite.scala
index 77d8448ca7d..ed9ca3ed997 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/AutoCompactSuite.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/AutoCompactSuite.scala
@@ -21,7 +21,6 @@ import java.io.File
// scalastyle:off import.ordering.noEmptyLine
import com.databricks.spark.util.{Log4jUsageLogger, UsageRecord}
-import org.apache.spark.sql.delta.DeltaExcludedBySparkVersionTestMixinShims
import org.apache.spark.sql.delta.actions.AddFile
import org.apache.spark.sql.delta.commands.optimize._
import org.apache.spark.sql.delta.hooks.{AutoCompact, AutoCompactType}
@@ -62,8 +61,7 @@ class AutoCompactConfigurationSuite extends
CompactionTestHelperForAutoCompaction
with DeltaSQLCommandTest
with SharedSparkSession
- with AutoCompactTestUtils
- with DeltaExcludedBySparkVersionTestMixinShims {
+ with AutoCompactTestUtils {
private def setTableProperty(log: DeltaLog, key: String, value: String): Unit = {
spark.sql(s"ALTER TABLE delta.`${log.dataPath}` SET TBLPROPERTIES " +
@@ -124,8 +122,7 @@ class AutoCompactExecutionSuite extends
CompactionTestHelperForAutoCompaction
with DeltaSQLCommandTest
with SharedSparkSession
- with AutoCompactTestUtils
- with DeltaExcludedBySparkVersionTestMixinShims {
+ with AutoCompactTestUtils {
private def testBothModesViaProperty(testName: String)(f: String => Unit): Unit = {
def runTest(autoCompactConfValue: String): Unit = {
withTempDir { dir =>
@@ -242,7 +239,7 @@ class AutoCompactExecutionSuite extends
checkAutoCompactionWorks(dir, spark.range(10).toDF("id"))
}
- testSparkMasterOnly("variant auto compact kicks in when enabled - table config") {
+ test("variant auto compact kicks in when enabled - table config") {
withTempDir { dir =>
withSQLConf(
"spark.databricks.delta.properties.defaults.autoOptimize.autoCompact" -> "true",
@@ -254,7 +251,7 @@ class AutoCompactExecutionSuite extends
}
}
- testSparkMasterOnly("variant auto compact kicks in when enabled - session config") {
+ test("variant auto compact kicks in when enabled - session config") {
withTempDir { dir =>
withSQLConf(
DeltaSQLConf.DELTA_AUTO_COMPACT_ENABLED.key -> "true",
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeleteSuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeleteSuiteBase.scala
index 0ed8cad08d2..88b7188f70f 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/DeleteSuiteBase.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeleteSuiteBase.scala
@@ -30,8 +30,7 @@ trait DeleteBaseMixin
extends QueryTest
with SharedSparkSession
with DeltaDMLTestUtils
- with DeltaTestUtilsForTempViews
- with DeltaExcludedBySparkVersionTestMixinShims {
+ with DeltaTestUtilsForTempViews {
import testImplicits._
@@ -539,7 +538,7 @@ trait DeleteBaseTests extends DeleteBaseMixin {
Some(".*More than one row returned by a subquery used as an expression(?s).*")
)
- testSparkMasterOnly("Variant type") {
+ test("Variant type") {
val dstDf = sql(
"""SELECT parse_json(cast(id as string)) v, id i
FROM range(3)""")
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaGenerateSymlinkManifestSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaGenerateSymlinkManifestSuite.scala
index 3b79f226d3e..5f8e0196f69 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaGenerateSymlinkManifestSuite.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaGenerateSymlinkManifestSuite.scala
@@ -20,7 +20,6 @@ import java.io.File
import java.net.URI
// scalastyle:off import.ordering.noEmptyLine
-import org.apache.spark.sql.delta.DeltaGenerateSymlinkManifestSuiteShims._
import org.apache.spark.sql.delta.DeltaOperations.Delete
import org.apache.spark.sql.delta.commands.DeltaGenerateCommand
import org.apache.spark.sql.delta.hooks.GenerateSymlinkManifest
@@ -123,7 +122,7 @@ trait DeltaGenerateSymlinkManifestSuiteBase
val e = intercept[AnalysisException] {
spark.sql(s"GENERATE symlink_format_manifest FOR TABLE v")
}
- assert(e.getMessage.contains(FAILS_ON_TEMP_VIEWS_ERROR_MSG))
+ assert(e.getMessage.contains("'GENERATE' expects a table but `v` is a view."))
}
}
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaHistoryManagerSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaHistoryManagerSuite.scala
index 91931ae0738..3ad3f0a841d 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaHistoryManagerSuite.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaHistoryManagerSuite.scala
@@ -28,7 +28,6 @@ import scala.language.implicitConversions
import com.databricks.spark.util.Log4jUsageLogger
import org.apache.spark.sql.delta.DeltaConfigs.IN_COMMIT_TIMESTAMPS_ENABLED
-import org.apache.spark.sql.delta.DeltaHistoryManagerSuiteShims._
import org.apache.spark.sql.delta.DeltaTestUtils.{createTestAddFile, modifyCommitTimestamp}
import org.apache.spark.sql.delta.catalog.DeltaTableV2
import org.apache.spark.sql.delta.coordinatedcommits.CatalogOwnedTestBaseSuite
@@ -541,8 +540,7 @@ trait DeltaTimeTravelTests extends QueryTest
}
}
-abstract class DeltaHistoryManagerBase extends DeltaTimeTravelTests
- {
+abstract class DeltaHistoryManagerBase extends DeltaTimeTravelTests {
test("cannot time travel target tables of insert/delete/update/merge") {
val tblName = "delta_table"
withTable(tblName) {
@@ -615,14 +613,14 @@ abstract class DeltaHistoryManagerBase extends DeltaTimeTravelTests
}
assert(e1.getMessage.contains("[0, 2]"))
- val e2 = intercept[MULTIPLE_TIME_TRAVEL_FORMATS_ERROR_TYPE] {
+ val e2 = intercept[org.apache.spark.sql.AnalysisException] {
spark.read.format("delta")
.option("versionAsOf", 3)
.option("timestampAsOf", "2020-10-22 23:20:11")
.table(tblName).collect()
}
- assert(e2.getMessage.contains(MULTIPLE_TIME_TRAVEL_FORMATS_ERROR_MSG))
+ assert(e2.getMessage.contains("Cannot specify both version and timestamp"))
}
}
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaInsertIntoTableSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaInsertIntoTableSuite.scala
index 5c9ae2f2e4b..862f7eed616 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaInsertIntoTableSuite.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaInsertIntoTableSuite.scala
@@ -22,7 +22,6 @@ import java.util.TimeZone
import scala.collection.JavaConverters._
-import org.apache.spark.sql.delta.DeltaInsertIntoTableSuiteShims._
import org.apache.spark.sql.delta.schema.InvariantViolationException
import org.apache.spark.sql.delta.schema.SchemaUtils
import org.apache.spark.sql.delta.sources.DeltaSQLConf
@@ -45,8 +44,7 @@ class DeltaInsertIntoSQLSuite
extends DeltaInsertIntoTestsWithTempViews(
supportsDynamicOverwrite = true,
includeSQLOnlyTests = true)
- with DeltaSQLCommandTest
- with DeltaExcludedBySparkVersionTestMixinShims {
+ with DeltaSQLCommandTest {
import testImplicits._
@@ -59,7 +57,7 @@ class DeltaInsertIntoSQLSuite
}
}
- testSparkMasterOnly("Variant type") {
+ test("Variant type") {
withTable("t") {
sql("CREATE TABLE t (id LONG, v VARIANT) USING delta")
sql("INSERT INTO t (id, v) VALUES (1, parse_json('{\"a\": 1}'))")
@@ -692,7 +690,7 @@ abstract class DeltaInsertIntoTestsWithTempViews(
} catch {
case e: AnalysisException =>
assert(
- e.getMessage.contains(INSERT_INTO_TMP_VIEW_ERROR_MSG) ||
+ e.getMessage.contains("[EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE]") ||
e.getMessage.contains("Inserting into an RDD-based table is not allowed") ||
e.getMessage.contains("Table default.v not found") ||
e.getMessage.contains("Table or view 'v' not found in database 'default'") ||
@@ -872,7 +870,7 @@ class DeltaColumnDefaultsInsertSuite extends InsertIntoSQLOnlyTests with DeltaSQ
sql(s"create table t4 (s int default badvalue) using $v2Format " +
s"$tblPropertiesAllowDefaults")
},
- INVALID_COLUMN_DEFAULT_VALUE_ERROR_MSG,
+ "INVALID_DEFAULT_VALUE.NOT_CONSTANT",
parameters = Map(
"statement" -> "CREATE TABLE",
"colName" -> "`s`",
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceSuite.scala
index 8ee62106c27..b8d9f3baa54 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceSuite.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceSuite.scala
@@ -51,8 +51,7 @@ import org.apache.spark.util.{ManualClock, Utils}
class DeltaSourceSuite extends DeltaSourceSuiteBase
with DeltaColumnMappingTestUtils
- with DeltaSQLCommandTest
- with DeltaExcludedBySparkVersionTestMixinShims {
+ with DeltaSQLCommandTest {
import testImplicits._
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSuite.scala
index 804c38a5b1e..675b68e2bbc 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSuite.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSuite.scala
@@ -20,7 +20,6 @@ import java.io.{File, FileNotFoundException}
import java.util.concurrent.atomic.AtomicInteger
// scalastyle:off import.ordering.noEmptyLine
-import org.apache.spark.sql.delta.DeltaSuiteShims._
import org.apache.spark.sql.delta.actions.{Action, TableFeatureProtocolUtils}
import org.apache.spark.sql.delta.commands.cdc.CDCReader
import org.apache.spark.sql.delta.coordinatedcommits.{CatalogOwnedTableUtils, CatalogOwnedTestBaseSuite}
@@ -1530,7 +1529,7 @@ class DeltaSuite extends QueryTest
val thrown = intercept[SparkException] {
data.toDF().collect()
}
- assert(thrown.getMessage.contains(THROWS_ON_CORRUPTED_FILE_ERROR_MSG))
+ assert(thrown.getMessage.contains("[FAILED_READ_FILE.NO_HINT]"))
}
}
}
@@ -1582,7 +1581,7 @@ class DeltaSuite extends QueryTest
val thrown = intercept[SparkException] {
data.toDF().collect()
}
- assert(thrown.getMessage.contains(THROWS_ON_DELETED_FILE_ERROR_MSG))
+ assert(thrown.getMessage.contains("[FAILED_READ_FILE.FILE_NOT_EXIST]"))
}
}
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaVacuumSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaVacuumSuite.scala
index 0dfd8fbad5c..648f7692b96 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaVacuumSuite.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaVacuumSuite.scala
@@ -26,7 +26,6 @@ import scala.language.implicitConversions
import org.apache.spark.sql.delta.{CatalogOwnedTableFeature, DeltaUnsupportedOperationException}
import org.apache.spark.sql.delta.DeltaOperations.{Delete, Write}
import org.apache.spark.sql.delta.DeltaTestUtils.createTestAddFile
-import org.apache.spark.sql.delta.DeltaVacuumSuiteShims._
import org.apache.spark.sql.delta.actions.{AddCDCFile, AddFile, Metadata, RemoveFile}
import org.apache.spark.sql.delta.catalog.DeltaTableV2
import org.apache.spark.sql.delta.commands.VacuumCommand
@@ -550,7 +549,7 @@ class DeltaVacuumSuite extends DeltaVacuumSuiteBase with DeltaSQLCommandTest {
val e = intercept[AnalysisException] {
vacuumSQLTest(table, viewName)
}
- assert(e.getMessage.contains(SQL_COMMAND_ON_TEMP_VIEW_NOT_SUPPORTED_ERROR_MSG))
+ assert(e.getMessage.contains("'VACUUM' expects a table but `v` is a view"))
}
}
}
diff --git a/spark/src/test/scala-spark-master/org/apache/spark/sql/delta/DeltaVariantShreddingSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaVariantShreddingSuite.scala
similarity index 84%
rename from spark/src/test/scala-spark-master/org/apache/spark/sql/delta/DeltaVariantShreddingSuite.scala
rename to spark/src/test/scala/org/apache/spark/sql/delta/DeltaVariantShreddingSuite.scala
index 27426a4265f..6287119d821 100644
--- a/spark/src/test/scala-spark-master/org/apache/spark/sql/delta/DeltaVariantShreddingSuite.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaVariantShreddingSuite.scala
@@ -161,36 +161,36 @@ class DeltaVariantShreddingSuite
// Table property not present or false
Seq("", s"TBLPROPERTIES ('${DeltaConfigs.ENABLE_VARIANT_SHREDDING.key}' = 'false') ")
.foreach { tblProperties =>
- withTable("tbl") {
- withTempDir { dir =>
- sql("CREATE TABLE tbl (i long, v variant) USING DELTA " + tblProperties +
- s"LOCATION '${dir.getAbsolutePath}'")
- withSQLConf(SQLConf.VARIANT_WRITE_SHREDDING_ENABLED.key -> true.toString,
- SQLConf.VARIANT_ALLOW_READING_SHREDDED.key -> true.toString,
- SQLConf.VARIANT_FORCE_SHREDDING_SCHEMA_FOR_TEST.key -> schema) {
-
- val e = intercept[DeltaSparkException] {
- df.write.format("delta").mode("append").saveAsTable("tbl")
+ withTable("tbl") {
+ withTempDir { dir =>
+ sql("CREATE TABLE tbl (i long, v variant) USING DELTA " + tblProperties +
+ s"LOCATION '${dir.getAbsolutePath}'")
+ withSQLConf(SQLConf.VARIANT_WRITE_SHREDDING_ENABLED.key -> true.toString,
+ SQLConf.VARIANT_ALLOW_READING_SHREDDED.key -> true.toString,
+ SQLConf.VARIANT_FORCE_SHREDDING_SCHEMA_FOR_TEST.key -> schema) {
+
+ val e = intercept[DeltaSparkException] {
+ df.write.format("delta").mode("append").saveAsTable("tbl")
+ }
+ checkError(e, "DELTA_SHREDDING_TABLE_PROPERTY_DISABLED", parameters = Map())
+ assert(e.getMessage.contains(
+ "Attempted to write shredded Variants but the table does not support shredded " +
+ "writes. Consider setting the table property enableVariantShredding to true."))
+ assert(numShreddedFiles(dir.getAbsolutePath, validation = { field: GroupType =>
+ field.getName == "v" && (field.getType("typed_value") match {
+ case t: GroupType =>
+ t.getFields.asScala.map(_.getName).toSet == Set("a", "b", "c")
+ case _ => false
+ })
+ }) == 0)
+ checkAnswer(
+ spark.read.format("delta").load(dir.getAbsolutePath).selectExpr("i", "to_json(v)"),
+ Seq()
+ )
}
- checkError(e, "DELTA_SHREDDING_TABLE_PROPERTY_DISABLED", parameters = Map())
- assert(e.getMessage.contains(
- "Attempted to write shredded Variants but the table does not support shredded " +
- "writes. Consider setting the table property enableVariantShredding to true."))
- assert(numShreddedFiles(dir.getAbsolutePath, validation = { field: GroupType =>
- field.getName == "v" && (field.getType("typed_value") match {
- case t: GroupType =>
- t.getFields.asScala.map(_.getName).toSet == Set("a", "b", "c")
- case _ => false
- })
- }) == 0)
- checkAnswer(
- spark.read.format("delta").load(dir.getAbsolutePath).selectExpr("i", "to_json(v)"),
- Seq()
- )
}
}
}
- }
}
test("Set table property to invalid value") {
diff --git a/spark/src/test/scala-spark-master/org/apache/spark/sql/delta/DeltaVariantSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaVariantSuite.scala
similarity index 97%
rename from spark/src/test/scala-spark-master/org/apache/spark/sql/delta/DeltaVariantSuite.scala
rename to spark/src/test/scala/org/apache/spark/sql/delta/DeltaVariantSuite.scala
index ea5cea81803..fb313b0b393 100644
--- a/spark/src/test/scala-spark-master/org/apache/spark/sql/delta/DeltaVariantSuite.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaVariantSuite.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.sql.types.StructType
class DeltaVariantSuite
- extends QueryTest
+ extends QueryTest
with SharedSparkSession
with DeltaSQLCommandTest
with DeltaSQLTestUtils
@@ -44,9 +44,9 @@ class DeltaVariantSuite
import testImplicits._
private def assertVariantTypeTableFeatures(
- tableName: String,
- expectPreviewFeature: Boolean,
- expectStableFeature: Boolean): Unit = {
+ tableName: String,
+ expectPreviewFeature: Boolean,
+ expectStableFeature: Boolean): Unit = {
val features = getProtocolForTable("tbl").readerAndWriterFeatures
if (expectPreviewFeature) {
assert(features.contains(VariantTypePreviewTableFeature))
@@ -79,8 +79,8 @@ class DeltaVariantSuite
assert(
!deltaLog.unsafeVolatileSnapshot.protocol.isFeatureSupported(
VariantTypePreviewTableFeature) &&
- !deltaLog.unsafeVolatileSnapshot.protocol.isFeatureSupported(
- VariantTypeTableFeature),
+ !deltaLog.unsafeVolatileSnapshot.protocol.isFeatureSupported(
+ VariantTypeTableFeature),
s"Table tbl contains VariantTypeFeature descriptor when its not supposed to"
)
}
@@ -104,10 +104,10 @@ class DeltaVariantSuite
assert(
getProtocolForTable("tbl") ==
- VariantTypeTableFeature.minProtocolVersion
- .withFeature(VariantTypeTableFeature)
- .withFeature(InvariantsTableFeature)
- .withFeature(AppendOnlyTableFeature)
+ VariantTypeTableFeature.minProtocolVersion
+ .withFeature(VariantTypeTableFeature)
+ .withFeature(InvariantsTableFeature)
+ .withFeature(AppendOnlyTableFeature)
)
}
}
@@ -121,7 +121,7 @@ class DeltaVariantSuite
"tbl", expectPreviewFeature = false, expectStableFeature = true)
sql(
s"ALTER TABLE tbl " +
- s"SET TBLPROPERTIES('delta.feature.variantType-preview' = 'supported')"
+ s"SET TBLPROPERTIES('delta.feature.variantType-preview' = 'supported')"
)
assertVariantTypeTableFeatures(
"tbl", expectPreviewFeature = true, expectStableFeature = true)
@@ -144,7 +144,7 @@ class DeltaVariantSuite
sql("CREATE TABLE tbl(s STRING) USING delta")
sql(
s"ALTER TABLE tbl " +
- s"SET TBLPROPERTIES('delta.feature.variantType-preview' = 'supported')"
+ s"SET TBLPROPERTIES('delta.feature.variantType-preview' = 'supported')"
)
sql("ALTER TABLE tbl ADD COLUMN v VARIANT")
@@ -214,7 +214,7 @@ class DeltaVariantSuite
}
test("enabling 'FORCE_USE_PREVIEW_VARIANT_FEATURE' on table with stable feature does not " +
- "require adding preview feature") {
+ "require adding preview feature") {
withTable("tbl") {
sql("CREATE TABLE tbl(s STRING, v VARIANT) USING DELTA")
sql("INSERT INTO tbl (SELECT 'foo', parse_json(cast(id + 99 as string)) FROM range(1))")
@@ -456,7 +456,7 @@ class DeltaVariantSuite
sql("""select _change_type, v::int from table_changes('tbl', 0)
where _change_type = 'update_preimage'"""),
Seq(Row("update_preimage", 50))
- )
+ )
checkAnswer(
sql("""select _change_type, v::int from table_changes('tbl', 0)
where _change_type = 'update_postimage'"""),
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DescribeDeltaHistorySuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DescribeDeltaHistorySuite.scala
index 5cfb5f55735..a8a122f85a5 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/DescribeDeltaHistorySuite.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/DescribeDeltaHistorySuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.delta
// scalastyle:off import.ordering.noEmptyLine
import java.io.File
-import org.apache.spark.sql.delta.DescribeDeltaHistorySuiteShims._
import org.apache.spark.sql.delta.actions.{Action, AddCDCFile, AddFile, Metadata, Protocol, RemoveFile}
import org.apache.spark.sql.delta.coordinatedcommits.{CatalogOwnedTableUtils, CatalogOwnedTestBaseSuite}
import org.apache.spark.sql.delta.sources.DeltaSQLConf
@@ -272,7 +271,8 @@ trait DescribeDeltaHistorySuiteBase
sql(s"DESCRIBE HISTORY $viewName").collect()
}
- assert(e.getMessage.contains(FAILS_ON_VIEWS_ERROR_MSG))
+ assert(e.getMessage.contains(
+ "'DESCRIBE HISTORY' expects a table but `spark_catalog`.`default`.`delta_view` is a view."))
}
}
@@ -286,7 +286,7 @@ trait DescribeDeltaHistorySuiteBase
sql(s"DESCRIBE HISTORY $viewName").collect()
}
- assert(e.getMessage.contains(FAILS_ON_TEMP_VIEWS_ERROR_MSG))
+ assert(e.getMessage.contains("'DESCRIBE HISTORY' expects a table but `v` is a view."))
}
}
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/GeneratedColumnSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/GeneratedColumnSuite.scala
index 9f7a214e86f..86a928d3b9c 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/GeneratedColumnSuite.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/GeneratedColumnSuite.scala
@@ -42,8 +42,7 @@ import org.apache.spark.sql.streaming.{StreamingQueryException, Trigger}
import org.apache.spark.sql.types.{ArrayType, DataType, DateType, IntegerType, LongType, MetadataBuilder, ShortType, StringType, StructField, StructType, TimestampType}
trait GeneratedColumnSuiteBase
- extends GeneratedColumnTest
- with DeltaExcludedBySparkVersionTestMixinShims {
+ extends GeneratedColumnTest {
import GeneratedColumn._
import testImplicits._
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/ImplicitDMLCastingSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/ImplicitDMLCastingSuite.scala
index bb8b21dfc9c..34333a78eca 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/ImplicitDMLCastingSuite.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/ImplicitDMLCastingSuite.scala
@@ -20,7 +20,6 @@ import scala.annotation.tailrec
import scala.collection.JavaConverters._
import org.apache.spark.sql.delta.DeltaTestUtils.BOOLEAN_DOMAIN
-import org.apache.spark.sql.delta.ImplicitDMLCastingSuiteShims._
import org.apache.spark.sql.delta.sources.DeltaSQLConf
import org.apache.spark.sql.delta.test.{DeltaExceptionTestUtils, DeltaSQLCommandTest}
@@ -149,8 +148,11 @@ abstract class ImplicitDMLCastingSuite extends QueryTest
assert(failureCause.toString.contains(testConfig.exceptionAnsiCast))
val sparkThrowable = failureCause.asInstanceOf[SparkThrowable]
- assert(Seq("CAST_OVERFLOW", NUMERIC_VALUE_OUT_OF_RANGE_ERROR_MSG, "CAST_INVALID_INPUT")
- .contains(sparkThrowable.getErrorClass))
+ assert(Seq(
+ "CAST_OVERFLOW",
+ "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+ "CAST_INVALID_INPUT"
+ ).contains(sparkThrowable.getErrorClass))
case Some(failureCause) if !sqlConfig.followAnsiEnabled =>
assert(sqlConfig.storeAssignmentPolicy === SQLConf.StoreAssignmentPolicy.ANSI)
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoMaterializeSourceSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoMaterializeSourceSuite.scala
index d9587b0c27d..b3166142a50 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoMaterializeSourceSuite.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoMaterializeSourceSuite.scala
@@ -24,7 +24,6 @@ import scala.util.control.NonFatal
import com.databricks.spark.util.{Log4jUsageLogger, MetricDefinitions, UsageRecord}
import org.apache.spark.sql.delta.DeltaTestUtils._
import org.apache.spark.sql.delta.commands.merge.{MergeIntoMaterializeSourceError, MergeIntoMaterializeSourceErrorType, MergeIntoMaterializeSourceReason, MergeStats}
-import org.apache.spark.sql.delta.commands.merge.MergeIntoMaterializeSourceShims
import org.apache.spark.sql.delta.sources.DeltaSQLConf
import org.apache.spark.sql.delta.test.DeltaSQLCommandTest
import org.apache.spark.sql.delta.test.DeltaSQLTestUtils
@@ -49,8 +48,7 @@ trait MergeIntoMaterializeSourceMixin
with SharedSparkSession
with DeltaSQLCommandTest
with DeltaSQLTestUtils
- with DeltaTestUtilsBase
- {
+ with DeltaTestUtilsBase {
override def beforeAll(): Unit = {
super.beforeAll()
@@ -175,10 +173,10 @@ trait MergeIntoMaterializeSourceErrorTests extends MergeIntoMaterializeSourceMix
checkpointedDf.collect()
}
assert(ex.isInstanceOf[SparkException], ex)
+ val sparkEx = ex.asInstanceOf[SparkException]
assert(
- MergeIntoMaterializeSourceShims.mergeMaterializedSourceRddBlockLostError(
- ex.asInstanceOf[SparkException],
- rdd.id))
+ sparkEx.getErrorClass == "CHECKPOINT_RDD_BLOCK_ID_NOT_FOUND" &&
+ sparkEx.getMessageParameters.get("rddBlockId").contains(s"rdd_${rdd.id}"))
}
for {
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoMetricsBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoMetricsBase.scala
index 225df6f9f0f..36841308ec6 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoMetricsBase.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoMetricsBase.scala
@@ -16,7 +16,6 @@
package org.apache.spark.sql.delta
-import org.apache.spark.sql.delta.MergeIntoMetricsShims._
import org.apache.spark.sql.delta.sources.DeltaSQLConf
import org.apache.spark.sql.{DataFrame, QueryTest, Row}
@@ -1041,8 +1040,7 @@ trait MergeIntoMetricsBase
((false, true), ("numTargetFilesAdded", 1)),
((false, false), (
"numTargetFilesAdded",
- // Depending on the Spark version, for non-partitioned tables we may add 1 or 2 files.
- DELETE_WITH_DUPLICATE_NUM_TARGET_FILES_ADDED_NON_PARTITIONED_NO_CDF)
+ 1)
)
)
)
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSuiteBase.scala
index 2935e2c87f7..9c0ebd40735 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSuiteBase.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSuiteBase.scala
@@ -46,8 +46,7 @@ trait MergeIntoSuiteBaseMixin
with DeltaSQLTestUtils
with ScanReportHelper
with MergeIntoTestUtils
- with MergeIntoSchemaEvolutionMixin
- with DeltaExcludedBySparkVersionTestMixinShims {
+ with MergeIntoSchemaEvolutionMixin {
import testImplicits._
// Maps expected error classes to actual error classes. Used to handle error classes that are
@@ -2687,7 +2686,7 @@ trait MergeIntoSuiteBaseMiscTests extends MergeIntoSuiteBaseMixin {
}
}
- testSparkMasterOnly("Variant type") {
+ test("Variant type") {
withTable("source") {
// Insert ("0", 0), ("1", 1)
val dstDf = sql(
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/SnapshotManagementSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/SnapshotManagementSuite.scala
index 6aa4111bb4a..9e91120a0f3 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/SnapshotManagementSuite.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/SnapshotManagementSuite.scala
@@ -24,7 +24,6 @@ import scala.collection.mutable
import com.databricks.spark.util.{Log4jUsageLogger, UsageRecord}
import org.apache.spark.sql.delta.DeltaConfigs.COORDINATED_COMMITS_COORDINATOR_NAME
import org.apache.spark.sql.delta.DeltaTestUtils.{verifyBackfilled, verifyUnbackfilled, BOOLEAN_DOMAIN}
-import org.apache.spark.sql.delta.SnapshotManagementSuiteShims._
import org.apache.spark.sql.delta.coordinatedcommits.{CommitCoordinatorBuilder, CommitCoordinatorProvider, CoordinatedCommitsBaseSuite, CoordinatedCommitsUsageLogs, InMemoryCommitCoordinator}
import org.apache.spark.sql.delta.sources.DeltaSQLConf
import org.apache.spark.sql.delta.storage.LocalLogStore
@@ -204,7 +203,7 @@ class SnapshotManagementSuite extends QueryTest with DeltaSQLTestUtils with Shar
// Guava cache wraps the root cause
assert(e.isInstanceOf[SparkException] &&
e.getMessage.contains("0001.checkpoint") &&
- e.getMessage.contains(SHOULD_NOT_RECOVER_CHECKPOINT_ERROR_MSG))
+ e.getMessage.contains("Encountered error while reading file"))
}
}
}
@@ -261,7 +260,7 @@ class SnapshotManagementSuite extends QueryTest with DeltaSQLTestUtils with Shar
val e = intercept[SparkException] { staleLog.update() }
val version = if (testEmptyCheckpoint) 0 else 1
assert(e.getMessage.contains(f"$version%020d.checkpoint") &&
- e.getMessage.contains(SHOULD_NOT_RECOVER_CHECKPOINT_ERROR_MSG))
+ e.getMessage.contains("Encountered error while reading file"))
}
}
}
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/UpdateSuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/UpdateSuiteBase.scala
index a262da6da45..549c7758b44 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/UpdateSuiteBase.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/UpdateSuiteBase.scala
@@ -39,8 +39,7 @@ trait UpdateBaseMixin
with SharedSparkSession
with DeltaDMLTestUtils
with DeltaSQLTestUtils
- with DeltaTestUtilsForTempViews
- with DeltaExcludedBySparkVersionTestMixinShims {
+ with DeltaTestUtilsForTempViews {
import testImplicits._
protected def executeUpdate(target: String, set: Seq[String], where: String): Unit = {
@@ -979,7 +978,7 @@ trait UpdateBaseMiscTests extends UpdateBaseMixin {
Some(".*ore than one row returned by a subquery used as an expression(?s).*")
)
- testSparkMasterOnly("Variant type") {
+ test("Variant type") {
val df = sql(
"""SELECT parse_json(cast(id as string)) v, id i
FROM range(2)""")
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/deletionvectors/DeletionVectorsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/deletionvectors/DeletionVectorsSuite.scala
index df66c12ebed..f570e299681 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/deletionvectors/DeletionVectorsSuite.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/deletionvectors/DeletionVectorsSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.delta.deletionvectors
import java.io.{File, FileNotFoundException}
import java.net.URISyntaxException
-import org.apache.spark.sql.delta.{DeletionVectorsTableFeature, DeletionVectorsTestUtils, DeltaChecksumException, DeltaConfigs, DeltaExcludedBySparkVersionTestMixinShims, DeltaLog, DeltaMetricsUtils, DeltaTestUtilsForTempViews}
+import org.apache.spark.sql.delta.{DeletionVectorsTableFeature, DeletionVectorsTestUtils, DeltaChecksumException, DeltaConfigs, DeltaLog, DeltaMetricsUtils, DeltaTestUtilsForTempViews}
import org.apache.spark.sql.delta.DeltaTestUtils.createTestAddFile
import org.apache.spark.sql.delta.actions.{AddFile, DeletionVectorDescriptor, RemoveFile}
import org.apache.spark.sql.delta.actions.DeletionVectorDescriptor.EMPTY
@@ -51,8 +51,7 @@ class DeletionVectorsSuite extends QueryTest
with DeltaSQLCommandTest
with DeletionVectorsTestUtils
with DeltaTestUtilsForTempViews
- with DeltaExceptionTestUtils
- with DeltaExcludedBySparkVersionTestMixinShims {
+ with DeltaExceptionTestUtils {
import testImplicits._
override def beforeAll(): Unit = {
@@ -300,7 +299,7 @@ class DeletionVectorsSuite extends QueryTest
}
}
- testSparkMasterOnly(s"variant types DELETE with DVs with column mapping mode=$mode") {
+ test(s"variant types DELETE with DVs with column mapping mode=$mode") {
withSQLConf("spark.databricks.delta.properties.defaults.columnMapping.mode" -> mode) {
withTempDir { dirName =>
val path = dirName.getAbsolutePath
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/logging/DeltaStructuredLoggingSuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/logging/DeltaStructuredLoggingSuite.scala
similarity index 58%
rename from spark/src/test/scala/org/apache/spark/sql/delta/logging/DeltaStructuredLoggingSuiteBase.scala
rename to spark/src/test/scala/org/apache/spark/sql/delta/logging/DeltaStructuredLoggingSuite.scala
index 6760eb3ed85..103390d2ae4 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/logging/DeltaStructuredLoggingSuiteBase.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/logging/DeltaStructuredLoggingSuite.scala
@@ -43,22 +43,40 @@ import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.util.regex.Pattern
+import com.fasterxml.jackson.databind.ObjectMapper
+import com.fasterxml.jackson.module.scala.DefaultScalaModule
import org.apache.logging.log4j.Level
import org.apache.spark.SparkFunSuite
-import org.apache.spark.internal.{LogEntry, LoggingShims, LogKeyShims, MDC}
+import org.apache.spark.internal.{LogEntry, Logging, LogKey, MDC}
-trait DeltaStructuredLoggingSuiteBase
- extends SparkFunSuite
- with LoggingShims {
- def className: String
- def logFilePath: String
+class DeltaStructuredLoggingSuite extends SparkFunSuite with Logging {
+ private def className: String = classOf[DeltaStructuredLoggingSuite].getSimpleName
+ private def logFilePath: String = "target/structured.log"
private lazy val logFile: File = {
val pwd = new File(".").getCanonicalPath
new File(pwd + "/" + logFilePath)
}
+ override def beforeAll(): Unit = {
+ super.beforeAll()
+ Logging.enableStructuredLogging()
+ }
+
+ override def afterAll(): Unit = {
+ Logging.disableStructuredLogging()
+ super.afterAll()
+ }
+
+ private val jsonMapper = new ObjectMapper().registerModule(DefaultScalaModule)
+ private def compactAndToRegexPattern(json: String): String = {
+ jsonMapper.readTree(json).toString.
+ replace("", """[^"]+""").
+ replace("""""""", """.*""").
+ replace("{", """\{""") + "\n"
+ }
+
// Return the newly added log contents in the log file after executing the function `f`
private def captureLogOutput(f: () => Unit): String = {
val content = if (logFile.exists()) {
@@ -72,38 +90,137 @@ trait DeltaStructuredLoggingSuiteBase
newContent.substring(content.length)
}
- def basicMsg: String = "This is a log message"
+ private def basicMsg: String = "This is a log message"
- def msgWithMDC: LogEntry = log"Lost executor ${MDC(DeltaLogKeys.EXECUTOR_ID, "1")}."
+ private def msgWithMDC: LogEntry = log"Lost executor ${MDC(DeltaLogKeys.EXECUTOR_ID, "1")}."
- def msgWithMDCValueIsNull: LogEntry = log"Lost executor ${MDC(DeltaLogKeys.EXECUTOR_ID, null)}."
+ private def msgWithMDCValueIsNull: LogEntry =
+ log"Lost executor ${MDC(DeltaLogKeys.EXECUTOR_ID, null)}."
- def msgWithMDCAndException: LogEntry =
+ private def msgWithMDCAndException: LogEntry =
log"Error in executor ${MDC(DeltaLogKeys.EXECUTOR_ID, "1")}."
- def msgWithConcat: LogEntry = log"Min Size: ${MDC(DeltaLogKeys.MIN_SIZE, "2")}, " +
+ private def msgWithConcat: LogEntry = log"Min Size: ${MDC(DeltaLogKeys.MIN_SIZE, "2")}, " +
log"Max Size: ${MDC(DeltaLogKeys.MAX_SIZE, "4")}. " +
log"Please double check."
- // test for basic message (without any mdc)
- def expectedPatternForBasicMsg(level: Level): String
+ private val customLog = log"${MDC(CustomLogKeys.CUSTOM_LOG_KEY, "Custom log message.")}"
- // test for basic message and exception
- def expectedPatternForBasicMsgWithException(level: Level): String
+ def expectedPatternForBasicMsg(level: Level): String = {
+ compactAndToRegexPattern(
+ s"""
+ {
+ "ts": "",
+ "level": "$level",
+ "msg": "This is a log message",
+ "logger": "$className"
+ }""")
+ }
- // test for message (with mdc)
- def expectedPatternForMsgWithMDC(level: Level): String
+ def expectedPatternForBasicMsgWithException(level: Level): String = {
+ compactAndToRegexPattern(
+ s"""
+ {
+ "ts": "",
+ "level": "$level",
+ "msg": "This is a log message",
+ "exception": {
+ "class": "java.lang.RuntimeException",
+ "msg": "OOM",
+ "stacktrace": ""
+ },
+ "logger": "$className"
+ }""")
+ }
- // test for message (with mdc - the value is null)
- def expectedPatternForMsgWithMDCValueIsNull(level: Level): String
+ def expectedPatternForMsgWithMDC(level: Level): String = {
+ compactAndToRegexPattern(
+ s"""
+ {
+ "ts": "",
+ "level": "$level",
+ "msg": "Lost executor 1.",
+ "context": {
+ "executor_id": "1"
+ },
+ "logger": "$className"
+ }""")
+ }
+
+ def expectedPatternForMsgWithMDCValueIsNull(level: Level): String = {
+ compactAndToRegexPattern(
+ s"""
+ {
+ "ts": "",
+ "level": "$level",
+ "msg": "Lost executor null.",
+ "context": {
+ "executor_id": null
+ },
+ "logger": "$className"
+ }""")
+ }
+
+ def expectedPatternForMsgWithMDCAndException(level: Level): String = {
+ compactAndToRegexPattern(
+ s"""
+ {
+ "ts": "",
+ "level": "$level",
+ "msg": "Error in executor 1.",
+ "context": {
+ "executor_id": "1"
+ },
+ "exception": {
+ "class": "java.lang.RuntimeException",
+ "msg": "OOM",
+ "stacktrace": ""
+ },
+ "logger": "$className"
+ }""")
+ }
- // test for message and exception
- def expectedPatternForMsgWithMDCAndException(level: Level): String
+ def expectedPatternForCustomLogKey(level: Level): String = {
+ compactAndToRegexPattern(
+ s"""
+ {
+ "ts": "",
+ "level": "$level",
+ "msg": "Custom log message.",
+ "logger": "$className"
+ }"""
+ )
+ }
- // test for custom LogKey
- def expectedPatternForCustomLogKey(level: Level): String
+ def verifyMsgWithConcat(level: Level, logOutput: String): Unit = {
+ val pattern1 = compactAndToRegexPattern(
+ s"""
+ {
+ "ts": "",
+ "level": "$level",
+ "msg": "Min Size: 2, Max Size: 4. Please double check.",
+ "context": {
+ "min_size": "2",
+ "max_size": "4"
+ },
+ "logger": "$className"
+ }""")
- def verifyMsgWithConcat(level: Level, logOutput: String): Unit
+ val pattern2 = compactAndToRegexPattern(
+ s"""
+ {
+ "ts": "",
+ "level": "$level",
+ "msg": "Min Size: 2, Max Size: 4. Please double check.",
+ "context": {
+ "max_size": "4",
+ "min_size": "2"
+ },
+ "logger": "$className"
+ }""")
+ assert(Pattern.compile(pattern1).matcher(logOutput).matches() ||
+ Pattern.compile(pattern2).matcher(logOutput).matches())
+ }
test("Basic logging") {
Seq(
@@ -167,7 +284,6 @@ trait DeltaStructuredLoggingSuiteBase
}
}
- private val customLog = log"${MDC(CustomLogKeys.CUSTOM_LOG_KEY, "Custom log message.")}"
test("Logging with custom LogKey") {
Seq(
(Level.ERROR, () => logError(customLog)),
@@ -192,6 +308,6 @@ trait DeltaStructuredLoggingSuiteBase
}
object CustomLogKeys {
- // Custom `LogKey` must be `extends LogKeyShims`
- case object CUSTOM_LOG_KEY extends LogKeyShims
+ // Custom `LogKey` must extend LogKey
+ case object CUSTOM_LOG_KEY extends LogKey
}
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/schema/SchemaUtilsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/schema/SchemaUtilsSuite.scala
index 47c99cb0382..98bd8fa65d6 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/schema/SchemaUtilsSuite.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/schema/SchemaUtilsSuite.scala
@@ -22,7 +22,7 @@ import java.util.regex.Pattern
import scala.annotation.tailrec
-import org.apache.spark.sql.delta.{DeltaAnalysisException, DeltaExcludedBySparkVersionTestMixinShims, DeltaLog, DeltaTestUtils, TypeWideningMode}
+import org.apache.spark.sql.delta.{DeltaAnalysisException, DeltaLog, DeltaTestUtils, TypeWideningMode}
import org.apache.spark.sql.delta.RowCommitVersion
import org.apache.spark.sql.delta.RowId
import org.apache.spark.sql.delta.commands.cdc.CDCReader
@@ -49,8 +49,7 @@ class SchemaUtilsSuite extends QueryTest
with SharedSparkSession
with GivenWhenThen
with DeltaSQLTestUtils
- with DeltaSQLCommandTest
- with DeltaExcludedBySparkVersionTestMixinShims {
+ with DeltaSQLCommandTest {
import SchemaUtils._
import TypeWideningMode._
import testImplicits._
@@ -2638,7 +2637,7 @@ class SchemaUtilsSuite extends QueryTest
MapType(IntegerType, IntegerType) -> MapType(LongType, LongType),
ArrayType(IntegerType) -> ArrayType(LongType)
))
- testSparkMasterOnly(s"typeWideningMode ${fromType.sql} -> ${toType.sql}") {
+ test(s"typeWideningMode ${fromType.sql} -> ${toType.sql}") {
val narrow = new StructType().add("a", fromType)
val wide = new StructType().add("a", toType)
@@ -2683,7 +2682,7 @@ class SchemaUtilsSuite extends QueryTest
ShortType -> DoubleType,
IntegerType -> DecimalType(10, 0)
))
- testSparkMasterOnly(
+ test(
s"typeWideningMode - blocked type evolution ${fromType.sql} -> ${toType.sql}") {
val narrow = new StructType().add("a", fromType)
val wide = new StructType().add("a", toType)
@@ -2716,7 +2715,7 @@ class SchemaUtilsSuite extends QueryTest
DateType -> TimestampNTZType,
DecimalType(10, 2) -> DecimalType(12, 4)
))
- testSparkMasterOnly(
+ test(
s"typeWideningMode - Uniform Iceberg compatibility ${fromType.sql} -> ${toType.sql}") {
val narrow = new StructType().add("a", fromType)
val wide = new StructType().add("a", toType)
@@ -2771,7 +2770,7 @@ class SchemaUtilsSuite extends QueryTest
}
}
- testSparkMasterOnly(
+ test(
s"typeWideningMode - widen to common wider decimal") {
val left = new StructType().add("a", DecimalType(10, 2))
val right = new StructType().add("a", DecimalType(5, 4))
@@ -2806,7 +2805,7 @@ class SchemaUtilsSuite extends QueryTest
}
- testSparkMasterOnly(
+ test(
s"typeWideningMode - widen to common wider decimal exceeds max decimal precision") {
// We'd need a DecimalType(40, 19) to fit both types, which exceeds max decimal precision of 38.
val left = new StructType().add("a", DecimalType(20, 19))
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/skipping/clustering/ClusteredTableDDLSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/skipping/clustering/ClusteredTableDDLSuite.scala
index 3aa8f444a6d..5feeafb1d80 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/skipping/clustering/ClusteredTableDDLSuite.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/skipping/clustering/ClusteredTableDDLSuite.scala
@@ -20,7 +20,7 @@ import java.io.File
import com.databricks.spark.util.{Log4jUsageLogger, MetricDefinitions}
import org.apache.spark.sql.delta.skipping.ClusteredTableTestUtils
-import org.apache.spark.sql.delta.{CatalogOwnedTableFeature, DeltaAnalysisException, DeltaColumnMappingEnableIdMode, DeltaColumnMappingEnableNameMode, DeltaConfigs, DeltaExcludedBySparkVersionTestMixinShims, DeltaLog, DeltaUnsupportedOperationException, NoMapping}
+import org.apache.spark.sql.delta.{CatalogOwnedTableFeature, DeltaAnalysisException, DeltaColumnMappingEnableIdMode, DeltaColumnMappingEnableNameMode, DeltaConfigs, DeltaLog, DeltaUnsupportedOperationException, NoMapping}
import org.apache.spark.sql.delta.actions.TableFeatureProtocolUtils
import org.apache.spark.sql.delta.clustering.ClusteringMetadataDomain
import org.apache.spark.sql.delta.coordinatedcommits.CatalogOwnedTestBaseSuite
@@ -649,8 +649,7 @@ trait ClusteredTableCreateOrReplaceDDLSuite
trait ClusteredTableDDLSuiteBase
extends ClusteredTableCreateOrReplaceDDLSuite
- with DeltaSQLCommandTest
- with DeltaExcludedBySparkVersionTestMixinShims {
+ with DeltaSQLCommandTest {
import testImplicits._
@@ -1002,7 +1001,7 @@ trait ClusteredTableDDLSuiteBase
}
}
- testSparkMasterOnly("Variant is not supported") {
+ test("Variant is not supported") {
val e = intercept[DeltaAnalysisException] {
createOrReplaceClusteredTable("CREATE", testTable, "id long, v variant", "v")
}
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/stats/DataSkippingDeltaTests.scala b/spark/src/test/scala/org/apache/spark/sql/delta/stats/DataSkippingDeltaTests.scala
index 16bb8da29ec..38686393762 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/stats/DataSkippingDeltaTests.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/stats/DataSkippingDeltaTests.scala
@@ -44,7 +44,7 @@ import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.sql.types._
import org.apache.spark.util.Utils
-trait DataSkippingDeltaTestsBase extends DeltaExcludedBySparkVersionTestMixinShims
+trait DataSkippingDeltaTestsBase extends QueryTest
with SharedSparkSession
with DeltaSQLCommandTest
with DataSkippingDeltaTestsUtils
@@ -1812,7 +1812,7 @@ trait DataSkippingDeltaTestsBase extends DeltaExcludedBySparkVersionTestMixinShi
}
}
- testSparkMasterOnly("data skipping by stats - variant type") {
+ test("data skipping by stats - variant type") {
withTable("tbl") {
sql("""CREATE TABLE tbl(v VARIANT,
v_struct STRUCT,
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaHiveTest.scala b/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaHiveTest.scala
index ed74b41de25..79d6b5f510e 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaHiveTest.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaHiveTest.scala
@@ -16,13 +16,13 @@
package org.apache.spark.sql.delta.test
-import org.apache.spark.sql.delta.Relocated._
import org.apache.spark.sql.delta.catalog.DeltaCatalog
import org.apache.spark.sql.delta.test.DeltaSQLTestUtils
import io.delta.sql.DeltaSparkSessionExtension
import org.scalatest.BeforeAndAfterAll
import org.apache.spark.{SparkContext, SparkFunSuite}
+import org.apache.spark.sql.classic.SparkSession
import org.apache.spark.sql.hive.test.{TestHive, TestHiveContext}
import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
@@ -46,7 +46,7 @@ trait DeltaHiveTest extends SparkFunSuite with BeforeAndAfterAll { self: DeltaSQ
_sc = new SparkContext("local", this.getClass.getName, conf)
_hiveContext = new TestHiveContext(_sc)
_session = _hiveContext.sparkSession
- setActiveSession(_session)
+ SparkSession.setActiveSession(_session)
super.beforeAll()
}
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/test/TestsStatistics.scala b/spark/src/test/scala/org/apache/spark/sql/delta/test/TestsStatistics.scala
index f75996762d6..93c7da959e6 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/test/TestsStatistics.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/test/TestsStatistics.scala
@@ -16,7 +16,6 @@
package org.apache.spark.sql.delta.test
-import org.apache.spark.sql.delta.DeltaExcludedBySparkVersionTestMixinShims
import org.apache.spark.sql.delta.DeltaLog
import org.apache.spark.sql.delta.test.DeltaSQLTestUtils
import org.apache.spark.sql.delta.test.DeltaTestImplicits._
@@ -28,7 +27,7 @@ import org.apache.spark.sql.{Column, DataFrame}
/**
* Provides utilities for testing StatisticsCollection.
*/
-trait TestsStatistics extends DeltaExcludedBySparkVersionTestMixinShims { self: DeltaSQLTestUtils =>
+trait TestsStatistics { self: DeltaSQLTestUtils =>
/** A function to get the reconciled statistics DataFrame from the DeltaLog */
protected var getStatsDf: (DeltaLog, Seq[Column]) => DataFrame = _
@@ -60,7 +59,7 @@ trait TestsStatistics extends DeltaExcludedBySparkVersionTestMixinShims { self:
testTags: org.scalatest.Tag*)(testFun: => Any): Unit = {
import testImplicits._
- testSparkMasterOnly(testName, testTags: _*) {
+ test(testName, testTags: _*) {
getStatsDf = (deltaLog, columns) => {
val snapshot = deltaLog.snapshot
snapshot.allFiles
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningAlterTableSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningAlterTableSuite.scala
index 40161c7ab70..dbde24b622d 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningAlterTableSuite.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningAlterTableSuite.scala
@@ -32,13 +32,11 @@ import org.apache.spark.sql.types._
* Suite providing core coverage for type widening using ALTER TABLE CHANGE COLUMN TYPE.
*/
class TypeWideningAlterTableSuite
- extends QueryTest
+ extends TypeWideningAlterTableTests
with ParquetTest
with TypeWideningTestMixin
- with TypeWideningAlterTableTests
-trait TypeWideningAlterTableTests
- extends DeltaExcludedBySparkVersionTestMixinShims
+trait TypeWideningAlterTableTests extends QueryTest
with QueryErrorsBase
with TypeWideningTestCases {
self: QueryTest with ParquetTest with TypeWideningTestMixin =>
@@ -155,7 +153,7 @@ trait TypeWideningAlterTableTests
}
}
- testSparkMasterOnly(
+ test(
"widening Date -> TimestampNTZ rejected when TimestampNTZ feature isn't supported") {
withTimestampNTZDisabled {
sql(s"CREATE TABLE delta.`$tempPath` (a date) USING DELTA")
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningInsertSchemaEvolutionBasicSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningInsertSchemaEvolutionBasicSuite.scala
index 11ac059b921..3ea819daa4f 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningInsertSchemaEvolutionBasicSuite.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningInsertSchemaEvolutionBasicSuite.scala
@@ -53,8 +53,7 @@ class TypeWideningInsertSchemaEvolutionBasicSuite
*/
trait TypeWideningInsertSchemaEvolutionBasicTests
extends DeltaInsertIntoTest
- with TypeWideningTestCases
- with DeltaExcludedBySparkVersionTestMixinShims {
+ with TypeWideningTestCases {
self: QueryTest with TypeWideningTestMixin with DeltaDMLTestUtils =>
import testImplicits._
@@ -106,7 +105,7 @@ trait TypeWideningInsertSchemaEvolutionBasicTests
}
}
- testSparkMasterOnly(s"INSERT - logs for missed opportunity for conversion") {
+ test(s"INSERT - logs for missed opportunity for conversion") {
val testCase = restrictedAutomaticWideningTestCases.head
append(testCase.initialValuesDF)
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningMergeIntoSchemaEvolutionSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningMergeIntoSchemaEvolutionSuite.scala
index 0b47eb8e4ff..1ded769d1e4 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningMergeIntoSchemaEvolutionSuite.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningMergeIntoSchemaEvolutionSuite.scala
@@ -30,10 +30,9 @@ import org.apache.spark.sql.types._
* INTO when the type widening table feature is supported.
*/
class TypeWideningMergeIntoSchemaEvolutionSuite
- extends QueryTest
+ extends TypeWideningMergeIntoSchemaEvolutionTests
with DeltaDMLTestUtils
- with TypeWideningTestMixin
- with TypeWideningMergeIntoSchemaEvolutionTests {
+ with TypeWideningTestMixin {
protected override def sparkConf: SparkConf = {
super.sparkConf
@@ -44,8 +43,7 @@ class TypeWideningMergeIntoSchemaEvolutionSuite
/**
* Tests covering type widening during schema evolution in MERGE INTO.
*/
-trait TypeWideningMergeIntoSchemaEvolutionTests
- extends DeltaExcludedBySparkVersionTestMixinShims
+trait TypeWideningMergeIntoSchemaEvolutionTests extends QueryTest
with MergeIntoSQLTestUtils
with MergeIntoSchemaEvolutionMixin
with TypeWideningTestCases {
@@ -53,7 +51,7 @@ trait TypeWideningMergeIntoSchemaEvolutionTests
import testImplicits._
- testSparkMasterOnly(s"MERGE - always automatic type widening TINYINT -> DOUBLE") {
+ test(s"MERGE - always automatic type widening TINYINT -> DOUBLE") {
withTable("source") {
sql(s"CREATE TABLE delta.`$tempPath` (a short) USING DELTA")
sql("CREATE TABLE source (a double) USING DELTA")
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningStreamingSinkSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningStreamingSinkSuite.scala
index 602c75f148a..11ffa899531 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningStreamingSinkSuite.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningStreamingSinkSuite.scala
@@ -30,8 +30,7 @@ import org.apache.spark.sql.types._
*/
class TypeWideningStreamingSinkSuite
extends DeltaSinkImplicitCastSuiteBase
- with TypeWideningTestMixin
- with DeltaExcludedBySparkVersionTestMixinShims {
+ with TypeWideningTestMixin {
import testImplicits._
@@ -46,7 +45,7 @@ class TypeWideningStreamingSinkSuite
spark.conf.set(SQLConf.ANSI_ENABLED.key, "true")
}
- testSparkMasterOnly("type is widened if automatic widening set to always") {
+ test("type is widened if automatic widening set to always") {
withDeltaStream[Int] { stream =>
stream.write(17)("CAST(value AS SHORT)")
assert(stream.currentSchema("value").dataType === ShortType)
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningTableFeatureSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningTableFeatureSuite.scala
index a3bed20d755..c8cc173b88e 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningTableFeatureSuite.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningTableFeatureSuite.scala
@@ -35,14 +35,11 @@ import org.apache.spark.sql.types._
/**
* Test suite covering feature enablement and configuration tests.
*/
-class TypeWideningTableFeatureEnablementSuite
- extends QueryTest
+class TypeWideningTableFeatureEnablementSuite extends TypeWideningTableFeatureEnablementTests
with TypeWideningTestMixin
with TypeWideningDropFeatureTestMixin
- with TypeWideningTableFeatureEnablementTests
-trait TypeWideningTableFeatureEnablementTests
- extends DeltaExcludedBySparkVersionTestMixinShims
+trait TypeWideningTableFeatureEnablementTests extends QueryTest
with TypeWideningTestCases {
self: QueryTest
with TypeWideningTestMixin
@@ -159,7 +156,6 @@ class TypeWideningTableFeatureDropSuite
trait TypeWideningTableFeatureDropTests
extends RowTrackingTestUtils
- with DeltaExcludedBySparkVersionTestMixinShims
with TypeWideningTestCases {
self: QueryTest
with TypeWideningTestMixin
@@ -380,13 +376,11 @@ trait TypeWideningTableFeatureDropTests
* Additional tests covering e.g. unsupported type change check, CLONE, RESTORE.
*/
class TypeWideningTableFeatureAdvancedSuite
- extends QueryTest
+ extends TypeWideningTableFeatureAdvancedTests
with TypeWideningTestMixin
with TypeWideningDropFeatureTestMixin
- with TypeWideningTableFeatureAdvancedTests
-trait TypeWideningTableFeatureAdvancedTests
- extends DeltaExcludedBySparkVersionTestMixinShims
+trait TypeWideningTableFeatureAdvancedTests extends QueryTest
with TypeWideningTestCases {
self: QueryTest
with TypeWideningTestMixin
@@ -544,42 +538,6 @@ trait TypeWideningTableFeatureAdvancedTests
readDeltaTable(tempPath).collect()
}
- testSparkLatestOnly(
- "helpful error when reading type changes not supported yet during preview") {
- sql(s"CREATE TABLE delta.`$tempDir` (a int) USING DELTA")
- val metadata = new MetadataBuilder()
- .putMetadataArray("delta.typeChanges", Array(
- new MetadataBuilder()
- .putString("toType", "long")
- .putString("fromType", "int")
- .build()
- )).build()
-
- // Delta 3.2/3.3 doesn't support changing type from int->long, we manually commit that type
- // change to simulate what Delta 4.0 could do.
- deltaLog.withNewTransaction { txn =>
- txn.commit(
- Seq(txn.snapshot.metadata.copy(
- schemaString = new StructType()
- .add("a", LongType, nullable = true, metadata).json
- )),
- ManualUpdate)
- }
-
- checkError(
- exception = intercept[DeltaUnsupportedOperationException] {
- readDeltaTable(tempPath).collect()
- },
- "DELTA_UNSUPPORTED_TYPE_CHANGE_IN_PREVIEW",
- parameters = Map(
- "fieldPath" -> "a",
- "fromType" -> "INT",
- "toType" -> "BIGINT",
- "typeWideningFeatureName" -> "typeWidening"
- )
- )
- }
-
test("type widening rewrite metrics") {
sql(s"CREATE TABLE delta.`$tempDir` (a byte) USING DELTA")
addSingleFile(Seq(1, 2, 3), ByteType)
@@ -691,13 +649,11 @@ trait TypeWideningTableFeatureAdvancedTests
* Test suite covering preview vs stable feature interactions.
*/
class TypeWideningTableFeaturePreviewSuite
- extends QueryTest
+ extends TypeWideningTableFeatureVersionTests
with TypeWideningTestMixin
with TypeWideningDropFeatureTestMixin
- with TypeWideningTableFeatureVersionTests
-trait TypeWideningTableFeatureVersionTests
- extends DeltaExcludedBySparkVersionTestMixinShims
+trait TypeWideningTableFeatureVersionTests extends QueryTest
with TypeWideningTestCases {
self: QueryTest
with TypeWideningTestMixin
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningTestCases.scala b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningTestCases.scala
index d9829f1150b..d53b0fa3fa5 100644
--- a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningTestCases.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningTestCases.scala
@@ -17,13 +17,13 @@
package org.apache.spark.sql.delta.typewidening
import org.apache.spark.sql.{DataFrame, Encoder, Row}
-import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{DataType, StructField, StructType}
+import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
+import org.apache.spark.sql.types._
/**
* Trait collecting supported and unsupported type change test cases.
*/
-trait TypeWideningTestCases extends TypeWideningTestCasesShims { self: SharedSparkSession =>
+trait TypeWideningTestCases extends SQLTestUtils { self: SharedSparkSession =>
import testImplicits._
/**
@@ -86,4 +86,115 @@ trait TypeWideningTestCases extends TypeWideningTestCasesShims { self: SharedSpa
override def expectedResult: DataFrame =
initialValuesDF.select($"value".cast(toType))
}
+
+ // Type changes that are supported by all Parquet readers. Byte, Short, Int are all stored as
+ // INT32 in parquet so these changes are guaranteed to be supported.
+ protected val supportedTestCases: Seq[TypeEvolutionTestCase] = Seq(
+ SupportedTypeEvolutionTestCase(ByteType, ShortType,
+ Seq(1, -1, Byte.MinValue, Byte.MaxValue, null.asInstanceOf[Byte]),
+ Seq(4, -4, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short])),
+ SupportedTypeEvolutionTestCase(ByteType, IntegerType,
+ Seq(1, -1, Byte.MinValue, Byte.MaxValue, null.asInstanceOf[Byte]),
+ Seq(4, -4, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int])),
+ SupportedTypeEvolutionTestCase(ShortType, IntegerType,
+ Seq(1, -1, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short]),
+ Seq(4, -4, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int])),
+ SupportedTypeEvolutionTestCase(ShortType, LongType,
+ Seq(1, -1, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short]),
+ Seq(4L, -4L, Long.MinValue, Long.MaxValue, null.asInstanceOf[Long])),
+ SupportedTypeEvolutionTestCase(IntegerType, LongType,
+ Seq(1, -1, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int]),
+ Seq(4L, -4L, Long.MinValue, Long.MaxValue, null.asInstanceOf[Long])),
+ SupportedTypeEvolutionTestCase(FloatType, DoubleType,
+ Seq(1234.56789f, -0f, 0f, Float.NaN, Float.NegativeInfinity, Float.PositiveInfinity,
+ Float.MinPositiveValue, Float.MinValue, Float.MaxValue, null.asInstanceOf[Float]),
+ Seq(987654321.987654321d, -0d, 0d, Double.NaN, Double.NegativeInfinity,
+ Double.PositiveInfinity, Double.MinPositiveValue, Double.MinValue, Double.MaxValue,
+ null.asInstanceOf[Double])),
+ SupportedTypeEvolutionTestCase(DateType, TimestampNTZType,
+ Seq("2020-01-01", "2024-02-29", "1312-02-27"),
+ Seq("2020-03-17 15:23:15.123456", "2058-12-31 23:59:59.999", "0001-01-01 00:00:00")),
+ // Larger precision.
+ SupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2),
+ DecimalType(Decimal.MAX_LONG_DIGITS, 2),
+ Seq(BigDecimal("1.23"), BigDecimal("10.34"), null.asInstanceOf[BigDecimal]),
+ Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99"),
+ null.asInstanceOf[BigDecimal])),
+ // Larger precision and scale, same physical type.
+ SupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS - 1, 2),
+ DecimalType(Decimal.MAX_INT_DIGITS, 3),
+ Seq(BigDecimal("1.23"), BigDecimal("10.34"), null.asInstanceOf[BigDecimal]),
+ Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 3) + ".99"),
+ null.asInstanceOf[BigDecimal])),
+ // Larger precision and scale, different physical types.
+ SupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2),
+ DecimalType(Decimal.MAX_LONG_DIGITS + 1, 3),
+ Seq(BigDecimal("1.23"), BigDecimal("10.34"), null.asInstanceOf[BigDecimal]),
+ Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99"),
+ null.asInstanceOf[BigDecimal]))
+ )
+
+ // Type changes that are only eligible for automatic widening when
+ // spark.databricks.delta.typeWidening.allowAutomaticWidening = ALWAYS.
+ protected val restrictedAutomaticWideningTestCases: Seq[TypeEvolutionTestCase] = Seq(
+ SupportedTypeEvolutionTestCase(IntegerType, DoubleType,
+ Seq(1, -1, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int]),
+ Seq(987654321.987654321d, -0d, 0d, Double.NaN, Double.NegativeInfinity,
+ Double.PositiveInfinity, Double.MinPositiveValue, Double.MinValue, Double.MaxValue,
+ null.asInstanceOf[Double])),
+ SupportedTypeEvolutionTestCase(ByteType, DecimalType(10, 0),
+ Seq(1, -1, Byte.MinValue, Byte.MaxValue, null.asInstanceOf[Byte]),
+ Seq(BigDecimal("1.23"), BigDecimal("9" * 10), null.asInstanceOf[BigDecimal])),
+ SupportedTypeEvolutionTestCase(ShortType, DecimalType(10, 0),
+ Seq(1, -1, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short]),
+ Seq(BigDecimal("1.23"), BigDecimal("9" * 10), null.asInstanceOf[BigDecimal])),
+ SupportedTypeEvolutionTestCase(IntegerType, DecimalType(10, 0),
+ Seq(1, -1, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int]),
+ Seq(BigDecimal("1.23"), BigDecimal("9" * 10), null.asInstanceOf[BigDecimal])),
+ SupportedTypeEvolutionTestCase(LongType, DecimalType(20, 0),
+ Seq(1L, -1L, Long.MinValue, Long.MaxValue, null.asInstanceOf[Int]),
+ Seq(BigDecimal("1.23"), BigDecimal("9" * 20), null.asInstanceOf[BigDecimal]))
+ )
+
+ // Test type changes that aren't supported.
+ protected val unsupportedTestCases: Seq[TypeEvolutionTestCase] = Seq(
+ UnsupportedTypeEvolutionTestCase(IntegerType, ByteType,
+ Seq(1, 2, Int.MinValue)),
+ UnsupportedTypeEvolutionTestCase(LongType, IntegerType,
+ Seq(4, 5, Long.MaxValue)),
+ UnsupportedTypeEvolutionTestCase(DoubleType, FloatType,
+ Seq(987654321.987654321d, Double.NaN, Double.NegativeInfinity,
+ Double.PositiveInfinity, Double.MinPositiveValue,
+ Double.MinValue, Double.MaxValue)),
+ UnsupportedTypeEvolutionTestCase(ByteType, DecimalType(2, 0),
+ Seq(1, -1, Byte.MinValue)),
+ UnsupportedTypeEvolutionTestCase(ShortType, DecimalType(4, 0),
+ Seq(1, -1, Short.MinValue)),
+ UnsupportedTypeEvolutionTestCase(IntegerType, DecimalType(9, 0),
+ Seq(1, -1, Int.MinValue)),
+ UnsupportedTypeEvolutionTestCase(LongType, DecimalType(19, 0),
+ Seq(1, -1, Long.MinValue)),
+ UnsupportedTypeEvolutionTestCase(TimestampNTZType, DateType,
+ Seq("2020-03-17 15:23:15", "2023-12-31 23:59:59", "0001-01-01 00:00:00")),
+ // Reduce scale
+ UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2),
+ DecimalType(Decimal.MAX_INT_DIGITS, 3),
+ Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 2) + ".99"))),
+ // Reduce precision
+ UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2),
+ DecimalType(Decimal.MAX_INT_DIGITS - 1, 2),
+ Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 2) + ".99"))),
+ // Reduce precision & scale
+ UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_LONG_DIGITS, 2),
+ DecimalType(Decimal.MAX_INT_DIGITS - 1, 1),
+ Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99"))),
+ // Increase scale more than precision
+ UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2),
+ DecimalType(Decimal.MAX_INT_DIGITS + 1, 4),
+ Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 2) + ".99"))),
+ // Smaller scale and larger precision.
+ UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_LONG_DIGITS, 2),
+ DecimalType(Decimal.MAX_INT_DIGITS + 3, 1),
+ Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99")))
+ )
}
diff --git a/storage/src/main/java/io/delta/storage/internal/S3LogStoreUtil.java b/storage/src/main/java/io/delta/storage/internal/S3LogStoreUtil.java
index 198d18ab9b7..b0b9f642f44 100644
--- a/storage/src/main/java/io/delta/storage/internal/S3LogStoreUtil.java
+++ b/storage/src/main/java/io/delta/storage/internal/S3LogStoreUtil.java
@@ -16,7 +16,7 @@
package io.delta.storage.internal;
-import com.amazonaws.services.s3.model.ListObjectsV2Request;
+import software.amazon.awssdk.services.s3.model.ListObjectsV2Request;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.s3a.*;
@@ -63,11 +63,12 @@ private static RemoteIterator s3ListFrom(
// List files lexicographically after resolvedPath inclusive within the same directory
return listing.createFileStatusListingIterator(resolvedPath,
S3ListRequest.v2(
- new ListObjectsV2Request()
- .withBucketName(s3afs.getBucket())
- .withMaxKeys(maxKeys)
- .withPrefix(s3afs.pathToKey(parentPath))
- .withStartAfter(keyBefore(s3afs.pathToKey(resolvedPath)))
+ ListObjectsV2Request.builder()
+ .bucket(s3afs.getBucket())
+ .maxKeys(maxKeys)
+ .prefix(s3afs.pathToKey(parentPath))
+ .startAfter(keyBefore(s3afs.pathToKey(resolvedPath)))
+ .build()
), ACCEPT_ALL,
new Listing.AcceptAllButSelfAndS3nDirs(parentPath),
s3afs.getActiveAuditSpan());
@@ -94,7 +95,7 @@ public static FileStatus[] s3ListFromArray(
"The Hadoop file system used for the S3LogStore must be castable to " +
"org.apache.hadoop.fs.s3a.S3AFileSystem.", e);
}
- return iteratorToStatuses(S3LogStoreUtil.s3ListFrom(s3afs, resolvedPath, parentPath), new HashSet<>());
+ return iteratorToStatuses(S3LogStoreUtil.s3ListFrom(s3afs, resolvedPath, parentPath));
}
/**
diff --git a/version.sbt b/version.sbt
index 074f3488404..895ae6915f2 100644
--- a/version.sbt
+++ b/version.sbt
@@ -1 +1 @@
-ThisBuild / version := "3.4.0-SNAPSHOT"
+ThisBuild / version := "4.1.0-SNAPSHOT"