diff --git a/.github/workflows/iceberg_test.yaml b/.github/workflows/iceberg_test.yaml index 001e9a97ede..9c637cd6802 100644 --- a/.github/workflows/iceberg_test.yaml +++ b/.github/workflows/iceberg_test.yaml @@ -1,5 +1,5 @@ name: "Delta Iceberg Latest" -on: [push, pull_request] +on: [] # [push, pull_request] jobs: test: name: "DIL: Scala ${{ matrix.scala }}" @@ -25,7 +25,7 @@ jobs: uses: actions/setup-java@v3 with: distribution: "zulu" - java-version: "11" + java-version: "17" - name: Cache Scala, SBT uses: actions/cache@v3 with: diff --git a/.github/workflows/kernel_test.yaml b/.github/workflows/kernel_test.yaml index b43fdaca81b..2b3d0211fa1 100644 --- a/.github/workflows/kernel_test.yaml +++ b/.github/workflows/kernel_test.yaml @@ -37,11 +37,12 @@ jobs: echo "Runner arch: ${{ runner.arch }}" - name: Checkout code uses: actions/checkout@v4 + # Run unit tests with JDK 17. These unit tests depend on Spark, and Spark 4.0+ is JDK 17. - name: install java uses: actions/setup-java@v4 with: distribution: "zulu" - java-version: "11" + java-version: "17" - name: Cache SBT and dependencies id: cache-sbt uses: actions/cache@v4 @@ -59,7 +60,7 @@ jobs: else echo "❌ Cache MISS - will download dependencies" fi - - name: Run tests + - name: Run unit tests run: | python run-tests.py --group kernel --coverage --shard ${{ matrix.shard }} @@ -68,6 +69,7 @@ jobs: runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v3 + # Run integration tests with JDK 11, as they have no Spark dependency - name: install java uses: actions/setup-java@v3 with: diff --git a/.github/workflows/kernel_unitycatalog_test.yaml b/.github/workflows/kernel_unitycatalog_test.yaml index b53c927c09f..6864cdca46b 100644 --- a/.github/workflows/kernel_unitycatalog_test.yaml +++ b/.github/workflows/kernel_unitycatalog_test.yaml @@ -22,7 +22,7 @@ jobs: uses: actions/setup-java@v3 with: distribution: "zulu" - java-version: "11" + java-version: "17" if: steps.git-diff.outputs.diff - name: Run Unity tests with coverage run: | diff --git a/.github/workflows/spark_examples_test.yaml b/.github/workflows/spark_examples_test.yaml index 302cc150f3d..d5574761310 100644 --- a/.github/workflows/spark_examples_test.yaml +++ b/.github/workflows/spark_examples_test.yaml @@ -24,7 +24,7 @@ jobs: uses: actions/setup-java@v3 with: distribution: "zulu" - java-version: "11" + java-version: "17" - name: Cache Scala, SBT uses: actions/cache@v3 with: diff --git a/.github/workflows/spark_master_test.yaml b/.github/workflows/spark_master_test.yaml deleted file mode 100644 index b2e88046a60..00000000000 --- a/.github/workflows/spark_master_test.yaml +++ /dev/null @@ -1,59 +0,0 @@ -name: "Delta Spark Master" -on: [push, pull_request] -jobs: - test: - name: "DSM: Scala ${{ matrix.scala }}, Shard ${{ matrix.shard }}" - runs-on: ubuntu-24.04 - strategy: - matrix: - # These Scala versions must match those in the build.sbt - scala: [2.13.16] - # Important: This list of shards must be [0..NUM_SHARDS - 1] - shard: [0, 1, 2, 3] - env: - SCALA_VERSION: ${{ matrix.scala }} - # Important: This must be the same as the length of shards in matrix - NUM_SHARDS: 4 - steps: - - uses: actions/checkout@v3 - - uses: technote-space/get-diff-action@v4 - id: git-diff - with: - PATTERNS: | - ** - .github/workflows/** - !unity/** - !kernel/** - !connectors/** - - name: install java - uses: actions/setup-java@v3 - with: - distribution: "zulu" - java-version: "17" - - name: Cache Scala, SBT - uses: actions/cache@v3 - with: - path: | - ~/.sbt - ~/.ivy2 - ~/.cache/coursier - !~/.cache/coursier/v1/https/repository.apache.org/content/groups/snapshots - # Change the key if dependencies are changed. For each key, GitHub Actions will cache the - # the above directories when we use the key for the first time. After that, each run will - # just use the cache. The cache is immutable so we need to use a new key when trying to - # cache new stuff. - key: delta-sbt-cache-spark-master-scala${{ matrix.scala }} - - name: Install Job dependencies - run: | - sudo apt-get update - sudo apt-get install -y make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev libffi-dev liblzma-dev python3-openssl git - sudo apt install libedit-dev - if: steps.git-diff.outputs.diff - - name: Run Spark Master tests - # when changing TEST_PARALLELISM_COUNT make sure to also change it in spark_test.yaml - run: | - TEST_PARALLELISM_COUNT=4 SHARD_ID=${{matrix.shard}} build/sbt -DsparkVersion=master "++ ${{ matrix.scala }}" clean sparkV2/test - TEST_PARALLELISM_COUNT=4 SHARD_ID=${{matrix.shard}} build/sbt -DsparkVersion=master "++ ${{ matrix.scala }}" clean spark/test - TEST_PARALLELISM_COUNT=4 build/sbt -DsparkVersion=master "++ ${{ matrix.scala }}" clean connectServer/test - TEST_PARALLELISM_COUNT=4 build/sbt -DsparkVersion=master "++ ${{ matrix.scala }}" clean connectServer/assembly connectClient/test - if: steps.git-diff.outputs.diff diff --git a/.github/workflows/spark_python_test.yaml b/.github/workflows/spark_python_test.yaml index 29eef085fbe..6ab3ab23bb8 100644 --- a/.github/workflows/spark_python_test.yaml +++ b/.github/workflows/spark_python_test.yaml @@ -25,7 +25,7 @@ jobs: uses: actions/setup-java@v3 with: distribution: "zulu" - java-version: "11" + java-version: "17" - name: Cache Scala, SBT uses: actions/cache@v3 with: @@ -53,23 +53,19 @@ jobs: export PATH="~/.pyenv/bin:$PATH" eval "$(pyenv init -)" eval "$(pyenv virtualenv-init -)" - pyenv install 3.8.18 - pyenv global system 3.8.18 - pipenv --python 3.8 install + pyenv install 3.9 + pyenv global system 3.9 + pipenv --python 3.9 install # Update the pip version to 24.0. By default `pyenv.run` installs the latest pip version # available. From version 24.1, `pip` doesn't allow installing python packages # with version string containing `-`. In Delta-Spark case, the pypi package generated has # `-SNAPSHOT` in version (e.g. `3.3.0-SNAPSHOT`) as the version is picked up from # the`version.sbt` file. pipenv run pip install pip==24.0 setuptools==69.5.1 wheel==0.43.0 - # Install PySpark without bundled Scala 2.12 JARs - read more in the future note below - pipenv run pip install pyspark==3.5.3 --no-deps - pipenv run pip install py4j==0.10.9.7 - pipenv run pip install flake8==3.5.0 pypandoc==1.3.3 - pipenv run pip install black==23.9.1 + pipenv run pip install pyspark==4.0.1 + pipenv run pip install flake8==3.9.0 + pipenv run pip install black==23.12.1 pipenv run pip install importlib_metadata==3.10.0 - # The mypy versions 0.982 and 1.8.0 have conflicting rules (cannot get style checks to - # pass for both versions on the same file) so we upgrade this to match Spark 4.0 pipenv run pip install mypy==1.8.0 pipenv run pip install mypy-protobuf==3.3.0 pipenv run pip install cryptography==37.0.4 @@ -77,9 +73,16 @@ jobs: pipenv run pip install wheel==0.33.4 pipenv run pip install setuptools==41.1.0 pipenv run pip install pydocstyle==3.0.0 - pipenv run pip install pandas==1.1.3 - pipenv run pip install pyarrow==8.0.0 - pipenv run pip install numpy==1.20.3 + pipenv run pip install pandas==2.2.0 + pipenv run pip install pyarrow==11.0.0 + pipenv run pip install pypandoc==1.3.3 + pipenv run pip install numpy==1.22.4 + pipenv run pip install grpcio==1.67.0 + pipenv run pip install grpcio-status==1.67.0 + pipenv run pip install googleapis-common-protos==1.65.0 + pipenv run pip install protobuf==5.29.1 + pipenv run pip install googleapis-common-protos-stubs==2.2.0 + pipenv run pip install grpc-stubs==1.24.11 if: steps.git-diff.outputs.diff - name: Run Python tests # when changing TEST_PARALLELISM_COUNT make sure to also change it in spark_master_test.yaml diff --git a/.github/workflows/spark_test.yaml b/.github/workflows/spark_test.yaml index d0001bdd10e..046eb5f0295 100644 --- a/.github/workflows/spark_test.yaml +++ b/.github/workflows/spark_test.yaml @@ -29,7 +29,7 @@ jobs: uses: actions/setup-java@v3 with: distribution: "zulu" - java-version: "11" + java-version: "17" - name: Cache Scala, SBT uses: actions/cache@v3 with: @@ -57,29 +57,36 @@ jobs: export PATH="~/.pyenv/bin:$PATH" eval "$(pyenv init -)" eval "$(pyenv virtualenv-init -)" - pyenv install 3.8.18 - pyenv global system 3.8.18 - pipenv --python 3.8 install + pyenv install 3.9 + pyenv global system 3.9 + pipenv --python 3.9 install # Update the pip version to 24.0. By default `pyenv.run` installs the latest pip version # available. From version 24.1, `pip` doesn't allow installing python packages # with version string containing `-`. In Delta-Spark case, the pypi package generated has # `-SNAPSHOT` in version (e.g. `3.3.0-SNAPSHOT`) as the version is picked up from # the`version.sbt` file. pipenv run pip install pip==24.0 setuptools==69.5.1 wheel==0.43.0 - pipenv run pip install pyspark==3.5.3 - pipenv run pip install flake8==3.5.0 pypandoc==1.3.3 - pipenv run pip install black==23.9.1 + pipenv run pip install pyspark==4.0.1 + pipenv run pip install flake8==3.9.0 + pipenv run pip install black==23.12.1 pipenv run pip install importlib_metadata==3.10.0 - pipenv run pip install mypy==0.982 + pipenv run pip install mypy==1.8.0 pipenv run pip install mypy-protobuf==3.3.0 pipenv run pip install cryptography==37.0.4 pipenv run pip install twine==4.0.1 pipenv run pip install wheel==0.33.4 pipenv run pip install setuptools==41.1.0 pipenv run pip install pydocstyle==3.0.0 - pipenv run pip install pandas==1.1.3 - pipenv run pip install pyarrow==8.0.0 - pipenv run pip install numpy==1.20.3 + pipenv run pip install pandas==2.2.0 + pipenv run pip install pyarrow==11.0.0 + pipenv run pip install pypandoc==1.3.3 + pipenv run pip install numpy==1.22.4 + pipenv run pip install grpcio==1.67.0 + pipenv run pip install grpcio-status==1.67.0 + pipenv run pip install googleapis-common-protos==1.65.0 + pipenv run pip install protobuf==5.29.1 + pipenv run pip install googleapis-common-protos-stubs==2.2.0 + pipenv run pip install grpc-stubs==1.24.11 if: steps.git-diff.outputs.diff - name: Scala structured logging style check run: | diff --git a/.github/workflows/unidoc.yaml b/.github/workflows/unidoc.yaml index 979c1cfa962..735bdf806d4 100644 --- a/.github/workflows/unidoc.yaml +++ b/.github/workflows/unidoc.yaml @@ -13,7 +13,7 @@ uses: actions/setup-java@v3 with: distribution: "zulu" - java-version: "11" + java-version: "17" - uses: actions/checkout@v3 - name: generate unidoc run: build/sbt "++ ${{ matrix.scala }}" unidoc diff --git a/build.sbt b/build.sbt index ee8f2c122f9..9694aeda251 100644 --- a/build.sbt +++ b/build.sbt @@ -66,7 +66,7 @@ val sparkVersion = settingKey[String]("Spark version") // Dependent library versions val defaultSparkVersion = SparkVersionSpec.DEFAULT.fullVersion // Spark version to use for testing in non-delta-spark related modules -val hadoopVersion = "3.3.4" +val hadoopVersion = "3.4.0" val scalaTestVersion = "3.2.15" val scalaTestVersionForConnectors = "3.0.8" val parquet4sVersion = "1.9.4" @@ -257,7 +257,7 @@ lazy val connectClient = (project in file("spark-connect/client")) // Create a symlink for the log4j properties val confDir = distributionDir / "conf" IO.createDirectory(confDir) - val log4jProps = (spark / Test / resourceDirectory).value / "log4j2_spark_master.properties" + val log4jProps = (spark / Test / resourceDirectory).value / "log4j2.properties" val linkedLog4jProps = confDir / "log4j2.properties" Files.createSymbolicLink(linkedLog4jProps.toPath, log4jProps.toPath) } @@ -705,6 +705,8 @@ lazy val contribs = (project in file("contribs")) Compile / compile := ((Compile / compile) dependsOn createTargetClassesDir).value ).configureUnidoc() +/* +TODO: compilation broken for Spark 4.0 lazy val sharing = (project in file("sharing")) .dependsOn(spark % "compile->compile;test->test;provided->provided") .disablePlugins(JavaFormatterPlugin, ScalafmtPlugin) @@ -715,22 +717,6 @@ lazy val sharing = (project in file("sharing")) releaseSettings, CrossSparkVersions.sparkDependentSettings(sparkVersion), Test / javaOptions ++= Seq("-ea"), - Compile / compile := runTaskOnlyOnSparkMaster( - task = Compile / compile, - taskName = "compile", - projectName = "delta-sharing-spark", - emptyValue = Analysis.empty.asInstanceOf[CompileAnalysis] - ).value, - Test / test := runTaskOnlyOnSparkMaster( - task = Test / test, - taskName = "test", - projectName = "delta-sharing-spark", - emptyValue = ()).value, - publish := runTaskOnlyOnSparkMaster( - task = publish, - taskName = "publish", - projectName = "delta-sharing-spark", - emptyValue = ()).value, libraryDependencies ++= Seq( "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided", @@ -747,6 +733,7 @@ lazy val sharing = (project in file("sharing")) "org.apache.spark" %% "spark-hive" % sparkVersion.value % "test" classifier "tests", ) ).configureUnidoc() +*/ lazy val kernelApi = (project in file("kernel/kernel-api")) .enablePlugins(ScalafmtPlugin) @@ -898,7 +885,7 @@ lazy val kernelDefaults = (project in file("kernel/kernel-defaults")) // such as warm runs, cold runs, defining benchmark parameter variables etc. "org.openjdk.jmh" % "jmh-core" % "1.37" % "test", "org.openjdk.jmh" % "jmh-generator-annprocess" % "1.37" % "test", - "io.delta" %% "delta-spark" % "3.3.2" % "test", + "io.delta" %% "delta-spark" % "4.0.0" % "test", "org.apache.spark" %% "spark-hive" % defaultSparkVersion % "test" classifier "tests", "org.apache.spark" %% "spark-sql" % defaultSparkVersion % "test" classifier "tests", @@ -1010,6 +997,8 @@ lazy val storageS3DynamoDB = (project in file("storage-s3-dynamodb")) ) ).configureUnidoc() +/* +TODO: readd delta-iceberg on Spark 4.0+ val icebergSparkRuntimeArtifactName = { val (expMaj, expMin, _) = getMajorMinorPatch(defaultSparkVersion) s"iceberg-spark-runtime-$expMaj.$expMin" @@ -1165,6 +1154,7 @@ lazy val icebergShaded = (project in file("icebergShaded")) assembly / assemblyMergeStrategy := updateMergeStrategy((assembly / assemblyMergeStrategy).value), assemblyPackageScala / assembleArtifact := false, ) +*/ lazy val hudi = (project in file("hudi")) .dependsOn(spark % "compile->compile;test->test;provided->provided") @@ -1265,7 +1255,8 @@ val createTargetClassesDir = taskKey[Unit]("create target classes dir") // Don't use these groups for any other projects lazy val sparkGroup = project - .aggregate(spark, sparkV1, sparkV1Filtered, sparkV2, contribs, storage, storageS3DynamoDB, sharing, hudi) + // TODO: add sharing back after fixing compilation + .aggregate(spark, sparkV1, sparkV1Filtered, sparkV2, contribs, storage, storageS3DynamoDB, hudi) .settings( // crossScalaVersions must be set to Nil on the aggregating project crossScalaVersions := Nil, @@ -1273,6 +1264,7 @@ lazy val sparkGroup = project publish / skip := false, ) +/* lazy val icebergGroup = project .aggregate(iceberg, testDeltaIcebergJar) .settings( @@ -1281,6 +1273,7 @@ lazy val icebergGroup = project publishArtifact := false, publish / skip := false, ) +*/ lazy val kernelGroup = project .aggregate(kernelApi, kernelDefaults, kernelBenchmarks) diff --git a/examples/scala/build.sbt b/examples/scala/build.sbt index 0f07d46a159..0f242990fe7 100644 --- a/examples/scala/build.sbt +++ b/examples/scala/build.sbt @@ -42,8 +42,10 @@ def getMajorMinor(version: String): (Int, Int) = { } } val lookupSparkVersion: PartialFunction[(Int, Int), String] = { - // version 4.0.0-preview1 - case (major, minor) if major >= 4 => "4.0.0-preview1" + // TODO: how to run integration tests for multiple Spark versions + case (major, minor) if major >= 4 && minor >= 1 => "4.0.1" + // version 4.0.0 + case (major, minor) if major >= 4 => "4.0.0" // versions 3.3.x+ case (major, minor) if major >= 3 && minor >=3 => "3.5.3" // versions 3.0.0 to 3.2.x diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/checkpoints/Checkpointer.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/checkpoints/Checkpointer.java index b651881c25e..5b326f5e000 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/checkpoints/Checkpointer.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/checkpoints/Checkpointer.java @@ -91,6 +91,11 @@ public static void checkpoint(Engine engine, Clock clock, SnapshotImpl snapshot) numberOfAddFiles = checkpointDataIter.getNumberOfAddActions(); } catch (FileAlreadyExistsException faee) { throw new CheckpointAlreadyExistsException(version); + } catch (IOException io) { + if (io.getCause() instanceof FileAlreadyExistsException) { + throw new CheckpointAlreadyExistsException(version); + } + throw io; } final CheckpointMetaData checkpointMetaData = diff --git a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/engine/DefaultParquetHandlerSuite.scala b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/engine/DefaultParquetHandlerSuite.scala index 6d5d04bad7a..e8ea9d844a5 100644 --- a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/engine/DefaultParquetHandlerSuite.scala +++ b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/engine/DefaultParquetHandlerSuite.scala @@ -15,6 +15,7 @@ */ package io.delta.kernel.defaults.engine +import java.io.IOException import java.nio.file.FileAlreadyExistsException import scala.collection.JavaConverters._ @@ -63,11 +64,12 @@ class DefaultParquetHandlerSuite extends AnyFunSuite with ParquetSuiteBase { writeAndVerify() // Try to write as same file and expect an error - intercept[FileAlreadyExistsException] { + val e = intercept[IOException] { parquetHandler.writeParquetFileAtomically( filePath, toCloseableIterator(dataToWrite.asJava.iterator())) } + assert(e.getCause.isInstanceOf[FileAlreadyExistsException]) } } } diff --git a/project/CrossSparkVersions.scala b/project/CrossSparkVersions.scala index 16cd96c2c39..2f25458c9f3 100644 --- a/project/CrossSparkVersions.scala +++ b/project/CrossSparkVersions.scala @@ -176,7 +176,7 @@ import Unidoc._ case class SparkVersionSpec( fullVersion: String, targetJvm: String, - additionalSourceDir: Option[String], + additionalSourceDir: Option[String] = None, antlr4Version: String, additionalJavaOptions: Seq[String] = Seq.empty, jacksonVersion: String = "2.15.2" @@ -194,11 +194,8 @@ case class SparkVersionSpec( /** Whether this is the master Spark version */ def isMaster: Boolean = SparkVersionSpec.MASTER.contains(this) - /** Returns log4j config file based on source directory */ - def log4jConfig: String = { - if (additionalSourceDir.exists(_.contains("master"))) "log4j2_spark_master.properties" - else "log4j2.properties" - } + /** Returns log4j config file */ + def log4jConfig: String = "log4j2.properties" /** Whether to export JARs instead of class directories (needed for Spark Connect on master) */ def exportJars: Boolean = additionalSourceDir.exists(_.contains("master")) @@ -209,18 +206,9 @@ case class SparkVersionSpec( object SparkVersionSpec { - private val spark35 = SparkVersionSpec( - fullVersion = "3.5.7", - targetJvm = "11", - additionalSourceDir = Some("scala-spark-3.5"), - antlr4Version = "4.9.3", - additionalJavaOptions = Seq.empty - ) - - private val spark40Snapshot = SparkVersionSpec( - fullVersion = "4.0.2-SNAPSHOT", + private val spark40 = SparkVersionSpec( + fullVersion = "4.0.1", targetJvm = "17", - additionalSourceDir = Some("scala-spark-master"), antlr4Version = "4.13.1", additionalJavaOptions = Seq( // Copied from SparkBuild.scala to support Java 17 for unit tests (see apache/spark#34153) @@ -240,13 +228,13 @@ object SparkVersionSpec { ) /** Default Spark version */ - val DEFAULT = spark35 + val DEFAULT = spark40 /** Spark master branch version (optional). Release branches should not build against master */ - val MASTER: Option[SparkVersionSpec] = Some(spark40Snapshot) + val MASTER: Option[SparkVersionSpec] = None /** All supported Spark versions - internal use only */ - val ALL_SPECS = Seq(spark35, spark40Snapshot) + val ALL_SPECS = Seq(spark40) } /** See docs on top of this file */ @@ -263,6 +251,7 @@ object CrossSparkVersions extends AutoPlugin { // Resolve aliases first val resolvedInput = input match { case "default" => SparkVersionSpec.DEFAULT.fullVersion + /* case "master" => SparkVersionSpec.MASTER match { case Some(masterSpec) => masterSpec.fullVersion case None => throw new IllegalArgumentException( @@ -270,6 +259,7 @@ object CrossSparkVersions extends AutoPlugin { SparkVersionSpec.ALL_SPECS.map(_.fullVersion).mkString(", ") ) } + */ case other => other } diff --git a/project/SparkMimaExcludes.scala b/project/SparkMimaExcludes.scala index 20af16c8423..21d850b8023 100644 --- a/project/SparkMimaExcludes.scala +++ b/project/SparkMimaExcludes.scala @@ -89,7 +89,11 @@ object SparkMimaExcludes { // Changes in 4.0.0 ProblemFilters.exclude[IncompatibleResultTypeProblem]("io.delta.tables.DeltaTable.improveUnsupportedOpError"), ProblemFilters.exclude[IncompatibleResultTypeProblem]("io.delta.tables.DeltaMergeBuilder.improveUnsupportedOpError"), - ProblemFilters.exclude[IncompatibleResultTypeProblem]("io.delta.tables.DeltaMergeBuilder.execute") + ProblemFilters.exclude[IncompatibleResultTypeProblem]("io.delta.tables.DeltaMergeBuilder.execute"), + + // Changes in 4.1.0 + // TODO: change in type hierarchy due to removal of DeltaThrowableConditionShim + ProblemFilters.exclude[MissingTypesProblem]("io.delta.exceptions.*") // scalastyle:on line.size.limit ) diff --git a/project/tests/test_cross_spark_publish.py b/project/tests/test_cross_spark_publish.py index 9a317b8c207..47de42c8009 100755 --- a/project/tests/test_cross_spark_publish.py +++ b/project/tests/test_cross_spark_publish.py @@ -32,9 +32,9 @@ "delta-connect-common{suffix}_2.13-{version}.jar", "delta-connect-client{suffix}_2.13-{version}.jar", "delta-connect-server{suffix}_2.13-{version}.jar", - "delta-sharing-spark{suffix}_2.13-{version}.jar", + # "delta-sharing-spark{suffix}_2.13-{version}.jar", TODO add back after fixing build "delta-contribs{suffix}_2.13-{version}.jar", - "delta-iceberg{suffix}_2.13-{version}.jar" + # "delta-iceberg{suffix}_2.13-{version}.jar" TODO add back after fixing build ] # Non-spark-related modules (built once, same for all Spark versions) @@ -76,12 +76,11 @@ def all_jars(self) -> List[str]: # Spark versions to test (key = full version string, value = spec with suffix) SPARK_VERSIONS: Dict[str, SparkVersionSpec] = { - "3.5.7": SparkVersionSpec(""), # Default Spark version without suffix - "4.0.2-SNAPSHOT": SparkVersionSpec("_4.0") # Other Spark versions with suffix + "4.0.1": SparkVersionSpec("") # Default Spark version without suffix } # The default Spark version (no suffix in artifact names) -DEFAULT_SPARK = "3.5.7" +DEFAULT_SPARK = "4.0.1" def substitute_xversion(jar_templates: List[str], delta_version: str) -> Set[str]: @@ -200,7 +199,7 @@ def test_default_publish(self) -> bool: def test_run_only_for_spark_modules(self) -> bool: """runOnlyForReleasableSparkModules should publish only Spark-dependent modules.""" - spark_version = "4.0.2-SNAPSHOT" + spark_version = "4.0.1" spark_spec = SPARK_VERSIONS[spark_version] print("\n" + "="*70) diff --git a/python/delta/pip_utils.py b/python/delta/pip_utils.py index ad95de096e2..52ca1babbdd 100644 --- a/python/delta/pip_utils.py +++ b/python/delta/pip_utils.py @@ -83,7 +83,7 @@ def configure_spark_with_delta_pip( # Determine the artifact name based on Spark version # NOTE: When updating LATEST_RELEASED_SPARK_VERSION in project/CrossSparkVersions.scala, # also update the version check here to match the new latest version. - latest_released_spark_version_prefix = "3.5." + latest_released_spark_version_prefix = "4.0." artifact_name = f"delta-spark_{scala_version}" diff --git a/run-tests.py b/run-tests.py index e8e5f43544f..59778b10dde 100755 --- a/run-tests.py +++ b/run-tests.py @@ -86,76 +86,9 @@ def run_sbt_tests(root_dir, test_group, coverage, scala_version=None, shard=None cmd += ["-J-Xmx6G"] run_cmd(cmd, stream_output=True) -def setup_pyspark_scala213_compatibility(): - """ - Setup PySpark with Scala 2.13 compatibility when SCALA_VERSION is set to 2.13.x. - This downloads Spark with Scala 2.13 and sets up the environment variables. - - Download and setup Spark 3.5.3 with Scala 2.13 for compatibility with Delta Scala 2.13 - Future note for Spark 4.0 upgrade: PySpark 3.5.3 from pip includes Scala 2.12 JARs, but - because of the upgrade to Scala 2.13, it was causing binary incompatibility errors. - For now (before Spark 4.0), we install PySpark without dependencies and use Spark 3.5.3 compiled - for Scala 2.13 to ensure compatibility. Remove the four steps below for Spark 4.0 upgrade. - """ - scala_version = os.getenv("SCALA_VERSION") - if not scala_version or not scala_version.startswith("2.13"): - return False - - print("##### Setting up PySpark Scala 2.13 compatibility #####") - - # Check if Scala 2.13 Spark is already set up - spark_home = os.getenv("SPARK_HOME") - if spark_home and "scala2.13" in spark_home: - print(f"PySpark Scala 2.13 already configured: {spark_home}") - return True - - try: - import subprocess - from pathlib import Path - - # Download Spark 3.5.3 with Scala 2.13 - SPARK_VERSION = "3.5.3" - SCALA_SUFFIX = "2.13" - SPARK_DIR = f"spark-{SPARK_VERSION}-bin-hadoop3-scala{SCALA_SUFFIX}" - - spark_url = f"https://archive.apache.org/dist/spark/spark-{SPARK_VERSION}/{SPARK_DIR}.tgz" - spark_tgz = f"{SPARK_DIR}.tgz" - - # Download if not already present - if not os.path.exists(SPARK_DIR): - print(f"Downloading Spark with Scala 2.13: {spark_url}") - run_cmd(["curl", "-LO", spark_url], stream_output=True) - print(f"Extracting {spark_tgz}") - run_cmd(["tar", "-xzf", spark_tgz], stream_output=True) - else: - print(f"Using existing Spark directory: {SPARK_DIR}") - - # Set SPARK_HOME environment variable - new_spark_home = os.path.abspath(SPARK_DIR) - os.environ["SPARK_HOME"] = new_spark_home - print(f"Set SPARK_HOME to: {new_spark_home}") - - # Add Spark bin to PATH - spark_bin = os.path.join(new_spark_home, "bin") - current_path = os.environ.get("PATH", "") - if spark_bin not in current_path: - os.environ["PATH"] = f"{spark_bin}:{current_path}" - print(f"Added to PATH: {spark_bin}") - - print("PySpark Scala 2.13 compatibility setup completed successfully") - return True - - except Exception as e: - print(f"Warning: Failed to setup PySpark Scala 2.13 compatibility: {e}") - print("Continuing with existing PySpark installation...") - return False - - def run_python_tests(root_dir): print("##### Running Python tests #####") - # Setup PySpark Scala 2.13 compatibility if needed - setup_pyspark_scala213_compatibility() python_test_script = path.join(root_dir, path.join("python", "run-tests.py")) print("Calling script %s", python_test_script) run_cmd(["python3", python_test_script], env={'DELTA_TESTING': '1'}, stream_output=True) diff --git a/setup.py b/setup.py index 3541c7c447c..03d2443e01f 100644 --- a/setup.py +++ b/setup.py @@ -13,19 +13,7 @@ def get_version_from_sbt(): version = fp.read().strip() return version.split('"')[1] - VERSION = get_version_from_sbt() -MAJOR_VERSION = int(VERSION.split(".")[0]) - -if MAJOR_VERSION < 4: - packages_arg = ['delta', 'delta.exceptions'] - install_requires_arg = ['pyspark>=3.5.2,<3.6.0', 'importlib_metadata>=1.0.0'] - python_requires_arg = '>=3.6' -else: # MAJOR_VERSION >= 4 - # Delta 4.0+ contains Delta Connect code and uses Spark 4.0+ - packages_arg = ['delta', 'delta.connect', 'delta.connect.proto', 'delta.exceptions'] - install_requires_arg = ['pyspark>=4.0.0', 'importlib_metadata>=1.0.0'] - python_requires_arg = '>=3.9' class VerifyVersionCommand(install): """Custom command to verify that the git tag matches our version""" @@ -44,6 +32,10 @@ def run(self): with open("python/README.md", "r", encoding="utf-8") as fh: long_description = fh.read() +# TODO: once we support multiple Spark versions update this to be compatible with both +install_requires_arg = ['pyspark>=4.0.1', 'importlib_metadata>=1.0.0'] +python_requires_arg = '>=3.9' + setup( name="delta-spark", version=VERSION, @@ -70,7 +62,7 @@ def run(self): ], keywords='delta.io', package_dir={'': 'python'}, - packages=packages_arg, + packages=['delta', 'delta.connect', 'delta.connect.proto', 'delta.exceptions'], package_data={ 'delta': ['py.typed'], }, diff --git a/sharing/src/test/scala/io/delta/sharing/spark/DeltaFormatSharingSourceSuite.scala b/sharing/src/test/scala/io/delta/sharing/spark/DeltaFormatSharingSourceSuite.scala index 6f26f8d2370..04eff52e8d8 100644 --- a/sharing/src/test/scala/io/delta/sharing/spark/DeltaFormatSharingSourceSuite.scala +++ b/sharing/src/test/scala/io/delta/sharing/spark/DeltaFormatSharingSourceSuite.scala @@ -18,7 +18,7 @@ package io.delta.sharing.spark import java.time.LocalDateTime -import org.apache.spark.sql.delta.{DeltaExcludedBySparkVersionTestMixinShims, DeltaIllegalStateException, DeltaLog} +import org.apache.spark.sql.delta.{DeltaIllegalStateException, DeltaLog} import org.apache.spark.sql.delta.DeltaOptions.{ IGNORE_CHANGES_OPTION, IGNORE_DELETES_OPTION, @@ -49,8 +49,7 @@ class DeltaFormatSharingSourceSuite extends StreamTest with DeltaSQLCommandTest with DeltaSharingTestSparkUtils - with DeltaSharingDataSourceDeltaTestUtils - with DeltaExcludedBySparkVersionTestMixinShims { + with DeltaSharingDataSourceDeltaTestUtils { import testImplicits._ @@ -1216,9 +1215,7 @@ class DeltaFormatSharingSourceSuite } } - testSparkMasterOnly( - "streaming variant query works" - ) { + test("streaming variant query works") { withTempDirs { (inputDir, outputDir, checkpointDir) => val deltaTableName = "variant_table" withTable(deltaTableName) { diff --git a/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingDataSourceDeltaSuite.scala b/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingDataSourceDeltaSuite.scala index 1cd22b47df0..65f20da8b1d 100644 --- a/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingDataSourceDeltaSuite.scala +++ b/sharing/src/test/scala/io/delta/sharing/spark/DeltaSharingDataSourceDeltaSuite.scala @@ -21,7 +21,7 @@ package io.delta.sharing.spark import scala.concurrent.duration._ -import org.apache.spark.sql.delta.{DeltaConfigs, DeltaExcludedBySparkVersionTestMixinShims, VariantShreddingPreviewTableFeature, VariantTypePreviewTableFeature, VariantTypeTableFeature} +import org.apache.spark.sql.delta.{DeltaConfigs, VariantShreddingPreviewTableFeature, VariantTypePreviewTableFeature, VariantTypeTableFeature} import org.apache.spark.sql.delta.sources.DeltaSQLConf import org.apache.spark.sql.delta.test.DeltaSQLCommandTest @@ -43,8 +43,7 @@ trait DeltaSharingDataSourceDeltaSuiteBase extends QueryTest with DeltaSQLCommandTest with DeltaSharingTestSparkUtils - with DeltaSharingDataSourceDeltaTestUtils - with DeltaExcludedBySparkVersionTestMixinShims { + with DeltaSharingDataSourceDeltaTestUtils { override def beforeEach(): Unit = { spark.sessionState.conf.setConfString( @@ -1519,7 +1518,7 @@ trait DeltaSharingDataSourceDeltaSuiteBase VariantTypeTableFeature, VariantShreddingPreviewTableFeature ).foreach { feature => - testSparkMasterOnly(s"basic variant test - table feature: $feature") { + test(s"basic variant test - table feature: $feature") { withTempDir { tempDir => val extraConfs = feature match { case VariantShreddingPreviewTableFeature => Map( diff --git a/spark/src/main/scala-spark-3.5/shims/ColumnConversionShims.scala b/spark/src/main/scala-spark-3.5/shims/ColumnConversionShims.scala deleted file mode 100644 index 97816828d4e..00000000000 --- a/spark/src/main/scala-spark-3.5/shims/ColumnConversionShims.scala +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -import org.apache.spark.sql.Column -import org.apache.spark.sql.catalyst.expressions.Expression - -/** - * Conversions from a [[org.apache.spark.sql.Column]] to an - * [[org.apache.spark.sql.catalyst.expressions.Expression]], and vice versa. - */ -object ClassicColumnConversions { - def expression(c: Column): Expression = c.expr -} diff --git a/spark/src/main/scala-spark-3.5/shims/ColumnDefinitionShims.scala b/spark/src/main/scala-spark-3.5/shims/ColumnDefinitionShims.scala deleted file mode 100644 index b548a282ca7..00000000000 --- a/spark/src/main/scala-spark-3.5/shims/ColumnDefinitionShims.scala +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (2024) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.catalyst.plans.logical - -import org.apache.spark.sql.catalyst.parser.ParserInterface -import org.apache.spark.sql.types.{StructField, StructType} - -object ColumnDefinitionShims { - - /** - * Helps handle a breaking change in [[org.apache.spark.sql.catalyst.plans.logical.CreateTable]] - * between Spark 3.5 and Spark 4.0: - * - In 3.5, `CreateTable` accepts a `tableSchema: StructType`. - * - In 4.0, `CreateTable` accepts a `columns: Seq[ColumnDefinition]`. - */ - def parseColumns(columns: Seq[StructField], sqlParser: ParserInterface): StructType = { - StructType(columns.toSeq) - } -} diff --git a/spark/src/main/scala-spark-3.5/shims/CreatableRelationProviderShims.scala b/spark/src/main/scala-spark-3.5/shims/CreatableRelationProviderShims.scala deleted file mode 100644 index b5249d349c9..00000000000 --- a/spark/src/main/scala-spark-3.5/shims/CreatableRelationProviderShims.scala +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (2024) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.sources - -import org.apache.spark.sql.types.DataType - -trait CreatableRelationProviderShim extends CreatableRelationProvider { - - /** - * The `supportsDataType` method is not defined in Spark 3.5 but is overidden by `DeltaDataSource` - * in Spark 4.0. - */ - def supportsDataType(dt: DataType): Boolean = throw new UnsupportedOperationException( - "This method is not defined in Spark 3.5." - ) -} diff --git a/spark/src/main/scala-spark-3.5/shims/DataFrameShims.scala b/spark/src/main/scala-spark-3.5/shims/DataFrameShims.scala deleted file mode 100644 index 53145e36f70..00000000000 --- a/spark/src/main/scala-spark-3.5/shims/DataFrameShims.scala +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.delta - -import org.apache.spark.sql.{Column, DataFrame, Dataset, Encoders, SparkSession} -import org.apache.spark.sql.execution.QueryExecution -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan - -object DataFrameUtils { - def ofRows(spark: SparkSession, plan: LogicalPlan): DataFrame = Dataset.ofRows(spark, plan) - def ofRows(queryExecution: QueryExecution): DataFrame = { - val ds = new Dataset(queryExecution, Encoders.row(queryExecution.analyzed.schema)) - ds.asInstanceOf[DataFrame] - } -} diff --git a/spark/src/main/scala-spark-3.5/shims/DecimalPrecisionTypeCoercionShims.scala b/spark/src/main/scala-spark-3.5/shims/DecimalPrecisionTypeCoercionShims.scala deleted file mode 100644 index 41f050828c1..00000000000 --- a/spark/src/main/scala-spark-3.5/shims/DecimalPrecisionTypeCoercionShims.scala +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -import org.apache.spark.sql.catalyst.analysis.DecimalPrecision -import org.apache.spark.sql.types.DecimalType - -object DecimalPrecisionTypeCoercionShims { - // Returns the wider decimal type that's wider than both of them - def widerDecimalType(d1: DecimalType, d2: DecimalType): DecimalType = - DecimalPrecision.widerDecimalType(d1, d2) -} diff --git a/spark/src/main/scala-spark-3.5/shims/DeltaInvariantCheckerExecShims.scala b/spark/src/main/scala-spark-3.5/shims/DeltaInvariantCheckerExecShims.scala deleted file mode 100644 index 5e40f5a9ea5..00000000000 --- a/spark/src/main/scala-spark-3.5/shims/DeltaInvariantCheckerExecShims.scala +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (2024) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta.constraints - -import org.apache.spark.sql.catalyst.optimizer.ReplaceExpressions -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.catalyst.rules.RuleExecutor - -trait DeltaInvariantCheckerOptimizerShims { self: RuleExecutor[LogicalPlan] => - val DELTA_INVARIANT_CHECKER_OPTIMIZER_BATCHES = Seq( - Batch("Finish Analysis", Once, ReplaceExpressions) - ) -} diff --git a/spark/src/main/scala-spark-3.5/shims/DeltaSqlParserShims.scala b/spark/src/main/scala-spark-3.5/shims/DeltaSqlParserShims.scala deleted file mode 100644 index 2ecc7e5e65e..00000000000 --- a/spark/src/main/scala-spark-3.5/shims/DeltaSqlParserShims.scala +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.delta.sql.parser - -trait DeltaSqlParserShims diff --git a/spark/src/main/scala-spark-3.5/shims/DeltaTableValueFunctionsShims.scala b/spark/src/main/scala-spark-3.5/shims/DeltaTableValueFunctionsShims.scala deleted file mode 100644 index 5e91563c8b7..00000000000 --- a/spark/src/main/scala-spark-3.5/shims/DeltaTableValueFunctionsShims.scala +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (2024) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -import org.apache.spark.sql.catalyst.expressions.Expression - -object DeltaTableValueFunctionsShims { - - /** - * Handles a breaking change between Spark 3.5 and Spark Master (4.0). - * - * In Spark 4.0, SPARK-46331 [https://github.com/apache/spark/pull/44261] removed CodegenFallback - * from a subset of DateTime expressions, making the `now()` expression unevaluable. - */ - def evaluateTimeOption(value: Expression): String = { - value.eval().toString - } -} diff --git a/spark/src/main/scala-spark-3.5/shims/DeltaThrowableHelperShims.scala b/spark/src/main/scala-spark-3.5/shims/DeltaThrowableHelperShims.scala deleted file mode 100644 index 945141e0907..00000000000 --- a/spark/src/main/scala-spark-3.5/shims/DeltaThrowableHelperShims.scala +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (2024) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -import org.apache.spark.SparkThrowable -import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.errors.QueryCompilationErrors - -object DeltaThrowableHelperShims { - /** - * Handles a breaking change (SPARK-46810) between Spark 3.5 and Spark Master (4.0) where - * `error-classes.json` was renamed to `error-conditions.json`. - */ - val SPARK_ERROR_CLASS_SOURCE_FILE = "error/error-classes.json" - - def showColumnsWithConflictDatabasesError( - db: Seq[String], v1TableName: TableIdentifier): Throwable = { - QueryCompilationErrors.showColumnsWithConflictDatabasesError(db, v1TableName) - } -} - -trait DeltaThrowableConditionShim extends SparkThrowable { - def getCondition(): String = getErrorClass() - override def getErrorClass(): String -} diff --git a/spark/src/main/scala-spark-3.5/shims/DeltaTimeTravelSpecShims.scala b/spark/src/main/scala-spark-3.5/shims/DeltaTimeTravelSpecShims.scala deleted file mode 100644 index 8202cbe2bd8..00000000000 --- a/spark/src/main/scala-spark-3.5/shims/DeltaTimeTravelSpecShims.scala +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (2024) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -import org.apache.spark.sql.SparkSession - -object DeltaTimeTravelSpecShims { - - /** - * Ensures only a single time travel syntax is used (i.e. not version AND timestamp). - * - * Handles a breaking change between Spark 3.5 and 4.0 which added support for - * DataFrame-based time travel in Spark (https://github.com/apache/spark/pull/43403). - * - * TLDR: Starting in Spark 4.0, we end up with two time travel specifications in DeltaTableV2 if - * options are used to specify the time travel version/timestamp. This breaks an existing check we - * had (against Spark 3.5) which ensures only one time travel specification is used. - * - * The solution to get around this is just to ignore two specs if they are the same. If the user - * did actually provide two different time travel specs, that would have been caught by Spark - * earlier. - * - * @param currSpecOpt: The table's current [[DeltaTimeTravelSpec]] - * @param newSpecOpt: The new [[DeltaTimeTravelSpec]] to be applied to the table - */ - def validateTimeTravelSpec( - spark: SparkSession, - currSpecOpt: Option[DeltaTimeTravelSpec], - newSpecOpt: Option[DeltaTimeTravelSpec]): Unit = { - if (currSpecOpt.nonEmpty && newSpecOpt.nonEmpty) { - throw DeltaErrors.multipleTimeTravelSyntaxUsed - } - } -} diff --git a/spark/src/main/scala-spark-3.5/shims/IncrementalExecutionShims.scala b/spark/src/main/scala-spark-3.5/shims/IncrementalExecutionShims.scala deleted file mode 100644 index 9432a5dabcc..00000000000 --- a/spark/src/main/scala-spark-3.5/shims/IncrementalExecutionShims.scala +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (2024) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution.streaming - -import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan - -object IncrementalExecutionShims { - - /** - * Handles a breaking change in the [[IncrementalExecution]] constructor between Spark 3.5 and - * 4.0: - * - Spark 3.5: no `isFirstBatch: Boolean` param - * - Spark 4.0: adds `isFirstBatch: Boolean` param - */ - def newInstance( - sparkSession: SparkSession, - logicalPlan: LogicalPlan, - incrementalExecution: IncrementalExecution): IncrementalExecution = new IncrementalExecution( - sparkSession, - logicalPlan, - incrementalExecution.outputMode, - incrementalExecution.checkpointLocation, - incrementalExecution.queryId, - incrementalExecution.runId, - incrementalExecution.currentBatchId, - incrementalExecution.prevOffsetSeqMetadata, - incrementalExecution.offsetSeqMetadata, - incrementalExecution.watermarkPropagator - ) -} diff --git a/spark/src/main/scala-spark-3.5/shims/LogKeyShims.scala b/spark/src/main/scala-spark-3.5/shims/LogKeyShims.scala deleted file mode 100644 index 3ebdd76038a..00000000000 --- a/spark/src/main/scala-spark-3.5/shims/LogKeyShims.scala +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * This file contains code from the Apache Spark project (original license above). - * It contains modifications, which are licensed as follows: - */ - -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.internal - -// LogKey is part of Spark's Structured Logging API and is not available in Spark 3.5. -trait LogKeyShims { - def name: String = "" -} diff --git a/spark/src/main/scala-spark-3.5/shims/LoggingShims.scala b/spark/src/main/scala-spark-3.5/shims/LoggingShims.scala deleted file mode 100644 index f56a51850f6..00000000000 --- a/spark/src/main/scala-spark-3.5/shims/LoggingShims.scala +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * This file contains code from the Apache Spark project (original license above). - * It contains modifications, which are licensed as follows: - */ - -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.internal - -// MDC is part of Spark's Structured Logging API and is not available in Spark 3.5. -case class MDC(key: LogKeyShims, value: Any) { - require(!value.isInstanceOf[MessageWithContext], - "the class of value cannot be MessageWithContext") -} - -object MDC { - def of(key: LogKeyShims, value: Any): MDC = MDC(key, value) -} - -// MessageWithContext is part of Spark's Structured Logging API and is not available in Spark 3.5. -case class MessageWithContext(message: String, context: java.util.HashMap[String, String]) { - def +(mdc: MessageWithContext): MessageWithContext = { - MessageWithContext(message + mdc.message, new java.util.HashMap[String, String]()) - } - - def stripMargin: MessageWithContext = copy(message = message.stripMargin) -} - -// LogEntry is part of Spark's Structured Logging API and is not available in Spark 3.5. -class LogEntry(messageWithContext: => MessageWithContext) { - def message: String = messageWithContext.message - - def context: java.util.HashMap[String, String] = messageWithContext.context -} - -object LogEntry { - import scala.language.implicitConversions - - implicit def from(msgWithCtx: => MessageWithContext): LogEntry = - new LogEntry(msgWithCtx) -} - -trait LoggingShims extends Logging { - implicit class LogStringContext(val sc: StringContext) { - def log(args: MDC*): MessageWithContext = { - val processedParts = sc.parts.iterator - val sb = new StringBuilder(processedParts.next()) - - args.foreach { mdc => - val value = if (mdc.value != null) mdc.value.toString else null - sb.append(value) - - if (processedParts.hasNext) { - sb.append(processedParts.next()) - } - } - - MessageWithContext(sb.toString(), new java.util.HashMap[String, String]()) - } - } - - protected def logInfo(entry: LogEntry): Unit = { - if (log.isInfoEnabled) { - log.info(entry.message) - } - } - - protected def logInfo(entry: LogEntry, throwable: Throwable): Unit = { - if (log.isInfoEnabled) { - log.info(entry.message, throwable) - } - } - - protected def logDebug(entry: LogEntry): Unit = { - if (log.isDebugEnabled) { - log.debug(entry.message) - } - } - - protected def logDebug(entry: LogEntry, throwable: Throwable): Unit = { - if (log.isDebugEnabled) { - log.debug(entry.message, throwable) - } - } - - protected def logTrace(entry: LogEntry): Unit = { - if (log.isTraceEnabled) { - log.trace(entry.message) - } - } - - protected def logTrace(entry: LogEntry, throwable: Throwable): Unit = { - if (log.isTraceEnabled) { - log.trace(entry.message, throwable) - } - } - - protected def logWarning(entry: LogEntry): Unit = { - if (log.isWarnEnabled) { - log.warn(entry.message) - } - } - - protected def logWarning(entry: LogEntry, throwable: Throwable): Unit = { - if (log.isWarnEnabled) { - log.warn(entry.message, throwable) - } - } - - protected def logError(entry: LogEntry): Unit = { - if (log.isErrorEnabled) { - log.error(entry.message) - } - } - - protected def logError(entry: LogEntry, throwable: Throwable): Unit = { - if (log.isErrorEnabled) { - log.error(entry.message, throwable) - } - } -} diff --git a/spark/src/main/scala-spark-3.5/shims/LogicalRelationShims.scala b/spark/src/main/scala-spark-3.5/shims/LogicalRelationShims.scala deleted file mode 100644 index 171491c01be..00000000000 --- a/spark/src/main/scala-spark-3.5/shims/LogicalRelationShims.scala +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution.datasources - -import org.apache.spark.sql.catalyst.catalog.CatalogTable -import org.apache.spark.sql.catalyst.expressions.AttributeReference -import org.apache.spark.sql.sources.BaseRelation - -object LogicalRelationShims { - /** - * Handles a breaking change in the [[LogicalRelation]] constructor between Spark 3.5 and - * 4.0: - * - Spark 3.5: no `stream: Option[SparkDataStream]` param - * - Spark 4.0: adds `stream: Option[SparkDataStream]` param - */ - def newInstance( - relation: BaseRelation, - output: Seq[AttributeReference], - catalogTable: Option[CatalogTable], - isStreaming: Boolean): LogicalRelation = { - LogicalRelation(relation, output, catalogTable, isStreaming) - } -} - -// Handles a breaking change between Spark 3.5 and Spark Master (4.0). -// `LogicalRelationWithTable` is a new object in Spark 4.0. - -/** - * Extract the [[BaseRelation]] and [[CatalogTable]] from [[LogicalRelation]]. You can also - * retrieve the instance of LogicalRelation like following: - * - * case l @ LogicalRelationWithTable(relation, catalogTable) => ... - */ -object LogicalRelationWithTable { - def unapply(plan: LogicalRelation): Option[(BaseRelation, Option[CatalogTable])] = { - Some(plan.relation, plan.catalogTable) - } -} diff --git a/spark/src/main/scala-spark-3.5/shims/MergeIntoMaterializeSourceShims.scala b/spark/src/main/scala-spark-3.5/shims/MergeIntoMaterializeSourceShims.scala deleted file mode 100644 index 1e0658a47c0..00000000000 --- a/spark/src/main/scala-spark-3.5/shims/MergeIntoMaterializeSourceShims.scala +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta.commands.merge - -import org.apache.spark.SparkException - -object MergeIntoMaterializeSourceShims { - - /** In Spark 3.5 we can only check for the error message :( */ - def mergeMaterializedSourceRddBlockLostError(e: SparkException, rddId: Int): Boolean = { - e.getMessage.matches(s"(?s).*Checkpoint block rdd_${rddId}_[0-9]+ not found!.*") - } -} diff --git a/spark/src/main/scala-spark-3.5/shims/RelocatedClassesShims.scala b/spark/src/main/scala-spark-3.5/shims/RelocatedClassesShims.scala deleted file mode 100644 index f3ec11d1419..00000000000 --- a/spark/src/main/scala-spark-3.5/shims/RelocatedClassesShims.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -import org.apache.spark.sql.{SparkSession => SparkSessionImpl, DataFrameWriter => DataFrameWriterImpl} - -object Relocated { - type SparkSession = SparkSessionImpl - def setActiveSession(session: SparkSession): Unit = SparkSessionImpl.setActiveSession(session) - val dataFrameWriterClassName = classOf[DataFrameWriterImpl[_]].getCanonicalName -} diff --git a/spark/src/main/scala-spark-3.5/shims/SqlScriptingLogicalOperatorsShims.scala b/spark/src/main/scala-spark-3.5/shims/SqlScriptingLogicalOperatorsShims.scala deleted file mode 100644 index 044235116ab..00000000000 --- a/spark/src/main/scala-spark-3.5/shims/SqlScriptingLogicalOperatorsShims.scala +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.catalyst.parser - -// Handles a breaking change between Spark 3.5 and Spark Master (4.0). -// `CompoundBody` is a new class in Spark 4.0. -/** - * Trait for all SQL Scripting logical operators that are product of parsing phase. - * These operators will be used by the SQL Scripting interpreter to generate execution nodes. - */ -sealed trait CompoundPlanStatement - -/** - * Logical operator for a compound body. Contains all statements within the compound body. - * @param collection Collection of statements within the compound body. - */ -case class CompoundBody(collection: Seq[CompoundPlanStatement]) extends CompoundPlanStatement diff --git a/spark/src/main/scala-spark-3.5/shims/TableSpecShims.scala b/spark/src/main/scala-spark-3.5/shims/TableSpecShims.scala deleted file mode 100644 index e383ea300ad..00000000000 --- a/spark/src/main/scala-spark-3.5/shims/TableSpecShims.scala +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.delta - -import org.apache.spark.sql.catalyst.plans.logical.TableSpec - -object TableSpecUtils { - def create( - properties: Map[String, String], - provider: Option[String], - location: Option[String], - comment: Option[String]): TableSpec = { - TableSpec( - properties = properties, - provider = provider, - options = Map.empty, - location = location, - comment = comment, - serde = None, - external = false) - } -} diff --git a/spark/src/main/scala-spark-3.5/shims/TypeWideningShims.scala b/spark/src/main/scala-spark-3.5/shims/TypeWideningShims.scala deleted file mode 100644 index ef2bdc32449..00000000000 --- a/spark/src/main/scala-spark-3.5/shims/TypeWideningShims.scala +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (2024) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -import org.apache.spark.sql.catalyst.expressions.Cast -import org.apache.spark.sql.types._ - -/** - * Type widening only supports a limited set of type changes with Spark 3.5 due to the parquet - * readers lacking the corresponding conversions that were added in Spark 4.0. - * This shim is for Delta on Spark 3.5 which supports: - * - byte -> short -> int - */ -object TypeWideningShims { - - /** - * Returns whether the given type change is eligible for widening. This only checks atomic types. - * It is the responsibility of the caller to recurse into structs, maps and arrays. - */ - def isTypeChangeSupported(fromType: AtomicType, toType: AtomicType): Boolean = - (fromType, toType) match { - case (from, to) if from == to => true - // All supported type changes below are supposed to be widening, but to be safe, reject any - // non-widening change upfront. - case (from, to) if !Cast.canUpCast(from, to) => false - case (ByteType, ShortType) => true - case (ByteType | ShortType, IntegerType) => true - case _ => false - } - - /** - * Returns whether the given type change can be applied during schema evolution. Only a - * subset of supported type changes are considered for schema evolution. - */ - def isTypeChangeSupportedForSchemaEvolution(fromType: AtomicType, toType: AtomicType): Boolean = { - // All supported type changes are eligible for schema evolution. - isTypeChangeSupported(fromType, toType) - } -} diff --git a/spark/src/main/scala-spark-3.5/shims/UnresolvedTableShims.scala b/spark/src/main/scala-spark-3.5/shims/UnresolvedTableShims.scala deleted file mode 100644 index 85f26843899..00000000000 --- a/spark/src/main/scala-spark-3.5/shims/UnresolvedTableShims.scala +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (2024) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.catalyst.analysis - -object UnresolvedTableImplicits { - - /** - * Handles a breaking change in [[UnresolvedTable]] constructor between Spark 3.5 and 4.0: - * - Spark 3.5: requires `relationTypeMismatchHint` param - * - Spark 4.0: gets rid of `relationTypeMismatchHint`param - */ - implicit class UnresolvedTableShim(self: UnresolvedTable.type) { - def apply( - tableNameParts: Seq[String], - commandName: String): UnresolvedTable = { - UnresolvedTable(tableNameParts, commandName, relationTypeMismatchHint = None) - } - } -} diff --git a/spark/src/main/scala-spark-3.5/shims/VariantShims.scala b/spark/src/main/scala-spark-3.5/shims/VariantShims.scala deleted file mode 100644 index 6918c409dc1..00000000000 --- a/spark/src/main/scala-spark-3.5/shims/VariantShims.scala +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (2024) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.types - -object VariantShims { - - /** - * Spark's variant type is implemented for Spark 4.0 and is not implemented in Spark 3.5. Thus, - * any Spark 3.5 DataType cannot be a variant type. - */ - def isVariantType(dt: DataType): Boolean = false -} diff --git a/spark/src/main/scala-spark-master/shims/ColumnDefinitionShims.scala b/spark/src/main/scala-spark-master/shims/ColumnDefinitionShims.scala deleted file mode 100644 index 9b3b4f55715..00000000000 --- a/spark/src/main/scala-spark-master/shims/ColumnDefinitionShims.scala +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.catalyst.plans.logical - -import org.apache.spark.sql.catalyst.parser.ParserInterface -import org.apache.spark.sql.types.StructField - -object ColumnDefinitionShims { - - /** - * Helps handle a breaking change in [[org.apache.spark.sql.catalyst.plans.logical.CreateTable]] - * between Spark 3.5 and Spark 4.0: - * - In 3.5, `CreateTable` accepts a `tableSchema: StructType`. - * - In 4.0, `CreateTable` accepts a `columns: Seq[ColumnDefinition]`. - */ - def parseColumns(columns: Seq[StructField], sqlParser: ParserInterface): Seq[ColumnDefinition] = { - columns.map(ColumnDefinition.fromV1Column(_, sqlParser)).toSeq - } -} diff --git a/spark/src/main/scala-spark-master/shims/CreatableRelationProviderShims.scala b/spark/src/main/scala-spark-master/shims/CreatableRelationProviderShims.scala deleted file mode 100644 index 4720396c6a6..00000000000 --- a/spark/src/main/scala-spark-master/shims/CreatableRelationProviderShims.scala +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.sources - -/** - * Spark 4.0 added additional methods to `CreatableRelationProvider`, such as `supportsDataType`, - * that can be overridden by child classes and need to be shimmed when compiling with Spark 3.5. - */ -trait CreatableRelationProviderShim extends CreatableRelationProvider diff --git a/spark/src/main/scala-spark-master/shims/DecimalPrecisionTypeCoercionShims.scala b/spark/src/main/scala-spark-master/shims/DecimalPrecisionTypeCoercionShims.scala deleted file mode 100644 index a907179fdd2..00000000000 --- a/spark/src/main/scala-spark-master/shims/DecimalPrecisionTypeCoercionShims.scala +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -import org.apache.spark.sql.catalyst.analysis.DecimalPrecisionTypeCoercion -import org.apache.spark.sql.types.DecimalType - -object DecimalPrecisionTypeCoercionShims { - // Returns the wider decimal type that's wider than both of them - def widerDecimalType(d1: DecimalType, d2: DecimalType): DecimalType = - DecimalPrecisionTypeCoercion.widerDecimalType(d1, d2) -} diff --git a/spark/src/main/scala-spark-master/shims/DeltaInvariantCheckerExecShims.scala b/spark/src/main/scala-spark-master/shims/DeltaInvariantCheckerExecShims.scala deleted file mode 100644 index 84b13f984f6..00000000000 --- a/spark/src/main/scala-spark-master/shims/DeltaInvariantCheckerExecShims.scala +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta.constraints - -import org.apache.spark.sql.catalyst.optimizer.{ReplaceExpressions, RewriteWithExpression} -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.catalyst.rules.RuleExecutor - -trait DeltaInvariantCheckerOptimizerShims { self: RuleExecutor[LogicalPlan] => - val DELTA_INVARIANT_CHECKER_OPTIMIZER_BATCHES = Seq( - Batch("Finish Analysis", Once, ReplaceExpressions), - Batch("Rewrite With expression", Once, RewriteWithExpression) - ) -} diff --git a/spark/src/main/scala-spark-master/shims/DeltaSqlParserShims.scala b/spark/src/main/scala-spark-master/shims/DeltaSqlParserShims.scala deleted file mode 100644 index 5945fa91eec..00000000000 --- a/spark/src/main/scala-spark-master/shims/DeltaSqlParserShims.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.delta.sql.parser - -import org.apache.spark.sql.catalyst.parser.ParserInterface -import org.apache.spark.sql.types.StructType - -trait DeltaSqlParserShims extends ParserInterface { - def delegate: ParserInterface - override def parseRoutineParam(sqlText: String): StructType = delegate.parseRoutineParam(sqlText) -} diff --git a/spark/src/main/scala-spark-master/shims/DeltaTableValueFunctionsShims.scala b/spark/src/main/scala-spark-master/shims/DeltaTableValueFunctionsShims.scala deleted file mode 100644 index f8ad0f668fd..00000000000 --- a/spark/src/main/scala-spark-master/shims/DeltaTableValueFunctionsShims.scala +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -import org.apache.spark.sql.delta.util.AnalysisHelper - -import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.sql.catalyst.optimizer.ComputeCurrentTime - -object DeltaTableValueFunctionsShims { - - /** - * Handles a breaking change between Spark 3.5 and Spark Master (4.0). - * - * In Spark 4.0, SPARK-46331 [https://github.com/apache/spark/pull/44261] removed CodegenFallback - * from a subset of DateTime expressions, making the `now()` expression unevaluable. - */ - def evaluateTimeOption(value: Expression): String = { - val fakePlan = AnalysisHelper.FakeLogicalPlan(Seq(value), Nil) - val timestampExpression = ComputeCurrentTime(fakePlan).expressions.head - timestampExpression.eval().toString - } -} diff --git a/spark/src/main/scala-spark-master/shims/DeltaThrowableHelperShims.scala b/spark/src/main/scala-spark-master/shims/DeltaThrowableHelperShims.scala deleted file mode 100644 index a8906ff1784..00000000000 --- a/spark/src/main/scala-spark-master/shims/DeltaThrowableHelperShims.scala +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -import org.apache.spark.SparkThrowable -import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.errors.QueryCompilationErrors - -object DeltaThrowableHelperShims { - /** - * Handles a breaking change (SPARK-46810) between Spark 3.5 and Spark Master (4.0) where - * `error-classes.json` was renamed to `error-conditions.json`. - */ - val SPARK_ERROR_CLASS_SOURCE_FILE = "error/error-conditions.json" - - def showColumnsWithConflictDatabasesError( - db: Seq[String], v1TableName: TableIdentifier): Throwable = { - QueryCompilationErrors.showColumnsWithConflictNamespacesError( - namespaceA = db, - namespaceB = v1TableName.database.get :: Nil) - } -} - -trait DeltaThrowableConditionShim extends SparkThrowable { - override def getCondition(): String = getErrorClass() - override def getErrorClass(): String -} diff --git a/spark/src/main/scala-spark-master/shims/DeltaTimeTravelSpecShims.scala b/spark/src/main/scala-spark-master/shims/DeltaTimeTravelSpecShims.scala deleted file mode 100644 index 9fcfb0e956a..00000000000 --- a/spark/src/main/scala-spark-master/shims/DeltaTimeTravelSpecShims.scala +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -import org.apache.spark.sql.SparkSession - -object DeltaTimeTravelSpecShims { - - /** - * Ensures only a single time travel syntax is used (i.e. not version AND timestamp). - * - * Handles a breaking change between Spark 3.5 and 4.0 which added support for - * DataFrame-based time travel in Spark (https://github.com/apache/spark/pull/43403). - * - * TLDR: Starting in Spark 4.0, we end up with two time travel specifications in DeltaTableV2 if - * options are used to specify the time travel version/timestamp. This breaks an existing check we - * had (against Spark 3.5) which ensures only one time travel specification is used. - * - * The solution to get around this is just to ignore two specs if they are the same. If the user - * did actually provide two different time travel specs, that would have been caught by Spark - * earlier. - * - * @param currSpecOpt: The table's current [[DeltaTimeTravelSpec]] - * @param newSpecOpt: The new [[DeltaTimeTravelSpec]] to be applied to the table - */ - def validateTimeTravelSpec( - spark: SparkSession, - currSpecOpt: Option[DeltaTimeTravelSpec], - newSpecOpt: Option[DeltaTimeTravelSpec]): Unit = (currSpecOpt, newSpecOpt) match { - case (Some(currSpec), Some(newSpec)) - if currSpec.version != newSpec.version || - currSpec.getTimestampOpt(spark.sessionState.conf).map(_.getTime) != - newSpec.getTimestampOpt(spark.sessionState.conf).map(_.getTime) => - throw DeltaErrors.multipleTimeTravelSyntaxUsed - case _ => - } -} diff --git a/spark/src/main/scala-spark-master/shims/IncrementalExecutionShims.scala b/spark/src/main/scala-spark-master/shims/IncrementalExecutionShims.scala deleted file mode 100644 index 21f98a6acea..00000000000 --- a/spark/src/main/scala-spark-master/shims/IncrementalExecutionShims.scala +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution.streaming - -import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.classic.ClassicConversions._ - -object IncrementalExecutionShims { - - /** - * Handles a breaking change in the [[IncrementalExecution]] constructor between Spark 3.5 and - * 4.0: - * - Spark 3.5: no `isFirstBatch: Boolean` param - * - Spark 4.0: adds `isFirstBatch: Boolean` param - */ - def newInstance( - sparkSession: SparkSession, - logicalPlan: LogicalPlan, - incrementalExecution: IncrementalExecution): IncrementalExecution = new IncrementalExecution( - sparkSession, - logicalPlan, - incrementalExecution.outputMode, - incrementalExecution.checkpointLocation, - incrementalExecution.queryId, - incrementalExecution.runId, - incrementalExecution.currentBatchId, - incrementalExecution.prevOffsetSeqMetadata, - incrementalExecution.offsetSeqMetadata, - incrementalExecution.watermarkPropagator, - incrementalExecution.isFirstBatch // Spark 4.0 API - ) -} diff --git a/spark/src/main/scala-spark-master/shims/LogKeyShims.scala b/spark/src/main/scala-spark-master/shims/LogKeyShims.scala deleted file mode 100644 index eab10fbf6d9..00000000000 --- a/spark/src/main/scala-spark-master/shims/LogKeyShims.scala +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * This file contains code from the Apache Spark project (original license above). - * It contains modifications, which are licensed as follows: - */ - -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.internal - -/** - * All structured logging `keys` used in `MDC` must be extends `LogKey` - *

- * - * `LogKey`s serve as identifiers for mapped diagnostic contexts (MDC) within logs. - * Follow these guidelines when adding a new LogKey: - *

- */ -trait LogKeyShims extends LogKey diff --git a/spark/src/main/scala-spark-master/shims/LoggingShims.scala b/spark/src/main/scala-spark-master/shims/LoggingShims.scala deleted file mode 100644 index c20133d5a04..00000000000 --- a/spark/src/main/scala-spark-master/shims/LoggingShims.scala +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * This file contains code from the Apache Spark project (original license above). - * It contains modifications, which are licensed as follows: - */ - -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.internal - -/** - * Guidelines for the Structured Logging Framework - Scala Logging - *

- * - * Use the `org.apache.spark.internal.Logging` trait for logging in Scala code: - * Logging Messages with Variables: - * When logging a message with variables, wrap all the variables with `MDC`s and they will be - * automatically added to the Mapped Diagnostic Context (MDC). - * This allows for structured logging and better log analysis. - *

- * - * logInfo(log"Trying to recover app: ${MDC(DeltaLogKeys.APP_ID, app.id)}") - *

- * - * Constant String Messages: - * If you are logging a constant string message, use the log methods that accept a constant - * string. - *

- * - * logInfo("StateStore stopped") - *

- * - * Exceptions: - * To ensure logs are compatible with Spark SQL and log analysis tools, avoid - * `Exception.printStackTrace()`. Use `logError`, `logWarning`, and `logInfo` methods from - * the `Logging` trait to log exceptions, maintaining structured and parsable logs. - *

- * - * If you want to output logs in `scala code` through the structured log framework, - * you can define `custom LogKey` and use it in `scala` code as follows: - *

- * - * // To add a `custom LogKey`, implement `LogKey` - * case object CUSTOM_LOG_KEY extends LogKey - * import org.apache.spark.internal.MDC; - * logInfo(log"${MDC(CUSTOM_LOG_KEY, "key")}") - */ -trait LoggingShims extends Logging diff --git a/spark/src/main/scala-spark-master/shims/LogicalRelationShims.scala b/spark/src/main/scala-spark-master/shims/LogicalRelationShims.scala deleted file mode 100644 index 54caef60722..00000000000 --- a/spark/src/main/scala-spark-master/shims/LogicalRelationShims.scala +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution.datasources - -import org.apache.spark.sql.catalyst.catalog.CatalogTable -import org.apache.spark.sql.catalyst.expressions.AttributeReference -import org.apache.spark.sql.sources.BaseRelation - -object LogicalRelationShims { - /** - * Handles a breaking change in the [[LogicalRelation]] constructor between Spark 3.5 and - * 4.0: - * - Spark 3.5: no `stream: Option[SparkDataStream]` param - * - Spark 4.0: adds `stream: Option[SparkDataStream]` param - */ - def newInstance( - relation: BaseRelation, - output: Seq[AttributeReference], - catalogTable: Option[CatalogTable], - isStreaming: Boolean): LogicalRelation = { - LogicalRelation(relation, output, catalogTable, isStreaming, stream = None) - } -} diff --git a/spark/src/main/scala-spark-master/shims/MergeIntoMaterializeSourceShims.scala b/spark/src/main/scala-spark-master/shims/MergeIntoMaterializeSourceShims.scala deleted file mode 100644 index b9452385643..00000000000 --- a/spark/src/main/scala-spark-master/shims/MergeIntoMaterializeSourceShims.scala +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta.commands.merge - -import org.apache.spark.SparkException - -object MergeIntoMaterializeSourceShims { - - /** In Spark 4.0+ we could check on error class, which is more stable. */ - def mergeMaterializedSourceRddBlockLostError(e: SparkException, rddId: Int): Boolean = { - e.getErrorClass == "CHECKPOINT_RDD_BLOCK_ID_NOT_FOUND" && - e.getMessageParameters.get("rddBlockId").contains(s"rdd_${rddId}") - } -} diff --git a/spark/src/main/scala-spark-master/shims/RelocatedClassesShims.scala b/spark/src/main/scala-spark-master/shims/RelocatedClassesShims.scala deleted file mode 100644 index 86b9d549205..00000000000 --- a/spark/src/main/scala-spark-master/shims/RelocatedClassesShims.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -import org.apache.spark.sql.classic.{SparkSession => SparkSessionImpl, DataFrameWriter => DataFrameWriterImpl} - -object Relocated { - type SparkSession = SparkSessionImpl - def setActiveSession(session: SparkSession): Unit = SparkSessionImpl.setActiveSession(session) - val dataFrameWriterClassName = classOf[DataFrameWriterImpl[_]].getCanonicalName -} diff --git a/spark/src/main/scala-spark-master/shims/TableSpecShims.scala b/spark/src/main/scala-spark-master/shims/TableSpecShims.scala deleted file mode 100644 index ded2daa7b8c..00000000000 --- a/spark/src/main/scala-spark-master/shims/TableSpecShims.scala +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.delta - -import org.apache.spark.sql.catalyst.plans.logical.TableSpec - -object TableSpecUtils { - def create( - properties: Map[String, String], - provider: Option[String], - location: Option[String], - comment: Option[String]): TableSpec = { - TableSpec( - properties = properties, - provider = provider, - options = Map.empty, - location = location, - comment = comment, - collation = None, - serde = None, - external = false) - } -} diff --git a/spark/src/main/scala-spark-master/shims/TypeWideningShims.scala b/spark/src/main/scala-spark-master/shims/TypeWideningShims.scala deleted file mode 100644 index 911123e09cb..00000000000 --- a/spark/src/main/scala-spark-master/shims/TypeWideningShims.scala +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -import org.apache.spark.sql.catalyst.expressions.Cast -import org.apache.spark.sql.types._ - -/** - * Type widening only supports a limited set of type changes with Spark 3.5 due to the parquet - * readers lacking the corresponding conversions that were added in Spark 4.0. - * This shim is for Delta on Spark 4.0 which supports: - * - byte -> short -> int -> long. - * - float -> double. - * - date -> timestamp_ntz. - * - {byte, short, int} -> double. - * - decimal -> wider decimal. - * - {byte, short, int} -> decimal(10, 0) and wider. - * - long -> decimal(20, 0) and wider. - */ -object TypeWideningShims { - - /** - * Returns whether the given type change is eligible for widening. This only checks atomic types. - * It is the responsibility of the caller to recurse into structs, maps and arrays. - */ - def isTypeChangeSupported(fromType: AtomicType, toType: AtomicType): Boolean = - (fromType, toType) match { - case (from, to) if from == to => true - // All supported type changes below are supposed to be widening, but to be safe, reject any - // non-widening change upfront. - case (from, to) if !Cast.canUpCast(from, to) => false - case (from: IntegralType, to: IntegralType) => from.defaultSize <= to.defaultSize - case (FloatType, DoubleType) => true - case (DateType, TimestampNTZType) => true - case (ByteType | ShortType | IntegerType, DoubleType) => true - case (from: DecimalType, to: DecimalType) => to.isWiderThan(from) - // Byte, Short, Integer are all stored as INT32 in parquet. The parquet readers support - // converting INT32 to Decimal(10, 0) and wider. - case (ByteType | ShortType | IntegerType, d: DecimalType) => d.isWiderThan(IntegerType) - // The parquet readers support converting INT64 to Decimal(20, 0) and wider. - case (LongType, d: DecimalType) => d.isWiderThan(LongType) - case _ => false - } - - /** - * Returns whether the given type change can be applied during schema evolution. Only a - * subset of supported type changes are considered for schema evolution. - */ - def isTypeChangeSupportedForSchemaEvolution(fromType: AtomicType, toType: AtomicType): Boolean = - (fromType, toType) match { - case (from, to) if from == to => true - case (from, to) if !isTypeChangeSupported(from, to) => false - case (from: IntegralType, to: IntegralType) => from.defaultSize <= to.defaultSize - case (FloatType, DoubleType) => true - case (from: DecimalType, to: DecimalType) => to.isWiderThan(from) - case (DateType, TimestampNTZType) => true - case _ => false - } -} diff --git a/spark/src/main/scala-spark-master/shims/UnresolvedTableShims.scala b/spark/src/main/scala-spark-master/shims/UnresolvedTableShims.scala deleted file mode 100644 index 3521f15c456..00000000000 --- a/spark/src/main/scala-spark-master/shims/UnresolvedTableShims.scala +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.catalyst.analysis - -object UnresolvedTableImplicits { - - /** - * Handles a breaking change in [[UnresolvedTable]] constructor between Spark 3.5 and 4.0: - * - Spark 3.5: requires `relationTypeMismatchHint` param - * - Spark 4.0: gets rid of `relationTypeMismatchHint`param - */ - implicit class UnresolvedTableShim(self: UnresolvedTable.type) { - def apply( - tableNameParts: Seq[String], - commandName: String): UnresolvedTable = { - UnresolvedTable(tableNameParts, commandName) - } - } -} diff --git a/spark/src/main/scala-spark-master/shims/VariantShims.scala b/spark/src/main/scala-spark-master/shims/VariantShims.scala deleted file mode 100644 index 7ea201fdd20..00000000000 --- a/spark/src/main/scala-spark-master/shims/VariantShims.scala +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.types - -object VariantShims { - - /** Spark's variant type is only implemented in Spark 4.0 and above. */ - def isVariantType(dt: DataType): Boolean = dt.isInstanceOf[VariantType] -} diff --git a/spark/src/main/scala/io/delta/sql/parser/DeltaSqlParser.scala b/spark/src/main/scala/io/delta/sql/parser/DeltaSqlParser.scala index 173ab2eaa81..2dcea9e6d36 100644 --- a/spark/src/main/scala/io/delta/sql/parser/DeltaSqlParser.scala +++ b/spark/src/main/scala/io/delta/sql/parser/DeltaSqlParser.scala @@ -58,7 +58,6 @@ import org.apache.spark.sql.{AnalysisException, SparkSession} import org.apache.spark.sql.catalyst.expressions.{Expression, Literal} import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.analysis._ -import org.apache.spark.sql.catalyst.analysis.UnresolvedTableImplicits._ import org.apache.spark.sql.catalyst.parser.{ParseErrorListener, ParseException, ParserInterface} import org.apache.spark.sql.catalyst.parser.ParserUtils.{checkDuplicateClauses, string, withOrigin} import org.apache.spark.sql.catalyst.plans.logical.{AlterColumnSyncIdentity, AlterTableAddConstraint, AlterTableDropConstraint, AlterTableDropFeature, CloneTableStatement, LogicalPlan, RestoreTableStatement} @@ -73,8 +72,7 @@ import org.apache.spark.sql.types._ * forward the call to `delegate`. */ class DeltaSqlParser(val delegate: ParserInterface) - extends ParserInterface - with DeltaSqlParserShims { + extends ParserInterface { private val builder = new DeltaSqlAstBuilder private val substitution = new VariableSubstitution @@ -157,6 +155,8 @@ class DeltaSqlParser(val delegate: ParserInterface) override def parseTableSchema(sqlText: String): StructType = delegate.parseTableSchema(sqlText) override def parseDataType(sqlText: String): DataType = delegate.parseDataType(sqlText) + + override def parseRoutineParam(sqlText: String): StructType = delegate.parseRoutineParam(sqlText) } /** diff --git a/spark/src/main/scala/io/delta/tables/DeltaTableBuilder.scala b/spark/src/main/scala/io/delta/tables/DeltaTableBuilder.scala index 3b24fb92897..fd2733a9428 100644 --- a/spark/src/main/scala/io/delta/tables/DeltaTableBuilder.scala +++ b/spark/src/main/scala/io/delta/tables/DeltaTableBuilder.scala @@ -18,7 +18,7 @@ package io.delta.tables import scala.collection.mutable -import org.apache.spark.sql.delta.{DeltaErrors, DeltaTableUtils, TableSpecUtils} +import org.apache.spark.sql.delta.{DeltaErrors, DeltaTableUtils} import org.apache.spark.sql.delta.DeltaTableUtils.withActiveSession import org.apache.spark.sql.delta.sources.DeltaSQLConf import io.delta.tables.execution._ @@ -26,7 +26,7 @@ import io.delta.tables.execution._ import org.apache.spark.annotation._ import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.plans.logical.{ColumnDefinitionShims, CreateTable, ReplaceTable} +import org.apache.spark.sql.catalyst.plans.logical.{ColumnDefinition, CreateTable, ReplaceTable} import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.SQLExecution @@ -357,19 +357,22 @@ class DeltaTableBuilder private[tables]( DeltaTableUtils.parseColsToClusterByTransform(colNames) }) - val tableSpec = TableSpecUtils.create( + val tableSpec = org.apache.spark.sql.catalyst.plans.logical.TableSpec( properties = properties, provider = Some(FORMAT_NAME), + options = Map.empty, location = location, - comment = tblComment) + comment = tblComment, + collation = None, + serde = None, + external = false) val stmt = builderOption match { case CreateTableOptions(ifNotExists) => val unresolvedTable = org.apache.spark.sql.catalyst.analysis.UnresolvedIdentifier(table) CreateTable( unresolvedTable, - // Callout: Spark 3.5 returns StructType, Spark 4.0 returns Seq[ColumnDefinition] - ColumnDefinitionShims.parseColumns(columns.toSeq, spark.sessionState.sqlParser), + columns.map(ColumnDefinition.fromV1Column(_, spark.sessionState.sqlParser)).toSeq, partitioning, tableSpec, ifNotExists) @@ -377,8 +380,7 @@ class DeltaTableBuilder private[tables]( val unresolvedTable = org.apache.spark.sql.catalyst.analysis.UnresolvedIdentifier(table) ReplaceTable( unresolvedTable, - // Callout: Spark 3.5 returns StructType, Spark 4.0 returns Seq[ColumnDefinition] - ColumnDefinitionShims.parseColumns(columns.toSeq, spark.sessionState.sqlParser), + columns.map(ColumnDefinition.fromV1Column(_, spark.sessionState.sqlParser)).toSeq, partitioning, tableSpec, orCreate) diff --git a/spark/src/main/scala/io/delta/tables/execution/VacuumTableCommand.scala b/spark/src/main/scala/io/delta/tables/execution/VacuumTableCommand.scala index d2f9d688e80..e2acc18499d 100644 --- a/spark/src/main/scala/io/delta/tables/execution/VacuumTableCommand.scala +++ b/spark/src/main/scala/io/delta/tables/execution/VacuumTableCommand.scala @@ -21,7 +21,6 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.UnresolvedTable -import org.apache.spark.sql.catalyst.analysis.UnresolvedTableImplicits._ import org.apache.spark.sql.delta.catalog.DeltaTableV2 import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog, DeltaTableIdentifier, DeltaTableUtils, UnresolvedDeltaPathOrIdentifier} import org.apache.spark.sql.delta.commands.DeltaCommand diff --git a/spark/src/main/scala-spark-master/shims/ColumnConversionShims.scala b/spark/src/main/scala/org/apache/spark/sql/delta/ClassicColumnConversions.scala similarity index 100% rename from spark/src/main/scala-spark-master/shims/ColumnConversionShims.scala rename to spark/src/main/scala/org/apache/spark/sql/delta/ClassicColumnConversions.scala diff --git a/spark/src/main/scala-spark-master/shims/DataFrameShims.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DataFrameUtils.scala similarity index 100% rename from spark/src/main/scala-spark-master/shims/DataFrameShims.scala rename to spark/src/main/scala/org/apache/spark/sql/delta/DataFrameUtils.scala diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaAnalysis.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaAnalysis.scala index 324867ce4d1..14a78f295eb 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaAnalysis.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaAnalysis.scala @@ -66,7 +66,7 @@ import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTrans import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.command.CreateTableLikeCommand import org.apache.spark.sql.execution.command.RunnableCommand -import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, LogicalRelationShims, LogicalRelationWithTable} +import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, LogicalRelationWithTable} import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.execution.streaming.StreamingRelation @@ -622,7 +622,7 @@ class DeltaAnalysis(session: SparkSession) val v1TableName = child.identifier.asTableIdentifier namespace.foreach { ns => if (v1TableName.database.exists(!resolver(_, ns.head))) { - throw DeltaThrowableHelperShims.showColumnsWithConflictDatabasesError(ns, v1TableName) + throw DeltaThrowableHelper.showColumnsWithConflictDatabasesError(ns, v1TableName) } } ShowDeltaTableColumnsCommand(child) @@ -1397,7 +1397,7 @@ object DeltaRelation extends DeltaLogging { } else { v2Relation.output } - LogicalRelationShims.newInstance(relation, output, d.ttSafeCatalogTable, isStreaming = false) + LogicalRelation(relation, output, d.ttSafeCatalogTable, isStreaming = false, stream = None) } } } diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaLogFileIndex.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaLogFileIndex.scala index 2b8a8549735..81e95dfc440 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaLogFileIndex.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaLogFileIndex.scala @@ -20,7 +20,7 @@ import org.apache.spark.sql.delta.logging.DeltaLogKeys import org.apache.spark.sql.delta.util.FileNames import org.apache.hadoop.fs._ -import org.apache.spark.internal.{LoggingShims, MDC} +import org.apache.spark.internal.{Logging, MDC} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.execution.datasources.{FileFormat, FileIndex, PartitionDirectory} @@ -40,7 +40,7 @@ case class DeltaLogFileIndex private ( format: FileFormat, files: Array[FileStatus]) extends FileIndex - with LoggingShims { + with Logging { import DeltaLogFileIndex._ diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaParquetFileFormat.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaParquetFileFormat.scala index d2f204435bf..d92a07fba5f 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaParquetFileFormat.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaParquetFileFormat.scala @@ -34,7 +34,7 @@ import org.apache.hadoop.mapreduce.Job import org.apache.parquet.hadoop.ParquetOutputFormat import org.apache.parquet.hadoop.util.ContextUtil -import org.apache.spark.internal.{LoggingShims, MDC} +import org.apache.spark.internal.{Logging, MDC} import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.FileSourceConstantMetadataStructField @@ -63,7 +63,7 @@ abstract class DeltaParquetFileFormatBase( protected val optimizationsEnabled: Boolean = true, protected val tablePath: Option[String] = None, protected val isCDCRead: Boolean = false) - extends ParquetFileFormat with LoggingShims { + extends ParquetFileFormat with Logging { // Validate either we have all arguments for DV enabled read or none of them. if (hasTablePath) { diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTable.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTable.scala index 1811c2fe7ee..f64391ec80b 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTable.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTable.scala @@ -27,11 +27,10 @@ import org.apache.spark.sql.delta.skipping.clustering.temp.{ClusterByTransform = import org.apache.spark.sql.delta.sources.{DeltaSourceUtils, DeltaSQLConf} import org.apache.hadoop.fs.{FileSystem, Path} -import org.apache.spark.internal.{LoggingShims, MDC} +import org.apache.spark.internal.{Logging, MDC} import org.apache.spark.sql.{Column, DataFrame, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, UnresolvedLeafNode, UnresolvedTable} -import org.apache.spark.sql.catalyst.analysis.UnresolvedTableImplicits._ import org.apache.spark.sql.catalyst.catalog.{CatalogTable, SessionCatalog} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTableValueFunctions.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTableValueFunctions.scala index 47b0bbbd778..6f210cff1a2 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTableValueFunctions.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaTableValueFunctions.scala @@ -111,7 +111,10 @@ trait CDCStatementBase extends DeltaTableValueFunction { protected def getOptions: CaseInsensitiveStringMap = { def toDeltaOption(keyPrefix: String, value: Expression): (String, String) = { val evaluated = try { - DeltaTableValueFunctionsShims.evaluateTimeOption(value) + val fakePlan = util.AnalysisHelper.FakeLogicalPlan(Seq(value), Nil) + val timestampExpression = + org.apache.spark.sql.catalyst.optimizer.ComputeCurrentTime(fakePlan).expressions.head + timestampExpression.eval().toString } catch { case _: NullPointerException => throw DeltaErrors.nullRangeBoundaryInCDCRead() } diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaThrowable.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaThrowable.scala index 020a7f586bb..9e9bc74c9d5 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaThrowable.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaThrowable.scala @@ -21,7 +21,10 @@ import org.apache.spark.SparkThrowable /** * The trait for all exceptions of Delta code path. */ -trait DeltaThrowable extends SparkThrowable with DeltaThrowableConditionShim { +trait DeltaThrowable extends SparkThrowable { + + override def getCondition(): String = getErrorClass() + // Portable error identifier across SQL engines // If null, error class or SQLSTATE is not set override def getSqlState: String = diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaThrowableHelper.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaThrowableHelper.scala index d2455f0bbc3..e6fb1d89a1b 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaThrowableHelper.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaThrowableHelper.scala @@ -22,7 +22,9 @@ import java.net.URL import scala.collection.JavaConverters._ -import org.apache.spark.sql.delta.DeltaThrowableHelperShims._ +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.errors.QueryCompilationErrors +import org.apache.spark.SparkThrowable import org.apache.spark.ErrorClassesJsonReader import org.apache.spark.util.Utils @@ -33,6 +35,19 @@ import org.apache.spark.util.Utils */ object DeltaThrowableHelper { + /** + * Handles a breaking change (SPARK-46810) between Spark 3.5 and Spark Master (4.0) where + * `error-classes.json` was renamed to `error-conditions.json`. + */ + val SPARK_ERROR_CLASS_SOURCE_FILE = "error/error-conditions.json" + + def showColumnsWithConflictDatabasesError( + db: Seq[String], v1TableName: TableIdentifier): Throwable = { + QueryCompilationErrors.showColumnsWithConflictNamespacesError( + namespaceA = db, + namespaceB = v1TableName.database.get :: Nil) + } + /** * Try to find the error class source file and throw exception if it is no found. */ diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/FileMetadataMaterializationTracker.scala b/spark/src/main/scala/org/apache/spark/sql/delta/FileMetadataMaterializationTracker.scala index 122c47c7b4a..1f6b1c699ea 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/FileMetadataMaterializationTracker.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/FileMetadataMaterializationTracker.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.delta.logging.DeltaLogKeys import org.apache.spark.sql.delta.metering.DeltaLogging import org.apache.spark.sql.delta.sources.DeltaSQLConf -import org.apache.spark.internal.{LoggingShims, MDC} +import org.apache.spark.internal.{Logging, MDC} import org.apache.spark.sql.SparkSession /** @@ -43,7 +43,7 @@ import org.apache.spark.sql.SparkSession * Accessed by the thread materializing files and by the thread releasing resources after execution. * */ -class FileMetadataMaterializationTracker extends LoggingShims { +class FileMetadataMaterializationTracker extends Logging { /** The number of permits allocated from the global file materialization semaphore */ @volatile private var numPermitsFromSemaphore: Int = 0 diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/GeneratedColumn.scala b/spark/src/main/scala/org/apache/spark/sql/delta/GeneratedColumn.scala index fb2bc51b449..17e67cde07f 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/GeneratedColumn.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/GeneratedColumn.scala @@ -211,7 +211,7 @@ object GeneratedColumn extends DeltaLogging with AnalysisHelper { // Generated columns cannot be variant types because the writer must be able to enforce that // the <=> . Variants are currently not comprable so // this condition is impossible to enforce. - if (VariantShims.isVariantType(c.dataType)) { + if (c.dataType.isInstanceOf[VariantType]) { throw DeltaErrors.generatedColumnsUnsupportedType(c.dataType) } } diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/TypeWidening.scala b/spark/src/main/scala/org/apache/spark/sql/delta/TypeWidening.scala index 47e904a9bfe..f6a4c391700 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/TypeWidening.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/TypeWidening.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.delta import org.apache.spark.sql.delta.actions.{AddFile, Metadata, Protocol, TableFeatureProtocolUtils} import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.catalyst.expressions.Cast import org.apache.spark.sql.functions.{col, lit} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -66,13 +67,38 @@ object TypeWidening { /** * Returns whether the given type change is eligible for widening. This only checks atomic types. * It is the responsibility of the caller to recurse into structs, maps and arrays. + * + * Type widening supports: + * - byte -> short -> int -> long. + * - float -> double. + * - date -> timestamp_ntz. + * - {byte, short, int} -> double. + * - decimal -> wider decimal. + * - {byte, short, int} -> decimal(10, 0) and wider. + * - long -> decimal(20, 0) and wider. */ def isTypeChangeSupported(fromType: AtomicType, toType: AtomicType): Boolean = - TypeWideningShims.isTypeChangeSupported(fromType = fromType, toType = toType) + (fromType, toType) match { + case (from, to) if from == to => true + // All supported type changes below are supposed to be widening, but to be safe, reject any + // non-widening change upfront. + case (from, to) if !Cast.canUpCast(from, to) => false + case (from: IntegralType, to: IntegralType) => from.defaultSize <= to.defaultSize + case (FloatType, DoubleType) => true + case (DateType, TimestampNTZType) => true + case (ByteType | ShortType | IntegerType, DoubleType) => true + case (from: DecimalType, to: DecimalType) => to.isWiderThan(from) + // Byte, Short, Integer are all stored as INT32 in parquet. The parquet readers support + // converting INT32 to Decimal(10, 0) and wider. + case (ByteType | ShortType | IntegerType, d: DecimalType) => d.isWiderThan(IntegerType) + // The parquet readers support converting INT64 to Decimal(20, 0) and wider. + case (LongType, d: DecimalType) => d.isWiderThan(LongType) + case _ => false + } def isTypeChangeSupported( fromType: AtomicType, toType: AtomicType, uniformIcebergCompatibleOnly: Boolean): Boolean = - TypeWideningShims.isTypeChangeSupported(fromType = fromType, toType = toType) && + isTypeChangeSupported(fromType, toType) && (!uniformIcebergCompatibleOnly || isTypeChangeSupportedByIceberg(fromType = fromType, toType = toType)) @@ -83,14 +109,22 @@ object TypeWidening { def isTypeChangeSupportedForSchemaEvolution( fromType: AtomicType, toType: AtomicType, - uniformIcebergCompatibleOnly: Boolean): Boolean = - TypeWideningShims.isTypeChangeSupportedForSchemaEvolution( - fromType = fromType, - toType = toType - ) && ( + uniformIcebergCompatibleOnly: Boolean): Boolean = { + val supportedForSchemaEvolution = (fromType, toType) match { + case (from, to) if from == to => true + case (from, to) if !isTypeChangeSupported(from, to) => false + case (from: IntegralType, to: IntegralType) => from.defaultSize <= to.defaultSize + case (FloatType, DoubleType) => true + case (from: DecimalType, to: DecimalType) => to.isWiderThan(from) + case (DateType, TimestampNTZType) => true + case _ => false + } + + supportedForSchemaEvolution && ( !uniformIcebergCompatibleOnly || isTypeChangeSupportedByIceberg(fromType = fromType, toType = toType) ) + } /** * Returns whether the given type change is supported by Iceberg, and by extension can be read diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/TypeWideningMode.scala b/spark/src/main/scala/org/apache/spark/sql/delta/TypeWideningMode.scala index 38601df0862..d38dd19e535 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/TypeWideningMode.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/TypeWideningMode.scala @@ -16,7 +16,7 @@ package org.apache.spark.sql.delta -import org.apache.spark.sql.delta.DecimalPrecisionTypeCoercionShims +import org.apache.spark.sql.catalyst.analysis.DecimalPrecisionTypeCoercion import org.apache.spark.sql.delta.metering.DeltaLogging import org.apache.spark.sql.delta.sources.DeltaSQLConf.AllowAutomaticWideningMode import org.apache.spark.sql.util.ScalaExtensions._ @@ -124,7 +124,7 @@ object TypeWideningMode { case (l, r) if TypeWidening.isTypeChangeSupported(l, r) => Some(r) case (l, r) if TypeWidening.isTypeChangeSupported(r, l) => Some(l) case (l: DecimalType, r: DecimalType) => - val wider = DecimalPrecisionTypeCoercionShims.widerDecimalType(l, r) + val wider = DecimalPrecisionTypeCoercion.widerDecimalType(l, r) Option.when( TypeWidening.isTypeChangeSupported(l, wider) && TypeWidening.isTypeChangeSupported(r, wider))(wider) @@ -149,7 +149,7 @@ object TypeWideningMode { case (l, r) if typeChangeSupported(l, r) => Some(r) case (l, r) if typeChangeSupported(r, l) => Some(l) case (l: DecimalType, r: DecimalType) => - val wider = DecimalPrecisionTypeCoercionShims.widerDecimalType(l, r) + val wider = DecimalPrecisionTypeCoercion.widerDecimalType(l, r) Option.when(typeChangeSupported(l, wider) && typeChangeSupported(r, wider))(wider) case _ => None } diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/catalog/DeltaTableV2.scala b/spark/src/main/scala/org/apache/spark/sql/delta/catalog/DeltaTableV2.scala index 0f5baea0303..1e04ea7e41d 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/catalog/DeltaTableV2.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/catalog/DeltaTableV2.scala @@ -38,7 +38,6 @@ import org.apache.hadoop.fs.Path import org.apache.spark.sql.{DataFrame, Dataset, SaveMode, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.{ResolvedTable, UnresolvedTable} -import org.apache.spark.sql.catalyst.analysis.UnresolvedTableImplicits._ import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, CatalogUtils} import org.apache.spark.sql.catalyst.plans.logical.{AnalysisHelper, LogicalPlan, SubqueryAlias} import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes @@ -49,7 +48,7 @@ import org.apache.spark.sql.connector.catalog.V1Table import org.apache.spark.sql.connector.expressions._ import org.apache.spark.sql.connector.write.{LogicalWriteInfo, SupportsDynamicOverwrite, SupportsOverwrite, SupportsTruncate, V1Write, WriteBuilder} import org.apache.spark.sql.errors.QueryCompilationErrors -import org.apache.spark.sql.execution.datasources.{LogicalRelation, LogicalRelationShims} +import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.sources.{BaseRelation, Filter, InsertableRelation} import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap @@ -328,8 +327,12 @@ class DeltaTableV2 private( /** Creates a [[LogicalRelation]] that represents this table */ lazy val toLogicalRelation: LogicalRelation = { val relation = this.toBaseRelation - LogicalRelationShims.newInstance( - relation, toAttributes(relation.schema), ttSafeCatalogTable, isStreaming = false) + LogicalRelation( + relation, + toAttributes(relation.schema), + ttSafeCatalogTable, + isStreaming = false, + stream = None) } /** Creates a [[DataFrame]] that uses the requested spark session to read from this table */ @@ -351,10 +354,15 @@ class DeltaTableV2 private( val ttSpec = DeltaDataSource.getTimeTravelVersion(newOptions) // Spark 4.0 and 3.5 handle time travel options differently. - DeltaTimeTravelSpecShims.validateTimeTravelSpec( - spark, - currSpecOpt = timeTravelOpt, - newSpecOpt = ttSpec) + // Validate that only one time travel spec is being used + (timeTravelOpt, ttSpec) match { + case (Some(currSpec), Some(newSpec)) + if currSpec.version != newSpec.version || + currSpec.getTimestampOpt(spark.sessionState.conf).map(_.getTime) != + newSpec.getTimestampOpt(spark.sessionState.conf).map(_.getTime) => + throw DeltaErrors.multipleTimeTravelSyntaxUsed + case _ => + } val caseInsensitiveNewOptions = new CaseInsensitiveStringMap(newOptions.asJava) diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/CreateDeltaTableLike.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/CreateDeltaTableLike.scala index 753566bf114..7a240587bc7 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/commands/CreateDeltaTableLike.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/CreateDeltaTableLike.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.delta.commands import org.apache.spark.sql.delta.{DeltaErrors, Snapshot} -import org.apache.spark.sql.delta.Relocated import org.apache.spark.sql.delta.hooks.{UpdateCatalog, UpdateCatalogFactory} import org.apache.spark.sql.delta.sources.DeltaSQLConf @@ -173,6 +172,6 @@ trait CreateDeltaTableLike extends SQLConfHelper { */ protected def isV1Writer: Boolean = { Thread.currentThread().getStackTrace.exists(_.toString.contains( - Relocated.dataFrameWriterClassName + ".")) + classOf[org.apache.spark.sql.classic.DataFrameWriter[_]].getCanonicalName + ".")) } } diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/MergeIntoMaterializeSource.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/MergeIntoMaterializeSource.scala index 919d3e9f5de..0c9a86bc62d 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/MergeIntoMaterializeSource.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/merge/MergeIntoMaterializeSource.scala @@ -176,9 +176,8 @@ trait MergeIntoMaterializeSource extends DeltaLogging with DeltaSparkPlanUtils { // SparkCoreErrors.checkpointRDDBlockIdNotFoundError from LocalCheckpointRDD.compute. case s: SparkException if materializedSourceRDD.nonEmpty && - MergeIntoMaterializeSourceShims.mergeMaterializedSourceRddBlockLostError( - s, - materializedSourceRDD.get.id) => + s.getErrorClass == "CHECKPOINT_RDD_BLOCK_ID_NOT_FOUND" && + s.getMessageParameters.get("rddBlockId").contains(s"rdd_${materializedSourceRDD.get.id}") => logWarning(log"Materialized ${MDC(DeltaLogKeys.OPERATION, operation)} source RDD block " + log"lost. ${MDC(DeltaLogKeys.OPERATION, operation)} needs to be restarted. " + log"This was attempt number ${MDC(DeltaLogKeys.ATTEMPT, attempt)}.") diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/constraints/DeltaInvariantCheckerExec.scala b/spark/src/main/scala/org/apache/spark/sql/delta/constraints/DeltaInvariantCheckerExec.scala index df013ea6295..3c905a4a898 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/constraints/DeltaInvariantCheckerExec.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/constraints/DeltaInvariantCheckerExec.scala @@ -119,10 +119,13 @@ object DeltaInvariantCheckerExec extends DeltaLogging { } // Specialized optimizer to run necessary rules so that the check expressions can be evaluated. - object DeltaInvariantCheckerOptimizer - extends RuleExecutor[LogicalPlan] - with DeltaInvariantCheckerOptimizerShims { - final override protected def batches = DELTA_INVARIANT_CHECKER_OPTIMIZER_BATCHES + object DeltaInvariantCheckerOptimizer extends RuleExecutor[LogicalPlan] { + import org.apache.spark.sql.catalyst.optimizer.{ReplaceExpressions, RewriteWithExpression} + + final override protected def batches = Seq( + Batch("Finish Analysis", Once, ReplaceExpressions), + Batch("Rewrite With expression", Once, RewriteWithExpression) + ) } /** Build the extractor for a particular column. */ diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/coordinatedcommits/AbstractBatchBackfillingCommitCoordinatorClient.scala b/spark/src/main/scala/org/apache/spark/sql/delta/coordinatedcommits/AbstractBatchBackfillingCommitCoordinatorClient.scala index 4915497a8c6..54b2fb714ee 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/coordinatedcommits/AbstractBatchBackfillingCommitCoordinatorClient.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/coordinatedcommits/AbstractBatchBackfillingCommitCoordinatorClient.scala @@ -32,7 +32,7 @@ import io.delta.storage.commit.{CommitCoordinatorClient, CommitFailedException = import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileStatus, Path} -import org.apache.spark.internal.{LoggingShims, MDC} +import org.apache.spark.internal.{Logging, MDC} /** * An abstract [[CommitCoordinatorClient]] which triggers backfills every n commits. @@ -40,7 +40,7 @@ import org.apache.spark.internal.{LoggingShims, MDC} */ trait AbstractBatchBackfillingCommitCoordinatorClient extends CommitCoordinatorClient - with LoggingShims { + with Logging { /** * Size of batch that should be backfilled. So every commit version which satisfies diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/files/DeltaFileFormatWriter.scala b/spark/src/main/scala/org/apache/spark/sql/delta/files/DeltaFileFormatWriter.scala index b138f2a16ce..75c73457864 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/files/DeltaFileFormatWriter.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/files/DeltaFileFormatWriter.scala @@ -29,7 +29,7 @@ import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl import org.apache.spark._ -import org.apache.spark.internal.{LoggingShims, MDC} +import org.apache.spark.internal.{Logging, MDC} import org.apache.spark.internal.io.{FileCommitProtocol, SparkHadoopWriterUtils} import org.apache.spark.shuffle.FetchFailedException import org.apache.spark.sql.SparkSession @@ -55,7 +55,7 @@ import org.apache.spark.util.{SerializableConfiguration, Utils} * values to data files. Specifically L123-126, L132, and L140 where it adds option * WRITE_PARTITION_COLUMNS */ -object DeltaFileFormatWriter extends LoggingShims { +object DeltaFileFormatWriter extends Logging { /** * A variable used in tests to check whether the output ordering of the query matches the diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/logging/DeltaLogKeys.scala b/spark/src/main/scala/org/apache/spark/sql/delta/logging/DeltaLogKeys.scala index 5b2f63afd34..1984a2cf4ec 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/logging/DeltaLogKeys.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/logging/DeltaLogKeys.scala @@ -38,106 +38,106 @@ package org.apache.spark.sql.delta.logging -import org.apache.spark.internal.LogKeyShims +import org.apache.spark.internal.LogKey /** * Various keys used for mapped diagnostic contexts(MDC) in logging. All structured logging keys * should be defined here for standardization. */ trait DeltaLogKeysBase { - case object APP_ID extends LogKeyShims - case object ATTEMPT extends LogKeyShims - case object BATCH_ID extends LogKeyShims - case object BATCH_SIZE extends LogKeyShims - case object CATALOG extends LogKeyShims - case object CLONE_SOURCE_DESC extends LogKeyShims - case object CONFIG extends LogKeyShims - case object CONFIG_KEY extends LogKeyShims - case object COORDINATOR_CONF extends LogKeyShims - case object COORDINATOR_NAME extends LogKeyShims - case object COUNT extends LogKeyShims - case object DATA_FILTER extends LogKeyShims - case object DATE extends LogKeyShims - case object DELTA_COMMIT_INFO extends LogKeyShims - case object DELTA_METADATA extends LogKeyShims - case object DIR extends LogKeyShims - case object DURATION extends LogKeyShims - case object ERROR_ID extends LogKeyShims - case object END_INDEX extends LogKeyShims - case object END_OFFSET extends LogKeyShims - case object END_VERSION extends LogKeyShims - case object ERROR extends LogKeyShims - case object EXCEPTION extends LogKeyShims - case object EXECUTOR_ID extends LogKeyShims - case object EXPR extends LogKeyShims - case object FILE_INDEX extends LogKeyShims - case object FILE_NAME extends LogKeyShims - case object FILE_STATUS extends LogKeyShims - case object FILE_SYSTEM_SCHEME extends LogKeyShims - case object FILTER extends LogKeyShims - case object FILTER2 extends LogKeyShims - case object HOOK_NAME extends LogKeyShims - case object INVARIANT_CHECK_INFO extends LogKeyShims - case object ISOLATION_LEVEL extends LogKeyShims - case object IS_DRY_RUN extends LogKeyShims - case object IS_INIT_SNAPSHOT extends LogKeyShims - case object IS_PATH_TABLE extends LogKeyShims - case object JOB_ID extends LogKeyShims - case object LOG_SEGMENT extends LogKeyShims - case object MAX_SIZE extends LogKeyShims - case object METADATA_ID extends LogKeyShims - case object METADATA_NEW extends LogKeyShims - case object METADATA_OLD extends LogKeyShims - case object METRICS extends LogKeyShims - case object METRIC_NAME extends LogKeyShims - case object MIN_SIZE extends LogKeyShims - case object NUM_ACTIONS extends LogKeyShims - case object NUM_ACTIONS2 extends LogKeyShims - case object NUM_ATTEMPT extends LogKeyShims - case object NUM_BYTES extends LogKeyShims - case object NUM_DIRS extends LogKeyShims - case object NUM_FILES extends LogKeyShims - case object NUM_FILES2 extends LogKeyShims - case object NUM_PARTITIONS extends LogKeyShims - case object NUM_PREDICATES extends LogKeyShims - case object NUM_RECORDS extends LogKeyShims - case object NUM_RECORDS2 extends LogKeyShims - case object NUM_SKIPPED extends LogKeyShims - case object OFFSET extends LogKeyShims - case object OPERATION extends LogKeyShims - case object OP_NAME extends LogKeyShims - case object PARTITION_FILTER extends LogKeyShims - case object PATH extends LogKeyShims - case object PATH2 extends LogKeyShims - case object PATHS extends LogKeyShims - case object PATHS2 extends LogKeyShims - case object PATHS3 extends LogKeyShims - case object PATHS4 extends LogKeyShims - case object PROTOCOL extends LogKeyShims - case object QUERY_ID extends LogKeyShims - case object SCHEMA extends LogKeyShims - case object SCHEMA_DIFF extends LogKeyShims - case object SNAPSHOT extends LogKeyShims - case object START_INDEX extends LogKeyShims - case object START_VERSION extends LogKeyShims - case object STATS extends LogKeyShims - case object STATUS extends LogKeyShims - case object STATUS_MESSAGE extends LogKeyShims - case object SYSTEM_CLASS_NAME extends LogKeyShims - case object TABLE_FEATURES extends LogKeyShims - case object TABLE_ID extends LogKeyShims - case object TABLE_NAME extends LogKeyShims - case object TBL_PROPERTIES extends LogKeyShims - case object THREAD_NAME extends LogKeyShims - case object TIMESTAMP extends LogKeyShims - case object TIMESTAMP2 extends LogKeyShims - case object TIME_MS extends LogKeyShims - case object TIME_STATS extends LogKeyShims - case object TXN_ID extends LogKeyShims - case object URI extends LogKeyShims - case object VACUUM_STATS extends LogKeyShims - case object VERSION extends LogKeyShims - case object VERSION2 extends LogKeyShims + case object APP_ID extends LogKey + case object ATTEMPT extends LogKey + case object BATCH_ID extends LogKey + case object BATCH_SIZE extends LogKey + case object CATALOG extends LogKey + case object CLONE_SOURCE_DESC extends LogKey + case object CONFIG extends LogKey + case object CONFIG_KEY extends LogKey + case object COORDINATOR_CONF extends LogKey + case object COORDINATOR_NAME extends LogKey + case object COUNT extends LogKey + case object DATA_FILTER extends LogKey + case object DATE extends LogKey + case object DELTA_COMMIT_INFO extends LogKey + case object DELTA_METADATA extends LogKey + case object DIR extends LogKey + case object DURATION extends LogKey + case object ERROR_ID extends LogKey + case object END_INDEX extends LogKey + case object END_OFFSET extends LogKey + case object END_VERSION extends LogKey + case object ERROR extends LogKey + case object EXCEPTION extends LogKey + case object EXECUTOR_ID extends LogKey + case object EXPR extends LogKey + case object FILE_INDEX extends LogKey + case object FILE_NAME extends LogKey + case object FILE_STATUS extends LogKey + case object FILE_SYSTEM_SCHEME extends LogKey + case object FILTER extends LogKey + case object FILTER2 extends LogKey + case object HOOK_NAME extends LogKey + case object INVARIANT_CHECK_INFO extends LogKey + case object ISOLATION_LEVEL extends LogKey + case object IS_DRY_RUN extends LogKey + case object IS_INIT_SNAPSHOT extends LogKey + case object IS_PATH_TABLE extends LogKey + case object JOB_ID extends LogKey + case object LOG_SEGMENT extends LogKey + case object MAX_SIZE extends LogKey + case object METADATA_ID extends LogKey + case object METADATA_NEW extends LogKey + case object METADATA_OLD extends LogKey + case object METRICS extends LogKey + case object METRIC_NAME extends LogKey + case object MIN_SIZE extends LogKey + case object NUM_ACTIONS extends LogKey + case object NUM_ACTIONS2 extends LogKey + case object NUM_ATTEMPT extends LogKey + case object NUM_BYTES extends LogKey + case object NUM_DIRS extends LogKey + case object NUM_FILES extends LogKey + case object NUM_FILES2 extends LogKey + case object NUM_PARTITIONS extends LogKey + case object NUM_PREDICATES extends LogKey + case object NUM_RECORDS extends LogKey + case object NUM_RECORDS2 extends LogKey + case object NUM_SKIPPED extends LogKey + case object OFFSET extends LogKey + case object OPERATION extends LogKey + case object OP_NAME extends LogKey + case object PARTITION_FILTER extends LogKey + case object PATH extends LogKey + case object PATH2 extends LogKey + case object PATHS extends LogKey + case object PATHS2 extends LogKey + case object PATHS3 extends LogKey + case object PATHS4 extends LogKey + case object PROTOCOL extends LogKey + case object QUERY_ID extends LogKey + case object SCHEMA extends LogKey + case object SCHEMA_DIFF extends LogKey + case object SNAPSHOT extends LogKey + case object START_INDEX extends LogKey + case object START_VERSION extends LogKey + case object STATS extends LogKey + case object STATUS extends LogKey + case object STATUS_MESSAGE extends LogKey + case object SYSTEM_CLASS_NAME extends LogKey + case object TABLE_FEATURES extends LogKey + case object TABLE_ID extends LogKey + case object TABLE_NAME extends LogKey + case object TBL_PROPERTIES extends LogKey + case object THREAD_NAME extends LogKey + case object TIMESTAMP extends LogKey + case object TIMESTAMP2 extends LogKey + case object TIME_MS extends LogKey + case object TIME_STATS extends LogKey + case object TXN_ID extends LogKey + case object URI extends LogKey + case object VACUUM_STATS extends LogKey + case object VERSION extends LogKey + case object VERSION2 extends LogKey } object DeltaLogKeys extends DeltaLogKeysBase diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/metering/DeltaLogging.scala b/spark/src/main/scala/org/apache/spark/sql/delta/metering/DeltaLogging.scala index ba4c26abbd0..fd458dbc9d3 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/metering/DeltaLogging.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/metering/DeltaLogging.scala @@ -39,7 +39,7 @@ import org.apache.spark.sql.util.ScalaExtensions._ import org.apache.hadoop.fs.Path import org.apache.spark.SparkThrowable -import org.apache.spark.internal.{LoggingShims, MDC, MessageWithContext} +import org.apache.spark.internal.{Logging, MDC, MessageWithContext} /** * Convenience wrappers for logging that include delta specific options and @@ -233,7 +233,7 @@ object DeltaLogging { class LogThrottler( val bucketSize: Int = 100, val tokenRecoveryInterval: FiniteDuration = 1.second, - val timeSource: NanoTimeTimeSource = SystemNanoTimeSource) extends LoggingShims { + val timeSource: NanoTimeTimeSource = SystemNanoTimeSource) extends Logging { private var remainingTokens = bucketSize private var nextRecovery: DeadlineWithTimeSource = diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/perf/OptimizeMetadataOnlyDeltaQuery.scala b/spark/src/main/scala/org/apache/spark/sql/delta/perf/OptimizeMetadataOnlyDeltaQuery.scala index 3387ab95f1b..b4de244672c 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/perf/OptimizeMetadataOnlyDeltaQuery.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/perf/OptimizeMetadataOnlyDeltaQuery.scala @@ -16,7 +16,7 @@ package org.apache.spark.sql.delta.perf -import org.apache.spark.internal.LoggingShims +import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate._ @@ -45,7 +45,7 @@ import java.util.Locale * - Query has no GROUP BY. * Example of valid query: SELECT COUNT(*), MIN(id), MAX(partition_col) FROM MyDeltaTable */ -trait OptimizeMetadataOnlyDeltaQuery extends LoggingShims { +trait OptimizeMetadataOnlyDeltaQuery extends Logging { def optimizeQueryWithMetadata(plan: LogicalPlan): LogicalPlan = { plan.transformUpWithSubqueries { case agg@MetadataOptimizableAggregate(tahoeLogFileIndex) => diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/schema/SchemaUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/schema/SchemaUtils.scala index f6a9e6dab82..ded73f7b5e0 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/schema/SchemaUtils.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/schema/SchemaUtils.scala @@ -1502,7 +1502,7 @@ def normalizeColumnNamesInDataType( * Returns 'true' if any VariantType exists in the table schema. */ def checkForVariantTypeColumnsRecursively(schema: StructType): Boolean = { - SchemaUtils.typeExistsRecursively(schema)(VariantShims.isVariantType(_)) + SchemaUtils.typeExistsRecursively(schema)(_.isInstanceOf[VariantType]) } /** @@ -1537,7 +1537,7 @@ def normalizeColumnNamesInDataType( case DateType => case TimestampType => case TimestampNTZType => - case dt if VariantShims.isVariantType(dt) => + case dt if dt.isInstanceOf[VariantType] => case BinaryType => case _: DecimalType => case a: ArrayType => diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaDataSource.scala b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaDataSource.scala index 677733fbd46..6ab2408042c 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaDataSource.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaDataSource.scala @@ -45,8 +45,7 @@ import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.streaming.{Sink, Source} import org.apache.spark.sql.sources._ import org.apache.spark.sql.streaming.OutputMode -import org.apache.spark.sql.types.{DataType, VariantShims} -import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.types.{DataType, StructType, VariantType} import org.apache.spark.sql.util.CaseInsensitiveStringMap @@ -55,7 +54,7 @@ class DeltaDataSource extends RelationProvider with StreamSourceProvider with StreamSinkProvider - with CreatableRelationProviderShim + with CreatableRelationProvider with DataSourceRegister with TableProvider with DeltaLogging { @@ -304,10 +303,9 @@ class DeltaDataSource /** * Extend the default `supportsDataType` to allow VariantType. - * Implemented by `CreatableRelationProviderShim`. */ override def supportsDataType(dt: DataType): Boolean = { - VariantShims.isVariantType(dt) || super.supportsDataType(dt) + dt.isInstanceOf[VariantType] || super.supportsDataType(dt) } override def shortName(): String = { diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaStreamUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaStreamUtils.scala index 787992a56cd..89a2852f5a8 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaStreamUtils.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaStreamUtils.scala @@ -23,8 +23,9 @@ import org.apache.hadoop.fs.Path import org.apache.spark.sql.delta.DataFrameUtils import org.apache.spark.sql.{Column, DataFrame} +import org.apache.spark.sql.classic.ClassicConversions._ import org.apache.spark.sql.execution.QueryExecution -import org.apache.spark.sql.execution.streaming.{IncrementalExecution, IncrementalExecutionShims, StreamExecution} +import org.apache.spark.sql.execution.streaming.{IncrementalExecution, StreamExecution} object DeltaStreamUtils { @@ -39,10 +40,18 @@ object DeltaStreamUtils { df: DataFrame, cols: Column*): DataFrame = { val newMicroBatch = df.select(cols: _*) - val newIncrementalExecution = IncrementalExecutionShims.newInstance( + val newIncrementalExecution = new IncrementalExecution( newMicroBatch.sparkSession, newMicroBatch.queryExecution.logical, - incrementalExecution) + incrementalExecution.outputMode, + incrementalExecution.checkpointLocation, + incrementalExecution.queryId, + incrementalExecution.runId, + incrementalExecution.currentBatchId, + incrementalExecution.prevOffsetSeqMetadata, + incrementalExecution.offsetSeqMetadata, + incrementalExecution.watermarkPropagator, + incrementalExecution.isFirstBatch) newIncrementalExecution.executedPlan // Force the lazy generation of execution plan DataFrameUtils.ofRows(newIncrementalExecution) } diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/stats/StatsCollectionUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/stats/StatsCollectionUtils.scala index 2ffaa1da1e1..2120b35e43b 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/stats/StatsCollectionUtils.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/stats/StatsCollectionUtils.scala @@ -39,7 +39,7 @@ import org.apache.parquet.io.api.Binary import org.apache.parquet.schema.LogicalTypeAnnotation._ import org.apache.parquet.schema.PrimitiveType -import org.apache.spark.internal.{LoggingShims, MDC} +import org.apache.spark.internal.{Logging, MDC} import org.apache.spark.sql.{Dataset, SparkSession} import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.execution.datasources.DataSourceUtils @@ -49,7 +49,7 @@ import org.apache.spark.util.SerializableConfiguration object StatsCollectionUtils - extends LoggingShims + extends Logging { /** diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/streaming/SchemaTrackingLog.scala b/spark/src/main/scala/org/apache/spark/sql/delta/streaming/SchemaTrackingLog.scala index 7c838ecc09c..61dbacec7d5 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/streaming/SchemaTrackingLog.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/streaming/SchemaTrackingLog.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.delta.logging.DeltaLogKeys import org.apache.spark.sql.delta.util.JsonUtils import com.fasterxml.jackson.annotation.JsonIgnore -import org.apache.spark.internal.{LoggingShims, MDC} +import org.apache.spark.internal.{Logging, MDC} import org.apache.spark.sql.SparkSession import org.apache.spark.sql.execution.streaming.{HDFSMetadataLog, MetadataVersionUtil} import org.apache.spark.sql.types.{DataType, StructType} @@ -103,7 +103,7 @@ class SchemaTrackingLog[T <: PartitionAndDataSchema: ClassTag: Manifest]( sparkSession: SparkSession, path: String, schemaSerializer: SchemaSerializer[T]) - extends HDFSMetadataLog[T](sparkSession, path) with LoggingShims { + extends HDFSMetadataLog[T](sparkSession, path) with Logging { import SchemaTrackingExceptions._ diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/DeltaProgressReporter.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/DeltaProgressReporter.scala index 82632cfeb85..42d1133b2bb 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/util/DeltaProgressReporter.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/DeltaProgressReporter.scala @@ -19,10 +19,10 @@ package org.apache.spark.sql.delta.util import org.apache.spark.sql.delta.logging.DeltaLogKeys import org.apache.spark.SparkContext -import org.apache.spark.internal.{LoggingShims, MDC} +import org.apache.spark.internal.{Logging, MDC} import org.apache.spark.sql.SparkSession -trait DeltaProgressReporter extends LoggingShims { +trait DeltaProgressReporter extends Logging { /** * Report a log to indicate some command is running. */ diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/PartitionUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/PartitionUtils.scala index 09c7984f29a..6bb8d4818ad 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/util/PartitionUtils.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/PartitionUtils.scala @@ -630,7 +630,7 @@ private[delta] object PartitionUtils { partitionColumnsSchema(schema, partitionColumns, caseSensitive).foreach { field => field.dataType match { // Variant types are not orderable and thus cannot be partition columns. - case a: AtomicType if !VariantShims.isVariantType(a) => // OK + case a: AtomicType if !a.isInstanceOf[VariantType] => // OK case _ => throw DeltaErrors.cannotUseDataTypeForPartitionColumnError(field) } } diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/util/threads/SparkThreadLocalForwardingThreadPoolExecutor.scala b/spark/src/main/scala/org/apache/spark/sql/delta/util/threads/SparkThreadLocalForwardingThreadPoolExecutor.scala index 109606dab40..6eeaf5a91d2 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/util/threads/SparkThreadLocalForwardingThreadPoolExecutor.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/util/threads/SparkThreadLocalForwardingThreadPoolExecutor.scala @@ -24,7 +24,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.sql.delta.logging.DeltaLogKeys import org.apache.spark.{SparkContext, TaskContext} -import org.apache.spark.internal.{LoggingShims, MDC} +import org.apache.spark.internal.{Logging, MDC} import org.apache.spark.util.{Utils => SparkUtils} /** @@ -48,7 +48,7 @@ class SparkThreadLocalForwardingThreadPoolExecutor( } -trait SparkThreadLocalCapturingHelper extends LoggingShims { +trait SparkThreadLocalCapturingHelper extends Logging { // At the time of creating this instance we capture the task context and command context. val capturedTaskContext = TaskContext.get() val sparkContext = SparkContext.getActive diff --git a/spark/src/test/resources/log4j2.properties b/spark/src/test/resources/log4j2.properties index 43daec1a285..0a8d5bb856f 100644 --- a/spark/src/test/resources/log4j2.properties +++ b/spark/src/test/resources/log4j2.properties @@ -38,18 +38,18 @@ appender.file.append = true appender.file.layout.type = PatternLayout appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n -# Pattern Logging Appender -appender.pattern.type = File -appender.pattern.name = pattern -appender.pattern.fileName = target/pattern.log -appender.pattern.layout.type = PatternLayout -appender.pattern.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex +# Structured Logging Appender +appender.structured.type = File +appender.structured.name = structured +appender.structured.fileName = target/structured.log +appender.structured.layout.type = JsonTemplateLayout +appender.structured.layout.eventTemplateUri = classpath:org/apache/spark/SparkLayout.json -# Custom logger for testing structured logging with Spark 3.5 shims -logger.pattern_logging.name = org.apache.spark.sql.delta.logging.DeltaPatternLoggingSuite -logger.pattern_logging.level = trace -logger.pattern_logging.appenderRefs = pattern -logger.pattern_logging.appenderRef.pattern.ref = pattern +# Custom logger for testing structured logging with Spark 4.0+ +logger.structured_logging.name = org.apache.spark.sql.delta.logging.DeltaStructuredLoggingSuite +logger.structured_logging.level = trace +logger.structured_logging.appenderRefs = structured +logger.structured_logging.appenderRef.structured.ref = structured # Tests that launch java subprocesses can set the "test.appender" system property to # "console" to avoid having the child process's logs overwrite the unit test's diff --git a/spark/src/test/resources/log4j2_spark_master.properties b/spark/src/test/resources/log4j2_spark_master.properties deleted file mode 100644 index 95aea7050b7..00000000000 --- a/spark/src/test/resources/log4j2_spark_master.properties +++ /dev/null @@ -1,65 +0,0 @@ -# -# Copyright (2021) The Delta Lake Project Authors. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Set everything to be logged to the file target/unit-tests.log -rootLogger.level = warn -rootLogger.appenderRef.file.ref = ${sys:test.appender:-File} - -appender.file.type = File -appender.file.name = File -appender.file.fileName = target/unit-tests.log -appender.file.append = true -appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n - -# Structured Logging Appender -appender.structured.type = File -appender.structured.name = structured -appender.structured.fileName = target/structured.log -appender.structured.layout.type = JsonTemplateLayout -appender.structured.layout.eventTemplateUri = classpath:org/apache/spark/SparkLayout.json - -# Custom logger for testing structured logging with Spark master -logger.structured_logging.name = org.apache.spark.sql.delta.logging.DeltaStructuredLoggingSuite -logger.structured_logging.level = trace -logger.structured_logging.appenderRefs = structured -logger.structured_logging.appenderRef.structured.ref = structured - -# Tests that launch java subprocesses can set the "test.appender" system property to -# "console" to avoid having the child process's logs overwrite the unit test's -# log file. -appender.console.type = Console -appender.console.name = console -appender.console.target = SYSTEM_ERR -appender.console.layout.type = PatternLayout -appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n - -# Ignore messages below warning level from Jetty, because it's a bit verbose -logger.jetty.name = org.sparkproject.jetty -logger.jetty.level = warn diff --git a/spark/src/test/scala-spark-3.5/org/apache/spark/sql/delta/logging/DeltaPatternLoggingSuite.scala b/spark/src/test/scala-spark-3.5/org/apache/spark/sql/delta/logging/DeltaPatternLoggingSuite.scala deleted file mode 100644 index 33f126ac3b8..00000000000 --- a/spark/src/test/scala-spark-3.5/org/apache/spark/sql/delta/logging/DeltaPatternLoggingSuite.scala +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * This file contains code from the Apache Spark project (original license above). - * It contains modifications, which are licensed as follows: - */ - -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta.logging - -import java.util.regex.Pattern - -import org.apache.logging.log4j.Level - -class DeltaPatternLoggingSuite extends DeltaStructuredLoggingSuiteBase { - override def className: String = classOf[DeltaPatternLoggingSuite].getSimpleName - override def logFilePath: String = "target/pattern.log" - - override def expectedPatternForBasicMsg(level: Level): String = { - s""".*$level $className: This is a log message\n""" - } - - override def expectedPatternForBasicMsgWithException(level: Level): String = { - s""".*$level $className: This is a log message\n[\\s\\S]*""" - } - - override def expectedPatternForMsgWithMDC(level: Level): String = - s""".*$level $className: Lost executor 1.\n""" - - override def expectedPatternForMsgWithMDCValueIsNull(level: Level): String = - s""".*$level $className: Lost executor null.\n""" - - override def expectedPatternForMsgWithMDCAndException(level: Level): String = - s""".*$level $className: Error in executor 1.\njava.lang.RuntimeException: OOM\n[\\s\\S]*""" - - override def expectedPatternForCustomLogKey(level: Level): String = { - s""".*$level $className: Custom log message.\n""" - } - - override def verifyMsgWithConcat(level: Level, logOutput: String): Unit = { - val pattern = - s""".*$level $className: Min Size: 2, Max Size: 4. Please double check.\n""" - assert(Pattern.compile(pattern).matcher(logOutput).matches()) - } -} diff --git a/spark/src/test/scala-spark-3.5/shims/DeltaExcludedBySparkVersionTestMixinShims.scala b/spark/src/test/scala-spark-3.5/shims/DeltaExcludedBySparkVersionTestMixinShims.scala deleted file mode 100644 index 587e6a32cab..00000000000 --- a/spark/src/test/scala-spark-3.5/shims/DeltaExcludedBySparkVersionTestMixinShims.scala +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (2024) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -import org.apache.spark.sql.QueryTest - -trait DeltaExcludedBySparkVersionTestMixinShims extends QueryTest { - /** - * Tests that are meant for Delta compiled against Spark Latest Release only. Executed since this - * is the Spark Latest Release shim. - */ - protected def testSparkLatestOnly( - testName: String, testTags: org.scalatest.Tag*) - (testFun: => Any) - (implicit pos: org.scalactic.source.Position): Unit = { - test(testName, testTags: _*)(testFun)(pos) - } - - /** - * Tests that are meant for Delta compiled against Spark Master Release only. Ignored since this - * is the Spark Latest Release shim. - */ - protected def testSparkMasterOnly( - testName: String, testTags: org.scalatest.Tag*) - (testFun: => Any) - (implicit pos: org.scalactic.source.Position): Unit = { - ignore(testName, testTags: _*)(testFun)(pos) - } -} diff --git a/spark/src/test/scala-spark-3.5/shims/DeltaGenerateSymlinkManifestSuiteShims.scala b/spark/src/test/scala-spark-3.5/shims/DeltaGenerateSymlinkManifestSuiteShims.scala deleted file mode 100644 index 41a3acc1340..00000000000 --- a/spark/src/test/scala-spark-3.5/shims/DeltaGenerateSymlinkManifestSuiteShims.scala +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (2024) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -object DeltaGenerateSymlinkManifestSuiteShims { - val FAILS_ON_TEMP_VIEWS_ERROR_MSG = "v is a temp view. 'GENERATE' expects a table" -} diff --git a/spark/src/test/scala-spark-3.5/shims/DeltaHistoryManagerSuiteShims.scala b/spark/src/test/scala-spark-3.5/shims/DeltaHistoryManagerSuiteShims.scala deleted file mode 100644 index 98765c9b667..00000000000 --- a/spark/src/test/scala-spark-3.5/shims/DeltaHistoryManagerSuiteShims.scala +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (2024) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -object DeltaHistoryManagerSuiteShims { - type MULTIPLE_TIME_TRAVEL_FORMATS_ERROR_TYPE = java.lang.IllegalArgumentException - - val MULTIPLE_TIME_TRAVEL_FORMATS_ERROR_MSG = "either provide 'timestampAsOf' or 'versionAsOf'" -} diff --git a/spark/src/test/scala-spark-3.5/shims/DeltaInsertIntoTableSuiteShims.scala b/spark/src/test/scala-spark-3.5/shims/DeltaInsertIntoTableSuiteShims.scala deleted file mode 100644 index 06624c21ea4..00000000000 --- a/spark/src/test/scala-spark-3.5/shims/DeltaInsertIntoTableSuiteShims.scala +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (2024) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -object DeltaInsertIntoTableSuiteShims { - val INSERT_INTO_TMP_VIEW_ERROR_MSG = "Inserting into a view is not allowed" - - val INVALID_COLUMN_DEFAULT_VALUE_ERROR_MSG = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION" -} diff --git a/spark/src/test/scala-spark-3.5/shims/DeltaSuiteShims.scala b/spark/src/test/scala-spark-3.5/shims/DeltaSuiteShims.scala deleted file mode 100644 index 978638f796b..00000000000 --- a/spark/src/test/scala-spark-3.5/shims/DeltaSuiteShims.scala +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -object DeltaSuiteShims { - val THROWS_ON_CORRUPTED_FILE_ERROR_MSG = "is not a Parquet file" - - val THROWS_ON_DELETED_FILE_ERROR_MSG = "FileNotFound" -} diff --git a/spark/src/test/scala-spark-3.5/shims/DeltaVacuumSuiteShims.scala b/spark/src/test/scala-spark-3.5/shims/DeltaVacuumSuiteShims.scala deleted file mode 100644 index 8b640829f98..00000000000 --- a/spark/src/test/scala-spark-3.5/shims/DeltaVacuumSuiteShims.scala +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (2024) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -object DeltaVacuumSuiteShims { - val SQL_COMMAND_ON_TEMP_VIEW_NOT_SUPPORTED_ERROR_MSG = - "v is a temp view. 'VACUUM' expects a table." -} diff --git a/spark/src/test/scala-spark-3.5/shims/DescribeDeltaHistorySuiteShims.scala b/spark/src/test/scala-spark-3.5/shims/DescribeDeltaHistorySuiteShims.scala deleted file mode 100644 index f1c693a000c..00000000000 --- a/spark/src/test/scala-spark-3.5/shims/DescribeDeltaHistorySuiteShims.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (2024) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -object DescribeDeltaHistorySuiteShims { - val FAILS_ON_VIEWS_ERROR_MSG = - "spark_catalog.default.delta_view is a view. 'DESCRIBE HISTORY' expects a table" - - val FAILS_ON_TEMP_VIEWS_ERROR_MSG = - "v is a temp view. 'DESCRIBE HISTORY' expects a table" -} diff --git a/spark/src/test/scala-spark-3.5/shims/ImplicitDMLCastingSuiteShims.scala b/spark/src/test/scala-spark-3.5/shims/ImplicitDMLCastingSuiteShims.scala deleted file mode 100644 index 5c2ffe237b3..00000000000 --- a/spark/src/test/scala-spark-3.5/shims/ImplicitDMLCastingSuiteShims.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (2024) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -object ImplicitDMLCastingSuiteShims { - /** - * Discrepancy in error message between Spark 3.5 and Master (4.0) due to SPARK-47798 - * (https://github.com/apache/spark/pull/45981) - */ - val NUMERIC_VALUE_OUT_OF_RANGE_ERROR_MSG = "NUMERIC_VALUE_OUT_OF_RANGE" -} diff --git a/spark/src/test/scala-spark-3.5/shims/MergeIntoMetricsShims.scala b/spark/src/test/scala-spark-3.5/shims/MergeIntoMetricsShims.scala deleted file mode 100644 index 8d8bce1bbc7..00000000000 --- a/spark/src/test/scala-spark-3.5/shims/MergeIntoMetricsShims.scala +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (2024) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -object MergeIntoMetricsShims { - val DELETE_WITH_DUPLICATE_NUM_TARGET_FILES_ADDED_NON_PARTITIONED_NO_CDF = 1 -} diff --git a/spark/src/test/scala-spark-3.5/shims/SnapshotManagementSuiteShims.scala b/spark/src/test/scala-spark-3.5/shims/SnapshotManagementSuiteShims.scala deleted file mode 100644 index 6dcea6ba77c..00000000000 --- a/spark/src/test/scala-spark-3.5/shims/SnapshotManagementSuiteShims.scala +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (2024) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -object SnapshotManagementSuiteShims { - val SHOULD_NOT_RECOVER_CHECKPOINT_ERROR_MSG = ".parquet is not a Parquet file" -} diff --git a/spark/src/test/scala-spark-3.5/shims/TypeWideningTestCasesShims.scala b/spark/src/test/scala-spark-3.5/shims/TypeWideningTestCasesShims.scala deleted file mode 100644 index 753ab18fa6a..00000000000 --- a/spark/src/test/scala-spark-3.5/shims/TypeWideningTestCasesShims.scala +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (2024) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta.typewidening - -import org.apache.spark.sql.test.SQLTestUtils -import org.apache.spark.sql.types._ - -/** - * The set of type changes supported by type widening is different between Spark 3.5 and Spark 4.0. - * See [[TypeWideningShims]]. This shim splits the test cases into supported and unsupported - * accordingly for delta on Spark 3.5. - */ -trait TypeWideningTestCasesShims { - self: TypeWideningTestCases with SQLTestUtils => - - import testImplicits._ - - // Type changes that are supported by all Parquet readers. Byte, Short, Int are all stored as - // INT32 in parquet so these changes are guaranteed to be supported. - protected val supportedTestCases: Seq[TypeEvolutionTestCase] = Seq( - SupportedTypeEvolutionTestCase(ByteType, ShortType, - Seq(1, -1, Byte.MinValue, Byte.MaxValue, null.asInstanceOf[Byte]), - Seq(4, -4, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short])), - SupportedTypeEvolutionTestCase(ByteType, IntegerType, - Seq(1, -1, Byte.MinValue, Byte.MaxValue, null.asInstanceOf[Byte]), - Seq(4, -4, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int])), - SupportedTypeEvolutionTestCase(ShortType, IntegerType, - Seq(1, -1, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short]), - Seq(4, -4, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int])) - ) - - // Type changes that are only eligible for automatic widening when - // spark.databricks.delta.typeWidening.allowAutomaticWidening = ALWAYS. - protected val restrictedAutomaticWideningTestCases: Seq[TypeEvolutionTestCase] = Seq.empty - - // Test type changes that aren't supported. - protected val unsupportedTestCases: Seq[TypeEvolutionTestCase] = Seq( - UnsupportedTypeEvolutionTestCase(IntegerType, ByteType, - Seq(1, 2, Int.MinValue)), - UnsupportedTypeEvolutionTestCase(LongType, IntegerType, - Seq(4, 5, Long.MaxValue)), - UnsupportedTypeEvolutionTestCase(DoubleType, FloatType, - Seq(987654321.987654321d, Double.NaN, Double.NegativeInfinity, - Double.PositiveInfinity, Double.MinPositiveValue, - Double.MinValue, Double.MaxValue)), - UnsupportedTypeEvolutionTestCase(ByteType, DecimalType(2, 0), - Seq(1, -1, Byte.MinValue)), - UnsupportedTypeEvolutionTestCase(ShortType, DecimalType(4, 0), - Seq(1, -1, Short.MinValue)), - UnsupportedTypeEvolutionTestCase(IntegerType, DecimalType(9, 0), - Seq(1, -1, Int.MinValue)), - UnsupportedTypeEvolutionTestCase(LongType, DecimalType(19, 0), - Seq(1, -1, Long.MinValue)), - UnsupportedTypeEvolutionTestCase(TimestampNTZType, DateType, - Seq("2020-03-17 15:23:15", "2023-12-31 23:59:59", "0001-01-01 00:00:00")), - // Reduce scale - UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2), - DecimalType(Decimal.MAX_INT_DIGITS, 3), - Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 2) + ".99"))), - // Reduce precision - UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2), - DecimalType(Decimal.MAX_INT_DIGITS - 1, 2), - Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 2) + ".99"))), - // Reduce precision & scale - UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_LONG_DIGITS, 2), - DecimalType(Decimal.MAX_INT_DIGITS - 1, 1), - Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99"))), - // Increase scale more than precision - UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2), - DecimalType(Decimal.MAX_INT_DIGITS + 1, 4), - Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 2) + ".99"))), - // Smaller scale and larger precision. - UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_LONG_DIGITS, 2), - DecimalType(Decimal.MAX_INT_DIGITS + 3, 1), - Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99"))), - SupportedTypeEvolutionTestCase(IntegerType, DoubleType, - Seq(1, -1, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int]), - Seq(987654321.987654321d, -0d, 0d, Double.NaN, Double.NegativeInfinity, - Double.PositiveInfinity, Double.MinPositiveValue, Double.MinValue, Double.MaxValue, - null.asInstanceOf[Double])), - SupportedTypeEvolutionTestCase(ByteType, DecimalType(10, 0), - Seq(1, -1, Byte.MinValue, Byte.MaxValue, null.asInstanceOf[Byte]), - Seq(BigDecimal("1.23"), BigDecimal("9" * 10), null.asInstanceOf[BigDecimal])), - SupportedTypeEvolutionTestCase(ShortType, DecimalType(10, 0), - Seq(1, -1, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short]), - Seq(BigDecimal("1.23"), BigDecimal("9" * 10), null.asInstanceOf[BigDecimal])), - SupportedTypeEvolutionTestCase(IntegerType, DecimalType(10, 0), - Seq(1, -1, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int]), - Seq(BigDecimal("1.23"), BigDecimal("9" * 10), null.asInstanceOf[BigDecimal])), - SupportedTypeEvolutionTestCase(LongType, DecimalType(20, 0), - Seq(1L, -1L, Long.MinValue, Long.MaxValue, null.asInstanceOf[Int]), - Seq(BigDecimal("1.23"), BigDecimal("9" * 20), null.asInstanceOf[BigDecimal])), - SupportedTypeEvolutionTestCase(ShortType, LongType, - Seq(1, -1, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short]), - Seq(4L, -4L, Long.MinValue, Long.MaxValue, null.asInstanceOf[Long])), - SupportedTypeEvolutionTestCase(IntegerType, LongType, - Seq(1, -1, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int]), - Seq(4L, -4L, Long.MinValue, Long.MaxValue, null.asInstanceOf[Long])), - SupportedTypeEvolutionTestCase(FloatType, DoubleType, - Seq(1234.56789f, -0f, 0f, Float.NaN, Float.NegativeInfinity, Float.PositiveInfinity, - Float.MinPositiveValue, Float.MinValue, Float.MaxValue, null.asInstanceOf[Float]), - Seq(987654321.987654321d, -0d, 0d, Double.NaN, Double.NegativeInfinity, - Double.PositiveInfinity, Double.MinPositiveValue, Double.MinValue, Double.MaxValue, - null.asInstanceOf[Double])), - SupportedTypeEvolutionTestCase(DateType, TimestampNTZType, - Seq("2020-01-01", "2024-02-29", "1312-02-27"), - Seq("2020-03-17 15:23:15.123456", "2058-12-31 23:59:59.999", "0001-01-01 00:00:00")), - // Larger precision. - SupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2), - DecimalType(Decimal.MAX_LONG_DIGITS, 2), - Seq(BigDecimal("1.23"), BigDecimal("10.34"), null.asInstanceOf[BigDecimal]), - Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99"), - null.asInstanceOf[BigDecimal])), - // Larger precision and scale, same physical type. - SupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS - 1, 2), - DecimalType(Decimal.MAX_INT_DIGITS, 3), - Seq(BigDecimal("1.23"), BigDecimal("10.34"), null.asInstanceOf[BigDecimal]), - Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 3) + ".99"), - null.asInstanceOf[BigDecimal])), - // Larger precision and scale, different physical types. - SupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2), - DecimalType(Decimal.MAX_LONG_DIGITS + 1, 3), - Seq(BigDecimal("1.23"), BigDecimal("10.34"), null.asInstanceOf[BigDecimal]), - Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99"), - null.asInstanceOf[BigDecimal])) - ) -} diff --git a/spark/src/test/scala-spark-master/shims/DeltaExcludedBySparkVersionTestMixinShims.scala b/spark/src/test/scala-spark-master/shims/DeltaExcludedBySparkVersionTestMixinShims.scala deleted file mode 100644 index 0dea4b2d536..00000000000 --- a/spark/src/test/scala-spark-master/shims/DeltaExcludedBySparkVersionTestMixinShims.scala +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -import org.apache.spark.sql.QueryTest - -trait DeltaExcludedBySparkVersionTestMixinShims extends QueryTest { - - /** - * Tests that are meant for Delta compiled against Spark Latest Release only. Ignored since this - * is the Spark Master shim. - */ - protected def testSparkLatestOnly( - testName: String, testTags: org.scalatest.Tag*) - (testFun: => Any) - (implicit pos: org.scalactic.source.Position): Unit = { - ignore(testName + " (Spark Latest Release Only)", testTags: _*)(testFun)(pos) - } - - /** - * Tests that are meant for Delta compiled against Spark Master (4.0+). Executed since this is the - * Spark Master shim. - */ - protected def testSparkMasterOnly( - testName: String, testTags: org.scalatest.Tag*) - (testFun: => Any) - (implicit pos: org.scalactic.source.Position): Unit = { - test(testName, testTags: _*)(testFun)(pos) - } - -} diff --git a/spark/src/test/scala-spark-master/shims/DeltaGenerateSymlinkManifestSuiteShims.scala b/spark/src/test/scala-spark-master/shims/DeltaGenerateSymlinkManifestSuiteShims.scala deleted file mode 100644 index 72e879122d8..00000000000 --- a/spark/src/test/scala-spark-master/shims/DeltaGenerateSymlinkManifestSuiteShims.scala +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (2024) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -object DeltaGenerateSymlinkManifestSuiteShims { - val FAILS_ON_TEMP_VIEWS_ERROR_MSG = "'GENERATE' expects a table but `v` is a view." -} diff --git a/spark/src/test/scala-spark-master/shims/DeltaHistoryManagerSuiteShims.scala b/spark/src/test/scala-spark-master/shims/DeltaHistoryManagerSuiteShims.scala deleted file mode 100644 index cdb9377d550..00000000000 --- a/spark/src/test/scala-spark-master/shims/DeltaHistoryManagerSuiteShims.scala +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -object DeltaHistoryManagerSuiteShims { - type MULTIPLE_TIME_TRAVEL_FORMATS_ERROR_TYPE = org.apache.spark.sql.AnalysisException - - val MULTIPLE_TIME_TRAVEL_FORMATS_ERROR_MSG = "Cannot specify both version and timestamp" -} diff --git a/spark/src/test/scala-spark-master/shims/DeltaInsertIntoTableSuiteShims.scala b/spark/src/test/scala-spark-master/shims/DeltaInsertIntoTableSuiteShims.scala deleted file mode 100644 index 55b8b09a0d2..00000000000 --- a/spark/src/test/scala-spark-master/shims/DeltaInsertIntoTableSuiteShims.scala +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -object DeltaInsertIntoTableSuiteShims { - val INSERT_INTO_TMP_VIEW_ERROR_MSG = "[EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE]" - - val INVALID_COLUMN_DEFAULT_VALUE_ERROR_MSG = "INVALID_DEFAULT_VALUE.NOT_CONSTANT" -} diff --git a/spark/src/test/scala-spark-master/shims/DeltaSuiteShims.scala b/spark/src/test/scala-spark-master/shims/DeltaSuiteShims.scala deleted file mode 100644 index fcc0a769e64..00000000000 --- a/spark/src/test/scala-spark-master/shims/DeltaSuiteShims.scala +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -object DeltaSuiteShims { - val THROWS_ON_CORRUPTED_FILE_ERROR_MSG = "[FAILED_READ_FILE.NO_HINT]" - - val THROWS_ON_DELETED_FILE_ERROR_MSG = "[FAILED_READ_FILE.FILE_NOT_EXIST]" -} diff --git a/spark/src/test/scala-spark-master/shims/DeltaVacuumSuiteShims.scala b/spark/src/test/scala-spark-master/shims/DeltaVacuumSuiteShims.scala deleted file mode 100644 index bf45efa28de..00000000000 --- a/spark/src/test/scala-spark-master/shims/DeltaVacuumSuiteShims.scala +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -object DeltaVacuumSuiteShims { - val SQL_COMMAND_ON_TEMP_VIEW_NOT_SUPPORTED_ERROR_MSG = - "'VACUUM' expects a table but `v` is a view" -} diff --git a/spark/src/test/scala-spark-master/shims/DescribeDeltaHistorySuiteShims.scala b/spark/src/test/scala-spark-master/shims/DescribeDeltaHistorySuiteShims.scala deleted file mode 100644 index 81b7aed6b42..00000000000 --- a/spark/src/test/scala-spark-master/shims/DescribeDeltaHistorySuiteShims.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -object DescribeDeltaHistorySuiteShims { - val FAILS_ON_VIEWS_ERROR_MSG = - "'DESCRIBE HISTORY' expects a table but `spark_catalog`.`default`.`delta_view` is a view." - - val FAILS_ON_TEMP_VIEWS_ERROR_MSG = - "'DESCRIBE HISTORY' expects a table but `v` is a view." -} diff --git a/spark/src/test/scala-spark-master/shims/ImplicitDMLCastingSuiteShims.scala b/spark/src/test/scala-spark-master/shims/ImplicitDMLCastingSuiteShims.scala deleted file mode 100644 index 11be5107df3..00000000000 --- a/spark/src/test/scala-spark-master/shims/ImplicitDMLCastingSuiteShims.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -object ImplicitDMLCastingSuiteShims { - /** - * Discrepancy in error message between Spark 3.5 and Master (4.0) due to SPARK-47798 - * (https://github.com/apache/spark/pull/45981) - */ - val NUMERIC_VALUE_OUT_OF_RANGE_ERROR_MSG = "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION" -} diff --git a/spark/src/test/scala-spark-master/shims/MergeIntoMetricsShims.scala b/spark/src/test/scala-spark-master/shims/MergeIntoMetricsShims.scala deleted file mode 100644 index 33f66c0470f..00000000000 --- a/spark/src/test/scala-spark-master/shims/MergeIntoMetricsShims.scala +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -object MergeIntoMetricsShims { - val DELETE_WITH_DUPLICATE_NUM_TARGET_FILES_ADDED_NON_PARTITIONED_NO_CDF = 1 -} diff --git a/spark/src/test/scala-spark-master/shims/SnapshotManagementSuiteShims.scala b/spark/src/test/scala-spark-master/shims/SnapshotManagementSuiteShims.scala deleted file mode 100644 index fa5ed1bb886..00000000000 --- a/spark/src/test/scala-spark-master/shims/SnapshotManagementSuiteShims.scala +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta - -object SnapshotManagementSuiteShims { - val SHOULD_NOT_RECOVER_CHECKPOINT_ERROR_MSG = "Encountered error while reading file" -} diff --git a/spark/src/test/scala-spark-master/shims/TypeWideningTestCasesShims.scala b/spark/src/test/scala-spark-master/shims/TypeWideningTestCasesShims.scala deleted file mode 100644 index d35f41b6f44..00000000000 --- a/spark/src/test/scala-spark-master/shims/TypeWideningTestCasesShims.scala +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta.typewidening - -import org.apache.spark.sql.test.SQLTestUtils -import org.apache.spark.sql.types._ - -/** - * The set of type changes supported by type widening is different between Spark 3.5 and Spark 4.0. - * See [[TypeWideningShims]]. This shim splits the test cases into supported and unsupported - * accordingly for delta on Spark 4.0. - */ -trait TypeWideningTestCasesShims { - self: TypeWideningTestCases with SQLTestUtils => - - import testImplicits._ - - // Type changes that are supported by all Parquet readers. Byte, Short, Int are all stored as - // INT32 in parquet so these changes are guaranteed to be supported. - protected val supportedTestCases: Seq[TypeEvolutionTestCase] = Seq( - SupportedTypeEvolutionTestCase(ByteType, ShortType, - Seq(1, -1, Byte.MinValue, Byte.MaxValue, null.asInstanceOf[Byte]), - Seq(4, -4, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short])), - SupportedTypeEvolutionTestCase(ByteType, IntegerType, - Seq(1, -1, Byte.MinValue, Byte.MaxValue, null.asInstanceOf[Byte]), - Seq(4, -4, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int])), - SupportedTypeEvolutionTestCase(ShortType, IntegerType, - Seq(1, -1, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short]), - Seq(4, -4, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int])), - SupportedTypeEvolutionTestCase(ShortType, LongType, - Seq(1, -1, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short]), - Seq(4L, -4L, Long.MinValue, Long.MaxValue, null.asInstanceOf[Long])), - SupportedTypeEvolutionTestCase(IntegerType, LongType, - Seq(1, -1, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int]), - Seq(4L, -4L, Long.MinValue, Long.MaxValue, null.asInstanceOf[Long])), - SupportedTypeEvolutionTestCase(FloatType, DoubleType, - Seq(1234.56789f, -0f, 0f, Float.NaN, Float.NegativeInfinity, Float.PositiveInfinity, - Float.MinPositiveValue, Float.MinValue, Float.MaxValue, null.asInstanceOf[Float]), - Seq(987654321.987654321d, -0d, 0d, Double.NaN, Double.NegativeInfinity, - Double.PositiveInfinity, Double.MinPositiveValue, Double.MinValue, Double.MaxValue, - null.asInstanceOf[Double])), - SupportedTypeEvolutionTestCase(DateType, TimestampNTZType, - Seq("2020-01-01", "2024-02-29", "1312-02-27"), - Seq("2020-03-17 15:23:15.123456", "2058-12-31 23:59:59.999", "0001-01-01 00:00:00")), - // Larger precision. - SupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2), - DecimalType(Decimal.MAX_LONG_DIGITS, 2), - Seq(BigDecimal("1.23"), BigDecimal("10.34"), null.asInstanceOf[BigDecimal]), - Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99"), - null.asInstanceOf[BigDecimal])), - // Larger precision and scale, same physical type. - SupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS - 1, 2), - DecimalType(Decimal.MAX_INT_DIGITS, 3), - Seq(BigDecimal("1.23"), BigDecimal("10.34"), null.asInstanceOf[BigDecimal]), - Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 3) + ".99"), - null.asInstanceOf[BigDecimal])), - // Larger precision and scale, different physical types. - SupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2), - DecimalType(Decimal.MAX_LONG_DIGITS + 1, 3), - Seq(BigDecimal("1.23"), BigDecimal("10.34"), null.asInstanceOf[BigDecimal]), - Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99"), - null.asInstanceOf[BigDecimal])) - ) - - // Type changes that are only eligible for automatic widening when - // spark.databricks.delta.typeWidening.allowAutomaticWidening = ALWAYS. - protected val restrictedAutomaticWideningTestCases: Seq[TypeEvolutionTestCase] = Seq( - SupportedTypeEvolutionTestCase(IntegerType, DoubleType, - Seq(1, -1, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int]), - Seq(987654321.987654321d, -0d, 0d, Double.NaN, Double.NegativeInfinity, - Double.PositiveInfinity, Double.MinPositiveValue, Double.MinValue, Double.MaxValue, - null.asInstanceOf[Double])), - SupportedTypeEvolutionTestCase(ByteType, DecimalType(10, 0), - Seq(1, -1, Byte.MinValue, Byte.MaxValue, null.asInstanceOf[Byte]), - Seq(BigDecimal("1.23"), BigDecimal("9" * 10), null.asInstanceOf[BigDecimal])), - SupportedTypeEvolutionTestCase(ShortType, DecimalType(10, 0), - Seq(1, -1, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short]), - Seq(BigDecimal("1.23"), BigDecimal("9" * 10), null.asInstanceOf[BigDecimal])), - SupportedTypeEvolutionTestCase(IntegerType, DecimalType(10, 0), - Seq(1, -1, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int]), - Seq(BigDecimal("1.23"), BigDecimal("9" * 10), null.asInstanceOf[BigDecimal])), - SupportedTypeEvolutionTestCase(LongType, DecimalType(20, 0), - Seq(1L, -1L, Long.MinValue, Long.MaxValue, null.asInstanceOf[Int]), - Seq(BigDecimal("1.23"), BigDecimal("9" * 20), null.asInstanceOf[BigDecimal])) - ) - - // Test type changes that aren't supported. - protected val unsupportedTestCases: Seq[TypeEvolutionTestCase] = Seq( - UnsupportedTypeEvolutionTestCase(IntegerType, ByteType, - Seq(1, 2, Int.MinValue)), - UnsupportedTypeEvolutionTestCase(LongType, IntegerType, - Seq(4, 5, Long.MaxValue)), - UnsupportedTypeEvolutionTestCase(DoubleType, FloatType, - Seq(987654321.987654321d, Double.NaN, Double.NegativeInfinity, - Double.PositiveInfinity, Double.MinPositiveValue, - Double.MinValue, Double.MaxValue)), - UnsupportedTypeEvolutionTestCase(ByteType, DecimalType(2, 0), - Seq(1, -1, Byte.MinValue)), - UnsupportedTypeEvolutionTestCase(ShortType, DecimalType(4, 0), - Seq(1, -1, Short.MinValue)), - UnsupportedTypeEvolutionTestCase(IntegerType, DecimalType(9, 0), - Seq(1, -1, Int.MinValue)), - UnsupportedTypeEvolutionTestCase(LongType, DecimalType(19, 0), - Seq(1, -1, Long.MinValue)), - UnsupportedTypeEvolutionTestCase(TimestampNTZType, DateType, - Seq("2020-03-17 15:23:15", "2023-12-31 23:59:59", "0001-01-01 00:00:00")), - // Reduce scale - UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2), - DecimalType(Decimal.MAX_INT_DIGITS, 3), - Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 2) + ".99"))), - // Reduce precision - UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2), - DecimalType(Decimal.MAX_INT_DIGITS - 1, 2), - Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 2) + ".99"))), - // Reduce precision & scale - UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_LONG_DIGITS, 2), - DecimalType(Decimal.MAX_INT_DIGITS - 1, 1), - Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99"))), - // Increase scale more than precision - UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2), - DecimalType(Decimal.MAX_INT_DIGITS + 1, 4), - Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 2) + ".99"))), - // Smaller scale and larger precision. - UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_LONG_DIGITS, 2), - DecimalType(Decimal.MAX_INT_DIGITS + 3, 1), - Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99"))) - ) -} diff --git a/spark/src/test/scala-spark-master/shims/logging/DeltaStructuredLoggingSuite.scala b/spark/src/test/scala-spark-master/shims/logging/DeltaStructuredLoggingSuite.scala deleted file mode 100644 index 852147ec017..00000000000 --- a/spark/src/test/scala-spark-master/shims/logging/DeltaStructuredLoggingSuite.scala +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * This file contains code from the Apache Spark project (original license above). - * It contains modifications, which are licensed as follows: - */ - -/* - * Copyright (2021) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.delta.logging - -import java.util.regex.Pattern - -import com.fasterxml.jackson.databind.ObjectMapper -import com.fasterxml.jackson.module.scala.DefaultScalaModule -import org.apache.logging.log4j.Level - -import org.apache.spark.internal.Logging - -class DeltaStructuredLoggingSuite extends DeltaStructuredLoggingSuiteBase { - override def className: String = classOf[DeltaStructuredLoggingSuite].getSimpleName - override def logFilePath: String = "target/structured.log" - - override def beforeAll(): Unit = { - super.beforeAll() - Logging.enableStructuredLogging() - } - - override def afterAll(): Unit = { - Logging.disableStructuredLogging() - super.afterAll() - } - - private val jsonMapper = new ObjectMapper().registerModule(DefaultScalaModule) - private def compactAndToRegexPattern(json: String): String = { - jsonMapper.readTree(json).toString. - replace("", """[^"]+"""). - replace("""""""", """.*"""). - replace("{", """\{""") + "\n" - } - - override def expectedPatternForBasicMsg(level: Level): String = { - compactAndToRegexPattern( - s""" - { - "ts": "", - "level": "$level", - "msg": "This is a log message", - "logger": "$className" - }""") - } - - override def expectedPatternForBasicMsgWithException(level: Level): String = { - compactAndToRegexPattern( - s""" - { - "ts": "", - "level": "$level", - "msg": "This is a log message", - "exception": { - "class": "java.lang.RuntimeException", - "msg": "OOM", - "stacktrace": "" - }, - "logger": "$className" - }""") - } - - override def expectedPatternForMsgWithMDC(level: Level): String = { - compactAndToRegexPattern( - s""" - { - "ts": "", - "level": "$level", - "msg": "Lost executor 1.", - "context": { - "executor_id": "1" - }, - "logger": "$className" - }""") - } - - def expectedPatternForMsgWithMDCValueIsNull(level: Level): String = { - compactAndToRegexPattern( - s""" - { - "ts": "", - "level": "$level", - "msg": "Lost executor null.", - "context": { - "executor_id": null - }, - "logger": "$className" - }""") - } - - override def expectedPatternForMsgWithMDCAndException(level: Level): String = { - compactAndToRegexPattern( - s""" - { - "ts": "", - "level": "$level", - "msg": "Error in executor 1.", - "context": { - "executor_id": "1" - }, - "exception": { - "class": "java.lang.RuntimeException", - "msg": "OOM", - "stacktrace": "" - }, - "logger": "$className" - }""") - } - - override def expectedPatternForCustomLogKey(level: Level): String = { - compactAndToRegexPattern( - s""" - { - "ts": "", - "level": "$level", - "msg": "Custom log message.", - "logger": "$className" - }""" - ) - } - - override def verifyMsgWithConcat(level: Level, logOutput: String): Unit = { - val pattern1 = compactAndToRegexPattern( - s""" - { - "ts": "", - "level": "$level", - "msg": "Min Size: 2, Max Size: 4. Please double check.", - "context": { - "min_size": "2", - "max_size": "4" - }, - "logger": "$className" - }""") - - val pattern2 = compactAndToRegexPattern( - s""" - { - "ts": "", - "level": "$level", - "msg": "Min Size: 2, Max Size: 4. Please double check.", - "context": { - "max_size": "4", - "min_size": "2" - }, - "logger": "$className" - }""") - assert(Pattern.compile(pattern1).matcher(logOutput).matches() || - Pattern.compile(pattern2).matcher(logOutput).matches()) - } -} diff --git a/spark/src/test/scala/io/delta/sql/parser/DeltaSqlParserSuite.scala b/spark/src/test/scala/io/delta/sql/parser/DeltaSqlParserSuite.scala index f22e33e0d7b..aa69ae445f4 100644 --- a/spark/src/test/scala/io/delta/sql/parser/DeltaSqlParserSuite.scala +++ b/spark/src/test/scala/io/delta/sql/parser/DeltaSqlParserSuite.scala @@ -28,7 +28,6 @@ import org.apache.spark.sql.delta.commands.{DeltaOptimizeContext, DescribeDeltaD import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.{TableIdentifier, TimeTravel} import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedRelation, UnresolvedTable} -import org.apache.spark.sql.catalyst.analysis.UnresolvedTableImplicits._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.catalyst.plans.SQLHelper diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/AutoCompactSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/AutoCompactSuite.scala index 77d8448ca7d..ed9ca3ed997 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/AutoCompactSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/AutoCompactSuite.scala @@ -21,7 +21,6 @@ import java.io.File // scalastyle:off import.ordering.noEmptyLine import com.databricks.spark.util.{Log4jUsageLogger, UsageRecord} -import org.apache.spark.sql.delta.DeltaExcludedBySparkVersionTestMixinShims import org.apache.spark.sql.delta.actions.AddFile import org.apache.spark.sql.delta.commands.optimize._ import org.apache.spark.sql.delta.hooks.{AutoCompact, AutoCompactType} @@ -62,8 +61,7 @@ class AutoCompactConfigurationSuite extends CompactionTestHelperForAutoCompaction with DeltaSQLCommandTest with SharedSparkSession - with AutoCompactTestUtils - with DeltaExcludedBySparkVersionTestMixinShims { + with AutoCompactTestUtils { private def setTableProperty(log: DeltaLog, key: String, value: String): Unit = { spark.sql(s"ALTER TABLE delta.`${log.dataPath}` SET TBLPROPERTIES " + @@ -124,8 +122,7 @@ class AutoCompactExecutionSuite extends CompactionTestHelperForAutoCompaction with DeltaSQLCommandTest with SharedSparkSession - with AutoCompactTestUtils - with DeltaExcludedBySparkVersionTestMixinShims { + with AutoCompactTestUtils { private def testBothModesViaProperty(testName: String)(f: String => Unit): Unit = { def runTest(autoCompactConfValue: String): Unit = { withTempDir { dir => @@ -242,7 +239,7 @@ class AutoCompactExecutionSuite extends checkAutoCompactionWorks(dir, spark.range(10).toDF("id")) } - testSparkMasterOnly("variant auto compact kicks in when enabled - table config") { + test("variant auto compact kicks in when enabled - table config") { withTempDir { dir => withSQLConf( "spark.databricks.delta.properties.defaults.autoOptimize.autoCompact" -> "true", @@ -254,7 +251,7 @@ class AutoCompactExecutionSuite extends } } - testSparkMasterOnly("variant auto compact kicks in when enabled - session config") { + test("variant auto compact kicks in when enabled - session config") { withTempDir { dir => withSQLConf( DeltaSQLConf.DELTA_AUTO_COMPACT_ENABLED.key -> "true", diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeleteSuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeleteSuiteBase.scala index 0ed8cad08d2..88b7188f70f 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/DeleteSuiteBase.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeleteSuiteBase.scala @@ -30,8 +30,7 @@ trait DeleteBaseMixin extends QueryTest with SharedSparkSession with DeltaDMLTestUtils - with DeltaTestUtilsForTempViews - with DeltaExcludedBySparkVersionTestMixinShims { + with DeltaTestUtilsForTempViews { import testImplicits._ @@ -539,7 +538,7 @@ trait DeleteBaseTests extends DeleteBaseMixin { Some(".*More than one row returned by a subquery used as an expression(?s).*") ) - testSparkMasterOnly("Variant type") { + test("Variant type") { val dstDf = sql( """SELECT parse_json(cast(id as string)) v, id i FROM range(3)""") diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaGenerateSymlinkManifestSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaGenerateSymlinkManifestSuite.scala index 3b79f226d3e..5f8e0196f69 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaGenerateSymlinkManifestSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaGenerateSymlinkManifestSuite.scala @@ -20,7 +20,6 @@ import java.io.File import java.net.URI // scalastyle:off import.ordering.noEmptyLine -import org.apache.spark.sql.delta.DeltaGenerateSymlinkManifestSuiteShims._ import org.apache.spark.sql.delta.DeltaOperations.Delete import org.apache.spark.sql.delta.commands.DeltaGenerateCommand import org.apache.spark.sql.delta.hooks.GenerateSymlinkManifest @@ -123,7 +122,7 @@ trait DeltaGenerateSymlinkManifestSuiteBase val e = intercept[AnalysisException] { spark.sql(s"GENERATE symlink_format_manifest FOR TABLE v") } - assert(e.getMessage.contains(FAILS_ON_TEMP_VIEWS_ERROR_MSG)) + assert(e.getMessage.contains("'GENERATE' expects a table but `v` is a view.")) } } diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaHistoryManagerSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaHistoryManagerSuite.scala index 91931ae0738..3ad3f0a841d 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaHistoryManagerSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaHistoryManagerSuite.scala @@ -28,7 +28,6 @@ import scala.language.implicitConversions import com.databricks.spark.util.Log4jUsageLogger import org.apache.spark.sql.delta.DeltaConfigs.IN_COMMIT_TIMESTAMPS_ENABLED -import org.apache.spark.sql.delta.DeltaHistoryManagerSuiteShims._ import org.apache.spark.sql.delta.DeltaTestUtils.{createTestAddFile, modifyCommitTimestamp} import org.apache.spark.sql.delta.catalog.DeltaTableV2 import org.apache.spark.sql.delta.coordinatedcommits.CatalogOwnedTestBaseSuite @@ -541,8 +540,7 @@ trait DeltaTimeTravelTests extends QueryTest } } -abstract class DeltaHistoryManagerBase extends DeltaTimeTravelTests - { +abstract class DeltaHistoryManagerBase extends DeltaTimeTravelTests { test("cannot time travel target tables of insert/delete/update/merge") { val tblName = "delta_table" withTable(tblName) { @@ -615,14 +613,14 @@ abstract class DeltaHistoryManagerBase extends DeltaTimeTravelTests } assert(e1.getMessage.contains("[0, 2]")) - val e2 = intercept[MULTIPLE_TIME_TRAVEL_FORMATS_ERROR_TYPE] { + val e2 = intercept[org.apache.spark.sql.AnalysisException] { spark.read.format("delta") .option("versionAsOf", 3) .option("timestampAsOf", "2020-10-22 23:20:11") .table(tblName).collect() } - assert(e2.getMessage.contains(MULTIPLE_TIME_TRAVEL_FORMATS_ERROR_MSG)) + assert(e2.getMessage.contains("Cannot specify both version and timestamp")) } } diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaInsertIntoTableSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaInsertIntoTableSuite.scala index 5c9ae2f2e4b..862f7eed616 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaInsertIntoTableSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaInsertIntoTableSuite.scala @@ -22,7 +22,6 @@ import java.util.TimeZone import scala.collection.JavaConverters._ -import org.apache.spark.sql.delta.DeltaInsertIntoTableSuiteShims._ import org.apache.spark.sql.delta.schema.InvariantViolationException import org.apache.spark.sql.delta.schema.SchemaUtils import org.apache.spark.sql.delta.sources.DeltaSQLConf @@ -45,8 +44,7 @@ class DeltaInsertIntoSQLSuite extends DeltaInsertIntoTestsWithTempViews( supportsDynamicOverwrite = true, includeSQLOnlyTests = true) - with DeltaSQLCommandTest - with DeltaExcludedBySparkVersionTestMixinShims { + with DeltaSQLCommandTest { import testImplicits._ @@ -59,7 +57,7 @@ class DeltaInsertIntoSQLSuite } } - testSparkMasterOnly("Variant type") { + test("Variant type") { withTable("t") { sql("CREATE TABLE t (id LONG, v VARIANT) USING delta") sql("INSERT INTO t (id, v) VALUES (1, parse_json('{\"a\": 1}'))") @@ -692,7 +690,7 @@ abstract class DeltaInsertIntoTestsWithTempViews( } catch { case e: AnalysisException => assert( - e.getMessage.contains(INSERT_INTO_TMP_VIEW_ERROR_MSG) || + e.getMessage.contains("[EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE]") || e.getMessage.contains("Inserting into an RDD-based table is not allowed") || e.getMessage.contains("Table default.v not found") || e.getMessage.contains("Table or view 'v' not found in database 'default'") || @@ -872,7 +870,7 @@ class DeltaColumnDefaultsInsertSuite extends InsertIntoSQLOnlyTests with DeltaSQ sql(s"create table t4 (s int default badvalue) using $v2Format " + s"$tblPropertiesAllowDefaults") }, - INVALID_COLUMN_DEFAULT_VALUE_ERROR_MSG, + "INVALID_DEFAULT_VALUE.NOT_CONSTANT", parameters = Map( "statement" -> "CREATE TABLE", "colName" -> "`s`", diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceSuite.scala index 8ee62106c27..b8d9f3baa54 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSourceSuite.scala @@ -51,8 +51,7 @@ import org.apache.spark.util.{ManualClock, Utils} class DeltaSourceSuite extends DeltaSourceSuiteBase with DeltaColumnMappingTestUtils - with DeltaSQLCommandTest - with DeltaExcludedBySparkVersionTestMixinShims { + with DeltaSQLCommandTest { import testImplicits._ diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSuite.scala index 804c38a5b1e..675b68e2bbc 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaSuite.scala @@ -20,7 +20,6 @@ import java.io.{File, FileNotFoundException} import java.util.concurrent.atomic.AtomicInteger // scalastyle:off import.ordering.noEmptyLine -import org.apache.spark.sql.delta.DeltaSuiteShims._ import org.apache.spark.sql.delta.actions.{Action, TableFeatureProtocolUtils} import org.apache.spark.sql.delta.commands.cdc.CDCReader import org.apache.spark.sql.delta.coordinatedcommits.{CatalogOwnedTableUtils, CatalogOwnedTestBaseSuite} @@ -1530,7 +1529,7 @@ class DeltaSuite extends QueryTest val thrown = intercept[SparkException] { data.toDF().collect() } - assert(thrown.getMessage.contains(THROWS_ON_CORRUPTED_FILE_ERROR_MSG)) + assert(thrown.getMessage.contains("[FAILED_READ_FILE.NO_HINT]")) } } } @@ -1582,7 +1581,7 @@ class DeltaSuite extends QueryTest val thrown = intercept[SparkException] { data.toDF().collect() } - assert(thrown.getMessage.contains(THROWS_ON_DELETED_FILE_ERROR_MSG)) + assert(thrown.getMessage.contains("[FAILED_READ_FILE.FILE_NOT_EXIST]")) } } diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaVacuumSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaVacuumSuite.scala index 0dfd8fbad5c..648f7692b96 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaVacuumSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaVacuumSuite.scala @@ -26,7 +26,6 @@ import scala.language.implicitConversions import org.apache.spark.sql.delta.{CatalogOwnedTableFeature, DeltaUnsupportedOperationException} import org.apache.spark.sql.delta.DeltaOperations.{Delete, Write} import org.apache.spark.sql.delta.DeltaTestUtils.createTestAddFile -import org.apache.spark.sql.delta.DeltaVacuumSuiteShims._ import org.apache.spark.sql.delta.actions.{AddCDCFile, AddFile, Metadata, RemoveFile} import org.apache.spark.sql.delta.catalog.DeltaTableV2 import org.apache.spark.sql.delta.commands.VacuumCommand @@ -550,7 +549,7 @@ class DeltaVacuumSuite extends DeltaVacuumSuiteBase with DeltaSQLCommandTest { val e = intercept[AnalysisException] { vacuumSQLTest(table, viewName) } - assert(e.getMessage.contains(SQL_COMMAND_ON_TEMP_VIEW_NOT_SUPPORTED_ERROR_MSG)) + assert(e.getMessage.contains("'VACUUM' expects a table but `v` is a view")) } } } diff --git a/spark/src/test/scala-spark-master/org/apache/spark/sql/delta/DeltaVariantShreddingSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaVariantShreddingSuite.scala similarity index 84% rename from spark/src/test/scala-spark-master/org/apache/spark/sql/delta/DeltaVariantShreddingSuite.scala rename to spark/src/test/scala/org/apache/spark/sql/delta/DeltaVariantShreddingSuite.scala index 27426a4265f..6287119d821 100644 --- a/spark/src/test/scala-spark-master/org/apache/spark/sql/delta/DeltaVariantShreddingSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaVariantShreddingSuite.scala @@ -161,36 +161,36 @@ class DeltaVariantShreddingSuite // Table property not present or false Seq("", s"TBLPROPERTIES ('${DeltaConfigs.ENABLE_VARIANT_SHREDDING.key}' = 'false') ") .foreach { tblProperties => - withTable("tbl") { - withTempDir { dir => - sql("CREATE TABLE tbl (i long, v variant) USING DELTA " + tblProperties + - s"LOCATION '${dir.getAbsolutePath}'") - withSQLConf(SQLConf.VARIANT_WRITE_SHREDDING_ENABLED.key -> true.toString, - SQLConf.VARIANT_ALLOW_READING_SHREDDED.key -> true.toString, - SQLConf.VARIANT_FORCE_SHREDDING_SCHEMA_FOR_TEST.key -> schema) { - - val e = intercept[DeltaSparkException] { - df.write.format("delta").mode("append").saveAsTable("tbl") + withTable("tbl") { + withTempDir { dir => + sql("CREATE TABLE tbl (i long, v variant) USING DELTA " + tblProperties + + s"LOCATION '${dir.getAbsolutePath}'") + withSQLConf(SQLConf.VARIANT_WRITE_SHREDDING_ENABLED.key -> true.toString, + SQLConf.VARIANT_ALLOW_READING_SHREDDED.key -> true.toString, + SQLConf.VARIANT_FORCE_SHREDDING_SCHEMA_FOR_TEST.key -> schema) { + + val e = intercept[DeltaSparkException] { + df.write.format("delta").mode("append").saveAsTable("tbl") + } + checkError(e, "DELTA_SHREDDING_TABLE_PROPERTY_DISABLED", parameters = Map()) + assert(e.getMessage.contains( + "Attempted to write shredded Variants but the table does not support shredded " + + "writes. Consider setting the table property enableVariantShredding to true.")) + assert(numShreddedFiles(dir.getAbsolutePath, validation = { field: GroupType => + field.getName == "v" && (field.getType("typed_value") match { + case t: GroupType => + t.getFields.asScala.map(_.getName).toSet == Set("a", "b", "c") + case _ => false + }) + }) == 0) + checkAnswer( + spark.read.format("delta").load(dir.getAbsolutePath).selectExpr("i", "to_json(v)"), + Seq() + ) } - checkError(e, "DELTA_SHREDDING_TABLE_PROPERTY_DISABLED", parameters = Map()) - assert(e.getMessage.contains( - "Attempted to write shredded Variants but the table does not support shredded " + - "writes. Consider setting the table property enableVariantShredding to true.")) - assert(numShreddedFiles(dir.getAbsolutePath, validation = { field: GroupType => - field.getName == "v" && (field.getType("typed_value") match { - case t: GroupType => - t.getFields.asScala.map(_.getName).toSet == Set("a", "b", "c") - case _ => false - }) - }) == 0) - checkAnswer( - spark.read.format("delta").load(dir.getAbsolutePath).selectExpr("i", "to_json(v)"), - Seq() - ) } } } - } } test("Set table property to invalid value") { diff --git a/spark/src/test/scala-spark-master/org/apache/spark/sql/delta/DeltaVariantSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaVariantSuite.scala similarity index 97% rename from spark/src/test/scala-spark-master/org/apache/spark/sql/delta/DeltaVariantSuite.scala rename to spark/src/test/scala/org/apache/spark/sql/delta/DeltaVariantSuite.scala index ea5cea81803..fb313b0b393 100644 --- a/spark/src/test/scala-spark-master/org/apache/spark/sql/delta/DeltaVariantSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaVariantSuite.scala @@ -35,7 +35,7 @@ import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.StructType class DeltaVariantSuite - extends QueryTest + extends QueryTest with SharedSparkSession with DeltaSQLCommandTest with DeltaSQLTestUtils @@ -44,9 +44,9 @@ class DeltaVariantSuite import testImplicits._ private def assertVariantTypeTableFeatures( - tableName: String, - expectPreviewFeature: Boolean, - expectStableFeature: Boolean): Unit = { + tableName: String, + expectPreviewFeature: Boolean, + expectStableFeature: Boolean): Unit = { val features = getProtocolForTable("tbl").readerAndWriterFeatures if (expectPreviewFeature) { assert(features.contains(VariantTypePreviewTableFeature)) @@ -79,8 +79,8 @@ class DeltaVariantSuite assert( !deltaLog.unsafeVolatileSnapshot.protocol.isFeatureSupported( VariantTypePreviewTableFeature) && - !deltaLog.unsafeVolatileSnapshot.protocol.isFeatureSupported( - VariantTypeTableFeature), + !deltaLog.unsafeVolatileSnapshot.protocol.isFeatureSupported( + VariantTypeTableFeature), s"Table tbl contains VariantTypeFeature descriptor when its not supposed to" ) } @@ -104,10 +104,10 @@ class DeltaVariantSuite assert( getProtocolForTable("tbl") == - VariantTypeTableFeature.minProtocolVersion - .withFeature(VariantTypeTableFeature) - .withFeature(InvariantsTableFeature) - .withFeature(AppendOnlyTableFeature) + VariantTypeTableFeature.minProtocolVersion + .withFeature(VariantTypeTableFeature) + .withFeature(InvariantsTableFeature) + .withFeature(AppendOnlyTableFeature) ) } } @@ -121,7 +121,7 @@ class DeltaVariantSuite "tbl", expectPreviewFeature = false, expectStableFeature = true) sql( s"ALTER TABLE tbl " + - s"SET TBLPROPERTIES('delta.feature.variantType-preview' = 'supported')" + s"SET TBLPROPERTIES('delta.feature.variantType-preview' = 'supported')" ) assertVariantTypeTableFeatures( "tbl", expectPreviewFeature = true, expectStableFeature = true) @@ -144,7 +144,7 @@ class DeltaVariantSuite sql("CREATE TABLE tbl(s STRING) USING delta") sql( s"ALTER TABLE tbl " + - s"SET TBLPROPERTIES('delta.feature.variantType-preview' = 'supported')" + s"SET TBLPROPERTIES('delta.feature.variantType-preview' = 'supported')" ) sql("ALTER TABLE tbl ADD COLUMN v VARIANT") @@ -214,7 +214,7 @@ class DeltaVariantSuite } test("enabling 'FORCE_USE_PREVIEW_VARIANT_FEATURE' on table with stable feature does not " + - "require adding preview feature") { + "require adding preview feature") { withTable("tbl") { sql("CREATE TABLE tbl(s STRING, v VARIANT) USING DELTA") sql("INSERT INTO tbl (SELECT 'foo', parse_json(cast(id + 99 as string)) FROM range(1))") @@ -456,7 +456,7 @@ class DeltaVariantSuite sql("""select _change_type, v::int from table_changes('tbl', 0) where _change_type = 'update_preimage'"""), Seq(Row("update_preimage", 50)) - ) + ) checkAnswer( sql("""select _change_type, v::int from table_changes('tbl', 0) where _change_type = 'update_postimage'"""), diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DescribeDeltaHistorySuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DescribeDeltaHistorySuite.scala index 5cfb5f55735..a8a122f85a5 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/DescribeDeltaHistorySuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DescribeDeltaHistorySuite.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.delta // scalastyle:off import.ordering.noEmptyLine import java.io.File -import org.apache.spark.sql.delta.DescribeDeltaHistorySuiteShims._ import org.apache.spark.sql.delta.actions.{Action, AddCDCFile, AddFile, Metadata, Protocol, RemoveFile} import org.apache.spark.sql.delta.coordinatedcommits.{CatalogOwnedTableUtils, CatalogOwnedTestBaseSuite} import org.apache.spark.sql.delta.sources.DeltaSQLConf @@ -272,7 +271,8 @@ trait DescribeDeltaHistorySuiteBase sql(s"DESCRIBE HISTORY $viewName").collect() } - assert(e.getMessage.contains(FAILS_ON_VIEWS_ERROR_MSG)) + assert(e.getMessage.contains( + "'DESCRIBE HISTORY' expects a table but `spark_catalog`.`default`.`delta_view` is a view.")) } } @@ -286,7 +286,7 @@ trait DescribeDeltaHistorySuiteBase sql(s"DESCRIBE HISTORY $viewName").collect() } - assert(e.getMessage.contains(FAILS_ON_TEMP_VIEWS_ERROR_MSG)) + assert(e.getMessage.contains("'DESCRIBE HISTORY' expects a table but `v` is a view.")) } } diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/GeneratedColumnSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/GeneratedColumnSuite.scala index 9f7a214e86f..86a928d3b9c 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/GeneratedColumnSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/GeneratedColumnSuite.scala @@ -42,8 +42,7 @@ import org.apache.spark.sql.streaming.{StreamingQueryException, Trigger} import org.apache.spark.sql.types.{ArrayType, DataType, DateType, IntegerType, LongType, MetadataBuilder, ShortType, StringType, StructField, StructType, TimestampType} trait GeneratedColumnSuiteBase - extends GeneratedColumnTest - with DeltaExcludedBySparkVersionTestMixinShims { + extends GeneratedColumnTest { import GeneratedColumn._ import testImplicits._ diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/ImplicitDMLCastingSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/ImplicitDMLCastingSuite.scala index bb8b21dfc9c..34333a78eca 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/ImplicitDMLCastingSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/ImplicitDMLCastingSuite.scala @@ -20,7 +20,6 @@ import scala.annotation.tailrec import scala.collection.JavaConverters._ import org.apache.spark.sql.delta.DeltaTestUtils.BOOLEAN_DOMAIN -import org.apache.spark.sql.delta.ImplicitDMLCastingSuiteShims._ import org.apache.spark.sql.delta.sources.DeltaSQLConf import org.apache.spark.sql.delta.test.{DeltaExceptionTestUtils, DeltaSQLCommandTest} @@ -149,8 +148,11 @@ abstract class ImplicitDMLCastingSuite extends QueryTest assert(failureCause.toString.contains(testConfig.exceptionAnsiCast)) val sparkThrowable = failureCause.asInstanceOf[SparkThrowable] - assert(Seq("CAST_OVERFLOW", NUMERIC_VALUE_OUT_OF_RANGE_ERROR_MSG, "CAST_INVALID_INPUT") - .contains(sparkThrowable.getErrorClass)) + assert(Seq( + "CAST_OVERFLOW", + "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION", + "CAST_INVALID_INPUT" + ).contains(sparkThrowable.getErrorClass)) case Some(failureCause) if !sqlConfig.followAnsiEnabled => assert(sqlConfig.storeAssignmentPolicy === SQLConf.StoreAssignmentPolicy.ANSI) diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoMaterializeSourceSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoMaterializeSourceSuite.scala index d9587b0c27d..b3166142a50 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoMaterializeSourceSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoMaterializeSourceSuite.scala @@ -24,7 +24,6 @@ import scala.util.control.NonFatal import com.databricks.spark.util.{Log4jUsageLogger, MetricDefinitions, UsageRecord} import org.apache.spark.sql.delta.DeltaTestUtils._ import org.apache.spark.sql.delta.commands.merge.{MergeIntoMaterializeSourceError, MergeIntoMaterializeSourceErrorType, MergeIntoMaterializeSourceReason, MergeStats} -import org.apache.spark.sql.delta.commands.merge.MergeIntoMaterializeSourceShims import org.apache.spark.sql.delta.sources.DeltaSQLConf import org.apache.spark.sql.delta.test.DeltaSQLCommandTest import org.apache.spark.sql.delta.test.DeltaSQLTestUtils @@ -49,8 +48,7 @@ trait MergeIntoMaterializeSourceMixin with SharedSparkSession with DeltaSQLCommandTest with DeltaSQLTestUtils - with DeltaTestUtilsBase - { + with DeltaTestUtilsBase { override def beforeAll(): Unit = { super.beforeAll() @@ -175,10 +173,10 @@ trait MergeIntoMaterializeSourceErrorTests extends MergeIntoMaterializeSourceMix checkpointedDf.collect() } assert(ex.isInstanceOf[SparkException], ex) + val sparkEx = ex.asInstanceOf[SparkException] assert( - MergeIntoMaterializeSourceShims.mergeMaterializedSourceRddBlockLostError( - ex.asInstanceOf[SparkException], - rdd.id)) + sparkEx.getErrorClass == "CHECKPOINT_RDD_BLOCK_ID_NOT_FOUND" && + sparkEx.getMessageParameters.get("rddBlockId").contains(s"rdd_${rdd.id}")) } for { diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoMetricsBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoMetricsBase.scala index 225df6f9f0f..36841308ec6 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoMetricsBase.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoMetricsBase.scala @@ -16,7 +16,6 @@ package org.apache.spark.sql.delta -import org.apache.spark.sql.delta.MergeIntoMetricsShims._ import org.apache.spark.sql.delta.sources.DeltaSQLConf import org.apache.spark.sql.{DataFrame, QueryTest, Row} @@ -1041,8 +1040,7 @@ trait MergeIntoMetricsBase ((false, true), ("numTargetFilesAdded", 1)), ((false, false), ( "numTargetFilesAdded", - // Depending on the Spark version, for non-partitioned tables we may add 1 or 2 files. - DELETE_WITH_DUPLICATE_NUM_TARGET_FILES_ADDED_NON_PARTITIONED_NO_CDF) + 1) ) ) ) diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSuiteBase.scala index 2935e2c87f7..9c0ebd40735 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSuiteBase.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSuiteBase.scala @@ -46,8 +46,7 @@ trait MergeIntoSuiteBaseMixin with DeltaSQLTestUtils with ScanReportHelper with MergeIntoTestUtils - with MergeIntoSchemaEvolutionMixin - with DeltaExcludedBySparkVersionTestMixinShims { + with MergeIntoSchemaEvolutionMixin { import testImplicits._ // Maps expected error classes to actual error classes. Used to handle error classes that are @@ -2687,7 +2686,7 @@ trait MergeIntoSuiteBaseMiscTests extends MergeIntoSuiteBaseMixin { } } - testSparkMasterOnly("Variant type") { + test("Variant type") { withTable("source") { // Insert ("0", 0), ("1", 1) val dstDf = sql( diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/SnapshotManagementSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/SnapshotManagementSuite.scala index 6aa4111bb4a..9e91120a0f3 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/SnapshotManagementSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/SnapshotManagementSuite.scala @@ -24,7 +24,6 @@ import scala.collection.mutable import com.databricks.spark.util.{Log4jUsageLogger, UsageRecord} import org.apache.spark.sql.delta.DeltaConfigs.COORDINATED_COMMITS_COORDINATOR_NAME import org.apache.spark.sql.delta.DeltaTestUtils.{verifyBackfilled, verifyUnbackfilled, BOOLEAN_DOMAIN} -import org.apache.spark.sql.delta.SnapshotManagementSuiteShims._ import org.apache.spark.sql.delta.coordinatedcommits.{CommitCoordinatorBuilder, CommitCoordinatorProvider, CoordinatedCommitsBaseSuite, CoordinatedCommitsUsageLogs, InMemoryCommitCoordinator} import org.apache.spark.sql.delta.sources.DeltaSQLConf import org.apache.spark.sql.delta.storage.LocalLogStore @@ -204,7 +203,7 @@ class SnapshotManagementSuite extends QueryTest with DeltaSQLTestUtils with Shar // Guava cache wraps the root cause assert(e.isInstanceOf[SparkException] && e.getMessage.contains("0001.checkpoint") && - e.getMessage.contains(SHOULD_NOT_RECOVER_CHECKPOINT_ERROR_MSG)) + e.getMessage.contains("Encountered error while reading file")) } } } @@ -261,7 +260,7 @@ class SnapshotManagementSuite extends QueryTest with DeltaSQLTestUtils with Shar val e = intercept[SparkException] { staleLog.update() } val version = if (testEmptyCheckpoint) 0 else 1 assert(e.getMessage.contains(f"$version%020d.checkpoint") && - e.getMessage.contains(SHOULD_NOT_RECOVER_CHECKPOINT_ERROR_MSG)) + e.getMessage.contains("Encountered error while reading file")) } } } diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/UpdateSuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/UpdateSuiteBase.scala index a262da6da45..549c7758b44 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/UpdateSuiteBase.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/UpdateSuiteBase.scala @@ -39,8 +39,7 @@ trait UpdateBaseMixin with SharedSparkSession with DeltaDMLTestUtils with DeltaSQLTestUtils - with DeltaTestUtilsForTempViews - with DeltaExcludedBySparkVersionTestMixinShims { + with DeltaTestUtilsForTempViews { import testImplicits._ protected def executeUpdate(target: String, set: Seq[String], where: String): Unit = { @@ -979,7 +978,7 @@ trait UpdateBaseMiscTests extends UpdateBaseMixin { Some(".*ore than one row returned by a subquery used as an expression(?s).*") ) - testSparkMasterOnly("Variant type") { + test("Variant type") { val df = sql( """SELECT parse_json(cast(id as string)) v, id i FROM range(2)""") diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/deletionvectors/DeletionVectorsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/deletionvectors/DeletionVectorsSuite.scala index df66c12ebed..f570e299681 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/deletionvectors/DeletionVectorsSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/deletionvectors/DeletionVectorsSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.delta.deletionvectors import java.io.{File, FileNotFoundException} import java.net.URISyntaxException -import org.apache.spark.sql.delta.{DeletionVectorsTableFeature, DeletionVectorsTestUtils, DeltaChecksumException, DeltaConfigs, DeltaExcludedBySparkVersionTestMixinShims, DeltaLog, DeltaMetricsUtils, DeltaTestUtilsForTempViews} +import org.apache.spark.sql.delta.{DeletionVectorsTableFeature, DeletionVectorsTestUtils, DeltaChecksumException, DeltaConfigs, DeltaLog, DeltaMetricsUtils, DeltaTestUtilsForTempViews} import org.apache.spark.sql.delta.DeltaTestUtils.createTestAddFile import org.apache.spark.sql.delta.actions.{AddFile, DeletionVectorDescriptor, RemoveFile} import org.apache.spark.sql.delta.actions.DeletionVectorDescriptor.EMPTY @@ -51,8 +51,7 @@ class DeletionVectorsSuite extends QueryTest with DeltaSQLCommandTest with DeletionVectorsTestUtils with DeltaTestUtilsForTempViews - with DeltaExceptionTestUtils - with DeltaExcludedBySparkVersionTestMixinShims { + with DeltaExceptionTestUtils { import testImplicits._ override def beforeAll(): Unit = { @@ -300,7 +299,7 @@ class DeletionVectorsSuite extends QueryTest } } - testSparkMasterOnly(s"variant types DELETE with DVs with column mapping mode=$mode") { + test(s"variant types DELETE with DVs with column mapping mode=$mode") { withSQLConf("spark.databricks.delta.properties.defaults.columnMapping.mode" -> mode) { withTempDir { dirName => val path = dirName.getAbsolutePath diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/logging/DeltaStructuredLoggingSuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/logging/DeltaStructuredLoggingSuite.scala similarity index 58% rename from spark/src/test/scala/org/apache/spark/sql/delta/logging/DeltaStructuredLoggingSuiteBase.scala rename to spark/src/test/scala/org/apache/spark/sql/delta/logging/DeltaStructuredLoggingSuite.scala index 6760eb3ed85..103390d2ae4 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/logging/DeltaStructuredLoggingSuiteBase.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/logging/DeltaStructuredLoggingSuite.scala @@ -43,22 +43,40 @@ import java.nio.charset.StandardCharsets import java.nio.file.Files import java.util.regex.Pattern +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule import org.apache.logging.log4j.Level import org.apache.spark.SparkFunSuite -import org.apache.spark.internal.{LogEntry, LoggingShims, LogKeyShims, MDC} +import org.apache.spark.internal.{LogEntry, Logging, LogKey, MDC} -trait DeltaStructuredLoggingSuiteBase - extends SparkFunSuite - with LoggingShims { - def className: String - def logFilePath: String +class DeltaStructuredLoggingSuite extends SparkFunSuite with Logging { + private def className: String = classOf[DeltaStructuredLoggingSuite].getSimpleName + private def logFilePath: String = "target/structured.log" private lazy val logFile: File = { val pwd = new File(".").getCanonicalPath new File(pwd + "/" + logFilePath) } + override def beforeAll(): Unit = { + super.beforeAll() + Logging.enableStructuredLogging() + } + + override def afterAll(): Unit = { + Logging.disableStructuredLogging() + super.afterAll() + } + + private val jsonMapper = new ObjectMapper().registerModule(DefaultScalaModule) + private def compactAndToRegexPattern(json: String): String = { + jsonMapper.readTree(json).toString. + replace("", """[^"]+"""). + replace("""""""", """.*"""). + replace("{", """\{""") + "\n" + } + // Return the newly added log contents in the log file after executing the function `f` private def captureLogOutput(f: () => Unit): String = { val content = if (logFile.exists()) { @@ -72,38 +90,137 @@ trait DeltaStructuredLoggingSuiteBase newContent.substring(content.length) } - def basicMsg: String = "This is a log message" + private def basicMsg: String = "This is a log message" - def msgWithMDC: LogEntry = log"Lost executor ${MDC(DeltaLogKeys.EXECUTOR_ID, "1")}." + private def msgWithMDC: LogEntry = log"Lost executor ${MDC(DeltaLogKeys.EXECUTOR_ID, "1")}." - def msgWithMDCValueIsNull: LogEntry = log"Lost executor ${MDC(DeltaLogKeys.EXECUTOR_ID, null)}." + private def msgWithMDCValueIsNull: LogEntry = + log"Lost executor ${MDC(DeltaLogKeys.EXECUTOR_ID, null)}." - def msgWithMDCAndException: LogEntry = + private def msgWithMDCAndException: LogEntry = log"Error in executor ${MDC(DeltaLogKeys.EXECUTOR_ID, "1")}." - def msgWithConcat: LogEntry = log"Min Size: ${MDC(DeltaLogKeys.MIN_SIZE, "2")}, " + + private def msgWithConcat: LogEntry = log"Min Size: ${MDC(DeltaLogKeys.MIN_SIZE, "2")}, " + log"Max Size: ${MDC(DeltaLogKeys.MAX_SIZE, "4")}. " + log"Please double check." - // test for basic message (without any mdc) - def expectedPatternForBasicMsg(level: Level): String + private val customLog = log"${MDC(CustomLogKeys.CUSTOM_LOG_KEY, "Custom log message.")}" - // test for basic message and exception - def expectedPatternForBasicMsgWithException(level: Level): String + def expectedPatternForBasicMsg(level: Level): String = { + compactAndToRegexPattern( + s""" + { + "ts": "", + "level": "$level", + "msg": "This is a log message", + "logger": "$className" + }""") + } - // test for message (with mdc) - def expectedPatternForMsgWithMDC(level: Level): String + def expectedPatternForBasicMsgWithException(level: Level): String = { + compactAndToRegexPattern( + s""" + { + "ts": "", + "level": "$level", + "msg": "This is a log message", + "exception": { + "class": "java.lang.RuntimeException", + "msg": "OOM", + "stacktrace": "" + }, + "logger": "$className" + }""") + } - // test for message (with mdc - the value is null) - def expectedPatternForMsgWithMDCValueIsNull(level: Level): String + def expectedPatternForMsgWithMDC(level: Level): String = { + compactAndToRegexPattern( + s""" + { + "ts": "", + "level": "$level", + "msg": "Lost executor 1.", + "context": { + "executor_id": "1" + }, + "logger": "$className" + }""") + } + + def expectedPatternForMsgWithMDCValueIsNull(level: Level): String = { + compactAndToRegexPattern( + s""" + { + "ts": "", + "level": "$level", + "msg": "Lost executor null.", + "context": { + "executor_id": null + }, + "logger": "$className" + }""") + } + + def expectedPatternForMsgWithMDCAndException(level: Level): String = { + compactAndToRegexPattern( + s""" + { + "ts": "", + "level": "$level", + "msg": "Error in executor 1.", + "context": { + "executor_id": "1" + }, + "exception": { + "class": "java.lang.RuntimeException", + "msg": "OOM", + "stacktrace": "" + }, + "logger": "$className" + }""") + } - // test for message and exception - def expectedPatternForMsgWithMDCAndException(level: Level): String + def expectedPatternForCustomLogKey(level: Level): String = { + compactAndToRegexPattern( + s""" + { + "ts": "", + "level": "$level", + "msg": "Custom log message.", + "logger": "$className" + }""" + ) + } - // test for custom LogKey - def expectedPatternForCustomLogKey(level: Level): String + def verifyMsgWithConcat(level: Level, logOutput: String): Unit = { + val pattern1 = compactAndToRegexPattern( + s""" + { + "ts": "", + "level": "$level", + "msg": "Min Size: 2, Max Size: 4. Please double check.", + "context": { + "min_size": "2", + "max_size": "4" + }, + "logger": "$className" + }""") - def verifyMsgWithConcat(level: Level, logOutput: String): Unit + val pattern2 = compactAndToRegexPattern( + s""" + { + "ts": "", + "level": "$level", + "msg": "Min Size: 2, Max Size: 4. Please double check.", + "context": { + "max_size": "4", + "min_size": "2" + }, + "logger": "$className" + }""") + assert(Pattern.compile(pattern1).matcher(logOutput).matches() || + Pattern.compile(pattern2).matcher(logOutput).matches()) + } test("Basic logging") { Seq( @@ -167,7 +284,6 @@ trait DeltaStructuredLoggingSuiteBase } } - private val customLog = log"${MDC(CustomLogKeys.CUSTOM_LOG_KEY, "Custom log message.")}" test("Logging with custom LogKey") { Seq( (Level.ERROR, () => logError(customLog)), @@ -192,6 +308,6 @@ trait DeltaStructuredLoggingSuiteBase } object CustomLogKeys { - // Custom `LogKey` must be `extends LogKeyShims` - case object CUSTOM_LOG_KEY extends LogKeyShims + // Custom `LogKey` must extend LogKey + case object CUSTOM_LOG_KEY extends LogKey } diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/schema/SchemaUtilsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/schema/SchemaUtilsSuite.scala index 47c99cb0382..98bd8fa65d6 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/schema/SchemaUtilsSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/schema/SchemaUtilsSuite.scala @@ -22,7 +22,7 @@ import java.util.regex.Pattern import scala.annotation.tailrec -import org.apache.spark.sql.delta.{DeltaAnalysisException, DeltaExcludedBySparkVersionTestMixinShims, DeltaLog, DeltaTestUtils, TypeWideningMode} +import org.apache.spark.sql.delta.{DeltaAnalysisException, DeltaLog, DeltaTestUtils, TypeWideningMode} import org.apache.spark.sql.delta.RowCommitVersion import org.apache.spark.sql.delta.RowId import org.apache.spark.sql.delta.commands.cdc.CDCReader @@ -49,8 +49,7 @@ class SchemaUtilsSuite extends QueryTest with SharedSparkSession with GivenWhenThen with DeltaSQLTestUtils - with DeltaSQLCommandTest - with DeltaExcludedBySparkVersionTestMixinShims { + with DeltaSQLCommandTest { import SchemaUtils._ import TypeWideningMode._ import testImplicits._ @@ -2638,7 +2637,7 @@ class SchemaUtilsSuite extends QueryTest MapType(IntegerType, IntegerType) -> MapType(LongType, LongType), ArrayType(IntegerType) -> ArrayType(LongType) )) - testSparkMasterOnly(s"typeWideningMode ${fromType.sql} -> ${toType.sql}") { + test(s"typeWideningMode ${fromType.sql} -> ${toType.sql}") { val narrow = new StructType().add("a", fromType) val wide = new StructType().add("a", toType) @@ -2683,7 +2682,7 @@ class SchemaUtilsSuite extends QueryTest ShortType -> DoubleType, IntegerType -> DecimalType(10, 0) )) - testSparkMasterOnly( + test( s"typeWideningMode - blocked type evolution ${fromType.sql} -> ${toType.sql}") { val narrow = new StructType().add("a", fromType) val wide = new StructType().add("a", toType) @@ -2716,7 +2715,7 @@ class SchemaUtilsSuite extends QueryTest DateType -> TimestampNTZType, DecimalType(10, 2) -> DecimalType(12, 4) )) - testSparkMasterOnly( + test( s"typeWideningMode - Uniform Iceberg compatibility ${fromType.sql} -> ${toType.sql}") { val narrow = new StructType().add("a", fromType) val wide = new StructType().add("a", toType) @@ -2771,7 +2770,7 @@ class SchemaUtilsSuite extends QueryTest } } - testSparkMasterOnly( + test( s"typeWideningMode - widen to common wider decimal") { val left = new StructType().add("a", DecimalType(10, 2)) val right = new StructType().add("a", DecimalType(5, 4)) @@ -2806,7 +2805,7 @@ class SchemaUtilsSuite extends QueryTest } - testSparkMasterOnly( + test( s"typeWideningMode - widen to common wider decimal exceeds max decimal precision") { // We'd need a DecimalType(40, 19) to fit both types, which exceeds max decimal precision of 38. val left = new StructType().add("a", DecimalType(20, 19)) diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/skipping/clustering/ClusteredTableDDLSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/skipping/clustering/ClusteredTableDDLSuite.scala index 3aa8f444a6d..5feeafb1d80 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/skipping/clustering/ClusteredTableDDLSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/skipping/clustering/ClusteredTableDDLSuite.scala @@ -20,7 +20,7 @@ import java.io.File import com.databricks.spark.util.{Log4jUsageLogger, MetricDefinitions} import org.apache.spark.sql.delta.skipping.ClusteredTableTestUtils -import org.apache.spark.sql.delta.{CatalogOwnedTableFeature, DeltaAnalysisException, DeltaColumnMappingEnableIdMode, DeltaColumnMappingEnableNameMode, DeltaConfigs, DeltaExcludedBySparkVersionTestMixinShims, DeltaLog, DeltaUnsupportedOperationException, NoMapping} +import org.apache.spark.sql.delta.{CatalogOwnedTableFeature, DeltaAnalysisException, DeltaColumnMappingEnableIdMode, DeltaColumnMappingEnableNameMode, DeltaConfigs, DeltaLog, DeltaUnsupportedOperationException, NoMapping} import org.apache.spark.sql.delta.actions.TableFeatureProtocolUtils import org.apache.spark.sql.delta.clustering.ClusteringMetadataDomain import org.apache.spark.sql.delta.coordinatedcommits.CatalogOwnedTestBaseSuite @@ -649,8 +649,7 @@ trait ClusteredTableCreateOrReplaceDDLSuite trait ClusteredTableDDLSuiteBase extends ClusteredTableCreateOrReplaceDDLSuite - with DeltaSQLCommandTest - with DeltaExcludedBySparkVersionTestMixinShims { + with DeltaSQLCommandTest { import testImplicits._ @@ -1002,7 +1001,7 @@ trait ClusteredTableDDLSuiteBase } } - testSparkMasterOnly("Variant is not supported") { + test("Variant is not supported") { val e = intercept[DeltaAnalysisException] { createOrReplaceClusteredTable("CREATE", testTable, "id long, v variant", "v") } diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/stats/DataSkippingDeltaTests.scala b/spark/src/test/scala/org/apache/spark/sql/delta/stats/DataSkippingDeltaTests.scala index 16bb8da29ec..38686393762 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/stats/DataSkippingDeltaTests.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/stats/DataSkippingDeltaTests.scala @@ -44,7 +44,7 @@ import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types._ import org.apache.spark.util.Utils -trait DataSkippingDeltaTestsBase extends DeltaExcludedBySparkVersionTestMixinShims +trait DataSkippingDeltaTestsBase extends QueryTest with SharedSparkSession with DeltaSQLCommandTest with DataSkippingDeltaTestsUtils @@ -1812,7 +1812,7 @@ trait DataSkippingDeltaTestsBase extends DeltaExcludedBySparkVersionTestMixinShi } } - testSparkMasterOnly("data skipping by stats - variant type") { + test("data skipping by stats - variant type") { withTable("tbl") { sql("""CREATE TABLE tbl(v VARIANT, v_struct STRUCT, diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaHiveTest.scala b/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaHiveTest.scala index ed74b41de25..79d6b5f510e 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaHiveTest.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/test/DeltaHiveTest.scala @@ -16,13 +16,13 @@ package org.apache.spark.sql.delta.test -import org.apache.spark.sql.delta.Relocated._ import org.apache.spark.sql.delta.catalog.DeltaCatalog import org.apache.spark.sql.delta.test.DeltaSQLTestUtils import io.delta.sql.DeltaSparkSessionExtension import org.scalatest.BeforeAndAfterAll import org.apache.spark.{SparkContext, SparkFunSuite} +import org.apache.spark.sql.classic.SparkSession import org.apache.spark.sql.hive.test.{TestHive, TestHiveContext} import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} @@ -46,7 +46,7 @@ trait DeltaHiveTest extends SparkFunSuite with BeforeAndAfterAll { self: DeltaSQ _sc = new SparkContext("local", this.getClass.getName, conf) _hiveContext = new TestHiveContext(_sc) _session = _hiveContext.sparkSession - setActiveSession(_session) + SparkSession.setActiveSession(_session) super.beforeAll() } diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/test/TestsStatistics.scala b/spark/src/test/scala/org/apache/spark/sql/delta/test/TestsStatistics.scala index f75996762d6..93c7da959e6 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/test/TestsStatistics.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/test/TestsStatistics.scala @@ -16,7 +16,6 @@ package org.apache.spark.sql.delta.test -import org.apache.spark.sql.delta.DeltaExcludedBySparkVersionTestMixinShims import org.apache.spark.sql.delta.DeltaLog import org.apache.spark.sql.delta.test.DeltaSQLTestUtils import org.apache.spark.sql.delta.test.DeltaTestImplicits._ @@ -28,7 +27,7 @@ import org.apache.spark.sql.{Column, DataFrame} /** * Provides utilities for testing StatisticsCollection. */ -trait TestsStatistics extends DeltaExcludedBySparkVersionTestMixinShims { self: DeltaSQLTestUtils => +trait TestsStatistics { self: DeltaSQLTestUtils => /** A function to get the reconciled statistics DataFrame from the DeltaLog */ protected var getStatsDf: (DeltaLog, Seq[Column]) => DataFrame = _ @@ -60,7 +59,7 @@ trait TestsStatistics extends DeltaExcludedBySparkVersionTestMixinShims { self: testTags: org.scalatest.Tag*)(testFun: => Any): Unit = { import testImplicits._ - testSparkMasterOnly(testName, testTags: _*) { + test(testName, testTags: _*) { getStatsDf = (deltaLog, columns) => { val snapshot = deltaLog.snapshot snapshot.allFiles diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningAlterTableSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningAlterTableSuite.scala index 40161c7ab70..dbde24b622d 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningAlterTableSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningAlterTableSuite.scala @@ -32,13 +32,11 @@ import org.apache.spark.sql.types._ * Suite providing core coverage for type widening using ALTER TABLE CHANGE COLUMN TYPE. */ class TypeWideningAlterTableSuite - extends QueryTest + extends TypeWideningAlterTableTests with ParquetTest with TypeWideningTestMixin - with TypeWideningAlterTableTests -trait TypeWideningAlterTableTests - extends DeltaExcludedBySparkVersionTestMixinShims +trait TypeWideningAlterTableTests extends QueryTest with QueryErrorsBase with TypeWideningTestCases { self: QueryTest with ParquetTest with TypeWideningTestMixin => @@ -155,7 +153,7 @@ trait TypeWideningAlterTableTests } } - testSparkMasterOnly( + test( "widening Date -> TimestampNTZ rejected when TimestampNTZ feature isn't supported") { withTimestampNTZDisabled { sql(s"CREATE TABLE delta.`$tempPath` (a date) USING DELTA") diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningInsertSchemaEvolutionBasicSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningInsertSchemaEvolutionBasicSuite.scala index 11ac059b921..3ea819daa4f 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningInsertSchemaEvolutionBasicSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningInsertSchemaEvolutionBasicSuite.scala @@ -53,8 +53,7 @@ class TypeWideningInsertSchemaEvolutionBasicSuite */ trait TypeWideningInsertSchemaEvolutionBasicTests extends DeltaInsertIntoTest - with TypeWideningTestCases - with DeltaExcludedBySparkVersionTestMixinShims { + with TypeWideningTestCases { self: QueryTest with TypeWideningTestMixin with DeltaDMLTestUtils => import testImplicits._ @@ -106,7 +105,7 @@ trait TypeWideningInsertSchemaEvolutionBasicTests } } - testSparkMasterOnly(s"INSERT - logs for missed opportunity for conversion") { + test(s"INSERT - logs for missed opportunity for conversion") { val testCase = restrictedAutomaticWideningTestCases.head append(testCase.initialValuesDF) diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningMergeIntoSchemaEvolutionSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningMergeIntoSchemaEvolutionSuite.scala index 0b47eb8e4ff..1ded769d1e4 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningMergeIntoSchemaEvolutionSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningMergeIntoSchemaEvolutionSuite.scala @@ -30,10 +30,9 @@ import org.apache.spark.sql.types._ * INTO when the type widening table feature is supported. */ class TypeWideningMergeIntoSchemaEvolutionSuite - extends QueryTest + extends TypeWideningMergeIntoSchemaEvolutionTests with DeltaDMLTestUtils - with TypeWideningTestMixin - with TypeWideningMergeIntoSchemaEvolutionTests { + with TypeWideningTestMixin { protected override def sparkConf: SparkConf = { super.sparkConf @@ -44,8 +43,7 @@ class TypeWideningMergeIntoSchemaEvolutionSuite /** * Tests covering type widening during schema evolution in MERGE INTO. */ -trait TypeWideningMergeIntoSchemaEvolutionTests - extends DeltaExcludedBySparkVersionTestMixinShims +trait TypeWideningMergeIntoSchemaEvolutionTests extends QueryTest with MergeIntoSQLTestUtils with MergeIntoSchemaEvolutionMixin with TypeWideningTestCases { @@ -53,7 +51,7 @@ trait TypeWideningMergeIntoSchemaEvolutionTests import testImplicits._ - testSparkMasterOnly(s"MERGE - always automatic type widening TINYINT -> DOUBLE") { + test(s"MERGE - always automatic type widening TINYINT -> DOUBLE") { withTable("source") { sql(s"CREATE TABLE delta.`$tempPath` (a short) USING DELTA") sql("CREATE TABLE source (a double) USING DELTA") diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningStreamingSinkSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningStreamingSinkSuite.scala index 602c75f148a..11ffa899531 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningStreamingSinkSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningStreamingSinkSuite.scala @@ -30,8 +30,7 @@ import org.apache.spark.sql.types._ */ class TypeWideningStreamingSinkSuite extends DeltaSinkImplicitCastSuiteBase - with TypeWideningTestMixin - with DeltaExcludedBySparkVersionTestMixinShims { + with TypeWideningTestMixin { import testImplicits._ @@ -46,7 +45,7 @@ class TypeWideningStreamingSinkSuite spark.conf.set(SQLConf.ANSI_ENABLED.key, "true") } - testSparkMasterOnly("type is widened if automatic widening set to always") { + test("type is widened if automatic widening set to always") { withDeltaStream[Int] { stream => stream.write(17)("CAST(value AS SHORT)") assert(stream.currentSchema("value").dataType === ShortType) diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningTableFeatureSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningTableFeatureSuite.scala index a3bed20d755..c8cc173b88e 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningTableFeatureSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningTableFeatureSuite.scala @@ -35,14 +35,11 @@ import org.apache.spark.sql.types._ /** * Test suite covering feature enablement and configuration tests. */ -class TypeWideningTableFeatureEnablementSuite - extends QueryTest +class TypeWideningTableFeatureEnablementSuite extends TypeWideningTableFeatureEnablementTests with TypeWideningTestMixin with TypeWideningDropFeatureTestMixin - with TypeWideningTableFeatureEnablementTests -trait TypeWideningTableFeatureEnablementTests - extends DeltaExcludedBySparkVersionTestMixinShims +trait TypeWideningTableFeatureEnablementTests extends QueryTest with TypeWideningTestCases { self: QueryTest with TypeWideningTestMixin @@ -159,7 +156,6 @@ class TypeWideningTableFeatureDropSuite trait TypeWideningTableFeatureDropTests extends RowTrackingTestUtils - with DeltaExcludedBySparkVersionTestMixinShims with TypeWideningTestCases { self: QueryTest with TypeWideningTestMixin @@ -380,13 +376,11 @@ trait TypeWideningTableFeatureDropTests * Additional tests covering e.g. unsupported type change check, CLONE, RESTORE. */ class TypeWideningTableFeatureAdvancedSuite - extends QueryTest + extends TypeWideningTableFeatureAdvancedTests with TypeWideningTestMixin with TypeWideningDropFeatureTestMixin - with TypeWideningTableFeatureAdvancedTests -trait TypeWideningTableFeatureAdvancedTests - extends DeltaExcludedBySparkVersionTestMixinShims +trait TypeWideningTableFeatureAdvancedTests extends QueryTest with TypeWideningTestCases { self: QueryTest with TypeWideningTestMixin @@ -544,42 +538,6 @@ trait TypeWideningTableFeatureAdvancedTests readDeltaTable(tempPath).collect() } - testSparkLatestOnly( - "helpful error when reading type changes not supported yet during preview") { - sql(s"CREATE TABLE delta.`$tempDir` (a int) USING DELTA") - val metadata = new MetadataBuilder() - .putMetadataArray("delta.typeChanges", Array( - new MetadataBuilder() - .putString("toType", "long") - .putString("fromType", "int") - .build() - )).build() - - // Delta 3.2/3.3 doesn't support changing type from int->long, we manually commit that type - // change to simulate what Delta 4.0 could do. - deltaLog.withNewTransaction { txn => - txn.commit( - Seq(txn.snapshot.metadata.copy( - schemaString = new StructType() - .add("a", LongType, nullable = true, metadata).json - )), - ManualUpdate) - } - - checkError( - exception = intercept[DeltaUnsupportedOperationException] { - readDeltaTable(tempPath).collect() - }, - "DELTA_UNSUPPORTED_TYPE_CHANGE_IN_PREVIEW", - parameters = Map( - "fieldPath" -> "a", - "fromType" -> "INT", - "toType" -> "BIGINT", - "typeWideningFeatureName" -> "typeWidening" - ) - ) - } - test("type widening rewrite metrics") { sql(s"CREATE TABLE delta.`$tempDir` (a byte) USING DELTA") addSingleFile(Seq(1, 2, 3), ByteType) @@ -691,13 +649,11 @@ trait TypeWideningTableFeatureAdvancedTests * Test suite covering preview vs stable feature interactions. */ class TypeWideningTableFeaturePreviewSuite - extends QueryTest + extends TypeWideningTableFeatureVersionTests with TypeWideningTestMixin with TypeWideningDropFeatureTestMixin - with TypeWideningTableFeatureVersionTests -trait TypeWideningTableFeatureVersionTests - extends DeltaExcludedBySparkVersionTestMixinShims +trait TypeWideningTableFeatureVersionTests extends QueryTest with TypeWideningTestCases { self: QueryTest with TypeWideningTestMixin diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningTestCases.scala b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningTestCases.scala index d9829f1150b..d53b0fa3fa5 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningTestCases.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/typewidening/TypeWideningTestCases.scala @@ -17,13 +17,13 @@ package org.apache.spark.sql.delta.typewidening import org.apache.spark.sql.{DataFrame, Encoder, Row} -import org.apache.spark.sql.test.SharedSparkSession -import org.apache.spark.sql.types.{DataType, StructField, StructType} +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.sql.types._ /** * Trait collecting supported and unsupported type change test cases. */ -trait TypeWideningTestCases extends TypeWideningTestCasesShims { self: SharedSparkSession => +trait TypeWideningTestCases extends SQLTestUtils { self: SharedSparkSession => import testImplicits._ /** @@ -86,4 +86,115 @@ trait TypeWideningTestCases extends TypeWideningTestCasesShims { self: SharedSpa override def expectedResult: DataFrame = initialValuesDF.select($"value".cast(toType)) } + + // Type changes that are supported by all Parquet readers. Byte, Short, Int are all stored as + // INT32 in parquet so these changes are guaranteed to be supported. + protected val supportedTestCases: Seq[TypeEvolutionTestCase] = Seq( + SupportedTypeEvolutionTestCase(ByteType, ShortType, + Seq(1, -1, Byte.MinValue, Byte.MaxValue, null.asInstanceOf[Byte]), + Seq(4, -4, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short])), + SupportedTypeEvolutionTestCase(ByteType, IntegerType, + Seq(1, -1, Byte.MinValue, Byte.MaxValue, null.asInstanceOf[Byte]), + Seq(4, -4, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int])), + SupportedTypeEvolutionTestCase(ShortType, IntegerType, + Seq(1, -1, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short]), + Seq(4, -4, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int])), + SupportedTypeEvolutionTestCase(ShortType, LongType, + Seq(1, -1, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short]), + Seq(4L, -4L, Long.MinValue, Long.MaxValue, null.asInstanceOf[Long])), + SupportedTypeEvolutionTestCase(IntegerType, LongType, + Seq(1, -1, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int]), + Seq(4L, -4L, Long.MinValue, Long.MaxValue, null.asInstanceOf[Long])), + SupportedTypeEvolutionTestCase(FloatType, DoubleType, + Seq(1234.56789f, -0f, 0f, Float.NaN, Float.NegativeInfinity, Float.PositiveInfinity, + Float.MinPositiveValue, Float.MinValue, Float.MaxValue, null.asInstanceOf[Float]), + Seq(987654321.987654321d, -0d, 0d, Double.NaN, Double.NegativeInfinity, + Double.PositiveInfinity, Double.MinPositiveValue, Double.MinValue, Double.MaxValue, + null.asInstanceOf[Double])), + SupportedTypeEvolutionTestCase(DateType, TimestampNTZType, + Seq("2020-01-01", "2024-02-29", "1312-02-27"), + Seq("2020-03-17 15:23:15.123456", "2058-12-31 23:59:59.999", "0001-01-01 00:00:00")), + // Larger precision. + SupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2), + DecimalType(Decimal.MAX_LONG_DIGITS, 2), + Seq(BigDecimal("1.23"), BigDecimal("10.34"), null.asInstanceOf[BigDecimal]), + Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99"), + null.asInstanceOf[BigDecimal])), + // Larger precision and scale, same physical type. + SupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS - 1, 2), + DecimalType(Decimal.MAX_INT_DIGITS, 3), + Seq(BigDecimal("1.23"), BigDecimal("10.34"), null.asInstanceOf[BigDecimal]), + Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 3) + ".99"), + null.asInstanceOf[BigDecimal])), + // Larger precision and scale, different physical types. + SupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2), + DecimalType(Decimal.MAX_LONG_DIGITS + 1, 3), + Seq(BigDecimal("1.23"), BigDecimal("10.34"), null.asInstanceOf[BigDecimal]), + Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99"), + null.asInstanceOf[BigDecimal])) + ) + + // Type changes that are only eligible for automatic widening when + // spark.databricks.delta.typeWidening.allowAutomaticWidening = ALWAYS. + protected val restrictedAutomaticWideningTestCases: Seq[TypeEvolutionTestCase] = Seq( + SupportedTypeEvolutionTestCase(IntegerType, DoubleType, + Seq(1, -1, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int]), + Seq(987654321.987654321d, -0d, 0d, Double.NaN, Double.NegativeInfinity, + Double.PositiveInfinity, Double.MinPositiveValue, Double.MinValue, Double.MaxValue, + null.asInstanceOf[Double])), + SupportedTypeEvolutionTestCase(ByteType, DecimalType(10, 0), + Seq(1, -1, Byte.MinValue, Byte.MaxValue, null.asInstanceOf[Byte]), + Seq(BigDecimal("1.23"), BigDecimal("9" * 10), null.asInstanceOf[BigDecimal])), + SupportedTypeEvolutionTestCase(ShortType, DecimalType(10, 0), + Seq(1, -1, Short.MinValue, Short.MaxValue, null.asInstanceOf[Short]), + Seq(BigDecimal("1.23"), BigDecimal("9" * 10), null.asInstanceOf[BigDecimal])), + SupportedTypeEvolutionTestCase(IntegerType, DecimalType(10, 0), + Seq(1, -1, Int.MinValue, Int.MaxValue, null.asInstanceOf[Int]), + Seq(BigDecimal("1.23"), BigDecimal("9" * 10), null.asInstanceOf[BigDecimal])), + SupportedTypeEvolutionTestCase(LongType, DecimalType(20, 0), + Seq(1L, -1L, Long.MinValue, Long.MaxValue, null.asInstanceOf[Int]), + Seq(BigDecimal("1.23"), BigDecimal("9" * 20), null.asInstanceOf[BigDecimal])) + ) + + // Test type changes that aren't supported. + protected val unsupportedTestCases: Seq[TypeEvolutionTestCase] = Seq( + UnsupportedTypeEvolutionTestCase(IntegerType, ByteType, + Seq(1, 2, Int.MinValue)), + UnsupportedTypeEvolutionTestCase(LongType, IntegerType, + Seq(4, 5, Long.MaxValue)), + UnsupportedTypeEvolutionTestCase(DoubleType, FloatType, + Seq(987654321.987654321d, Double.NaN, Double.NegativeInfinity, + Double.PositiveInfinity, Double.MinPositiveValue, + Double.MinValue, Double.MaxValue)), + UnsupportedTypeEvolutionTestCase(ByteType, DecimalType(2, 0), + Seq(1, -1, Byte.MinValue)), + UnsupportedTypeEvolutionTestCase(ShortType, DecimalType(4, 0), + Seq(1, -1, Short.MinValue)), + UnsupportedTypeEvolutionTestCase(IntegerType, DecimalType(9, 0), + Seq(1, -1, Int.MinValue)), + UnsupportedTypeEvolutionTestCase(LongType, DecimalType(19, 0), + Seq(1, -1, Long.MinValue)), + UnsupportedTypeEvolutionTestCase(TimestampNTZType, DateType, + Seq("2020-03-17 15:23:15", "2023-12-31 23:59:59", "0001-01-01 00:00:00")), + // Reduce scale + UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2), + DecimalType(Decimal.MAX_INT_DIGITS, 3), + Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 2) + ".99"))), + // Reduce precision + UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2), + DecimalType(Decimal.MAX_INT_DIGITS - 1, 2), + Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 2) + ".99"))), + // Reduce precision & scale + UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_LONG_DIGITS, 2), + DecimalType(Decimal.MAX_INT_DIGITS - 1, 1), + Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99"))), + // Increase scale more than precision + UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_INT_DIGITS, 2), + DecimalType(Decimal.MAX_INT_DIGITS + 1, 4), + Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_INT_DIGITS - 2) + ".99"))), + // Smaller scale and larger precision. + UnsupportedTypeEvolutionTestCase(DecimalType(Decimal.MAX_LONG_DIGITS, 2), + DecimalType(Decimal.MAX_INT_DIGITS + 3, 1), + Seq(BigDecimal("-67.89"), BigDecimal("9" * (Decimal.MAX_LONG_DIGITS - 2) + ".99"))) + ) } diff --git a/storage/src/main/java/io/delta/storage/internal/S3LogStoreUtil.java b/storage/src/main/java/io/delta/storage/internal/S3LogStoreUtil.java index 198d18ab9b7..b0b9f642f44 100644 --- a/storage/src/main/java/io/delta/storage/internal/S3LogStoreUtil.java +++ b/storage/src/main/java/io/delta/storage/internal/S3LogStoreUtil.java @@ -16,7 +16,7 @@ package io.delta.storage.internal; -import com.amazonaws.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; import org.apache.hadoop.fs.*; import org.apache.hadoop.fs.s3a.*; @@ -63,11 +63,12 @@ private static RemoteIterator s3ListFrom( // List files lexicographically after resolvedPath inclusive within the same directory return listing.createFileStatusListingIterator(resolvedPath, S3ListRequest.v2( - new ListObjectsV2Request() - .withBucketName(s3afs.getBucket()) - .withMaxKeys(maxKeys) - .withPrefix(s3afs.pathToKey(parentPath)) - .withStartAfter(keyBefore(s3afs.pathToKey(resolvedPath))) + ListObjectsV2Request.builder() + .bucket(s3afs.getBucket()) + .maxKeys(maxKeys) + .prefix(s3afs.pathToKey(parentPath)) + .startAfter(keyBefore(s3afs.pathToKey(resolvedPath))) + .build() ), ACCEPT_ALL, new Listing.AcceptAllButSelfAndS3nDirs(parentPath), s3afs.getActiveAuditSpan()); @@ -94,7 +95,7 @@ public static FileStatus[] s3ListFromArray( "The Hadoop file system used for the S3LogStore must be castable to " + "org.apache.hadoop.fs.s3a.S3AFileSystem.", e); } - return iteratorToStatuses(S3LogStoreUtil.s3ListFrom(s3afs, resolvedPath, parentPath), new HashSet<>()); + return iteratorToStatuses(S3LogStoreUtil.s3ListFrom(s3afs, resolvedPath, parentPath)); } /** diff --git a/version.sbt b/version.sbt index 074f3488404..895ae6915f2 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -ThisBuild / version := "3.4.0-SNAPSHOT" +ThisBuild / version := "4.1.0-SNAPSHOT"