From 9103bb7fadab76cc21317db312f23ea6bfe0eb05 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Wed, 10 Jun 2026 08:33:07 -0700 Subject: [PATCH] Remove old Spark 3.3 DBR shim sources Signed-off-by: Gera Shegalov --- .../DatabricksShimServiceProvider.scala | 66 --------------- .../CheckOverflowInTableInsertShims.scala | 82 +++++++++++++++++++ .../rapids/shims/Spark321PlusDBShims.scala | 2 +- .../rapids/shims/Spark330PlusDBShims.scala | 19 +---- .../spark/rapids/shims/SparkShims.scala | 11 +-- .../spark330db/SparkShimServiceProvider.scala | 37 --------- .../apache/spark/sql/rapids/arithmetic.scala | 3 +- .../shims/GpuGroupedPythonRunnerFactory.scala | 6 +- .../sql/rapids/shims/OriginContextShim.scala | 33 -------- .../shims/SparkDateTimeExceptionShims.scala | 38 --------- .../shims/SparkUpgradeExceptionShims.scala | 42 ---------- 11 files changed, 96 insertions(+), 243 deletions(-) delete mode 100644 sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/DatabricksShimServiceProvider.scala create mode 100644 sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/CheckOverflowInTableInsertShims.scala delete mode 100644 sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/spark330db/SparkShimServiceProvider.scala delete mode 100644 sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/OriginContextShim.scala delete mode 100644 sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/SparkDateTimeExceptionShims.scala delete mode 100644 sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/SparkUpgradeExceptionShims.scala diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/DatabricksShimServiceProvider.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/DatabricksShimServiceProvider.scala deleted file mode 100644 index 2aae0326162..00000000000 --- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/DatabricksShimServiceProvider.scala +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2024-2026, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*** spark-rapids-shim-json-lines -{"spark": "330db"} -{"spark": "332db"} -{"spark": "341db"} -{"spark": "350db143"} -{"spark": "400db173"} -spark-rapids-shim-json-lines ***/ -package com.nvidia.spark.rapids - -object DatabricksShimServiceProvider { - val log = org.slf4j.LoggerFactory.getLogger(getClass().getName().stripSuffix("$")) - - def matchesVersion(dbrVersion: String, - shimMatchEnabled: Boolean = true, - disclaimer: String = "" - ): Boolean = { - var ignoreExceptions = true - try { - val sparkBuildInfo = org.apache.spark.BuildInfo - val databricksBuildInfo = com.databricks.BuildInfo - val matchRes = sparkBuildInfo.dbrVersion.startsWith(dbrVersion) - val matchStatus = if (matchRes) "SUCCESS" else "FAILURE" - val logMessage = - s"""Databricks Runtime Build Info match: $matchStatus - |\tDBR_VERSION: ${sparkBuildInfo.dbrVersion} - |\tspark.BuildInfo.gitHash: ${sparkBuildInfo.gitHash} - |\tdatabricks.BuildInfo.gitHash: ${databricksBuildInfo.gitHash} - |\tdatabricks.BuildInfo.gitTimestamp: ${databricksBuildInfo.gitTimestamp}""" - .stripMargin - if (matchRes) { - log.warn(logMessage) - if (shimMatchEnabled) { - if (disclaimer.nonEmpty) { - log.warn(disclaimer) - } - } else { - ignoreExceptions = false - sys.error(disclaimer) - } - } else { - log.debug(logMessage) - } - matchRes - } catch { - case x: Throwable if ignoreExceptions => - log.debug("Databricks detection failed: " + x, x) - false - } - } -} diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/CheckOverflowInTableInsertShims.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/CheckOverflowInTableInsertShims.scala new file mode 100644 index 00000000000..137e72913ae --- /dev/null +++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/CheckOverflowInTableInsertShims.scala @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2026, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "330db"} +{"spark": "331"} +{"spark": "332"} +{"spark": "332db"} +{"spark": "333"} +{"spark": "334"} +{"spark": "340"} +{"spark": "341"} +{"spark": "341db"} +{"spark": "342"} +{"spark": "343"} +{"spark": "344"} +{"spark": "350"} +{"spark": "350db143"} +{"spark": "351"} +{"spark": "352"} +{"spark": "353"} +{"spark": "354"} +{"spark": "355"} +{"spark": "356"} +{"spark": "357"} +{"spark": "358"} +{"spark": "400"} +{"spark": "400db173"} +{"spark": "401"} +{"spark": "402"} +{"spark": "411"} +spark-rapids-shim-json-lines ***/ +package com.nvidia.spark.rapids.shims + +import com.nvidia.spark.rapids.{ + ExprChecks, + ExprRule, + GpuCast, + GpuExpression, + GpuOverrides, + TypeSig, + UnaryExprMeta +} + +import org.apache.spark.sql.catalyst.expressions.{CheckOverflowInTableInsert, Expression} +import org.apache.spark.sql.rapids.GpuCheckOverflowInTableInsert + +object CheckOverflowInTableInsertShims { + val exprs: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] = { + Seq( + // Add expression CheckOverflowInTableInsert starting Spark-3.3.1+. + // Accepts all types as input as the child Cast does the type checking and the calculations. + GpuOverrides.expr[CheckOverflowInTableInsert]( + "Casting a numeric value as another numeric type in store assignment", + ExprChecks.unaryProjectInputMatchesOutput( + TypeSig.all, + TypeSig.all), + (t, conf, p, r) => new UnaryExprMeta[CheckOverflowInTableInsert](t, conf, p, r) { + override def convertToGpu(child: Expression): GpuExpression = { + child match { + case c: GpuCast => GpuCheckOverflowInTableInsert(c, t.columnName) + case _ => + throw new IllegalStateException("Expression child is not of Type GpuCast") + } + } + }) + ).map(r => (r.getClassFor.asSubclass(classOf[Expression]), r)).toMap + } +} diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/Spark321PlusDBShims.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/Spark321PlusDBShims.scala index 14e7e8ff610..d11b1c7f7f2 100644 --- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/Spark321PlusDBShims.scala +++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/Spark321PlusDBShims.scala @@ -124,7 +124,7 @@ trait Spark321PlusDBShims extends SparkShims TypeSig.all, Map("partitionSpec" -> new InputCheck(TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL_128, - TypeSig.all, Nil))), + TypeSig.all, List.empty))), (runningWindowFunctionExec, conf, p, r) => new GpuRunningWindowExecMeta(runningWindowFunctionExec, conf, p, r) ) diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/Spark330PlusDBShims.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/Spark330PlusDBShims.scala index fac3314c709..349368def4f 100644 --- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/Spark330PlusDBShims.scala +++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/Spark330PlusDBShims.scala @@ -30,29 +30,16 @@ import org.apache.spark.sql.catalyst.plans.physical.SinglePartition import org.apache.spark.sql.execution.{ColumnarToRowTransition, SparkPlan} import org.apache.spark.sql.execution.adaptive.ShuffleQueryStageExec import org.apache.spark.sql.execution.exchange.{EXECUTOR_BROADCAST, ShuffleExchangeExec, ShuffleExchangeLike} -import org.apache.spark.sql.rapids.{GpuCheckOverflowInTableInsert, GpuElementAtMeta} +import org.apache.spark.sql.rapids.GpuElementAtMeta import org.apache.spark.sql.rapids.execution.{GpuBroadcastHashJoinExec, GpuBroadcastNestedLoopJoinExec} trait Spark330PlusDBShims extends Spark321PlusDBShims { override def getExprs: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] = { val shimExprs: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] = Seq( - GpuOverrides.expr[CheckOverflowInTableInsert]( - "Casting a numeric value as another numeric type in store assignment", - ExprChecks.unaryProjectInputMatchesOutput( - TypeSig.all, - TypeSig.all), - (t, conf, p, r) => new UnaryExprMeta[CheckOverflowInTableInsert](t, conf, p, r) { - override def convertToGpu(child: Expression): GpuExpression = { - child match { - case c: GpuCast => GpuCheckOverflowInTableInsert(c, t.columnName) - case _ => - throw new IllegalStateException("Expression child is not of Type GpuCast") - } - } - }), GpuElementAtMeta.elementAtRule(true) ).map(r => (r.getClassFor.asSubclass(classOf[Expression]), r)).toMap - super.getExprs ++ shimExprs ++ DayTimeIntervalShims.exprs ++ RoundingShims.exprs + super.getExprs ++ CheckOverflowInTableInsertShims.exprs ++ shimExprs ++ + DayTimeIntervalShims.exprs ++ RoundingShims.exprs } override def getExecs: Map[Class[_ <: SparkPlan], ExecRule[_ <: SparkPlan]] = diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/SparkShims.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/SparkShims.scala index 84f5c4e4886..324c7718ab5 100644 --- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/SparkShims.scala +++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/SparkShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2026, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ package com.nvidia.spark.rapids.shims import com.nvidia.spark.rapids._ import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.sql.execution.command.{CreateDataSourceTableAsSelectCommand, DataWritingCommand, RunnableCommand} +import org.apache.spark.sql.execution.command.{DataWritingCommand, RunnableCommand} object SparkShimImpl extends Spark330PlusDBShims { // AnsiCast is removed from Spark3.4.0 @@ -30,9 +30,10 @@ object SparkShimImpl extends Spark330PlusDBShims { override def getDataWriteCmds: Map[Class[_ <: DataWritingCommand], DataWritingCommandRule[_ <: DataWritingCommand]] = { - Seq(GpuOverrides.dataWriteCmd[CreateDataSourceTableAsSelectCommand]( - "Create table with select command", - (a, conf, p, r) => new CreateDataSourceTableAsSelectCommandMeta(a, conf, p, r)) + Seq( + GpuOverrides.dataWriteCmdFromShim( + CreateDataSourceTableAsSelectRules.dataWriteCmd, + (a, conf, p, r) => new CreateDataSourceTableAsSelectCommandMeta(a, conf, p, r)) ).map(r => (r.getClassFor.asSubclass(classOf[DataWritingCommand]), r)).toMap } diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/spark330db/SparkShimServiceProvider.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/spark330db/SparkShimServiceProvider.scala deleted file mode 100644 index 0db1385cda5..00000000000 --- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/spark330db/SparkShimServiceProvider.scala +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2022-2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*** spark-rapids-shim-json-lines -{"spark": "330db"} -spark-rapids-shim-json-lines ***/ -package com.nvidia.spark.rapids.shims.spark330db - -import com.nvidia.spark.rapids._ - -object SparkShimServiceProvider { - // DB version should conform to "major.minor" and has no patch version. - // Refer to VersionUtils.getVersionForJni - val VERSION = DatabricksShimVersion(3, 3, 0, "11.3") -} - -class SparkShimServiceProvider extends com.nvidia.spark.rapids.SparkShimServiceProvider { - - override def getShimVersion: ShimVersion = SparkShimServiceProvider.VERSION - - def matchesVersion(version: String): Boolean = { - DatabricksShimServiceProvider.matchesVersion("11.3.x") - } -} diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/arithmetic.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/arithmetic.scala index 160f93fc34a..a9d2530a443 100644 --- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/arithmetic.scala +++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/arithmetic.scala @@ -49,7 +49,6 @@ import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource} import com.nvidia.spark.rapids.RapidsPluginImplicits._ import com.nvidia.spark.rapids.shims.NullIntolerantShim -import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin} @@ -279,7 +278,7 @@ case class GpuDecimalRemainder( left: Expression, right: Expression, failOnError: Boolean = SQLConf.get.ansiEnabled) - extends GpuRemainderBase(left, right) with Logging { + extends GpuRemainderBase(left, right) { // scalastyle:off // The formula follows Hive which is based on the SQL standard and MS SQL: diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala index 70837659871..02aba89c418 100644 --- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala +++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2025, NVIDIA CORPORATION. + * Copyright (c) 2023-2026, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,14 +25,14 @@ import org.apache.spark.sql.rapids.shims.ArrowUtilsShim import org.apache.spark.sql.types.StructType import org.apache.spark.sql.vectorized.ColumnarBatch -case class GpuGroupedPythonRunnerFactory( +class GpuGroupedPythonRunnerFactory( conf: org.apache.spark.sql.internal.SQLConf, chainedFunc: Seq[(ChainedPythonFunctions, Long)], argOffsets: Array[Array[Int]], dedupAttrs: StructType, pythonOutputSchema: StructType, evalType: Int, - argNames: Option[Array[Array[Option[String]]]] = None) { + argNames: Option[Array[Array[Option[String]]]]) extends Serializable { // Configs from DB runtime val maxBytes = conf.pandasZeroConfConversionGroupbyApplyMaxBytesPerSlice val zeroConfEnabled = conf.pandasZeroConfConversionGroupbyApplyEnabled diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/OriginContextShim.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/OriginContextShim.scala deleted file mode 100644 index 50d91c1abdb..00000000000 --- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/OriginContextShim.scala +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2026, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*** spark-rapids-shim-json-lines -{"spark": "330db"} -{"spark": "332db"} -spark-rapids-shim-json-lines ***/ -package org.apache.spark.sql.rapids.shims - -import org.apache.spark.sql.catalyst.trees.{Origin, SQLQueryContext} - -// Databricks 3.3.x back-ported SPARK-39175 and typed `Origin.context` as -// `SQLQueryContext` directly — same shape as Apache 3.4+. -object OriginContextShim { - def queryContext(origin: Origin): SQLQueryContext = origin.context - def contextSummary(origin: Origin): String = origin.context match { - case null => "" - case ctx => ctx.summary - } -} diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/SparkDateTimeExceptionShims.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/SparkDateTimeExceptionShims.scala deleted file mode 100644 index 4f59c12c985..00000000000 --- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/SparkDateTimeExceptionShims.scala +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*** spark-rapids-shim-json-lines -{"spark": "330db"} -spark-rapids-shim-json-lines ***/ -package org.apache.spark.sql.rapids.shims - -import org.apache.spark.{QueryContext, SparkDateTimeException} - -object SparkDateTimeExceptionShims { - - def newSparkDateTimeException( - errorClass: String, - messageParameters: Map[String, String], - context: Array[QueryContext], - summary: String): SparkDateTimeException = { - new SparkDateTimeException( - errorClass, - None, - Array.empty, - context, - summary) - } -} diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/SparkUpgradeExceptionShims.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/SparkUpgradeExceptionShims.scala deleted file mode 100644 index 2ae55c73057..00000000000 --- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/SparkUpgradeExceptionShims.scala +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*** spark-rapids-shim-json-lines -{"spark": "330db"} -spark-rapids-shim-json-lines ***/ -package org.apache.spark.sql.rapids.shims - -import org.apache.spark.SparkUpgradeException - -object SparkUpgradeExceptionShims { - - def newSparkUpgradeException( - version: String, - message: String, - cause: Throwable): SparkUpgradeException = { - new SparkUpgradeException( - "INCONSISTENT_BEHAVIOR_CROSS_VERSION", - None, - Array(version, message), - cause) - } - - // Used in tests to compare the class seen in an exception to - // `SparkUpgradeException` which is private in Spark - def getSparkUpgradeExceptionClass: Class[_] = { - classOf[SparkUpgradeException] - } -}