Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,9 @@ final class CudfUnsafeRow(
}
}

// Keep companion line metadata aligned with pre-Spark-4 shims for binary-dedupe.




object CudfUnsafeRow extends CudfUnsafeRowTrait

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,19 @@ class GpuOrcDataReader(
}


// Keep executable line numbers aligned with pre-Spark-4 shims for binary-dedupe.












object GpuOrcDataReader {
// File cache is being used, so we want read ranges that can be cached separately
val shouldMergeDiskRanges: Boolean = false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,18 @@ package com.nvidia.spark.rapids.shims
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.execution.datasources.{FileIndex, HadoopFsRelation, LogicalRelationWithTable}

// Keep companion line metadata aligned with pre-Spark-4 shims for binary-dedupe.











object LogicalPlanShims {
def getLocations(plan: LogicalPlan): Seq[FileIndex] = plan.collect {
case LogicalRelationWithTable(t: HadoopFsRelation, _) => t.location
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,24 @@ import org.apache.spark.sql.catalyst.expressions.Expression
trait NullIntolerantShim extends Expression {
override def nullIntolerant: Boolean = true
}

abstract class GpuLiteralShim extends com.nvidia.spark.rapids.GpuLeafExpression {
def value: Any
def dataType: org.apache.spark.sql.types.DataType

override protected def jsonFields: List[org.json4s.JsonAST.JField] = {
val jsonValue = (value, dataType) match {
case (null, _) => org.json4s.JsonAST.JNull
case (i: Int, org.apache.spark.sql.types.DateType) =>
org.json4s.JsonAST.JString(
org.apache.spark.sql.catalyst.util.DateTimeUtils.toJavaDate(i).toString)
case (l: Long, org.apache.spark.sql.types.TimestampType) =>
org.json4s.JsonAST.JString(
org.apache.spark.sql.catalyst.util.DateTimeUtils.toJavaTimestamp(l).toString)
Comment on lines +42 to +44

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Missing TimestampNTZType case in jsonFields

GpuLiteralShim.jsonFields handles TimestampType (LTZ) via DateTimeUtils.toJavaTimestamp, but TimestampNTZType falls through to the catch-all other branch, which calls .toString on the raw Long microsecond value. Any GpuLiteral with a TIMESTAMP_NTZ type will therefore serialize as a bare integer in explain plans and plan JSON output rather than a human-readable timestamp string. The parallel path for DateType already shows the expected pattern — TIMESTAMP_NTZ would need something like DateTimeUtils.microsToLocalDateTime instead of toJavaTimestamp (which would incorrectly apply a timezone offset).

case (other, _) => org.json4s.JsonAST.JString(other.toString)
}
("value" -> jsonValue) ::
("dataType" -> org.apache.spark.sql.rapids.execution.TrampolineUtil.jsonValue(dataType)
.asInstanceOf[org.json4s.JsonAST.JValue]) :: Nil
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ trait Spark400PlusCommonShims extends Spark350PlusNonDBShims {
"And(GreaterThanOrEqual(ref, lower), LessThanOrEqual(ref, upper); StructToJson is " +
"replaced by Invoke(Literal(StructsToJsonEvaluator), evaluate, string_type, arguments)",
InvokeCheck,
InvokeExprMeta)
(invoke, conf, p, r) => new InvokeExprMeta(invoke, conf, p, r))
.note("The supported types are not deterministic since it's a dynamic expression")
).map(r => (r.getClassFor.asSubclass(classOf[Expression]), r)).toMap
super.getExprs ++ shimExprs
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.expressions.url.ParseUrlEvaluator
import org.apache.spark.sql.rapids.{GpuParseUrl, GpuStructsToJson}
import org.apache.spark.sql.types._

case class InvokeExprMeta(
class InvokeExprMeta(
invoke: Invoke,
override val conf: RapidsConf,
p: Option[RapidsMeta[_, _, _]],
Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ trait Spark400PlusDBShims extends Spark341PlusDBShims {
"And(GreaterThanOrEqual(ref, lower), LessThanOrEqual(ref, upper); StructToJson is " +
"replaced by Invoke(Literal(StructsToJsonEvaluator), evaluate, string_type, arguments)",
InvokeCheck,
InvokeExprMeta)
(invoke, conf, p, r) => new InvokeExprMeta(invoke, conf, p, r))
.note("The supported types are not deterministic since it's a dynamic expression")
).map(r => (r.getClassFor.asSubclass(classOf[Expression]), r)).toMap
super.getExprs ++ shimExprs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,40 @@ package com.nvidia.spark.rapids.shims

import com.nvidia.spark.rapids._

import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.expressions.{Expression, TimestampAddInterval}
import org.apache.spark.sql.rapids.shims.GpuTimestampAddInterval
import org.apache.spark.sql.types.{CalendarIntervalType, DayTimeIntervalType}
import org.apache.spark.unsafe.types.CalendarInterval

/**
* Empty TimeAddShims for Spark 4.1.0+ and Databricks 17.3.
* TimeAdd was renamed to TimestampAddInterval and is handled by DayTimeIntervalShims.
* TimestampAddInterval support for Spark 4.1.0+ and Databricks 17.3.
* TimeAdd was renamed to TimestampAddInterval in Spark 4.1.
*/
object TimeAddShims {
val exprs: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] = Map.empty
val exprs: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] = Seq(
GpuOverrides.expr[TimestampAddInterval](
"Adds interval to timestamp",
ExprChecks.binaryProject(TypeSig.TIMESTAMP, TypeSig.TIMESTAMP,
("start", TypeSig.TIMESTAMP, TypeSig.TIMESTAMP),
("interval", TypeSig.DAYTIME + TypeSig.lit(TypeEnum.CALENDAR)
.withPsNote(TypeEnum.CALENDAR, "month intervals are not supported"),
TypeSig.DAYTIME + TypeSig.CALENDAR)),
Comment on lines +38 to +42

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 TIMESTAMP_NTZ not covered by the new override

ExprChecks.binaryProject is registered with TypeSig.TIMESTAMP only, so TimestampAddInterval on TIMESTAMP_NTZ columns will silently fall back to CPU. In Spark 4.1 / DB 17.3, TimestampAddInterval supports both timestamp variants — the CPU implementation is timezone-ignorant for TIMESTAMP_NTZ, making it straightforward to add TypeSig.TIMESTAMP_NTZ to both the result and the start input sig. Worth confirming whether this omission is intentional (matching old TimeAdd shim behaviour) or an oversight given the new override. Is the omission of TIMESTAMP_NTZ intentional here (preserving prior TimeAdd behaviour) or should TypeSig.TIMESTAMP_NTZ be added to both the result and start signatures?

(timeAdd, conf, p, r) => new BinaryExprMeta[TimestampAddInterval](timeAdd, conf, p, r) {
override def tagExprForGpu(): Unit = {
GpuOverrides.extractLit(timeAdd.interval).foreach { lit =>
lit.dataType match {
case CalendarIntervalType =>
val intvl = lit.value.asInstanceOf[CalendarInterval]
if (intvl.months != 0) {
willNotWorkOnGpu("interval months isn't supported")
}
case _: DayTimeIntervalType =>
}
}
}

override def convertToGpu(lhs: Expression, rhs: Expression): GpuExpression =
GpuTimestampAddInterval(lhs, rhs)
})
).map(r => (r.getClassFor.asSubclass(classOf[Expression]), r)).toMap
}
Loading
Loading