apache · AnishMahto · Nov 14, 2025 · Nov 14, 2025 · Nov 14, 2025
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/AnalysisWarning.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/AnalysisWarning.scala
diff --git a/...lines/src/main/scala/org/apache/spark/sql/pipelines/graph/CoreDataflowNodeProcessor.scala b/...lines/src/main/scala/org/apache/spark/sql/pipelines/graph/CoreDataflowNodeProcessor.scala
@@ -86,7 +86,8 @@ class CoreDataflowNodeProcessor(rawGraph: DataflowGraph) {
           identifier = table.identifier,
           specifiedSchema = table.specifiedSchema,
           incomingFlowIdentifiers = flowsToTable.map(_.identifier).toSet,
-          availableFlows = resolvedFlowsToTable
+          availableFlows = resolvedFlowsToTable,
+          isStreamingTable = table.isStreamingTable
         )
         resolvedInputs.put(table.identifier, virtualTableInput)
         Seq(table)

diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/Flow.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/Flow.scala
@@ -22,8 +22,6 @@ import scala.util.Try
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
 import org.apache.spark.sql.classic.DataFrame
-import org.apache.spark.sql.pipelines.AnalysisWarning
-import org.apache.spark.sql.pipelines.util.InputReadOptions
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -99,8 +97,7 @@ case class FlowFunctionResult(
     streamingInputs: Set[ResolvedInput],
     usedExternalInputs: Set[TableIdentifier],
     dataFrame: Try[DataFrame],
-    sqlConf: Map[String, String],
-    analysisWarnings: Seq[AnalysisWarning] = Nil) {
+    sqlConf: Map[String, String]) {
 
   /**
    * Returns the names of all of the [[Input]]s used when resolving this [[Flow]]. If the
@@ -165,7 +162,7 @@ trait ResolvedFlow extends ResolutionCompletedFlow with Input {
 
   /** Returns the schema of the output of this [[Flow]]. */
   def schema: StructType = df.schema
-  override def load(readOptions: InputReadOptions): DataFrame = df
+  override def load: DataFrame = df
   def inputs: Set[TableIdentifier] = funcResult.inputs
 }
 

diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowAnalysis.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowAnalysis.scala
@@ -23,10 +23,8 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{CTESubstitution, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
-import org.apache.spark.sql.classic.{DataFrame, Dataset, DataStreamReader, SparkSession}
-import org.apache.spark.sql.pipelines.AnalysisWarning
+import org.apache.spark.sql.classic.{DataFrame, DataFrameReader, Dataset, DataStreamReader, SparkSession}
 import org.apache.spark.sql.pipelines.graph.GraphIdentifierManager.{ExternalDatasetIdentifier, InternalDatasetIdentifier}
-import org.apache.spark.sql.pipelines.util.{BatchReadOptions, InputReadOptions, StreamingReadOptions}
 
 
 object FlowAnalysis {
@@ -67,8 +65,7 @@ object FlowAnalysis {
           streamingInputs = ctx.streamingInputs.toSet,
           usedExternalInputs = ctx.externalInputs.toSet,
           dataFrame = df,
-          sqlConf = confs,
-          analysisWarnings = ctx.analysisWarnings.toList
+          sqlConf = confs
         )
       }
     }
@@ -116,8 +113,7 @@ object FlowAnalysis {
           val resolved = readStreamInput(
             context,
             name = IdentifierHelper.toQuotedString(u.multipartIdentifier),
-            spark.readStream,
-            streamingReadOptions = StreamingReadOptions()
+            streamReader = spark.readStream.options(u.options)
           ).queryExecution.analyzed
           // Spark Connect requires the PLAN_ID_TAG to be propagated to the resolved plan
           // to allow correct analysis of the parent plan that contains this subquery
@@ -128,7 +124,7 @@ object FlowAnalysis {
           val resolved = readBatchInput(
             context,
             name = IdentifierHelper.toQuotedString(u.multipartIdentifier),
-            batchReadOptions = BatchReadOptions()
+            batchReader = spark.read.options(u.options)
           ).queryExecution.analyzed
           // Spark Connect requires the PLAN_ID_TAG to be propagated to the resolved plan
           // to allow correct analysis of the parent plan that contains this subquery
@@ -147,23 +143,25 @@ object FlowAnalysis {
    * All the public APIs that read from a dataset should call this function to read the dataset.
    *
    * @param name the name of the Dataset to be read.
-   * @param batchReadOptions Options for this batch read
+   * @param batchReader the batch dataframe reader, possibly with options, to execute the read
+   *                    with.
    * @return batch DataFrame that represents data from the specified Dataset.
    */
   final private def readBatchInput(
       context: FlowAnalysisContext,
       name: String,
-      batchReadOptions: BatchReadOptions
+      batchReader: DataFrameReader
   ): DataFrame = {
     GraphIdentifierManager.parseAndQualifyInputIdentifier(context, name) match {
       case inputIdentifier: InternalDatasetIdentifier =>
-        readGraphInput(context, inputIdentifier, batchReadOptions)
+        readGraphInput(context, inputIdentifier, isStreamingRead = false)
 
       case inputIdentifier: ExternalDatasetIdentifier =>
         readExternalBatchInput(
           context,
           inputIdentifier = inputIdentifier,
-          name = name
+          name = name,
+          batchReader = batchReader
         )
     }
   }
@@ -177,21 +175,19 @@ object FlowAnalysis {
    *
    * @param name the name of the Dataset to be read.
    * @param streamReader The [[DataStreamReader]] that may hold read options specified by the user.
-   * @param streamingReadOptions Options for this streaming read.
    * @return streaming DataFrame that represents data from the specified Dataset.
    */
   final private def readStreamInput(
       context: FlowAnalysisContext,
       name: String,
-      streamReader: DataStreamReader,
-      streamingReadOptions: StreamingReadOptions
+      streamReader: DataStreamReader
   ): DataFrame = {
     GraphIdentifierManager.parseAndQualifyInputIdentifier(context, name) match {
       case inputIdentifier: InternalDatasetIdentifier =>
         readGraphInput(
           context,
           inputIdentifier,
-          streamingReadOptions
+          isStreamingRead = true
         )
 
       case inputIdentifier: ExternalDatasetIdentifier =>
@@ -208,13 +204,13 @@ object FlowAnalysis {
    * Internal helper to reference dataset defined in the same [[DataflowGraph]].
    *
    * @param inputIdentifier The identifier of the Dataset to be read.
-   * @param readOptions Options for this read (may be either streaming or batch options)
+   * @param isStreamingRead Whether this is a streaming read or batch read.
    * @return streaming or batch DataFrame that represents data from the specified Dataset.
    */
   final private def readGraphInput(
       ctx: FlowAnalysisContext,
       inputIdentifier: InternalDatasetIdentifier,
-      readOptions: InputReadOptions
+      isStreamingRead: Boolean
   ): DataFrame = {
     val datasetIdentifier = inputIdentifier.identifier
 
@@ -230,8 +226,15 @@ object FlowAnalysis {
       // Dataset is resolved, so we can read from it
       ctx.availableInput(datasetIdentifier)
     }
+    val inputDF = i match {
+      case vt: VirtualTableInput =>
+        // Unlike temporary views (which would have been substituted into flows by this point), we
+        // allow tables to batch read a streaming dataset. We do not allow the opposite however,
+        // which is checked on the resolved graph during graph validation.
+        vt.load(asStreaming = isStreamingRead)
+      case _ => i.load
+    }
 
-    val inputDF = i.load(readOptions)
     i match {
       // If the referenced input is a [[Flow]], because the query plans will be fused
       // together, we also need to fuse their confs.
@@ -252,30 +255,22 @@ object FlowAnalysis {
       qualifier = Seq(datasetIdentifier.catalog, datasetIdentifier.database).flatten
     )
 
-    readOptions match {
-      case sro: StreamingReadOptions =>
-        if (!inputDF.isStreaming && incompatibleViewReadCheck) {
-          throw new AnalysisException(
-            "INCOMPATIBLE_BATCH_VIEW_READ",
-            Map("datasetIdentifier" -> datasetIdentifier.toString)
-          )
-        }
-
-        if (sro.droppedUserOptions.nonEmpty) {
-          ctx.analysisWarnings += AnalysisWarning.StreamingReaderOptionsDropped(
-            sourceName = datasetIdentifier.unquotedString,
-            droppedOptions = sro.droppedUserOptions.keys.toSeq
-          )
-        }
-        ctx.streamingInputs += ResolvedInput(i, aliasIdentifier)
-      case _ =>
-        if (inputDF.isStreaming && incompatibleViewReadCheck) {
-          throw new AnalysisException(
-            "INCOMPATIBLE_STREAMING_VIEW_READ",
-            Map("datasetIdentifier" -> datasetIdentifier.toString)
-          )
-        }
-        ctx.batchInputs += ResolvedInput(i, aliasIdentifier)
+    if (isStreamingRead) {
+      if (!inputDF.isStreaming && incompatibleViewReadCheck) {
+        throw new AnalysisException(
+          "INCOMPATIBLE_BATCH_VIEW_READ",
+          Map("datasetIdentifier" -> datasetIdentifier.toString)
+        )
+      }
+      ctx.streamingInputs += ResolvedInput(i, aliasIdentifier)
+    } else {
+      if (inputDF.isStreaming && incompatibleViewReadCheck) {
+        throw new AnalysisException(
+          "INCOMPATIBLE_STREAMING_VIEW_READ",
+          Map("datasetIdentifier" -> datasetIdentifier.toString)
+        )
+      }
+      ctx.batchInputs += ResolvedInput(i, aliasIdentifier)
     }
     Dataset.ofRows(
       ctx.spark,
@@ -293,11 +288,11 @@ object FlowAnalysis {
   final private def readExternalBatchInput(
       context: FlowAnalysisContext,
       inputIdentifier: ExternalDatasetIdentifier,
-      name: String): DataFrame = {
+      name: String,
+      batchReader: DataFrameReader): DataFrame = {
 
-    val spark = context.spark
     context.externalInputs += inputIdentifier.identifier
-    spark.read.table(inputIdentifier.identifier.quotedString)
+    batchReader.table(inputIdentifier.identifier.quotedString)
   }
 
   /**

diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowAnalysisContext.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowAnalysisContext.scala
@@ -18,11 +18,9 @@
 package org.apache.spark.sql.pipelines.graph
 
 import scala.collection.mutable
-import scala.collection.mutable.ListBuffer
 
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.classic.SparkSession
-import org.apache.spark.sql.pipelines.AnalysisWarning
 
 /**
  * A context used when evaluating a `Flow`'s query into a concrete DataFrame.
@@ -44,7 +42,6 @@ private[pipelines] case class FlowAnalysisContext(
     streamingInputs: mutable.HashSet[ResolvedInput] = mutable.HashSet.empty,
     requestedInputs: mutable.HashSet[TableIdentifier] = mutable.HashSet.empty,
     shouldLowerCaseNames: Boolean = false,
-    analysisWarnings: mutable.Buffer[AnalysisWarning] = new ListBuffer[AnalysisWarning],
     spark: SparkSession,
     externalInputs: mutable.HashSet[TableIdentifier] = mutable.HashSet.empty
 ) {

diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/GraphValidations.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/GraphValidations.scala
@@ -212,7 +212,7 @@ trait GraphValidations extends Logging {
   }
 
   protected def validateUserSpecifiedSchemas(): Unit = {
-    flows.flatMap(f => table.get(f.identifier)).foreach { t: TableInput =>
+    flows.flatMap(f => table.get(f.identifier)).foreach { t: TableElement =>
       // The output inferred schema of a table is the declared schema merged with the
       // schema of all incoming flows. This must be equivalent to the declared schema.
       val inferredSchema = SchemaInferenceUtils

diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/State.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/State.scala
@@ -29,7 +29,9 @@ object State extends Logging {
    * @param graph The graph to reset.
    * @param env The current update context.
    */
-  private def findElementsToReset(graph: DataflowGraph, env: PipelineUpdateContext): Seq[Input] = {
+  private def findElementsToReset(
+      graph: DataflowGraph,
+      env: PipelineUpdateContext): Seq[GraphElement] = {
     // If tableFilter is an instance of SomeTables, this is a refresh selection and all tables
     // to reset should be resettable; Otherwise, this is a full graph update, and we reset all
     // tables that are resettable.
@@ -71,8 +73,8 @@ object State extends Logging {
    * - Clearing checkpoint data
    * - Truncating table data
    */
-  def reset(resolvedGraph: DataflowGraph, env: PipelineUpdateContext): Seq[Input] = {
-    val elementsToReset: Seq[Input] = findElementsToReset(resolvedGraph, env)
+  def reset(resolvedGraph: DataflowGraph, env: PipelineUpdateContext): Seq[GraphElement] = {
+    val elementsToReset: Seq[GraphElement] = findElementsToReset(resolvedGraph, env)
 
     elementsToReset.foreach {
       case f: ResolvedFlow => reset(f, env, resolvedGraph)