NVIDIA · wjxiz1992 · Jun 17, 2026 · Jun 17, 2026 · Jun 25, 2026 · Jun 26, 2026
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOrcScan.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOrcScan.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2025, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2026, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -652,19 +652,24 @@ case class GpuOrcMultiFilePartitionReaderFactory(
 
     metrics.getOrElse(FILTER_TIME, NoopMetric).ns {
       metrics.getOrElse(SCAN_TIME, NoopMetric).ns {
-        files.map { file =>
-          val orcPartitionReaderContext = filterHandler.filterStripes(file, dataSchema,
-            readDataSchema, partitionSchema)
-          compressionAndStripes.getOrElseUpdate(orcPartitionReaderContext.compressionKind,
-            new ArrayBuffer[OrcSingleStripeMeta]) ++=
-            orcPartitionReaderContext.blockIterator.map(block =>
-              OrcSingleStripeMeta(
-                orcPartitionReaderContext.filePath,
-                OrcDataStripe(OrcStripeWithMeta(block, orcPartitionReaderContext)),
-                file.partitionValues,
-                OrcSchemaWrapper(orcPartitionReaderContext.updatedReadSchema),
-                readDataSchema,
-                OrcExtraInfo(orcPartitionReaderContext.requestedMapping)))
+        files.foreach { file =>
+          try {
+            val orcPartitionReaderContext = filterHandler.filterStripes(file, dataSchema,
+              readDataSchema, partitionSchema)
+            compressionAndStripes.getOrElseUpdate(orcPartitionReaderContext.compressionKind,
+              new ArrayBuffer[OrcSingleStripeMeta]) ++=
+              orcPartitionReaderContext.blockIterator.map(block =>
+                OrcSingleStripeMeta(
+                  orcPartitionReaderContext.filePath,
+                  OrcDataStripe(OrcStripeWithMeta(block, orcPartitionReaderContext)),
+                  file.partitionValues,
+                  OrcSchemaWrapper(orcPartitionReaderContext.updatedReadSchema),
+                  readDataSchema,
+                  OrcExtraInfo(orcPartitionReaderContext.requestedMapping)))
+          } catch {
+            case e: FileNotFoundException if ignoreMissingFiles =>
+              logWarning(s"Skipped missing file: ${file.filePath}", e)
+          }
         }
       }
     }

diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/OrcScanSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/OrcScanSuite.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2025, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2026, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,8 +16,12 @@
 
 package com.nvidia.spark.rapids
 
+import org.apache.hadoop.fs.Path
+
 import org.apache.spark.SparkConf
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.rapids.GpuFileSourceScanExec
 import org.apache.spark.sql.rapids.shims.TrampolineConnectShims.SparkSession
 import org.apache.spark.sql.types.{DateType, IntegerType, LongType, StringType, StructField, StructType}
 
@@ -76,6 +80,60 @@ class OrcScanSuite extends SparkQueryCompareTestSuite {
           StructField("_col2", StringType),
           StructField("_col1", LongType))))) { frame => frame }
 
+  test("ORC coalescing reader honors ignoreMissingFiles") {
+    def collectAfterDeletingPlannedFiles(spark: SparkSession, checkGpu: Boolean): Seq[String] = {
+      import spark.implicits._
+
+      withTempPath { base =>
+        val basePath = base.getCanonicalPath
+
+        Seq("0").toDF("a").write.mode("overwrite").format("orc")
+          .save(new Path(basePath, "second").toString)
+        Seq("1").toDF("a").write.mode("overwrite").format("orc")
+          .save(new Path(basePath, "fourth").toString)
+
+        val firstPath = new Path(basePath, "first")
+        val thirdPath = new Path(basePath, "third")
+        val fs = thirdPath.getFileSystem(spark.sessionState.newHadoopConf())
+
+        Seq("2").toDF("a").write.mode("overwrite").format("orc").save(firstPath.toString)
+        Seq("3").toDF("a").write.mode("overwrite").format("orc").save(thirdPath.toString)
+
+        val filesToDelete = Seq(firstPath, thirdPath).flatMap { path =>
+          fs.listStatus(path).filter(_.isFile).map(_.getPath)
+        }
+        val df = spark.read.format("orc").load(
+          firstPath.toString,
+          new Path(basePath, "second").toString,
+          thirdPath.toString,
+          new Path(basePath, "fourth").toString)
+
+        if (checkGpu) {
+          val gpuScans = df.queryExecution.executedPlan.collect {
+            case _: GpuFileSourceScanExec => true
+          }
+          assert(gpuScans.nonEmpty, "ORC read is not running on GPU")
+        }
+
+        filesToDelete.foreach(file => fs.delete(file, false))
+        assert(fs.delete(thirdPath, true))
+
+        df.collect().map(_.getString(0)).sorted.toSeq
+      }
+    }
+
+    val conf = new SparkConf()
+      .set(SQLConf.USE_V1_SOURCE_LIST.key, "orc")
+      .set(SQLConf.IGNORE_MISSING_FILES.key, "true")
+      .set(RapidsConf.ORC_READER_TYPE.key, RapidsReaderType.COALESCING.toString)
+
+    val cpuResult = withCpuSparkSession(collectAfterDeletingPlannedFiles(_, checkGpu = false), conf)
+    val gpuResult = withGpuSparkSession(collectAfterDeletingPlannedFiles(_, checkGpu = true), conf)
+
+    assertResult(Seq("0", "1"))(cpuResult)
+    assertResult(cpuResult)(gpuResult)
+  }
+
   /**
    *
    * The calendar of hybrid-Julian-calendar.orc file is hybrid Julian Gregorian