NVIDIA · gerashegalov · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026
diff --git a/...a-33x-41x/scala/com/nvidia/spark/rapids/delta/common/GpuDeltaParquetFileFormatBase2.scala b/...a-33x-41x/scala/com/nvidia/spark/rapids/delta/common/GpuDeltaParquetFileFormatBase2.scala
@@ -502,7 +502,7 @@ class GpuDeltaParquetFileFormatBase2(
       queryUsesInputFile: Boolean)
     extends AbstractGpuParquetMultiFilePartitionReaderFactory(sqlConf, broadcastedConf,
       dataSchema, readDataSchema, partitionSchema, filters, rapidsConf, poolConfBuilder,
-      metrics, queryUsesInputFile) with Logging {
+      metrics, queryUsesInputFile) {
 
     logDebug("Using GpuDeltaParquetMultiFilePartitionReaderFactory for multi-threaded Parquet " +
       "reading with deletion vectors")
@@ -584,11 +584,11 @@ class GpuDeltaParquetFileFormatBase2(
             val (rowGroupOffsets, rowGroupNumRows) =
               RapidsDeletionVectors.getRowGroupMetadata(singleFileInfo.blocks)
             clippedBlocks ++= singleFileInfo.blocks.zipWithIndex.map { case (block, i) =>
-              ParquetSingleDataBlockMeta(
+              new ParquetSingleDataBlockMeta(
                 singleFileInfo.filePath,
-                ParquetDataBlock(block, compressCfg),
+                new ParquetDataBlock(block, compressCfg),
                 metaAndFile.file.partitionValues,
-                ParquetSchemaWrapper(singleFileInfo.schema),
+                new ParquetSchemaWrapper(singleFileInfo.schema),
                 singleFileInfo.readSchema,
                 new DeltaParquetExtraInfo(
                   singleFileInfo.dateRebaseMode,

diff --git a/...b173/src/main/scala/com/nvidia/spark/rapids/delta/GpuDeltaParquetFileFormatNativeDV.scala b/...b173/src/main/scala/com/nvidia/spark/rapids/delta/GpuDeltaParquetFileFormatNativeDV.scala
@@ -546,7 +546,7 @@ case class GpuDeltaParquetFileFormatNativeDV(
       tablePathOpt: Option[String])
     extends AbstractGpuParquetMultiFilePartitionReaderFactory(sqlConf, broadcastedConf,
       dataSchema, readDataSchema, partitionSchema, filters, rapidsConf, poolConfBuilder,
-      metrics, queryUsesInputFile) with Logging {
+      metrics, queryUsesInputFile) {
 
     logDebug("Using GpuDeltaParquetMultiFilePartitionReaderFactory for multi-threaded Parquet " +
       "reading with deletion vectors")
@@ -636,11 +636,11 @@ case class GpuDeltaParquetFileFormatNativeDV(
             val (rowGroupOffsets, rowGroupNumRows) =
               RapidsDeletionVectors.getRowGroupMetadata(singleFileInfo.blocks)
             clippedBlocks ++= singleFileInfo.blocks.zipWithIndex.map { case (block, i) =>
-              ParquetSingleDataBlockMeta(
+              new ParquetSingleDataBlockMeta(
                 singleFileInfo.filePath,
-                ParquetDataBlock(block, compressCfg),
+                new ParquetDataBlock(block, compressCfg),
                 metaAndFile.file.partitionValues,
-                ParquetSchemaWrapper(singleFileInfo.schema),
+                new ParquetSchemaWrapper(singleFileInfo.schema),
                 singleFileInfo.readSchema,
                 new DeltaParquetExtraInfo(
                   singleFileInfo.dateRebaseMode,

diff --git a/...ain/scala/com/nvidia/spark/rapids/iceberg/parquet/GpuCoalescingIcebergParquetReader.scala b/...ain/scala/com/nvidia/spark/rapids/iceberg/parquet/GpuCoalescingIcebergParquetReader.scala
@@ -68,11 +68,11 @@ class GpuCoalescingIcebergParquetReader(
             conf.metrics)
 
           info.blocks.map { block =>
-            ParquetSingleDataBlockMeta(
+            new ParquetSingleDataBlockMeta(
               info.filePath,
-              ParquetDataBlock(block, CpuCompressionConfig.disabled()),
+              new ParquetDataBlock(block, CpuCompressionConfig.disabled()),
               InternalRow.empty,
-              ParquetSchemaWrapper(info.schema),
+              new ParquetSchemaWrapper(info.schema),
               info.readSchema,
               IcebergParquetExtraInfo(
                 info.dateRebaseMode,

diff --git a/sql-plugin-fileio/src/main/java/com/nvidia/spark/rapids/FileUtils.java b/sql-plugin-fileio/src/main/java/com/nvidia/spark/rapids/FileUtils.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2019-2026, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileAlreadyExistsException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+public final class FileUtils {
+  private FileUtils() {}
+
+  public static final class TempFile {
+    private final FSDataOutputStream outputStream;
+    private final Path path;
+
+    TempFile(FSDataOutputStream outputStream, Path path) {
+      this.outputStream = outputStream;
+      this.path = path;
+    }
+
+    public FSDataOutputStream getOutputStream() {
+      return outputStream;
+    }
+
+    public Path getPath() {
+      return path;
+    }
+  }
+
+  public static TempFile createTempFile(
+      Configuration conf, String pathPrefix, String pathSuffix) throws IOException {
+    FileSystem fs = new Path(pathPrefix).getFileSystem(conf);
+    Random rnd = new Random();
+    String suffix = pathSuffix != null ? pathSuffix : "";
+    while (true) {
+      Path path = new Path(pathPrefix + rnd.nextInt(Integer.MAX_VALUE) + suffix);
+      if (!fs.exists(path)) {
+        try {
+          return new TempFile(fs.create(path, false), path);
+        } catch (FileAlreadyExistsException e) {
+          // Retry if another writer won the race between exists and create.
+        }
+      }
+    }
+  }
+}
diff --git a/sql-plugin/src/main/java/com/nvidia/spark/rapids/SpillableKudoTable.java b/sql-plugin/src/main/java/com/nvidia/spark/rapids/SpillableKudoTable.java
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2025-2026, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids;
+
+import ai.rapids.cudf.HostMemoryBuffer;
+import com.nvidia.spark.rapids.jni.kudo.KudoTable;
+import com.nvidia.spark.rapids.jni.kudo.KudoTableHeader;
+
+public class SpillableKudoTable implements AutoCloseable {
+  public final KudoTableHeader header;
+  public final long length;
+  private final SpillableHostBuffer shb;
+
+  public SpillableKudoTable(KudoTableHeader header, long length, SpillableHostBuffer shb) {
+    this.header = header;
+    this.length = length;
+    this.shb = shb;
+  }
+
+  public static SpillableKudoTable from(KudoTableHeader header, HostMemoryBuffer buffer) {
+    if (buffer == null) {
+      return new SpillableKudoTable(header, 0, null);
+    } else {
+      return new SpillableKudoTable(
+          header,
+          buffer.getLength(),
+          SpillableHostBuffer.apply(
+              buffer,
+              buffer.getLength(),
+              SpillPriorities.ACTIVE_BATCHING_PRIORITY));
+    }
+  }
+
+  public KudoTable makeKudoTable() {
+    if (shb == null) {
+      return new KudoTable(header, null);
+    } else {
+      return new KudoTable(header, shb.getHostBuffer());
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "SpillableKudoTable{header=" + header + ", shb=" + shb + '}';
+  }
+
+  @Override
+  public void close() {
+    if (shb != null) {
+      shb.close();
+    }
+  }
+}
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/AvroDataFileReader.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/AvroDataFileReader.scala
@@ -33,7 +33,7 @@ import org.apache.commons.io.output.CountingOutputStream
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.sql.rapids.shims.TrampolineConnectShims
+import org.apache.spark.sql.rapids.execution.TrampolineUtil
 
 private[rapids] class AvroSeekableInputStream(in: SeekableInput) extends InputStream
     with SeekableInput {
@@ -82,7 +82,7 @@ case class Header(
   @transient
   lazy val schema: Schema = {
     getMetaString(SCHEMA)
-      .map(s => TrampolineConnectShims.createSchemaParser().parse(s))
+      .map(s => TrampolineUtil.createSchemaParser().parse(s))
       .orNull
   }
 
@@ -127,26 +127,6 @@ object Header {
   }
 }
 
-/**
- * The each Avro block information
- *
- * @param blockStart the start of block
- * @param blockSize  the whole block size = the size between two sync buffers + sync buffer
- * @param dataSize   the block data size
- * @param count      how many entries in this block
- */
-case class BlockInfo(blockStart: Long, blockSize: Long, dataSize: Long, count: Long)
-
-/**
- * The mutable version of the BlockInfo without block start.
- * This is for reusing an existing instance when accessing data in the iterator pattern.
- *
- * @param blockSize the whole block size (the size between two sync buffers + sync buffer size)
- * @param dataSize  the data size in this block
- * @param count   how many entries in this block
- */
-case class MutableBlockInfo(var blockSize: Long, var dataSize: Long, var count: Long)
-
 /** The parent of the Rapids Avro file readers */
 abstract class AvroFileReader(si: SeekableInput) extends AutoCloseable {
   // Children should update this pointer accordingly.
@@ -328,7 +308,7 @@ class AvroMetaFileReader(si: SeekableInput) extends AvroFileReader(si) {
       val dataSizeLongLen = BinaryData.encodeLong(blockDataSize, buf, 0)
       // (len of entries) + (len of block size) + (block size) + (sync size)
       val blockLength = countLongLen + dataSizeLongLen + blockDataSize + SYNC_SIZE
-      blocks += BlockInfo(curBlockStart, blockLength, blockDataSize, blockCount)
+      blocks += new BlockInfo(curBlockStart, blockLength, blockDataSize, blockCount)
 
       // Do we need to check the SYNC BUFFER, or just let cudf do it?
       curBlockStart += blockLength
@@ -405,11 +385,11 @@ class AvroDataFileReader(si: SeekableInput) extends AvroFileReader(si) {
       throw new NoSuchElementException
     }
     if (reuse == null) {
-      MutableBlockInfo(curBlockSize, curDataSize, curCount)
+      new MutableBlockInfo(curBlockSize, curDataSize, curCount)
     } else {
-      reuse.blockSize = curBlockSize
-      reuse.dataSize = curDataSize
-      reuse.count = curCount
+      reuse.setBlockSize(curBlockSize)
+      reuse.setDataSize(curDataSize)
+      reuse.setCount(curCount)
       reuse
     }
   }

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/DumpUtils.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/DumpUtils.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2025, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2026, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,10 +28,15 @@ import com.nvidia.spark.rapids.jni.kudo.KudoSerializer
 import org.apache.commons.io.IOUtils
 import org.apache.hadoop.conf.Configuration
 
-import org.apache.spark.internal.Logging
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
-object DumpUtils extends Logging {
+object DumpUtils {
+  private val log = org.slf4j.LoggerFactory.getLogger(getClass.getName.stripSuffix("$"))
+
+  private def logWarning(msg: => String): Unit = {
+    log.warn(msg)
+  }
+
   /**
    * Debug utility to dump a host memory buffer to a file.
    *
@@ -51,15 +56,16 @@ object DumpUtils extends Logging {
       prefix: String,
       suffix: String): String = {
     try {
-      val (out, path) = FileUtils.createTempFile(conf, prefix, suffix)
+      val tempFile = FileUtils.createTempFile(conf, prefix, suffix)
+      val out = tempFile.getOutputStream
       withResource(out) { _ =>
         withResource(data.slice(offset, len)) { hmb =>
           withResource(new HostMemoryInputStream(hmb, hmb.getLength)) { in =>
             IOUtils.copy(in, out)
           }
         }
       }
-      path.toString
+      tempFile.getPath.toString
     } catch {
       case e: Exception =>
         log.error(s"Error attempting to dump data", e)
@@ -73,15 +79,16 @@ object DumpUtils extends Logging {
       prefix: String,
       suffix: String): String = {
     try {
-      val (out, path) = FileUtils.createTempFile(conf, prefix, suffix)
+      val tempFile = FileUtils.createTempFile(conf, prefix, suffix)
+      val out = tempFile.getOutputStream
       withResource(out) { _ =>
         data.foreach { hmb =>
           withResource(new HostMemoryInputStream(hmb, hmb.getLength)) { in =>
             IOUtils.copy(in, out)
           }
         }
       }
-      path.toString
+      tempFile.getPath.toString
     } catch {
       case e: Exception =>
         log.error(s"Error attempting to dump data", e)
@@ -324,7 +331,15 @@ private class ColumnIndex() {
   }
 }
 
-object ParquetDumper extends Logging {
+object ParquetDumper {
+  private val log = org.slf4j.LoggerFactory.getLogger(getClass.getName.stripSuffix("$"))
+
+  private def logDebug(msg: => String): Unit = {
+    if (log.isDebugEnabled) {
+      log.debug(msg)
+    }
+  }
+
   val COMPRESS_TYPE = CompressionType.SNAPPY
 
   def parquetWriterOptionsFromTable[T <: NestedBuilder[_, _], V <: ColumnWriterOptions](

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/FileUtils.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/FileUtils.scala