NVIDIA · eordentlich · Jun 25, 2026 · Jan 22, 2026 · Mar 30, 2026 · Mar 30, 2026
diff --git a/.claude/skills/update-rapids-version/SKILL.md b/.claude/skills/update-rapids-version/SKILL.md
@@ -0,0 +1,25 @@
+---
+name: update-rapids-version
+description: Updates python code (e.g. internal api calls) so that tests pass after running in conda environment with updated rapids version.  
+---
+
+You will be running in an already activated conda environment with the update rapids dependencies.
+
+Make necessary code changes in the `python` directory tree to get the following test script to complete without error:
+
+```bash
+cd python && CUDA_VISIBLE_DEVICES=0 bash run_test.sh
+```
+
+1.  Fix any formatting errors reported by the script.
+2.  Fix any type-checking errors reported.
+3.  Fix all other pytest errors reported.   
+    - Note that pytest phase runs through all tests before reporting any errors.   This can take a while.
+    - Most failures will be due to changes to internal apis in cuML that we rely on.
+
+
+Iterate on 1., 2., and 3. until script succeeeds.   The script can take a while to complete.
+
+For 3., when working on individual tests, especially if only a few are failing, it is faster to run only these tests via pytest directly, followed by a final full run.
+
+You may search the source code in the directory `../cuml` for relevant internal api changes.  The branch for the desired version is checked out.
diff --git a/ci/Dockerfile b/ci/Dockerfile
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -47,6 +47,6 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86
     && conda config --set solver libmamba
 
 # install cuML
-ARG RAPIDS_VERSION=25.12
-RUN conda install -y -c rapidsai -c conda-forge -c nvidia cuml=$RAPIDS_VERSION cuvs=$RAPIDS_VERSION python=3.10 pylibraft=$RAPIDS_VERSION raft-dask=$RAPIDS_VERSION cuda-version=12.2 numpy~=1.0 \
+ARG RAPIDS_VERSION=26.06
+RUN conda install -y -c rapidsai -c conda-forge -c nvidia cuml=$RAPIDS_VERSION cuvs=$RAPIDS_VERSION python=3.11 pylibraft=$RAPIDS_VERSION raft-dask=$RAPIDS_VERSION cuda-version=12.2 numpy~=1.0 \
     && conda clean --all -f -y
diff --git a/ci/test.sh b/ci/test.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -47,15 +47,11 @@ pip install -r requirements_dev.txt && pip install -e .
 # plugin tests
 ./run_plugin_test.sh
 
-# check compatibility with Spark 3.3 in nightly run
-# also push draft release docs to gh-pages
+# push draft release docs to gh-pages in nightly run
 if [[ $type == "nightly" ]]; then
-    pip uninstall pyspark -y
-    pip install pyspark~=3.3.0
-    ./run_test.sh
-    ./run_benchmark.sh $bench_args
     # if everything passed till now update draft release docs in gh-pages
     # need to invoke docs.sh from top level of repo
     cd .. # top level of repo
     ci/docs.sh nightly
 fi
+
diff --git a/docker/Dockerfile.pip b/docker/Dockerfile.pip
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,8 +17,8 @@
 ARG CUDA_VERSION=12.2.2
 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
 
-ARG PYSPARK_VERSION=3.3.1
-ARG RAPIDS_VERSION=25.12.0
+ARG PYSPARK_VERSION=3.4.4
+ARG RAPIDS_VERSION=26.06.0
 ARG ARCH=amd64
 #ARG ARCH=arm64
 
@@ -35,13 +35,27 @@ RUN apt-get update -y \
     && DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt install -y openjdk-17-jdk \
     && rm -rf /var/lib/apt/lists
 
+# some of the below needed for python, installed from source below, to have full functionality
 RUN apt-get update -y \
-    && apt install -y git numactl python3.10-venv python3-pip python-is-python3 software-properties-common wget zip \
-    && python -m pip install --upgrade pip \
-    && rm -rf /var/lib/apt/lists
+    && apt install -y git numactl software-properties-common wget zip build-essential zlib1g-dev \
+    libncurses5-dev libgdbm-dev libnss3-dev libssl-dev libreadline-dev libffi-dev libsqlite3-dev libbz2-dev
+
+# install python 3.11 as base image has python 3.10
+RUN bash -c "wget https://www.python.org/ftp/python/3.11.9/Python-3.11.9.tgz && \
+    tar xzf Python-3.11.9.tgz && cd Python-3.11.9 && \
+    ./configure --enable-optimizations && make altinstall"
+
+# 1. Define the venv path and update system PATH
+ENV VIRTUAL_ENV=/opt/venv
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+
+# 2. Create the virtual environment
+RUN python3.11 -m venv $VIRTUAL_ENV 
+
+RUN python -m pip install --upgrade pip
 
 RUN apt-get update -y \
-    && apt install -y python3.10-dev cmake curl \
+    && apt install -y cmake curl \
     && rm -rf /var/lib/apt/lists
 
 # install RAPIDS
@@ -55,13 +69,9 @@ RUN pip install --no-cache-dir \
     numpy~=1.0 \
     --extra-index-url=https://pypi.nvidia.com
 
-# install python dependencies
-RUN pip install --no-cache-dir pyspark==${PYSPARK_VERSION} "scikit-learn>=1.2.1" \
-    && pip install --no-cache-dir "black>=23.1.0" "build>=0.10.0" "isort>=5.12.0" "mypy>=1.0.0" \
-    numpydoc pydata-sphinx-theme pylint pytest "sphinx<6.0" "twine>=4.0.0"
 
 # Config JAVA_HOME
-ENV JAVA_HOME /usr/lib/jvm/java-1.17.0-openjdk-$ARCH
+ENV JAVA_HOME=/usr/lib/jvm/java-1.17.0-openjdk-$ARCH
 
 ### END OF CACHE ###
 

diff --git a/docker/Dockerfile.python b/docker/Dockerfile.python
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 ARG CUDA_VERSION=12.2.2
 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
 
-ARG RAPIDS_VERSION=25.12
+ARG RAPIDS_VERSION=26.06
 
 # ubuntu22
 RUN sed -i -e 's|http://archive.ubuntu.com/ubuntu|https://archive.ubuntu.com/ubuntu|g' \
@@ -34,28 +34,24 @@ RUN apt update -y \
     && rm -rf /var/lib/apt/lists
 
 # Config JAVA_HOME
-ENV JAVA_HOME /usr/lib/jvm/java-1.17.0-openjdk-amd64
+ENV JAVA_HOME=/usr/lib/jvm/java-1.17.0-openjdk-amd64
 
 # Install conda
 ENV PATH="/root/miniconda3/bin:${PATH}"
-RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py38_4.10.3-Linux-x86_64.sh \
+ENV CONDA_PLUGINS_AUTO_ACCEPT_TOS="yes"
+RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
     && mkdir /root/.conda \
-    && bash Miniconda3-py38_4.10.3-Linux-x86_64.sh -b \
-    && rm -f Miniconda3-py38_4.10.3-Linux-x86_64.sh \
-    && conda tos accept --override-channels -c conda-forge -c defaults \
-    && conda init
+    && bash Miniconda3-latest-Linux-x86_64.sh -b \
+    && rm -f Miniconda3-latest-Linux-x86_64.sh \
+    && conda init && conda update -n base conda \
+    && conda install -n base conda-libmamba-solver \
+    && conda config --set solver libmamba
 
 # install cuML
 
-RUN conda install -y -c rapidsai -c conda-forge -c nvidia python=3.10 cuda-version=12.2 cuml=$RAPIDS_VERSION cudf=$RAPIDS_VERSION cuvs=$RAPIDS_VERSION pylibraft=$RAPIDS_VERSION raft-dask=$RAPIDS_VERSION numpy~=1.0 \
+RUN conda install -y -c rapidsai -c conda-forge -c nvidia python=3.11 cuda-version=12.2 cuml=$RAPIDS_VERSION cudf=$RAPIDS_VERSION cuvs=$RAPIDS_VERSION pylibraft=$RAPIDS_VERSION raft-dask=$RAPIDS_VERSION numpy~=1.0 \
     && conda clean --all -f -y
 
-# install python dependencies
-RUN pip install --no-cache-dir "pyspark>=3.2.1" "scikit-learn>=1.2.1" \
-    && pip install --no-cache-dir "black>=23.1.0" "build>=0.10.0" "isort>=5.12.0" "mypy>=1.0.0" \
-    numpydoc pydata-sphinx-theme pylint pytest "sphinx<6.0" "twine>=4.0.0"
-
-### END OF CACHE ###
 
 #ARG RAPIDS_ML_VER=main
 #RUN git clone -b branch-$RAPIDS_ML_VER https://github.com/NVIDIA/spark-rapids-ml.git

diff --git a/docs/site/FAQ.md b/docs/site/FAQ.md
@@ -9,11 +9,11 @@ nav_order: 4
 
 ### What versions of Apache Spark are supported?
 
-Apache Spark version 3.3.1 or higher.
+Apache Spark version 3.4 or higher.
 
 ### What versions of Python are supported
 
-Python 3.10 or higher.
+Python 3.11 or higher.
 
 ### How do I fix the "java.lang.IllegalArgumentException: valueCount must be >= 0" error?
 

diff --git a/docs/site/compatibility.md b/docs/site/compatibility.md
@@ -31,7 +31,7 @@ The following table shows the currently supported algorithms.  The goal is to ex
 
 | Spark Rapids ML | CUDA  | Spark  | Python |
 | :-------------- | :---- | :----- | :----- |
-| 1.0.0           | 12.0+ | 3.3+   | 3.10+  |
+| 26.6.0          | 12.2+ | 3.4+   | 3.11+  |
 
 
 ## Single vs Double precision inputs

diff --git a/docs/site/performance.md b/docs/site/performance.md
@@ -10,7 +10,7 @@ nav_order: 6
 ## Stage-level scheduling
 
 Starting from spark-rapids-ml `23.10.0`, stage-level scheduling is automatically enabled.
-Therefore, if you are using Spark **standalone** cluster version **`3.4.0`** or higher, we strongly recommend
+Therefore, if you are using Spark **standalone** cluster version **`3.4`** or higher, we strongly recommend
 configuring the `"spark.task.resource.gpu.amount"` as a fractional value. This will
 enable running multiple tasks in parallel during the ETL phase to help the performance. An example configuration
 would be `"spark.task.resource.gpu.amount=1/spark.executor.cores"`. For example,
@@ -30,7 +30,7 @@ a total of 12 tasks per executor will be executed concurrently during the ETL ph
 is then used internally to the library to automatically carry out the ML training phases using the required 1 gpu per task.
 
 However, if you are using a spark-rapids-ml version earlier than 23.10.0 or a Spark
-standalone cluster version below 3.4.0, you need to make sure there will be only 1 task running at any time per executor.
+standalone cluster version below 3.4, you need to make sure there will be only 1 task running at any time per executor.
 You can set `spark.task.cpus` equal to `spark.executor.cores`, or `"spark.task.resource.gpu.amount"=1`. For example,
 
 ``` bash

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -21,9 +21,9 @@
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 
 project = 'spark-rapids-ml'
-copyright = '2025, NVIDIA'
+copyright = '2025-2026, NVIDIA'
 author = 'NVIDIA'
-release = '25.12.0'
+release = '26.06.0'
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

diff --git a/jvm/README.md b/jvm/README.md
@@ -31,7 +31,7 @@ JDK 17, Spark 4.0
 
     ```shell
     # Create a new conda environment for the client
-    conda create -n pyspark-client python==3.10
+    conda create -n pyspark-client python==3.11
     conda activate pyspark-client
 
     # Install the PySpark client package
@@ -50,10 +50,10 @@ including setting up the server and running client-side tests.
 To start the Spark Connect server with Spark Rapids ML support, follow these steps:
 
 ```shell
-conda activate rapids-25.12  # from spark-rapids-ml installation
+conda activate rapids-26.06  # from spark-rapids-ml installation
 export SPARK_HOME=<directory where spark was installed above>
 export PYSPARK_PYTHON=$(which python)
-export PLUGIN_JAR=$(pip show spark-rapids-ml | grep Location: | cut -d ' ' -f 2 )/spark_rapids_ml/jars/com.nvidia.rapids.ml-25.12.0.jar
+export PLUGIN_JAR=$(pip show spark-rapids-ml | grep Location: | cut -d ' ' -f 2 )/spark_rapids_ml/jars/com.nvidia.rapids.ml-26.06.0.jar
 $SPARK_HOME/sbin/start-connect-server.sh --master local[*] \
   --jars $PLUGIN_JAR \
   --conf spark.driver.memory=20G
@@ -107,7 +107,7 @@ mvn clean package -DskipTests
 if you would like to compile the plugin and run the unit tests, install `spark-rapids-ml` python package and its dependencies per the above instructions and run the following command:
 
 ``` shell
-conda activate rapids-25.12
+conda activate rapids-26.06
 export PYSPARK_PYTHON=$(which python)
 mvn clean package
 ```

diff --git a/jvm/pom.xml b/jvm/pom.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!--
-  Copyright (c) 2025, NVIDIA CORPORATION.
+  Copyright (c) 2025-2026, NVIDIA CORPORATION.
 
   Licensed under the Apache License, Version 2.0 (the "License");
   You may not use this file except in compliance with the License.
@@ -21,7 +21,7 @@
 
     <groupId>com.nvidia.rapids</groupId>
     <artifactId>ml</artifactId>
-    <version>25.12.0</version>
+    <version>26.06.0</version>
     <packaging>jar</packaging>
 
     <properties>

diff --git a/notebooks/aws-emr/init-bootstrap-action.sh b/notebooks/aws-emr/init-bootstrap-action.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -23,27 +23,27 @@ sudo chmod a+rwx -R /spark-rapids-cgroup
 sudo yum update -y
 sudo yum install -y gcc bzip2-devel libffi-devel tar gzip wget make 
 sudo yum install -y mysql-devel --skip-broken
-sudo bash -c "wget https://www.python.org/ftp/python/3.10.9/Python-3.10.9.tgz && \
-tar xzf Python-3.10.9.tgz && cd Python-3.10.9 && \
+sudo bash -c "wget https://www.python.org/ftp/python/3.11.9/Python-3.11.9.tgz && \
+tar xzf Python-3.11.9.tgz && cd Python-3.11.9 && \
 ./configure --enable-optimizations && make altinstall"
 
-RAPIDS_VERSION=25.12.0
+RAPIDS_VERSION=26.6.0
 
-sudo /usr/local/bin/pip3.10 install --upgrade pip
+sudo /usr/local/bin/pip3.11 install --upgrade pip
 
 # install scikit-learn 
-sudo /usr/local/bin/pip3.10 install scikit-learn
+sudo /usr/local/bin/pip3.11 install scikit-learn
 
 # install cudf and cuml
-sudo /usr/local/bin/pip3.10 install --no-cache-dir \
+sudo /usr/local/bin/pip3.11 install --no-cache-dir \
          cudf-cu12~=${RAPIDS_VERSION} \
          cuml-cu12~=${RAPIDS_VERSION} \
          cuvs-cu12~=${RAPIDS_VERSION} \
          pylibraft-cu12~=${RAPIDS_VERSION} \
          raft-dask-cu12~=${RAPIDS_VERSION} \
          --extra-index-url=https://pypi.nvidia.com --verbose
-sudo /usr/local/bin/pip3.10 install spark-rapids-ml
-sudo /usr/local/bin/pip3.10 list
+sudo /usr/local/bin/pip3.11 install spark-rapids-ml
+sudo /usr/local/bin/pip3.11 list
 
 # set up no-import-change for cluster if enabled
 if [[ $1 == "--no-import-enabled" && $2 == 1 ]]; then
@@ -55,7 +55,7 @@ if [[ $1 == "--no-import-enabled" && $2 == 1 ]]; then
     sudo rm fake_shell.py
 fi 
 
-# ensure notebook comes up in python 3.10 by using a background script that waits for an 
+# ensure notebook comes up in python 3.11 by using a background script that waits for an
 # application file to be installed before modifying.
 cat <<EOF >/tmp/mod_start_kernel.sh
 #!/bin/bash
@@ -66,7 +66,7 @@ sleep 10
 done
 echo "done waiting"
 sleep 10
-sudo sed -i /mnt/notebook-env/bin/start_kernel_as_emr_notebook.sh -e 's#"spark.pyspark.python": "python3"#"spark.pyspark.python": "/usr/local/bin/python3.10"#g'
+sudo sed -i /mnt/notebook-env/bin/start_kernel_as_emr_notebook.sh -e 's#"spark.pyspark.python": "python3"#"spark.pyspark.python": "/usr/local/bin/python3.11"#g'
 sudo sed -i /mnt/notebook-env/bin/start_kernel_as_emr_notebook.sh -e 's#"spark.pyspark.virtualenv.enabled": "true"#"spark.pyspark.virtualenv.enabled": "false"#g'
 exit 0
 EOF

diff --git a/notebooks/aws-emr/init-configurations.json b/notebooks/aws-emr/init-configurations.json
@@ -67,10 +67,10 @@
             "spark.sql.execution.arrow.pyspark.enabled":"true",
             "spark.sql.execution.arrow.maxRecordsPerBatch":"100000",
             "spark.sql.cache.serializer":"com.nvidia.spark.ParquetCachedBatchSerializer",
-            "spark.pyspark.python":"/usr/local/bin/python3.10",
-            "spark.pyspark.driver.python":"/usr/local/bin/python3.10",
+            "spark.pyspark.python":"/usr/local/bin/python3.11",
+            "spark.pyspark.driver.python":"/usr/local/bin/python3.11",
             "spark.pyspark.virtualenv.enabled":"false",
-            "spark.yarn.appMasterEnv.PYSPARK_PYTHON":"/usr/local/bin/python3.10",
+            "spark.yarn.appMasterEnv.PYSPARK_PYTHON":"/usr/local/bin/python3.11",
             "spark.dynamicAllocation.enabled":"false",
             "spark.driver.memory":"20g",
             "spark.rpc.message.maxSize":"512",

diff --git a/notebooks/databricks/README.md b/notebooks/databricks/README.md
@@ -26,7 +26,7 @@ If you already have a Databricks account, you can run the example notebooks on a
       spark.task.resource.gpu.amount 0.125
       spark.databricks.delta.preview.enabled true
       spark.python.worker.reuse true
-      spark.executorEnv.PYTHONPATH /databricks/jars/rapids-4-spark_2.13-26.04.2.jar:/databricks/spark/python
+      spark.executorEnv.PYTHONPATH /databricks/jars/rapids-4-spark_2.13-26.06.0.jar:/databricks/spark/python
       spark.sql.execution.arrow.maxRecordsPerBatch 100000
       spark.plugins com.nvidia.spark.SQLPlugin
       spark.locality.wait 0s

diff --git a/notebooks/databricks/init-pip-cuda-12.sh b/notebooks/databricks/init-pip-cuda-12.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2026, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -22,8 +22,8 @@ set -ex
 # Note that the SPARK_RAPIDS_VERSION will not necessarily match the RAPIDS_VERSION. Check https://nvidia.github.io/spark-rapids/docs/download.html for the latest compatible version of 
 # spark-rapids version that verifies compatibility with your Databricks Runtime. (In this case, Databricks 17.3 ML LTS.) The available versions for RAPIDS_VERSION can be
 # found by executing "pip index versions spark-rapids-ml".   
-RAPIDS_VERSION=25.12.0
-SPARK_RAPIDS_VERSION=26.04.2
+RAPIDS_VERSION=26.6.0
+SPARK_RAPIDS_VERSION=26.06.0
 
 curl -L https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/${SPARK_RAPIDS_VERSION}/rapids-4-spark_2.13-${SPARK_RAPIDS_VERSION}-cuda12.jar -o /databricks/jars/rapids-4-spark_2.13-${SPARK_RAPIDS_VERSION}.jar