From 7e845dfefca7353dcd0aeaa14fcc06bdb65e2985 Mon Sep 17 00:00:00 2001
From: Erik Ordentlich <eordentlich@gmail.com>
Date: Thu, 22 Jan 2026 13:50:03 -0800
Subject: [PATCH 01/13] preliminary rapids 26.02 updates to pass tests

Signed-off-by: Erik Ordentlich <eordentlich@gmail.com>
---
 ci/Dockerfile                            | 4 ++--
 python/README.md                         | 6 +++---
 python/pyproject.toml                    | 2 +-
 python/src/spark_rapids_ml/regression.py | 2 --
 python/src/spark_rapids_ml/umap.py       | 1 +
 python/src/spark_rapids_ml/utils.py      | 4 ++--
 python/tests/test_linear_model.py        | 6 +-----
 7 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/ci/Dockerfile b/ci/Dockerfile
index 13d119e9..cdec57d4 100644
--- a/ci/Dockerfile
+++ b/ci/Dockerfile
@@ -47,6 +47,6 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86
     && conda config --set solver libmamba
 
 # install cuML
-ARG RAPIDS_VERSION=25.12
-RUN conda install -y -c rapidsai -c conda-forge -c nvidia cuml=$RAPIDS_VERSION cuvs=$RAPIDS_VERSION python=3.10 pylibraft=$RAPIDS_VERSION raft-dask=$RAPIDS_VERSION cuda-version=12.2 numpy~=1.0 \
+ARG RAPIDS_VERSION=26.02
+RUN conda install -y -c rapidsai-nightly -c conda-forge -c nvidia cuml=$RAPIDS_VERSION cuvs=$RAPIDS_VERSION python=3.10 pylibraft=$RAPIDS_VERSION raft-dask=$RAPIDS_VERSION cuda-version=12.2 numpy~=1.0 \
     && conda clean --all -f -y
diff --git a/python/README.md b/python/README.md
index 266e8fcd..d1ec68c7 100644
--- a/python/README.md
+++ b/python/README.md
@@ -20,9 +20,9 @@ For simplicity, the following instructions just use Spark local mode, assuming a
 
 First, install RAPIDS cuML per [these instructions](https://rapids.ai/start.html).   Example for CUDA Toolkit 12.2:
 ```bash
-conda create -n rapids-25.12 \
+conda create -n rapids-26.02 \
     -c rapidsai -c conda-forge -c nvidia \
-    python=3.10 cuml=25.12 cuvs=25.12 pylibraft=25.12 raft-dask=25.12 cuda-version=12.2 numpy~=1.0
+    python=3.10 cuml=26.02 cuvs=26.02 pylibraft=26.02 raft-dask=26.02 cuda-version=12.2 numpy~=1.0
 ```
 
 **Note**: while testing, we recommend using conda or docker to simplify installation and isolate your environment while experimenting.  Once you have a working environment, you can then try installing directly, if necessary.
@@ -31,7 +31,7 @@ conda create -n rapids-25.12 \
 
 Once you have the conda environment, activate it and install the required packages.
 ```bash
-conda activate rapids-25.12
+conda activate rapids-26.02
 
 ## for development access to notebooks, tests, and benchmarks
 git clone --branch main https://github.com/NVIDIA/spark-rapids-ml.git
diff --git a/python/pyproject.toml b/python/pyproject.toml
index d201a20a..d75c51a3 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -14,7 +14,7 @@
 
 [project]
 name = "spark-rapids-ml"
-version = "25.12.0"
+version = "26.2.0"
 authors = [
   { name="Jinfeng Li", email="jinfeng@nvidia.com" },
   { name="Bobby Wang", email="bobwang@nvidia.com" },
diff --git a/python/src/spark_rapids_ml/regression.py b/python/src/spark_rapids_ml/regression.py
index b630bb36..04165722 100644
--- a/python/src/spark_rapids_ml/regression.py
+++ b/python/src/spark_rapids_ml/regression.py
@@ -191,7 +191,6 @@ def _param_mapping(cls) -> Dict[str, Optional[str]]:
             "maxIter": "max_iter",
             "regParam": "alpha",
             "solver": "solver",
-            "standardization": "normalize",  # TODO: standardization is carried out in cupy not cuml so need a new type of param mapped value to indicate that.
             "tol": "tol",
             "weightCol": None,
         }
@@ -219,7 +218,6 @@ def _get_cuml_params_default(self) -> Dict[str, Any]:
             "algorithm": "auto",
             "fit_intercept": True,
             "copy_X": True,
-            "normalize": False,
             "verbose": False,
             "alpha": 0.0001,
             "solver": "auto",  # in cuml 25.04 default was changed to auto which is mapped to eig internally in cuml
diff --git a/python/src/spark_rapids_ml/umap.py b/python/src/spark_rapids_ml/umap.py
index d0e8cb15..016fba67 100644
--- a/python/src/spark_rapids_ml/umap.py
+++ b/python/src/spark_rapids_ml/umap.py
@@ -134,6 +134,7 @@ def _get_cuml_params_default(self) -> Dict[str, Any]:
             "verbose": False,
             "build_algo": "auto",
             "build_kwds": None,
+            "device_ids": None,
         }
 
     def _pyspark_class(self) -> Optional[ABCMeta]:
diff --git a/python/src/spark_rapids_ml/utils.py b/python/src/spark_rapids_ml/utils.py
index f8ac301a..e19070ed 100644
--- a/python/src/spark_rapids_ml/utils.py
+++ b/python/src/spark_rapids_ml/utils.py
@@ -222,8 +222,8 @@ def _configure_memory_resource(
         ) == type(rmm.mr.SamHeadroomMemoryResource(headroom=sam_headroom)):
             _old_memory_resources.append(rmm.mr.get_current_device_resource())
             _last_sam_headroom_size = sam_headroom
-            mr = rmm.mr.SamHeadroomMemoryResource(headroom=sam_headroom)
-            rmm.mr.set_current_device_resource(mr)
+            _mr = rmm.mr.SamHeadroomMemoryResource(headroom=sam_headroom)
+            rmm.mr.set_current_device_resource(_mr)
 
     if uvm_enabled:
         if not type(rmm.mr.get_current_device_resource()) == type(
diff --git a/python/tests/test_linear_model.py b/python/tests/test_linear_model.py
index eddfbfd6..c06832fd 100644
--- a/python/tests/test_linear_model.py
+++ b/python/tests/test_linear_model.py
@@ -115,7 +115,7 @@ def test_params(default_params: bool) -> None:
 
     cuml_params = get_default_cuml_parameters(
         cuml_classes=[CumlLinearRegression, Ridge, CD],
-        excludes=["handle", "output_type"],
+        excludes=["handle", "output_type", "normalize"],
     )
 
     # Ensure internal cuml defaults match actual cuml defaults
@@ -126,7 +126,6 @@ def test_params(default_params: bool) -> None:
         "alpha": spark_params["regParam"],
         "l1_ratio": spark_params["elasticNetParam"],
         "max_iter": spark_params["maxIter"],
-        "normalize": spark_params["standardization"],
         "tol": spark_params["tol"],
     }
 
@@ -175,7 +174,6 @@ def test_linear_regression_params(
         "fit_intercept": True,
         "l1_ratio": 0.0,
         "max_iter": 100,
-        "normalize": True,
         "solver": "auto",
     }
     default_lr = LinearRegression()
@@ -196,7 +194,6 @@ def test_linear_regression_params(
         {
             "alpha": reg,
             "fit_intercept": False,
-            "normalize": False,
             "solver": "eig",
         }
     )
@@ -234,7 +231,6 @@ def test_linear_regression_copy() -> None:
         ({"regParam": 0.12}, {"alpha": 0.12}),
         ({"elasticNetParam": 0.23}, {"l1_ratio": 0.23}),
         ({"fitIntercept": False}, {"fit_intercept": False}),
-        ({"standardization": False}, {"normalize": False}),
         ({"tol": 0.0132}, {"tol": 0.0132}),
         ({"verbose": True}, {"verbose": True}),
     ]

From 96077ce3620620a6847dd42a58e016484e616811 Mon Sep 17 00:00:00 2001
From: Erik Ordentlich <eordentlich@gmail.com>
Date: Mon, 30 Mar 2026 09:35:31 -0700
Subject: [PATCH 02/13] updates for 26.04 + claude skill for this update

Signed-off-by: Erik Ordentlich <eordentlich@gmail.com>
---
 .claude/skills/update-rapids-version/SKILL.md | 25 +++++++++++++++++++
 python/benchmark/test_gen_data.py             |  2 +-
 python/requirements.txt                       |  2 +-
 python/src/spark_rapids_ml/classification.py  |  3 ++-
 python/src/spark_rapids_ml/feature.py         |  3 ++-
 python/src/spark_rapids_ml/knn.py             |  8 +++++-
 python/src/spark_rapids_ml/regression.py      |  5 +++-
 python/src/spark_rapids_ml/tree.py            |  2 ++
 python/src/spark_rapids_ml/umap.py            |  1 +
 python/tests/test_metrics.py                  |  2 +-
 python/tests/test_pipeline.py                 |  2 +-
 11 files changed, 47 insertions(+), 8 deletions(-)
 create mode 100644 .claude/skills/update-rapids-version/SKILL.md

diff --git a/.claude/skills/update-rapids-version/SKILL.md b/.claude/skills/update-rapids-version/SKILL.md
new file mode 100644
index 00000000..580e0be5
--- /dev/null
+++ b/.claude/skills/update-rapids-version/SKILL.md
@@ -0,0 +1,25 @@
+---
+name: update-rapids-version
+description: Updates python code (e.g. internal api calls) so that tests pass after running in conda environment with updated rapids version.  
+---
+
+You will be running in an already activated conda environment with the update rapids dependencies.
+
+Make necessary code changes in the `python` directory tree to get the following test script to complete without error:
+
+```bash
+cd python && CUDA_VISIBLE_DEVICES=0 bash run_test.sh
+```
+
+1.  Fix any formatting errors reported by the script.
+2.  Fix any type-checking errors reported.
+3.  Fix all other pytest errors reported.   
+    - Note that pytest phase runs through all tests before reporting any errors.   This can take a while.
+    - Most failures will be due to changes to internal apis in cuML that we rely on.
+
+
+Iterate on 1., 2., and 3. until script succeeeds.   The script can take a while to complete.
+
+For 3., when working on individual tests, especially if only a few are failing, it is faster to run only these tests via pytest directly, followed by a final full run.
+
+You may search the source code in the directory `../cuml` for relevant internal api changes.  The branch for the desired version is checked out.
diff --git a/python/benchmark/test_gen_data.py b/python/benchmark/test_gen_data.py
index 9d29be69..8b3a8124 100644
--- a/python/benchmark/test_gen_data.py
+++ b/python/benchmark/test_gen_data.py
@@ -396,7 +396,7 @@ def _func_test_make_sparse_regression(
 
             for i in range(len(chunk_boundary)):
                 start = 0 if i == 0 else chunk_boundary[i - 1]
-                dense_count = np.count_nonzero(X_np[:, start : chunk_boundary[i]])
+                dense_count = int(np.count_nonzero(X_np[:, start : chunk_boundary[i]]))
 
                 col_density = density_values[i]
                 chunk_size = col_per_chunk[i]
diff --git a/python/requirements.txt b/python/requirements.txt
index ba2a9b48..9aa2c1a6 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -14,6 +14,6 @@
 
 numpy_allocator
 psutil
-pyspark>=3.2.1,<3.5
+pyspark>=3.2.1,<4.0
 scikit-learn>=1.2.1
 cryptography==46.0.6
diff --git a/python/src/spark_rapids_ml/classification.py b/python/src/spark_rapids_ml/classification.py
index 8e6ebcd6..b2a26bf2 100644
--- a/python/src/spark_rapids_ml/classification.py
+++ b/python/src/spark_rapids_ml/classification.py
@@ -1481,6 +1481,7 @@ def _construct_lr() -> CumlT:
             import cupy as cp
             import numpy as np
             from cuml.linear_model.logistic_regression_mg import LogisticRegressionMG
+            from pylibraft.common import Handle
 
             from .utils import cudf_to_cuml_array
 
@@ -1490,7 +1491,7 @@ def _construct_lr() -> CumlT:
             lrs = []
 
             for i in range(num_models):
-                lr = LogisticRegressionMG(output_type="cupy")
+                lr = LogisticRegressionMG(handle=Handle(), output_type="cupy")
 
                 lr.n_features_in_ = n_cols
                 lr.n_cols = n_cols
diff --git a/python/src/spark_rapids_ml/feature.py b/python/src/spark_rapids_ml/feature.py
index 595a9e85..efb620ce 100644
--- a/python/src/spark_rapids_ml/feature.py
+++ b/python/src/spark_rapids_ml/feature.py
@@ -411,8 +411,9 @@ def _construct_pca() -> CumlT:
 
             """
             from cuml.decomposition.pca_mg import PCAMG as CumlPCAMG
+            from pylibraft.common import Handle
 
-            pca = CumlPCAMG(output_type="numpy", **cuml_alg_params)
+            pca = CumlPCAMG(handle=Handle(), output_type="numpy", **cuml_alg_params)
 
             pca.n_features_in_ = n_cols
 
diff --git a/python/src/spark_rapids_ml/knn.py b/python/src/spark_rapids_ml/knn.py
index 347de9c1..041bfd87 100644
--- a/python/src/spark_rapids_ml/knn.py
+++ b/python/src/spark_rapids_ml/knn.py
@@ -79,7 +79,12 @@ def _param_mapping(cls) -> Dict[str, Optional[str]]:
         return {"k": "n_neighbors"}
 
     def _get_cuml_params_default(self) -> Dict[str, Any]:
-        return {"n_neighbors": 5, "verbose": False, "batch_size": 2000000}
+        return {
+            "n_neighbors": 5,
+            "verbose": False,
+            "batch_size": 2000000,
+            "radius": 1.0,
+        }
 
     def _pyspark_class(self) -> Optional[ABCMeta]:
         return None
@@ -853,6 +858,7 @@ def _get_cuml_params_default(self) -> Dict[str, Any]:
             "algorithm": "ivfflat",
             "metric": "euclidean",
             "algo_params": None,
+            "radius": 1.0,
         }
 
     def _pyspark_class(self) -> Optional[ABCMeta]:
diff --git a/python/src/spark_rapids_ml/regression.py b/python/src/spark_rapids_ml/regression.py
index 04165722..5dc5e130 100644
--- a/python/src/spark_rapids_ml/regression.py
+++ b/python/src/spark_rapids_ml/regression.py
@@ -787,6 +787,7 @@ def _get_cuml_transform_func(
 
         def _construct_lr() -> CumlT:
             from cuml.linear_model.linear_regression_mg import LinearRegressionMG
+            from pylibraft.common import Handle
 
             from .utils import cudf_to_cuml_array
 
@@ -796,7 +797,9 @@ def _construct_lr() -> CumlT:
             intercepts = intercept_ if isinstance(intercept_, list) else [intercept_]
 
             for i in range(len(coefs)):
-                lr = LinearRegressionMG(output_type="numpy", copy_X=False)
+                lr = LinearRegressionMG(
+                    handle=Handle(), output_type="numpy", copy_X=False
+                )
                 # need this to revert a change in cuML targeting sklearn compat.
                 lr.n_features_in_ = n_cols
                 lr.n_cols = n_cols
diff --git a/python/src/spark_rapids_ml/tree.py b/python/src/spark_rapids_ml/tree.py
index bb418ce7..529d8177 100644
--- a/python/src/spark_rapids_ml/tree.py
+++ b/python/src/spark_rapids_ml/tree.py
@@ -678,6 +678,7 @@ def _get_cuml_transform_func(
         is_classification = self._is_classification()
         dtype = self.dtype
         num_classes = self._num_classes
+        n_cols = self.n_cols
 
         def _construct_rf() -> CumlT:
             if is_classification:
@@ -698,6 +699,7 @@ def _construct_rf() -> CumlT:
                 rf = cuRf()
                 rf.n_classes_ = num_classes
                 rf.classes_ = np.arange(num_classes, dtype=np.int32)
+                rf.n_features_in_ = n_cols
                 rf._treelite_model_bytes = treelite.Model.deserialize_bytes(model)
 
                 rfs.append(rf)
diff --git a/python/src/spark_rapids_ml/umap.py b/python/src/spark_rapids_ml/umap.py
index 016fba67..a7482a06 100644
--- a/python/src/spark_rapids_ml/umap.py
+++ b/python/src/spark_rapids_ml/umap.py
@@ -135,6 +135,7 @@ def _get_cuml_params_default(self) -> Dict[str, Any]:
             "build_algo": "auto",
             "build_kwds": None,
             "device_ids": None,
+            "force_serial_epochs": False,
         }
 
     def _pyspark_class(self) -> Optional[ABCMeta]:
diff --git a/python/tests/test_metrics.py b/python/tests/test_metrics.py
index ecaadae0..6c36359d 100644
--- a/python/tests/test_metrics.py
+++ b/python/tests/test_metrics.py
@@ -96,7 +96,7 @@ def test_multi_class_metrics(
     ).astype(np.float64)
 
     probabilities = np.random.rand(1000, num_classes)
-    probabilities[range(1000), list(pdf["label"].astype(np.integer))] = 2.0
+    probabilities[range(1000), list(pdf["label"].astype(int))] = 2.0
     probabilities = probabilities / np.sum(probabilities, axis=1).reshape(-1, 1)
 
     pdf["probabilities"] = list(probabilities)
diff --git a/python/tests/test_pipeline.py b/python/tests/test_pipeline.py
index 15fe446c..1caeb94b 100644
--- a/python/tests/test_pipeline.py
+++ b/python/tests/test_pipeline.py
@@ -420,7 +420,7 @@ def test_compat_random_forest(
             Pipeline,
             UMAP,
             UMAPModel,
-            {"n_components": 1},
+            {"n_components": 2},
         ),
         (
             Pipeline,

From 6d573b99ff6e7f386f63f8e43b2745c7e03fa3f2 Mon Sep 17 00:00:00 2001
From: Erik Ordentlich <eordentlich@gmail.com>
Date: Mon, 30 Mar 2026 09:42:53 -0700
Subject: [PATCH 03/13] more 26.04 updates

Signed-off-by: Erik Ordentlich <eordentlich@gmail.com>
---
 ci/Dockerfile                                |  2 +-
 python/README.md                             |  4 ++--
 python/benchmark/databricks/run_benchmark.sh |  8 ++++----
 python/pyproject.toml                        | 10 ++++------
 4 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/ci/Dockerfile b/ci/Dockerfile
index cdec57d4..2b2594b8 100644
--- a/ci/Dockerfile
+++ b/ci/Dockerfile
@@ -47,6 +47,6 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86
     && conda config --set solver libmamba
 
 # install cuML
-ARG RAPIDS_VERSION=26.02
+ARG RAPIDS_VERSION=26.04
 RUN conda install -y -c rapidsai-nightly -c conda-forge -c nvidia cuml=$RAPIDS_VERSION cuvs=$RAPIDS_VERSION python=3.10 pylibraft=$RAPIDS_VERSION raft-dask=$RAPIDS_VERSION cuda-version=12.2 numpy~=1.0 \
     && conda clean --all -f -y
diff --git a/python/README.md b/python/README.md
index d1ec68c7..9e967fcb 100644
--- a/python/README.md
+++ b/python/README.md
@@ -20,9 +20,9 @@ For simplicity, the following instructions just use Spark local mode, assuming a
 
 First, install RAPIDS cuML per [these instructions](https://rapids.ai/start.html).   Example for CUDA Toolkit 12.2:
 ```bash
-conda create -n rapids-26.02 \
+conda create -n rapids-26.04 \
     -c rapidsai -c conda-forge -c nvidia \
-    python=3.10 cuml=26.02 cuvs=26.02 pylibraft=26.02 raft-dask=26.02 cuda-version=12.2 numpy~=1.0
+    python=3.11 cuml=26.04 cuvs=26.04 pylibraft=26.04 raft-dask=26.04 cuda-version=12.2 numpy~=1.0
 ```
 
 **Note**: while testing, we recommend using conda or docker to simplify installation and isolate your environment while experimenting.  Once you have a working environment, you can then try installing directly, if necessary.
diff --git a/python/benchmark/databricks/run_benchmark.sh b/python/benchmark/databricks/run_benchmark.sh
index 7920d511..c7eee3f9 100755
--- a/python/benchmark/databricks/run_benchmark.sh
+++ b/python/benchmark/databricks/run_benchmark.sh
@@ -15,7 +15,7 @@
 
 
 cluster_type=${1:-gpu_etl}
-db_version=${2:-13.3}
+db_version=${2:-15.4}
 
 if [[ $cluster_type == "gpu" || $cluster_type == "gpu_etl" ]]; then
     num_cpus=0
@@ -25,13 +25,13 @@ elif [[ $cluster_type == "cpu" ]]; then
     num_gpus=0
 else
     echo "unknown cluster type $cluster_type"
-    echo "usage: $0 cpu|gpu|gpu_etl [12.2|13.3|14.3|15.4]" 
+    echo "usage: $0 cpu|gpu|gpu_etl [15.4]" 
     exit 1
 fi
 
-if [[ $db_version > 13.3 && $cluster_type == "gpu_etl" ]]; then
+if [[ $db_version > 16.4 && $cluster_type == "gpu_etl" ]]; then
     echo "spark rapids etl plugin is not supported on databricks ${db_version}"
-    echo "please specify db_version 12.2 or 13.3 for cluster type gpu_etl"
+    echo "please specify db_version 15.4 or 16.4 for cluster type gpu_etl"
     exit 1
 fi
 
diff --git a/python/pyproject.toml b/python/pyproject.toml
index d75c51a3..1fdcd8e4 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +14,7 @@
 
 [project]
 name = "spark-rapids-ml"
-version = "26.2.0"
+version = "26.4.0"
 authors = [
   { name="Jinfeng Li", email="jinfeng@nvidia.com" },
   { name="Bobby Wang", email="bobwang@nvidia.com" },
@@ -23,16 +23,14 @@ authors = [
 ]
 description = "Apache Spark integration with RAPIDS and cuML"
 readme = "README.md"
-requires-python = ">=3.10"
+requires-python = ">=3.11"
 classifiers = [
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
     "License :: OSI Approved :: Apache Software License",
     "Operating System :: OS Independent",
     "Environment :: GPU :: NVIDIA CUDA :: 12",
-    "Environment :: GPU :: NVIDIA CUDA :: 12 :: 12.0",
-    "Environment :: GPU :: NVIDIA CUDA :: 12 :: 12.1",
     "Environment :: GPU :: NVIDIA CUDA :: 12 :: 12.2",
     "Environment :: GPU :: NVIDIA CUDA :: 12 :: 12.3",
     "Environment :: GPU :: NVIDIA CUDA :: 12 :: 12.4",

From dc518ac58eb562a97d37575af96996819c778b2e Mon Sep 17 00:00:00 2001
From: Erik Ordentlich <eordentlich@gmail.com>
Date: Thu, 28 May 2026 15:24:37 -0700
Subject: [PATCH 04/13] updates for rapids 26.06

Signed-off-by: Erik Ordentlich <eordentlich@gmail.com>
---
 ci/Dockerfile                          | 2 +-
 docker/Dockerfile.pip                  | 2 +-
 docker/Dockerfile.python               | 2 +-
 python/README.md                       | 6 +++---
 python/pyproject.toml                  | 2 +-
 python/src/spark_rapids_ml/__init__.py | 2 +-
 python/src/spark_rapids_ml/tree.py     | 5 ++---
 python/src/spark_rapids_ml/umap.py     | 7 ++++---
 python/src/spark_rapids_ml/utils.py    | 2 +-
 python/tests/test_random_forest.py     | 6 +++++-
 python/tests/test_umap.py              | 9 +++++++--
 11 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/ci/Dockerfile b/ci/Dockerfile
index 2b2594b8..306fca74 100644
--- a/ci/Dockerfile
+++ b/ci/Dockerfile
@@ -47,6 +47,6 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86
     && conda config --set solver libmamba
 
 # install cuML
-ARG RAPIDS_VERSION=26.04
+ARG RAPIDS_VERSION=26.06
 RUN conda install -y -c rapidsai-nightly -c conda-forge -c nvidia cuml=$RAPIDS_VERSION cuvs=$RAPIDS_VERSION python=3.10 pylibraft=$RAPIDS_VERSION raft-dask=$RAPIDS_VERSION cuda-version=12.2 numpy~=1.0 \
     && conda clean --all -f -y
diff --git a/docker/Dockerfile.pip b/docker/Dockerfile.pip
index ac9cd292..58c6e5de 100644
--- a/docker/Dockerfile.pip
+++ b/docker/Dockerfile.pip
@@ -18,7 +18,7 @@ ARG CUDA_VERSION=12.2.2
 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
 
 ARG PYSPARK_VERSION=3.3.1
-ARG RAPIDS_VERSION=25.12.0
+ARG RAPIDS_VERSION=26.06.0
 ARG ARCH=amd64
 #ARG ARCH=arm64
 
diff --git a/docker/Dockerfile.python b/docker/Dockerfile.python
index 0306459a..44bf5d40 100644
--- a/docker/Dockerfile.python
+++ b/docker/Dockerfile.python
@@ -17,7 +17,7 @@
 ARG CUDA_VERSION=12.2.2
 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
 
-ARG RAPIDS_VERSION=25.12
+ARG RAPIDS_VERSION=26.06
 
 # ubuntu22
 RUN sed -i -e 's|http://archive.ubuntu.com/ubuntu|https://archive.ubuntu.com/ubuntu|g' \
diff --git a/python/README.md b/python/README.md
index 9e967fcb..b2fe8e34 100644
--- a/python/README.md
+++ b/python/README.md
@@ -20,9 +20,9 @@ For simplicity, the following instructions just use Spark local mode, assuming a
 
 First, install RAPIDS cuML per [these instructions](https://rapids.ai/start.html).   Example for CUDA Toolkit 12.2:
 ```bash
-conda create -n rapids-26.04 \
+conda create -n rapids-26.06 \
     -c rapidsai -c conda-forge -c nvidia \
-    python=3.11 cuml=26.04 cuvs=26.04 pylibraft=26.04 raft-dask=26.04 cuda-version=12.2 numpy~=1.0
+    python=3.11 cuml=26.06 cuvs=26.06 pylibraft=26.06 raft-dask=26.06 cuda-version=12.2 numpy~=1.0
 ```
 
 **Note**: while testing, we recommend using conda or docker to simplify installation and isolate your environment while experimenting.  Once you have a working environment, you can then try installing directly, if necessary.
@@ -31,7 +31,7 @@ conda create -n rapids-26.04 \
 
 Once you have the conda environment, activate it and install the required packages.
 ```bash
-conda activate rapids-26.02
+conda activate rapids-26.06
 
 ## for development access to notebooks, tests, and benchmarks
 git clone --branch main https://github.com/NVIDIA/spark-rapids-ml.git
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 1fdcd8e4..3c418d85 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -14,7 +14,7 @@
 
 [project]
 name = "spark-rapids-ml"
-version = "26.4.0"
+version = "26.6.0"
 authors = [
   { name="Jinfeng Li", email="jinfeng@nvidia.com" },
   { name="Bobby Wang", email="bobwang@nvidia.com" },
diff --git a/python/src/spark_rapids_ml/__init__.py b/python/src/spark_rapids_ml/__init__.py
index ea4c268b..e9b6bf10 100644
--- a/python/src/spark_rapids_ml/__init__.py
+++ b/python/src/spark_rapids_ml/__init__.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "25.12.0"
+__version__ = "26.6.0"
 
 import pandas as pd
 import pyspark
diff --git a/python/src/spark_rapids_ml/tree.py b/python/src/spark_rapids_ml/tree.py
index 529d8177..d4693918 100644
--- a/python/src/spark_rapids_ml/tree.py
+++ b/python/src/spark_rapids_ml/tree.py
@@ -138,7 +138,7 @@ def _get_cuml_params_default(self) -> Dict[str, Any]:
         return {
             "n_streams": 4,
             "n_estimators": 100,
-            "max_depth": 16,
+            "max_depth": "deprecated",
             "max_features": "sqrt",  # for classification, should be 1.0 for regressor, cuml is a little broken here
             "n_bins": 128,
             "bootstrap": True,
@@ -688,7 +688,6 @@ def _construct_rf() -> CumlT:
 
             import cupy as cp
             import numpy as np
-            import treelite
 
             rfs = []
             treelite_models = (
@@ -700,7 +699,7 @@ def _construct_rf() -> CumlT:
                 rf.n_classes_ = num_classes
                 rf.classes_ = np.arange(num_classes, dtype=np.int32)
                 rf.n_features_in_ = n_cols
-                rf._treelite_model_bytes = treelite.Model.deserialize_bytes(model)
+                rf._treelite_model_bytes = model
 
                 rfs.append(rf)
 
diff --git a/python/src/spark_rapids_ml/umap.py b/python/src/spark_rapids_ml/umap.py
index a7482a06..d20b550e 100644
--- a/python/src/spark_rapids_ml/umap.py
+++ b/python/src/spark_rapids_ml/umap.py
@@ -29,6 +29,7 @@
     Tuple,
     Type,
     Union,
+    cast,
 )
 
 import numpy as np
@@ -135,7 +136,7 @@ def _get_cuml_params_default(self) -> Dict[str, Any]:
             "build_algo": "auto",
             "build_kwds": None,
             "device_ids": None,
-            "force_serial_epochs": False,
+            "force_serial_epochs": None,
         }
 
     def _pyspark_class(self) -> Optional[ABCMeta]:
@@ -1702,9 +1703,9 @@ def read_sparse_array(
 
         def read_dense_array(df_path: str) -> np.ndarray:
             data_df = spark.read.parquet(df_path).orderBy("row_id")
-            pdf = data_df.toPandas()
+            pdf = cast(PandasDataFrame, data_df.toPandas())
             assert type(pdf) == pd.DataFrame
-            return np.array(list(pdf.data), dtype=np.float32)
+            return np.array(list(pdf["data"]), dtype=np.float32)
 
         metadata = DefaultParamsReader.loadMetadata(path, self.sc)
         data_path = os.path.join(path, "data")
diff --git a/python/src/spark_rapids_ml/utils.py b/python/src/spark_rapids_ml/utils.py
index e19070ed..945b3d2b 100644
--- a/python/src/spark_rapids_ml/utils.py
+++ b/python/src/spark_rapids_ml/utils.py
@@ -175,7 +175,7 @@ def _get_gpu_id(task_context: TaskContext) -> int:
 # invoke the corresponding deallocate methods.  They will get cleaned up only when
 # the process exits.  This avoids a segfault in the case of creating a new
 # SAM resource with a smaller headroom.
-_old_memory_resources = []
+_old_memory_resources: List[Any] = []
 
 # keep track of last headroom to check if new sam mr is needed.
 _last_sam_headroom_size = None
diff --git a/python/tests/test_random_forest.py b/python/tests/test_random_forest.py
index ff4c877e..5ab2f45d 100644
--- a/python/tests/test_random_forest.py
+++ b/python/tests/test_random_forest.py
@@ -331,7 +331,10 @@ def test_random_forest_basic(
         est.setLabelCol(label_col)
         assert est.getLabelCol() == label_col
 
-        def assert_model(lhs: RandomForestModel, rhs: RandomForestModel) -> None:
+        def assert_model(
+            lhs: Union[RandomForestClassificationModel, RandomForestRegressionModel],
+            rhs: Union[RandomForestClassificationModel, RandomForestRegressionModel],
+        ) -> None:
             assert lhs.cuml_params == rhs.cuml_params
 
             # Vector and array(double) type will be cast to array(float) by default
@@ -342,6 +345,7 @@ def assert_model(lhs: RandomForestModel, rhs: RandomForestModel) -> None:
             assert lhs.n_cols == data_shape[1]
 
             if isinstance(lhs, RandomForestClassificationModel):
+                assert isinstance(rhs, RandomForestClassificationModel)
                 assert lhs.numClasses == rhs.numClasses
                 assert lhs.numClasses == n_classes
 
diff --git a/python/tests/test_umap.py b/python/tests/test_umap.py
index cc447d99..fdf0c84c 100644
--- a/python/tests/test_umap.py
+++ b/python/tests/test_umap.py
@@ -67,10 +67,15 @@ def _load_sparse_data(
 
     if normalize:
         row_sums = np.array(csr_mat.sum(axis=1)).flatten()
-        row_sums[row_sums == 0] = 1.0
+        zero_rows = np.flatnonzero(row_sums == 0)
+        if len(zero_rows) > 0:
+            csr_mat = csr_mat.tolil()
+            csr_mat[zero_rows, 0] = 1.0
+            csr_mat = csr_mat.tocsr()
+            row_sums = np.array(csr_mat.sum(axis=1)).flatten()
         row_sum_diag = scipy.sparse.diags(1.0 / row_sums)
         csr_mat = row_sum_diag @ csr_mat
-        assert np.allclose(np.array(csr_mat.sum(axis=1)).flatten(), 1.0)
+        assert np.allclose(np.array(csr_mat.sum(axis=1)).flatten(), 1.0, atol=1e-6)
 
     # Convert CSR matrix to SparseVectors
     data = []

From ba7f67eefb577e1b6c61bb17fed0b8baea7f97ee Mon Sep 17 00:00:00 2001
From: Erik Ordentlich <eordentlich@gmail.com>
Date: Thu, 28 May 2026 15:34:51 -0700
Subject: [PATCH 05/13] drop spark 3.3 test, as rapids minimum python is 3.11
 which is not compatible with pyspark 3.3

Signed-off-by: Erik Ordentlich <eordentlich@gmail.com>
---
 ci/test.sh | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/ci/test.sh b/ci/test.sh
index 05c80a88..94d46f1b 100755
--- a/ci/test.sh
+++ b/ci/test.sh
@@ -47,15 +47,3 @@ pip install -r requirements_dev.txt && pip install -e .
 # plugin tests
 ./run_plugin_test.sh
 
-# check compatibility with Spark 3.3 in nightly run
-# also push draft release docs to gh-pages
-if [[ $type == "nightly" ]]; then
-    pip uninstall pyspark -y
-    pip install pyspark~=3.3.0
-    ./run_test.sh
-    ./run_benchmark.sh $bench_args
-    # if everything passed till now update draft release docs in gh-pages
-    # need to invoke docs.sh from top level of repo
-    cd .. # top level of repo
-    ci/docs.sh nightly
-fi

From 3bed382d327042081ee6c633206442ab8d322242 Mon Sep 17 00:00:00 2001
From: Erik Ordentlich <eordentlich@gmail.com>
Date: Sat, 30 May 2026 10:12:52 -0700
Subject: [PATCH 06/13] update databricks benchmark scripts

Signed-off-by: Erik Ordentlich <eordentlich@gmail.com>
---
 .../databricks/gpu_etl_cluster_spec.sh        |  2 +-
 .../benchmark/databricks/init-pip-cuda-12.sh  | 23 ++++++++++++-------
 python/benchmark/databricks/run_benchmark.sh  | 13 ++++++++---
 3 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/python/benchmark/databricks/gpu_etl_cluster_spec.sh b/python/benchmark/databricks/gpu_etl_cluster_spec.sh
index ab31b70e..4a5d7b40 100644
--- a/python/benchmark/databricks/gpu_etl_cluster_spec.sh
+++ b/python/benchmark/databricks/gpu_etl_cluster_spec.sh
@@ -24,7 +24,7 @@ cat <<EOF
         "spark.task.cpus": "1",
         "spark.databricks.delta.preview.enabled": "true",
         "spark.python.worker.reuse": "true",
-        "spark.executorEnv.PYTHONPATH": "/databricks/jars/rapids-4-spark_2.12-25.12.0.jar:/databricks/spark/python",
+        "spark.executorEnv.PYTHONPATH": "/databricks/jars/rapids-4-spark_${SCALA_VERSION}-${SPARK_RAPIDS_VERSION}.jar:/databricks/spark/python",
         "spark.sql.files.minPartitionNum": "2",
         "spark.sql.execution.arrow.maxRecordsPerBatch": "10000",
         "spark.executor.cores": "8",
diff --git a/python/benchmark/databricks/init-pip-cuda-12.sh b/python/benchmark/databricks/init-pip-cuda-12.sh
index dbc842ac..8996f595 100644
--- a/python/benchmark/databricks/init-pip-cuda-12.sh
+++ b/python/benchmark/databricks/init-pip-cuda-12.sh
@@ -20,18 +20,25 @@ BENCHMARK_ZIP=/dbfs/path/to/benchmark.zip
 # also, in general, RAPIDS_VERSION (python) fields should omit any leading 0 in month/minor field (i.e. 23.8.0 and not 23.08.0)
 # while SPARK_RAPIDS_VERSION (jar) should have leading 0 in month/minor (e.g. 23.08.2 and not 23.8.2)
 RAPIDS_VERSION=25.12.0
-SPARK_RAPIDS_VERSION=25.12.0
+SPARK_RAPIDS_VERSION=26.04.2
 
-curl -L https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/${SPARK_RAPIDS_VERSION}/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}-cuda12.jar -o /databricks/jars/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}.jar
+if [[ $DATABRICKS_RUNTIME_VERSION < "17.3" ]]; then
+    SCALA_VERSION=2.12
+else
+    SCALA_VERSION=2.13
+fi
 
-# install cudatoolkit 12.2 via runfile approach
-wget https://developer.download.nvidia.com/compute/cuda/12.2.2/local_installers/cuda_12.2.2_535.104.05_linux.run
-sh cuda_12.2.2_535.104.05_linux.run --silent --toolkit
+curl -L https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_${SCALA_VERSION}/${SPARK_RAPIDS_VERSION}/rapids-4-spark_${SCALA_VERSION}-${SPARK_RAPIDS_VERSION}-cuda12.jar -o /databricks/jars/rapids-4-spark_${SCALA_VERSION}-${SPARK_RAPIDS_VERSION}.jar
 
+if [[ $DATABRICKS_RUNTIME_VERSION < "16.4" ]]; then
+    # install cudatoolkit 12.2 via runfile approach on DB < 16.4
+    wget https://developer.download.nvidia.com/compute/cuda/12.2.2/local_installers/cuda_12.2.2_535.104.05_linux.run
+    sh cuda_12.2.2_535.104.05_linux.run --silent --toolkit
 
-# reset symlink 
-rm /usr/local/cuda
-ln -s /usr/local/cuda-12.2 /usr/local/cuda
+    # reset symlink 
+    rm /usr/local/cuda
+    ln -s /usr/local/cuda-12.2 /usr/local/cuda
+fi
 
 # upgrade pip
 /databricks/python/bin/pip install --upgrade pip
diff --git a/python/benchmark/databricks/run_benchmark.sh b/python/benchmark/databricks/run_benchmark.sh
index c7eee3f9..737c3acf 100755
--- a/python/benchmark/databricks/run_benchmark.sh
+++ b/python/benchmark/databricks/run_benchmark.sh
@@ -16,6 +16,8 @@
 
 cluster_type=${1:-gpu_etl}
 db_version=${2:-15.4}
+SPARK_RAPIDS_VERSION=26.04.2
+SCALA_VERSION=2.12
 
 if [[ $cluster_type == "gpu" || $cluster_type == "gpu_etl" ]]; then
     num_cpus=0
@@ -25,16 +27,21 @@ elif [[ $cluster_type == "cpu" ]]; then
     num_gpus=0
 else
     echo "unknown cluster type $cluster_type"
-    echo "usage: $0 cpu|gpu|gpu_etl [15.4]" 
+    echo "usage: $0 cpu|gpu|gpu_etl [15.4|16.4|17.3]" 
     exit 1
 fi
 
-if [[ $db_version > 16.4 && $cluster_type == "gpu_etl" ]]; then
+if [[ $db_version != 17.3 && $cluster_type == "gpu_etl" ]]; then
     echo "spark rapids etl plugin is not supported on databricks ${db_version}"
-    echo "please specify db_version 15.4 or 16.4 for cluster type gpu_etl"
+    echo "please specify db_version 17.3 for cluster type gpu_etl"
     exit 1
 fi
 
+if [[ $db_version > 16.4 ]]; then
+    SCALA_VERSION=2.13
+fi
+
+
 source benchmark_utils.sh
 
 BENCHMARK_DATA_HOME=s3a://spark-rapids-ml-bm-datasets-public

From b2982ae5ad5478a72717cbfc55a2a6235b914e52 Mon Sep 17 00:00:00 2001
From: Erik Ordentlich <eordentlich@gmail.com>
Date: Wed, 3 Jun 2026 13:29:14 -0700
Subject: [PATCH 07/13] update copyright years

Signed-off-by: Erik Ordentlich <eordentlich@gmail.com>
---
 ci/Dockerfile                                       | 2 +-
 ci/test.sh                                          | 2 +-
 docker/Dockerfile.pip                               | 2 +-
 docker/Dockerfile.python                            | 2 +-
 python/benchmark/databricks/gpu_etl_cluster_spec.sh | 2 +-
 python/benchmark/databricks/init-pip-cuda-12.sh     | 2 +-
 python/benchmark/databricks/run_benchmark.sh        | 2 +-
 python/src/spark_rapids_ml/__init__.py              | 2 +-
 python/src/spark_rapids_ml/classification.py        | 2 +-
 python/src/spark_rapids_ml/feature.py               | 2 +-
 python/src/spark_rapids_ml/knn.py                   | 2 +-
 python/src/spark_rapids_ml/regression.py            | 2 +-
 python/src/spark_rapids_ml/tree.py                  | 2 +-
 python/src/spark_rapids_ml/umap.py                  | 2 +-
 python/src/spark_rapids_ml/utils.py                 | 2 +-
 python/tests/test_linear_model.py                   | 2 +-
 python/tests/test_metrics.py                        | 2 +-
 python/tests/test_pipeline.py                       | 2 +-
 python/tests/test_random_forest.py                  | 2 +-
 python/tests/test_umap.py                           | 2 +-
 20 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/ci/Dockerfile b/ci/Dockerfile
index 306fca74..48f3ee82 100644
--- a/ci/Dockerfile
+++ b/ci/Dockerfile
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/ci/test.sh b/ci/test.sh
index 94d46f1b..862f81f4 100755
--- a/ci/test.sh
+++ b/ci/test.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/docker/Dockerfile.pip b/docker/Dockerfile.pip
index 58c6e5de..509b81eb 100644
--- a/docker/Dockerfile.pip
+++ b/docker/Dockerfile.pip
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/docker/Dockerfile.python b/docker/Dockerfile.python
index 44bf5d40..c46809d6 100644
--- a/docker/Dockerfile.python
+++ b/docker/Dockerfile.python
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/benchmark/databricks/gpu_etl_cluster_spec.sh b/python/benchmark/databricks/gpu_etl_cluster_spec.sh
index 4a5d7b40..56e52de6 100644
--- a/python/benchmark/databricks/gpu_etl_cluster_spec.sh
+++ b/python/benchmark/databricks/gpu_etl_cluster_spec.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/benchmark/databricks/init-pip-cuda-12.sh b/python/benchmark/databricks/init-pip-cuda-12.sh
index 8996f595..b8778e69 100644
--- a/python/benchmark/databricks/init-pip-cuda-12.sh
+++ b/python/benchmark/databricks/init-pip-cuda-12.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/benchmark/databricks/run_benchmark.sh b/python/benchmark/databricks/run_benchmark.sh
index 737c3acf..8d55490b 100755
--- a/python/benchmark/databricks/run_benchmark.sh
+++ b/python/benchmark/databricks/run_benchmark.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/src/spark_rapids_ml/__init__.py b/python/src/spark_rapids_ml/__init__.py
index e9b6bf10..ca5d2f8a 100644
--- a/python/src/spark_rapids_ml/__init__.py
+++ b/python/src/spark_rapids_ml/__init__.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/src/spark_rapids_ml/classification.py b/python/src/spark_rapids_ml/classification.py
index b2a26bf2..eed109cb 100644
--- a/python/src/spark_rapids_ml/classification.py
+++ b/python/src/spark_rapids_ml/classification.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/src/spark_rapids_ml/feature.py b/python/src/spark_rapids_ml/feature.py
index efb620ce..71ef0481 100644
--- a/python/src/spark_rapids_ml/feature.py
+++ b/python/src/spark_rapids_ml/feature.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2022-2025, NVIDIA CORPORATION.
+# Copyright (c) 2022-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/src/spark_rapids_ml/knn.py b/python/src/spark_rapids_ml/knn.py
index 041bfd87..adc2e32b 100644
--- a/python/src/spark_rapids_ml/knn.py
+++ b/python/src/spark_rapids_ml/knn.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2022-2025, NVIDIA CORPORATION.
+# Copyright (c) 2022-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/src/spark_rapids_ml/regression.py b/python/src/spark_rapids_ml/regression.py
index 5dc5e130..da5e5394 100644
--- a/python/src/spark_rapids_ml/regression.py
+++ b/python/src/spark_rapids_ml/regression.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2025, NVIDIA CORPORATION.
+# Copyright (c) 2022-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/src/spark_rapids_ml/tree.py b/python/src/spark_rapids_ml/tree.py
index d4693918..13f5830a 100644
--- a/python/src/spark_rapids_ml/tree.py
+++ b/python/src/spark_rapids_ml/tree.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/src/spark_rapids_ml/umap.py b/python/src/spark_rapids_ml/umap.py
index d20b550e..03cce339 100644
--- a/python/src/spark_rapids_ml/umap.py
+++ b/python/src/spark_rapids_ml/umap.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/src/spark_rapids_ml/utils.py b/python/src/spark_rapids_ml/utils.py
index 945b3d2b..a236f9ba 100644
--- a/python/src/spark_rapids_ml/utils.py
+++ b/python/src/spark_rapids_ml/utils.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2022-2025, NVIDIA CORPORATION.
+# Copyright (c) 2022-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/tests/test_linear_model.py b/python/tests/test_linear_model.py
index c06832fd..6a51890d 100644
--- a/python/tests/test_linear_model.py
+++ b/python/tests/test_linear_model.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2022-2025, NVIDIA CORPORATION.
+# Copyright (c) 2022-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/tests/test_metrics.py b/python/tests/test_metrics.py
index 6c36359d..0ac2f8e9 100644
--- a/python/tests/test_metrics.py
+++ b/python/tests/test_metrics.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/tests/test_pipeline.py b/python/tests/test_pipeline.py
index 1caeb94b..215c6311 100644
--- a/python/tests/test_pipeline.py
+++ b/python/tests/test_pipeline.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/tests/test_random_forest.py b/python/tests/test_random_forest.py
index 5ab2f45d..088d3b90 100644
--- a/python/tests/test_random_forest.py
+++ b/python/tests/test_random_forest.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/tests/test_umap.py b/python/tests/test_umap.py
index fdf0c84c..e07ca1d7 100644
--- a/python/tests/test_umap.py
+++ b/python/tests/test_umap.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From ab5a9cb31920de121a2a244386e39bef95836172 Mon Sep 17 00:00:00 2001
From: Erik Ordentlich <eordentlich@gmail.com>
Date: Wed, 3 Jun 2026 14:28:18 -0700
Subject: [PATCH 08/13] bumpy python version in ci Docker

Signed-off-by: Erik Ordentlich <eordentlich@gmail.com>
---
 ci/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/Dockerfile b/ci/Dockerfile
index 48f3ee82..1de8ca46 100644
--- a/ci/Dockerfile
+++ b/ci/Dockerfile
@@ -48,5 +48,5 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86
 
 # install cuML
 ARG RAPIDS_VERSION=26.06
-RUN conda install -y -c rapidsai-nightly -c conda-forge -c nvidia cuml=$RAPIDS_VERSION cuvs=$RAPIDS_VERSION python=3.10 pylibraft=$RAPIDS_VERSION raft-dask=$RAPIDS_VERSION cuda-version=12.2 numpy~=1.0 \
+RUN conda install -y -c rapidsai-nightly -c conda-forge -c nvidia cuml=$RAPIDS_VERSION cuvs=$RAPIDS_VERSION python=3.11 pylibraft=$RAPIDS_VERSION raft-dask=$RAPIDS_VERSION cuda-version=12.2 numpy~=1.0 \
     && conda clean --all -f -y

From 624ebcbad7e10bd6d509d6d16d5f22907a2ac8ac Mon Sep 17 00:00:00 2001
From: Erik Ordentlich <eordentlich@gmail.com>
Date: Wed, 3 Jun 2026 14:32:07 -0700
Subject: [PATCH 09/13] add some TODOs to track official 26.06 rapids release

Signed-off-by: Erik Ordentlich <eordentlich@gmail.com>
---
 ci/Dockerfile                                   | 1 +
 notebooks/databricks/init-pip-cuda-12.sh        | 1 +
 python/benchmark/databricks/init-pip-cuda-12.sh | 1 +
 3 files changed, 3 insertions(+)

diff --git a/ci/Dockerfile b/ci/Dockerfile
index 1de8ca46..f49dc25b 100644
--- a/ci/Dockerfile
+++ b/ci/Dockerfile
@@ -48,5 +48,6 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86
 
 # install cuML
 ARG RAPIDS_VERSION=26.06
+# TODO change to rapidsai after rapids 26.06 is released
 RUN conda install -y -c rapidsai-nightly -c conda-forge -c nvidia cuml=$RAPIDS_VERSION cuvs=$RAPIDS_VERSION python=3.11 pylibraft=$RAPIDS_VERSION raft-dask=$RAPIDS_VERSION cuda-version=12.2 numpy~=1.0 \
     && conda clean --all -f -y
diff --git a/notebooks/databricks/init-pip-cuda-12.sh b/notebooks/databricks/init-pip-cuda-12.sh
index f53bb590..1c053dfd 100644
--- a/notebooks/databricks/init-pip-cuda-12.sh
+++ b/notebooks/databricks/init-pip-cuda-12.sh
@@ -22,6 +22,7 @@ set -ex
 # Note that the SPARK_RAPIDS_VERSION will not necessarily match the RAPIDS_VERSION. Check https://nvidia.github.io/spark-rapids/docs/download.html for the latest compatible version of 
 # spark-rapids version that verifies compatibility with your Databricks Runtime. (In this case, Databricks 17.3 ML LTS.) The available versions for RAPIDS_VERSION can be
 # found by executing "pip index versions spark-rapids-ml".   
+# TODO change RAPIDS_VERSION to 26.6.0 after rapids 26.06 is released
 RAPIDS_VERSION=25.12.0
 SPARK_RAPIDS_VERSION=26.04.2
 
diff --git a/python/benchmark/databricks/init-pip-cuda-12.sh b/python/benchmark/databricks/init-pip-cuda-12.sh
index b8778e69..850fe7f9 100644
--- a/python/benchmark/databricks/init-pip-cuda-12.sh
+++ b/python/benchmark/databricks/init-pip-cuda-12.sh
@@ -19,6 +19,7 @@ BENCHMARK_ZIP=/dbfs/path/to/benchmark.zip
 # IMPORTANT: specify rapids fully 23.10.0 and not 23.10
 # also, in general, RAPIDS_VERSION (python) fields should omit any leading 0 in month/minor field (i.e. 23.8.0 and not 23.08.0)
 # while SPARK_RAPIDS_VERSION (jar) should have leading 0 in month/minor (e.g. 23.08.2 and not 23.8.2)
+# TODO change RAPIDS_VERSION to 26.6.0 after rapids 26.06 is released
 RAPIDS_VERSION=25.12.0
 SPARK_RAPIDS_VERSION=26.04.2
 

From 71c8197c26a3e5e0572758c2021169be9c40b172 Mon Sep 17 00:00:00 2001
From: Erik Ordentlich <eordentlich@gmail.com>
Date: Fri, 12 Jun 2026 23:36:38 -0700
Subject: [PATCH 10/13] updates to align with official 26.06 rapids release. 
 update emr and dataproc

Signed-off-by: Erik Ordentlich <eordentlich@gmail.com>
---
 ci/Dockerfile                                 |  3 +--
 ci/test.sh                                    |  8 +++++++
 docs/source/conf.py                           |  6 ++---
 jvm/README.md                                 |  8 +++----
 jvm/pom.xml                                   |  4 ++--
 notebooks/aws-emr/init-bootstrap-action.sh    | 22 +++++++++----------
 notebooks/aws-emr/init-configurations.json    |  6 ++---
 notebooks/databricks/init-pip-cuda-12.sh      |  3 +--
 notebooks/dataproc/README.md                  |  2 +-
 notebooks/dataproc/spark_rapids_ml.sh         |  4 ++--
 python/benchmark/aws-emr/run_benchmark.sh     | 12 +++++-----
 .../benchmark/databricks/init-pip-cuda-12.sh  |  3 +--
 python/benchmark/dataproc/init_benchmark.sh   |  4 ++--
 python/benchmark/dataproc/run_benchmark.sh    | 16 ++++++++------
 python/run_benchmark.sh                       |  4 ++--
 15 files changed, 57 insertions(+), 48 deletions(-)

diff --git a/ci/Dockerfile b/ci/Dockerfile
index f49dc25b..f838721a 100644
--- a/ci/Dockerfile
+++ b/ci/Dockerfile
@@ -48,6 +48,5 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86
 
 # install cuML
 ARG RAPIDS_VERSION=26.06
-# TODO change to rapidsai after rapids 26.06 is released
-RUN conda install -y -c rapidsai-nightly -c conda-forge -c nvidia cuml=$RAPIDS_VERSION cuvs=$RAPIDS_VERSION python=3.11 pylibraft=$RAPIDS_VERSION raft-dask=$RAPIDS_VERSION cuda-version=12.2 numpy~=1.0 \
+RUN conda install -y -c rapidsai -c conda-forge -c nvidia cuml=$RAPIDS_VERSION cuvs=$RAPIDS_VERSION python=3.11 pylibraft=$RAPIDS_VERSION raft-dask=$RAPIDS_VERSION cuda-version=12.2 numpy~=1.0 \
     && conda clean --all -f -y
diff --git a/ci/test.sh b/ci/test.sh
index 862f81f4..a2816091 100755
--- a/ci/test.sh
+++ b/ci/test.sh
@@ -47,3 +47,11 @@ pip install -r requirements_dev.txt && pip install -e .
 # plugin tests
 ./run_plugin_test.sh
 
+# push draft release docs to gh-pages in nightly run
+if [[ $type == "nightly" ]]; then
+    # if everything passed till now update draft release docs in gh-pages
+    # need to invoke docs.sh from top level of repo
+    cd .. # top level of repo
+    ci/docs.sh nightly
+fi
+
diff --git a/docs/source/conf.py b/docs/source/conf.py
index c7db1bf1..b284ea8a 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -21,9 +21,9 @@
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 
 project = 'spark-rapids-ml'
-copyright = '2025, NVIDIA'
+copyright = '2026, NVIDIA'
 author = 'NVIDIA'
-release = '25.12.0'
+release = '26.06.0'
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
diff --git a/jvm/README.md b/jvm/README.md
index 1eb7d92f..ad9581e7 100644
--- a/jvm/README.md
+++ b/jvm/README.md
@@ -31,7 +31,7 @@ JDK 17, Spark 4.0
 
     ```shell
     # Create a new conda environment for the client
-    conda create -n pyspark-client python==3.10
+    conda create -n pyspark-client python==3.11
     conda activate pyspark-client
 
     # Install the PySpark client package
@@ -50,10 +50,10 @@ including setting up the server and running client-side tests.
 To start the Spark Connect server with Spark Rapids ML support, follow these steps:
 
 ```shell
-conda activate rapids-25.12  # from spark-rapids-ml installation
+conda activate rapids-26.06  # from spark-rapids-ml installation
 export SPARK_HOME=<directory where spark was installed above>
 export PYSPARK_PYTHON=$(which python)
-export PLUGIN_JAR=$(pip show spark-rapids-ml | grep Location: | cut -d ' ' -f 2 )/spark_rapids_ml/jars/com.nvidia.rapids.ml-25.12.0.jar
+export PLUGIN_JAR=$(pip show spark-rapids-ml | grep Location: | cut -d ' ' -f 2 )/spark_rapids_ml/jars/com.nvidia.rapids.ml-26.06.0.jar
 $SPARK_HOME/sbin/start-connect-server.sh --master local[*] \
   --jars $PLUGIN_JAR \
   --conf spark.driver.memory=20G
@@ -107,7 +107,7 @@ mvn clean package -DskipTests
 if you would like to compile the plugin and run the unit tests, install `spark-rapids-ml` python package and its dependencies per the above instructions and run the following command:
 
 ``` shell
-conda activate rapids-25.12
+conda activate rapids-26.06
 export PYSPARK_PYTHON=$(which python)
 mvn clean package
 ```
diff --git a/jvm/pom.xml b/jvm/pom.xml
index 09aae8bf..9b1e45a6 100644
--- a/jvm/pom.xml
+++ b/jvm/pom.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!--
-  Copyright (c) 2025, NVIDIA CORPORATION.
+  Copyright (c) 2026, NVIDIA CORPORATION.
 
   Licensed under the Apache License, Version 2.0 (the "License");
   You may not use this file except in compliance with the License.
@@ -21,7 +21,7 @@
 
     <groupId>com.nvidia.rapids</groupId>
     <artifactId>ml</artifactId>
-    <version>25.12.0</version>
+    <version>26.06.0</version>
     <packaging>jar</packaging>
 
     <properties>
diff --git a/notebooks/aws-emr/init-bootstrap-action.sh b/notebooks/aws-emr/init-bootstrap-action.sh
index f45deb06..af9aa5c9 100755
--- a/notebooks/aws-emr/init-bootstrap-action.sh
+++ b/notebooks/aws-emr/init-bootstrap-action.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -23,27 +23,27 @@ sudo chmod a+rwx -R /spark-rapids-cgroup
 sudo yum update -y
 sudo yum install -y gcc bzip2-devel libffi-devel tar gzip wget make 
 sudo yum install -y mysql-devel --skip-broken
-sudo bash -c "wget https://www.python.org/ftp/python/3.10.9/Python-3.10.9.tgz && \
-tar xzf Python-3.10.9.tgz && cd Python-3.10.9 && \
+sudo bash -c "wget https://www.python.org/ftp/python/3.11.9/Python-3.11.9.tgz && \
+tar xzf Python-3.11.9.tgz && cd Python-3.11.9 && \
 ./configure --enable-optimizations && make altinstall"
 
-RAPIDS_VERSION=25.12.0
+RAPIDS_VERSION=26.6.0
 
-sudo /usr/local/bin/pip3.10 install --upgrade pip
+sudo /usr/local/bin/pip3.11 install --upgrade pip
 
 # install scikit-learn 
-sudo /usr/local/bin/pip3.10 install scikit-learn
+sudo /usr/local/bin/pip3.11 install scikit-learn
 
 # install cudf and cuml
-sudo /usr/local/bin/pip3.10 install --no-cache-dir \
+sudo /usr/local/bin/pip3.11 install --no-cache-dir \
          cudf-cu12~=${RAPIDS_VERSION} \
          cuml-cu12~=${RAPIDS_VERSION} \
          cuvs-cu12~=${RAPIDS_VERSION} \
          pylibraft-cu12~=${RAPIDS_VERSION} \
          raft-dask-cu12~=${RAPIDS_VERSION} \
          --extra-index-url=https://pypi.nvidia.com --verbose
-sudo /usr/local/bin/pip3.10 install spark-rapids-ml
-sudo /usr/local/bin/pip3.10 list
+sudo /usr/local/bin/pip3.11 install spark-rapids-ml
+sudo /usr/local/bin/pip3.11 list
 
 # set up no-import-change for cluster if enabled
 if [[ $1 == "--no-import-enabled" && $2 == 1 ]]; then
@@ -55,7 +55,7 @@ if [[ $1 == "--no-import-enabled" && $2 == 1 ]]; then
     sudo rm fake_shell.py
 fi 
 
-# ensure notebook comes up in python 3.10 by using a background script that waits for an 
+# ensure notebook comes up in python 3.11 by using a background script that waits for an
 # application file to be installed before modifying.
 cat <<EOF >/tmp/mod_start_kernel.sh
 #!/bin/bash
@@ -66,7 +66,7 @@ sleep 10
 done
 echo "done waiting"
 sleep 10
-sudo sed -i /mnt/notebook-env/bin/start_kernel_as_emr_notebook.sh -e 's#"spark.pyspark.python": "python3"#"spark.pyspark.python": "/usr/local/bin/python3.10"#g'
+sudo sed -i /mnt/notebook-env/bin/start_kernel_as_emr_notebook.sh -e 's#"spark.pyspark.python": "python3"#"spark.pyspark.python": "/usr/local/bin/python3.11"#g'
 sudo sed -i /mnt/notebook-env/bin/start_kernel_as_emr_notebook.sh -e 's#"spark.pyspark.virtualenv.enabled": "true"#"spark.pyspark.virtualenv.enabled": "false"#g'
 exit 0
 EOF
diff --git a/notebooks/aws-emr/init-configurations.json b/notebooks/aws-emr/init-configurations.json
index 6efdd5ae..fa0be20a 100644
--- a/notebooks/aws-emr/init-configurations.json
+++ b/notebooks/aws-emr/init-configurations.json
@@ -67,10 +67,10 @@
             "spark.sql.execution.arrow.pyspark.enabled":"true",
             "spark.sql.execution.arrow.maxRecordsPerBatch":"100000",
             "spark.sql.cache.serializer":"com.nvidia.spark.ParquetCachedBatchSerializer",
-            "spark.pyspark.python":"/usr/local/bin/python3.10",
-            "spark.pyspark.driver.python":"/usr/local/bin/python3.10",
+            "spark.pyspark.python":"/usr/local/bin/python3.11",
+            "spark.pyspark.driver.python":"/usr/local/bin/python3.11",
             "spark.pyspark.virtualenv.enabled":"false",
-            "spark.yarn.appMasterEnv.PYSPARK_PYTHON":"/usr/local/bin/python3.10",
+            "spark.yarn.appMasterEnv.PYSPARK_PYTHON":"/usr/local/bin/python3.11",
             "spark.dynamicAllocation.enabled":"false",
             "spark.driver.memory":"20g",
             "spark.rpc.message.maxSize":"512",
diff --git a/notebooks/databricks/init-pip-cuda-12.sh b/notebooks/databricks/init-pip-cuda-12.sh
index 1c053dfd..c32a8ab9 100644
--- a/notebooks/databricks/init-pip-cuda-12.sh
+++ b/notebooks/databricks/init-pip-cuda-12.sh
@@ -22,8 +22,7 @@ set -ex
 # Note that the SPARK_RAPIDS_VERSION will not necessarily match the RAPIDS_VERSION. Check https://nvidia.github.io/spark-rapids/docs/download.html for the latest compatible version of 
 # spark-rapids version that verifies compatibility with your Databricks Runtime. (In this case, Databricks 17.3 ML LTS.) The available versions for RAPIDS_VERSION can be
 # found by executing "pip index versions spark-rapids-ml".   
-# TODO change RAPIDS_VERSION to 26.6.0 after rapids 26.06 is released
-RAPIDS_VERSION=25.12.0
+RAPIDS_VERSION=26.6.0
 SPARK_RAPIDS_VERSION=26.04.2
 
 curl -L https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/${SPARK_RAPIDS_VERSION}/rapids-4-spark_2.13-${SPARK_RAPIDS_VERSION}-cuda12.jar -o /databricks/jars/rapids-4-spark_2.13-${SPARK_RAPIDS_VERSION}.jar
diff --git a/notebooks/dataproc/README.md b/notebooks/dataproc/README.md
index 6a32c72b..e2dc12f5 100644
--- a/notebooks/dataproc/README.md
+++ b/notebooks/dataproc/README.md
@@ -31,7 +31,7 @@ If you already have a Dataproc account, you can run the example notebooks on a D
   If you wish to enable [no-import-change](../README.md#no-import-change) UX for the cluster, change the `spark-rapids-ml-no-import-enabled` metadata value to `1` in the command.  The initialization script `spark_rapids_ml.sh` checks this metadata value and modifies the run time accordingly.
 
   ```
-  export RAPIDS_VERSION=25.12.0
+  export RAPIDS_VERSION=26.6.0
 
   gcloud dataproc clusters create $USER-spark-rapids-ml \
   --image-version=2.2-ubuntu22 \
diff --git a/notebooks/dataproc/spark_rapids_ml.sh b/notebooks/dataproc/spark_rapids_ml.sh
index b00102d1..957ed3ea 100644
--- a/notebooks/dataproc/spark_rapids_ml.sh
+++ b/notebooks/dataproc/spark_rapids_ml.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 
 set -ex
 
-RAPIDS_VERSION=25.12.0
+RAPIDS_VERSION=26.6.0
 
 
 # install cudf and cuml
diff --git a/python/benchmark/aws-emr/run_benchmark.sh b/python/benchmark/aws-emr/run_benchmark.sh
index 81142d4b..15a364e2 100755
--- a/python/benchmark/aws-emr/run_benchmark.sh
+++ b/python/benchmark/aws-emr/run_benchmark.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -96,11 +96,13 @@ else
     exit 1
 fi
 
+if [[ -z ${CLUSTER_ID} ]]; then
 # start benchmark cluster
-CLUSTER_ID=$(./start_cluster.sh ${cluster_type})
-if [[ $? != 0 ]]; then
-    echo "Failed to start cluster."
-    exit 1
+    CLUSTER_ID=$(./start_cluster.sh ${cluster_type})
+    if [[ $? != 0 ]]; then
+        echo "Failed to start cluster."
+        exit 1
+    fi
 fi
 
 ssh_command () {
diff --git a/python/benchmark/databricks/init-pip-cuda-12.sh b/python/benchmark/databricks/init-pip-cuda-12.sh
index 850fe7f9..247f96b3 100644
--- a/python/benchmark/databricks/init-pip-cuda-12.sh
+++ b/python/benchmark/databricks/init-pip-cuda-12.sh
@@ -19,8 +19,7 @@ BENCHMARK_ZIP=/dbfs/path/to/benchmark.zip
 # IMPORTANT: specify rapids fully 23.10.0 and not 23.10
 # also, in general, RAPIDS_VERSION (python) fields should omit any leading 0 in month/minor field (i.e. 23.8.0 and not 23.08.0)
 # while SPARK_RAPIDS_VERSION (jar) should have leading 0 in month/minor (e.g. 23.08.2 and not 23.8.2)
-# TODO change RAPIDS_VERSION to 26.6.0 after rapids 26.06 is released
-RAPIDS_VERSION=25.12.0
+RAPIDS_VERSION=26.6.0
 SPARK_RAPIDS_VERSION=26.04.2
 
 if [[ $DATABRICKS_RUNTIME_VERSION < "17.3" ]]; then
diff --git a/python/benchmark/dataproc/init_benchmark.sh b/python/benchmark/dataproc/init_benchmark.sh
index 07babbdf..68622024 100755
--- a/python/benchmark/dataproc/init_benchmark.sh
+++ b/python/benchmark/dataproc/init_benchmark.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -22,7 +22,7 @@ function get_metadata_attribute() {
   /usr/share/google/get_metadata_value "attributes/${attribute_name}" || echo -n "${default_value}"
 }
 
-RAPIDS_VERSION=$(get_metadata_attribute rapids-version 25.12.0)
+RAPIDS_VERSION=$(get_metadata_attribute rapids-version 26.6.0)
 
 
 # install cudf and cuml
diff --git a/python/benchmark/dataproc/run_benchmark.sh b/python/benchmark/dataproc/run_benchmark.sh
index 7c25db81..9ad8cc69 100755
--- a/python/benchmark/dataproc/run_benchmark.sh
+++ b/python/benchmark/dataproc/run_benchmark.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -35,7 +35,7 @@ gpu_args=$(cat <<EOF
 --num_gpus=2 \
 --spark_confs spark.executor.resource.gpu.amount=1 \
 --spark_confs spark.task.resource.gpu.amount=1 \
---spark_confs spark.rapids.memory.gpu.pooling.enabled=false
+--spark_confs spark.rapids.memory.gpu.pool=NONE
 EOF
 )
 
@@ -67,11 +67,13 @@ fi
 
 BENCHMARK_DATA_HOME=gs://spark-rapids-ml-benchmarking/datasets
 
-# start benchmark cluster
-./start_cluster.sh $cluster_type
-if [[ $? != 0 ]]; then
-    echo "Failed to start cluster."
-    exit 1
+if [[ -z ${CLUSTER_NAME} ]]; then
+    # start benchmark cluster
+    ./start_cluster.sh $cluster_type
+    if [[ $? != 0 ]]; then
+        echo "Failed to start cluster."
+        exit 1
+    fi
 fi
 
 cluster_name=${CLUSTER_NAME:-"${USER}-spark-rapids-ml-${cluster_type}"}
diff --git a/python/run_benchmark.sh b/python/run_benchmark.sh
index 39a3f86f..b2e3e699 100755
--- a/python/run_benchmark.sh
+++ b/python/run_benchmark.sh
@@ -1,5 +1,5 @@
 #! /bin/bash
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -163,7 +163,7 @@ fi
 spark_rapids_confs=""
 if [[ $cluster_type == "gpu_etl" ]]
 then
-SPARK_RAPIDS_VERSION=25.12.0
+SPARK_RAPIDS_VERSION=26.04.2
 rapids_jar=${rapids_jar:-rapids-4-spark_2.12-$SPARK_RAPIDS_VERSION.jar}
 if [ ! -f $rapids_jar ]; then
     echo "downloading spark rapids jar"

From 67d2f01ee9559fdedf7dcb737f4866cdc9b3f3df Mon Sep 17 00:00:00 2001
From: Erik Ordentlich <eordentlich@gmail.com>
Date: Tue, 23 Jun 2026 16:11:04 -0700
Subject: [PATCH 11/13] update more pyspark 3.3 drop related code,docs,tests,
 update Docker files to use python > 3.10, fix databricks 17.3 with plugin,
 update plugin to 26.06

Signed-off-by: Erik Ordentlich <eordentlich@gmail.com>
---
 docker/Dockerfile.pip                         | 28 ++++---
 docker/Dockerfile.python                      | 22 +++---
 docs/site/FAQ.md                              |  4 +-
 docs/site/compatibility.md                    |  2 +-
 docs/site/performance.md                      |  4 +-
 notebooks/databricks/README.md                |  2 +-
 notebooks/databricks/init-pip-cuda-12.sh      |  2 +-
 .../benchmark/databricks/cpu_cluster_spec.sh  |  2 +-
 .../benchmark/databricks/gpu_cluster_spec.sh  |  4 +-
 .../databricks/gpu_etl_cluster_spec.sh        |  2 +-
 .../benchmark/databricks/init-pip-cuda-12.sh  |  2 +-
 python/benchmark/databricks/run_benchmark.sh  |  2 +-
 python/requirements.txt                       |  2 +-
 python/run_benchmark.sh                       | 79 +++++++++----------
 python/src/spark_rapids_ml/__init__.py        |  9 ---
 python/src/spark_rapids_ml/clustering.py      |  4 -
 python/src/spark_rapids_ml/core.py            |  6 --
 python/tests/test_common_estimator.py         |  6 +-
 python/tests/test_dbscan.py                   |  9 +--
 python/tests/test_kmeans.py                   | 19 +----
 python/tests/test_linear_model.py             |  7 +-
 python/tests/test_logistic_regression.py      | 68 +---------------
 python/tests/test_pca.py                      | 11 +--
 python/tests/test_random_forest.py            | 11 +--
 python/tests/test_umap.py                     | 34 +-------
 25 files changed, 90 insertions(+), 251 deletions(-)

diff --git a/docker/Dockerfile.pip b/docker/Dockerfile.pip
index 509b81eb..f1b418e9 100644
--- a/docker/Dockerfile.pip
+++ b/docker/Dockerfile.pip
@@ -14,10 +14,11 @@
 # limitations under the License.
 #
 
-ARG CUDA_VERSION=12.2.2
-FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
+# for simplicity, use oldest ubuntu with python > 3.10, and corresponding available cuda version
+ARG CUDA_VERSION=12.6.0
+FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu24.04
 
-ARG PYSPARK_VERSION=3.3.1
+ARG PYSPARK_VERSION=3.4.4
 ARG RAPIDS_VERSION=26.06.0
 ARG ARCH=amd64
 #ARG ARCH=arm64
@@ -36,12 +37,19 @@ RUN apt-get update -y \
     && rm -rf /var/lib/apt/lists
 
 RUN apt-get update -y \
-    && apt install -y git numactl python3.10-venv python3-pip python-is-python3 software-properties-common wget zip \
-    && python -m pip install --upgrade pip \
-    && rm -rf /var/lib/apt/lists
+    && apt install -y git numactl python3.12-venv python3-pip python-is-python3 software-properties-common wget zip
+
+# 1. Define the venv path and update system PATH
+ENV VIRTUAL_ENV=/opt/venv
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+
+# 2. Create the virtual environment
+RUN python3 -m venv $VIRTUAL_ENV 
+
+RUN python -m pip install --upgrade pip
 
 RUN apt-get update -y \
-    && apt install -y python3.10-dev cmake curl \
+    && apt install -y cmake curl \
     && rm -rf /var/lib/apt/lists
 
 # install RAPIDS
@@ -55,13 +63,9 @@ RUN pip install --no-cache-dir \
     numpy~=1.0 \
     --extra-index-url=https://pypi.nvidia.com
 
-# install python dependencies
-RUN pip install --no-cache-dir pyspark==${PYSPARK_VERSION} "scikit-learn>=1.2.1" \
-    && pip install --no-cache-dir "black>=23.1.0" "build>=0.10.0" "isort>=5.12.0" "mypy>=1.0.0" \
-    numpydoc pydata-sphinx-theme pylint pytest "sphinx<6.0" "twine>=4.0.0"
 
 # Config JAVA_HOME
-ENV JAVA_HOME /usr/lib/jvm/java-1.17.0-openjdk-$ARCH
+ENV JAVA_HOME=/usr/lib/jvm/java-1.17.0-openjdk-$ARCH
 
 ### END OF CACHE ###
 
diff --git a/docker/Dockerfile.python b/docker/Dockerfile.python
index c46809d6..b84fef35 100644
--- a/docker/Dockerfile.python
+++ b/docker/Dockerfile.python
@@ -34,28 +34,24 @@ RUN apt update -y \
     && rm -rf /var/lib/apt/lists
 
 # Config JAVA_HOME
-ENV JAVA_HOME /usr/lib/jvm/java-1.17.0-openjdk-amd64
+ENV JAVA_HOME=/usr/lib/jvm/java-1.17.0-openjdk-amd64
 
 # Install conda
 ENV PATH="/root/miniconda3/bin:${PATH}"
-RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py38_4.10.3-Linux-x86_64.sh \
+ENV CONDA_PLUGINS_AUTO_ACCEPT_TOS="yes"
+RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
     && mkdir /root/.conda \
-    && bash Miniconda3-py38_4.10.3-Linux-x86_64.sh -b \
-    && rm -f Miniconda3-py38_4.10.3-Linux-x86_64.sh \
-    && conda tos accept --override-channels -c conda-forge -c defaults \
-    && conda init
+    && bash Miniconda3-latest-Linux-x86_64.sh -b \
+    && rm -f Miniconda3-latest-Linux-x86_64.sh \
+    && conda init && conda update -n base conda \
+    && conda install -n base conda-libmamba-solver \
+    && conda config --set solver libmamba
 
 # install cuML
 
-RUN conda install -y -c rapidsai -c conda-forge -c nvidia python=3.10 cuda-version=12.2 cuml=$RAPIDS_VERSION cudf=$RAPIDS_VERSION cuvs=$RAPIDS_VERSION pylibraft=$RAPIDS_VERSION raft-dask=$RAPIDS_VERSION numpy~=1.0 \
+RUN conda install -y -c rapidsai -c conda-forge -c nvidia python=3.11 cuda-version=12.2 cuml=$RAPIDS_VERSION cudf=$RAPIDS_VERSION cuvs=$RAPIDS_VERSION pylibraft=$RAPIDS_VERSION raft-dask=$RAPIDS_VERSION numpy~=1.0 \
     && conda clean --all -f -y
 
-# install python dependencies
-RUN pip install --no-cache-dir "pyspark>=3.2.1" "scikit-learn>=1.2.1" \
-    && pip install --no-cache-dir "black>=23.1.0" "build>=0.10.0" "isort>=5.12.0" "mypy>=1.0.0" \
-    numpydoc pydata-sphinx-theme pylint pytest "sphinx<6.0" "twine>=4.0.0"
-
-### END OF CACHE ###
 
 #ARG RAPIDS_ML_VER=main
 #RUN git clone -b branch-$RAPIDS_ML_VER https://github.com/NVIDIA/spark-rapids-ml.git
diff --git a/docs/site/FAQ.md b/docs/site/FAQ.md
index 53b2dd31..b83dbeb6 100644
--- a/docs/site/FAQ.md
+++ b/docs/site/FAQ.md
@@ -9,11 +9,11 @@ nav_order: 4
 
 ### What versions of Apache Spark are supported?
 
-Apache Spark version 3.3.1 or higher.
+Apache Spark version 3.4 or higher.
 
 ### What versions of Python are supported
 
-Python 3.10 or higher.
+Python 3.11 or higher.
 
 ### How do I fix the "java.lang.IllegalArgumentException: valueCount must be >= 0" error?
 
diff --git a/docs/site/compatibility.md b/docs/site/compatibility.md
index 3432d4d7..c114d0ac 100644
--- a/docs/site/compatibility.md
+++ b/docs/site/compatibility.md
@@ -31,7 +31,7 @@ The following table shows the currently supported algorithms.  The goal is to ex
 
 | Spark Rapids ML | CUDA  | Spark  | Python |
 | :-------------- | :---- | :----- | :----- |
-| 1.0.0           | 12.0+ | 3.3+   | 3.10+  |
+| 26.0.6          | 12.2+ | 3.4+   | 3.11+  |
 
 
 ## Single vs Double precision inputs
diff --git a/docs/site/performance.md b/docs/site/performance.md
index 4804f8c6..41494c7f 100644
--- a/docs/site/performance.md
+++ b/docs/site/performance.md
@@ -10,7 +10,7 @@ nav_order: 6
 ## Stage-level scheduling
 
 Starting from spark-rapids-ml `23.10.0`, stage-level scheduling is automatically enabled.
-Therefore, if you are using Spark **standalone** cluster version **`3.4.0`** or higher, we strongly recommend
+Therefore, if you are using Spark **standalone** cluster version **`3.4`** or higher, we strongly recommend
 configuring the `"spark.task.resource.gpu.amount"` as a fractional value. This will
 enable running multiple tasks in parallel during the ETL phase to help the performance. An example configuration
 would be `"spark.task.resource.gpu.amount=1/spark.executor.cores"`. For example,
@@ -30,7 +30,7 @@ a total of 12 tasks per executor will be executed concurrently during the ETL ph
 is then used internally to the library to automatically carry out the ML training phases using the required 1 gpu per task.
 
 However, if you are using a spark-rapids-ml version earlier than 23.10.0 or a Spark
-standalone cluster version below 3.4.0, you need to make sure there will be only 1 task running at any time per executor.
+standalone cluster version below 3.4, you need to make sure there will be only 1 task running at any time per executor.
 You can set `spark.task.cpus` equal to `spark.executor.cores`, or `"spark.task.resource.gpu.amount"=1`. For example,
 
 ``` bash
diff --git a/notebooks/databricks/README.md b/notebooks/databricks/README.md
index 40735ca5..abc232ca 100644
--- a/notebooks/databricks/README.md
+++ b/notebooks/databricks/README.md
@@ -26,7 +26,7 @@ If you already have a Databricks account, you can run the example notebooks on a
       spark.task.resource.gpu.amount 0.125
       spark.databricks.delta.preview.enabled true
       spark.python.worker.reuse true
-      spark.executorEnv.PYTHONPATH /databricks/jars/rapids-4-spark_2.13-26.04.2.jar:/databricks/spark/python
+      spark.executorEnv.PYTHONPATH /databricks/jars/rapids-4-spark_2.13-26.06.0.jar:/databricks/spark/python
       spark.sql.execution.arrow.maxRecordsPerBatch 100000
       spark.plugins com.nvidia.spark.SQLPlugin
       spark.locality.wait 0s
diff --git a/notebooks/databricks/init-pip-cuda-12.sh b/notebooks/databricks/init-pip-cuda-12.sh
index c32a8ab9..714c555f 100644
--- a/notebooks/databricks/init-pip-cuda-12.sh
+++ b/notebooks/databricks/init-pip-cuda-12.sh
@@ -23,7 +23,7 @@ set -ex
 # spark-rapids version that verifies compatibility with your Databricks Runtime. (In this case, Databricks 17.3 ML LTS.) The available versions for RAPIDS_VERSION can be
 # found by executing "pip index versions spark-rapids-ml".   
 RAPIDS_VERSION=26.6.0
-SPARK_RAPIDS_VERSION=26.04.2
+SPARK_RAPIDS_VERSION=26.06.0
 
 curl -L https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/${SPARK_RAPIDS_VERSION}/rapids-4-spark_2.13-${SPARK_RAPIDS_VERSION}-cuda12.jar -o /databricks/jars/rapids-4-spark_2.13-${SPARK_RAPIDS_VERSION}.jar
 
diff --git a/python/benchmark/databricks/cpu_cluster_spec.sh b/python/benchmark/databricks/cpu_cluster_spec.sh
index 79997e97..42284c15 100644
--- a/python/benchmark/databricks/cpu_cluster_spec.sh
+++ b/python/benchmark/databricks/cpu_cluster_spec.sh
@@ -18,7 +18,7 @@ cat <<EOF
 {
     "num_workers": $(( num_cpus / 8)),
     "cluster_name": "$cluster_name",
-    "spark_version": "${db_version}.x-cpu-ml-scala2.12",
+    "spark_version": "${db_version}.x-cpu-ml-scala${SCALA_VERSION}",
     "spark_conf": {},
     "aws_attributes": {
         "first_on_demand": 1,
diff --git a/python/benchmark/databricks/gpu_cluster_spec.sh b/python/benchmark/databricks/gpu_cluster_spec.sh
index 7578e351..147efa9f 100644
--- a/python/benchmark/databricks/gpu_cluster_spec.sh
+++ b/python/benchmark/databricks/gpu_cluster_spec.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# Copyright (c) 2024-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -18,7 +18,7 @@ cat <<EOF
 {
     "num_workers": $num_gpus,
     "cluster_name": "$cluster_name",
-    "spark_version": "${db_version}.x-gpu-ml-scala2.12",
+    "spark_version": "${db_version}.x-gpu-ml-scala${SCALA_VERSION}",
     "spark_conf": {
         "spark.task.resource.gpu.amount": "0.25",
         "spark.task.cpus": "1",
diff --git a/python/benchmark/databricks/gpu_etl_cluster_spec.sh b/python/benchmark/databricks/gpu_etl_cluster_spec.sh
index 56e52de6..ffbf068f 100644
--- a/python/benchmark/databricks/gpu_etl_cluster_spec.sh
+++ b/python/benchmark/databricks/gpu_etl_cluster_spec.sh
@@ -18,7 +18,7 @@ cat <<EOF
 {
     "num_workers": $num_gpus,
     "cluster_name": "$cluster_name",
-    "spark_version": "${db_version}.x-gpu-ml-scala2.12",
+    "spark_version": "${db_version}.x-gpu-ml-scala${SCALA_VERSION}",
     "spark_conf": {
         "spark.task.resource.gpu.amount": "0.25",
         "spark.task.cpus": "1",
diff --git a/python/benchmark/databricks/init-pip-cuda-12.sh b/python/benchmark/databricks/init-pip-cuda-12.sh
index 247f96b3..637b37b9 100644
--- a/python/benchmark/databricks/init-pip-cuda-12.sh
+++ b/python/benchmark/databricks/init-pip-cuda-12.sh
@@ -20,7 +20,7 @@ BENCHMARK_ZIP=/dbfs/path/to/benchmark.zip
 # also, in general, RAPIDS_VERSION (python) fields should omit any leading 0 in month/minor field (i.e. 23.8.0 and not 23.08.0)
 # while SPARK_RAPIDS_VERSION (jar) should have leading 0 in month/minor (e.g. 23.08.2 and not 23.8.2)
 RAPIDS_VERSION=26.6.0
-SPARK_RAPIDS_VERSION=26.04.2
+SPARK_RAPIDS_VERSION=26.06.0
 
 if [[ $DATABRICKS_RUNTIME_VERSION < "17.3" ]]; then
     SCALA_VERSION=2.12
diff --git a/python/benchmark/databricks/run_benchmark.sh b/python/benchmark/databricks/run_benchmark.sh
index 8d55490b..db253b96 100755
--- a/python/benchmark/databricks/run_benchmark.sh
+++ b/python/benchmark/databricks/run_benchmark.sh
@@ -16,7 +16,7 @@
 
 cluster_type=${1:-gpu_etl}
 db_version=${2:-15.4}
-SPARK_RAPIDS_VERSION=26.04.2
+SPARK_RAPIDS_VERSION=26.06.0
 SCALA_VERSION=2.12
 
 if [[ $cluster_type == "gpu" || $cluster_type == "gpu_etl" ]]; then
diff --git a/python/requirements.txt b/python/requirements.txt
index 9aa2c1a6..473b773d 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -14,6 +14,6 @@
 
 numpy_allocator
 psutil
-pyspark>=3.2.1,<4.0
+pyspark>=3.4.1,<4.0
 scikit-learn>=1.2.1
 cryptography==46.0.6
diff --git a/python/run_benchmark.sh b/python/run_benchmark.sh
index b2e3e699..4c3ed34d 100755
--- a/python/run_benchmark.sh
+++ b/python/run_benchmark.sh
@@ -163,7 +163,7 @@ fi
 spark_rapids_confs=""
 if [[ $cluster_type == "gpu_etl" ]]
 then
-SPARK_RAPIDS_VERSION=26.04.2
+SPARK_RAPIDS_VERSION=26.06.0
 rapids_jar=${rapids_jar:-rapids-4-spark_2.12-$SPARK_RAPIDS_VERSION.jar}
 if [ ! -f $rapids_jar ]; then
     echo "downloading spark rapids jar"
@@ -561,48 +561,43 @@ if [[ "${MODE}" =~ "logistic_regression" ]] || [[ "${MODE}" == "all" ]]; then
     done
     
     # Logistic Regression with sparse vector dataset
-    PYSPARK_4_below=$(python -c "import pyspark; from packaging import version; cmp = version.parse(pyspark.__version__) < version.parse('3.4.0'); print(cmp);")
-    if [ $PYSPARK_4_below = "True" ]; then
-        echo "Skip benchmarking logistic regression on sparse vectors. Spark 3.4 and above is required."
-    else
-        for num_classes in ${num_classes_list}; do
-            data_path=${gen_data_root}/sparse_logistic_regression/r${num_rows}_c${num_sparse_cols}_float64_ncls${num_classes}.parquet
+    for num_classes in ${num_classes_list}; do
+        data_path=${gen_data_root}/sparse_logistic_regression/r${num_rows}_c${num_sparse_cols}_float64_ncls${num_classes}.parquet
 
-            if [[ $gen_data == "true" && ! -d ${data_path} ]]; then
-                python $gen_data_script sparse_regression \
-                --n_informative $(( num_cols / 3 ))  \
-                --num_rows $num_rows \
-                --num_cols $num_sparse_cols \
-                --output_num_files $output_num_files \
-                --dtype "float64" \
-                --feature_type "vector" \
-                --output_dir ${data_path} \
-                --density $density \
-                --logistic_regression "True" \
-                --n_classes ${num_classes} \
-                --use_gpu ${use_gpu} \
-                $common_confs
-            fi
-
-            family="Binomial"
-                
-            echo "$sep algo: sparse ${family} logistic regression - elasticnet regularization $sep"
-            python ./benchmark/benchmark_runner.py logistic_regression \
-                --standardization False \
-                --maxIter 200 \
-                --tol 1e-30 \
-                --regParam 0.00001 \
-                --elasticNetParam 0.2 \
-                --num_gpus $num_gpus \
-                --num_cpus $num_cpus \
-                --num_runs $num_runs \
-                --train_path ${data_path} \
-                --transform_path ${data_path} \
-                --report_path "report_sparse_logistic_regression_${cluster_type}.csv" \
-                $common_confs $spark_rapids_confs \
-                "${EXTRA_ARGS[@]}"
-        done
-    fi
+        if [[ $gen_data == "true" && ! -d ${data_path} ]]; then
+            python $gen_data_script sparse_regression \
+            --n_informative $(( num_cols / 3 ))  \
+            --num_rows $num_rows \
+            --num_cols $num_sparse_cols \
+            --output_num_files $output_num_files \
+            --dtype "float64" \
+            --feature_type "vector" \
+            --output_dir ${data_path} \
+            --density $density \
+            --logistic_regression "True" \
+            --n_classes ${num_classes} \
+            --use_gpu ${use_gpu} \
+            $common_confs
+        fi
+
+        family="Binomial"
+            
+        echo "$sep algo: sparse ${family} logistic regression - elasticnet regularization $sep"
+        python ./benchmark/benchmark_runner.py logistic_regression \
+            --standardization False \
+            --maxIter 200 \
+            --tol 1e-30 \
+            --regParam 0.00001 \
+            --elasticNetParam 0.2 \
+            --num_gpus $num_gpus \
+            --num_cpus $num_cpus \
+            --num_runs $num_runs \
+            --train_path ${data_path} \
+            --transform_path ${data_path} \
+            --report_path "report_sparse_logistic_regression_${cluster_type}.csv" \
+            $common_confs $spark_rapids_confs \
+            "${EXTRA_ARGS[@]}"
+    done
 fi
 
 # UMAP
diff --git a/python/src/spark_rapids_ml/__init__.py b/python/src/spark_rapids_ml/__init__.py
index ca5d2f8a..81472555 100644
--- a/python/src/spark_rapids_ml/__init__.py
+++ b/python/src/spark_rapids_ml/__init__.py
@@ -17,12 +17,3 @@
 
 import pandas as pd
 import pyspark
-
-# patch pandas 2.0+ for backward compatibility with psypark < 3.4
-from packaging import version
-
-if version.parse(pyspark.__version__) < version.parse("3.4.0") and version.parse(
-    pd.__version__
-) >= version.parse("2.0.0"):
-    pd.DataFrame.iteritems = pd.DataFrame.items
-    pd.Series.iteritems = pd.Series.items
diff --git a/python/src/spark_rapids_ml/clustering.py b/python/src/spark_rapids_ml/clustering.py
index 7760abbf..b0d9315b 100644
--- a/python/src/spark_rapids_ml/clustering.py
+++ b/python/src/spark_rapids_ml/clustering.py
@@ -100,10 +100,6 @@ def _param_mapping(cls) -> Dict[str, Optional[str]]:
         import pyspark
         from packaging import version
 
-        if version.parse(pyspark.__version__) < version.parse("3.4.0"):
-            param_map.pop("solver")
-            param_map.pop("maxBlockSizeInMB")
-
         return param_map
 
     @classmethod
diff --git a/python/src/spark_rapids_ml/core.py b/python/src/spark_rapids_ml/core.py
index 0ca83f50..db670207 100644
--- a/python/src/spark_rapids_ml/core.py
+++ b/python/src/spark_rapids_ml/core.py
@@ -638,12 +638,6 @@ def _skip_stage_level_scheduling(self, spark_version: str, conf: SparkConf) -> b
         """Check if stage-level scheduling is not needed,
         return true to skip stage-level scheduling"""
 
-        if spark_version < "3.4.0":
-            self.logger.info(
-                "Stage-level scheduling in spark-rapids-ml requires spark version 3.4.0+"
-            )
-            return True
-
         if "3.4.0" <= spark_version < "3.5.1" and not _is_standalone_or_localcluster(
             conf
         ):
diff --git a/python/tests/test_common_estimator.py b/python/tests/test_common_estimator.py
index f576a682..e03fe6ea 100644
--- a/python/tests/test_common_estimator.py
+++ b/python/tests/test_common_estimator.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2022-2025, NVIDIA CORPORATION.
+# Copyright (c) 2022-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -634,9 +634,6 @@ def test_stage_level_scheduling() -> None:
     assert not dummy._skip_stage_level_scheduling("3.5.0", standalone_conf)
     assert not dummy._skip_stage_level_scheduling("3.5.1", standalone_conf)
 
-    # spark version < 3.4.0
-    assert dummy._skip_stage_level_scheduling("3.3.0", standalone_conf)
-
     # spark.executor.cores is not set
     bad_conf = (
         SparkConf()
@@ -711,7 +708,6 @@ def test_stage_level_scheduling() -> None:
                 .set("spark.executor.resource.gpu.amount", "1")
                 .set("spark.task.resource.gpu.amount", gpu_amount)
             )
-            assert dummy._skip_stage_level_scheduling("3.3.0", conf)
             assert dummy._skip_stage_level_scheduling("3.4.0", conf)
             assert dummy._skip_stage_level_scheduling("3.4.1", conf)
             assert dummy._skip_stage_level_scheduling("3.5.0", conf)
diff --git a/python/tests/test_dbscan.py b/python/tests/test_dbscan.py
index ca6bc786..b941ca4f 100644
--- a/python/tests/test_dbscan.py
+++ b/python/tests/test_dbscan.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2022-2025, NVIDIA CORPORATION.
+# Copyright (c) 2022-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -21,12 +21,7 @@
 import pytest
 from _pytest.logging import LogCaptureFixture
 from packaging import version
-
-if version.parse(pyspark.__version__) < version.parse("3.4.0"):
-    from pyspark.sql.utils import IllegalArgumentException  # type: ignore
-else:
-    from pyspark.errors import IllegalArgumentException  # type: ignore
-
+from pyspark.errors import IllegalArgumentException  # type: ignore
 from pyspark.ml.functions import array_to_vector
 from pyspark.ml.linalg import DenseVector, Vectors
 from pyspark.sql.dataframe import DataFrame
diff --git a/python/tests/test_kmeans.py b/python/tests/test_kmeans.py
index f0a98235..0ad5b480 100644
--- a/python/tests/test_kmeans.py
+++ b/python/tests/test_kmeans.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2022-2025, NVIDIA CORPORATION.
+# Copyright (c) 2022-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,16 +17,9 @@
 from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar
 
 import numpy as np
-import pyspark
 import pytest
 from _pytest.logging import LogCaptureFixture
-from packaging import version
-
-if version.parse(pyspark.__version__) < version.parse("3.4.0"):
-    from pyspark.sql.utils import IllegalArgumentException  # type: ignore
-else:
-    from pyspark.errors import IllegalArgumentException  # type: ignore
-
+from pyspark.errors import IllegalArgumentException  # type: ignore
 from pyspark.ml.clustering import KMeans as SparkKMeans
 from pyspark.ml.clustering import KMeansModel as SparkKMeansModel
 from pyspark.ml.functions import array_to_vector
@@ -443,13 +436,7 @@ def test_kmeans_spark_compat(
         ]
         df = spark.createDataFrame(data, ["features", "weighCol"])
 
-        import pyspark
-        from packaging import version
-
-        if version.parse(pyspark.__version__) < version.parse("3.4.0"):
-            kmeans = _KMeans(k=2)
-        else:
-            kmeans = _KMeans(k=2, solver="auto", maxBlockSizeInMB=0)  # type: ignore # only spark >= 3.4 supports solver and maxblockSize
+        kmeans = _KMeans(k=2, solver="auto", maxBlockSizeInMB=0)  # type: ignore # only spark >= 3.4 supports solver and maxblockSize
 
         kmeans.setSeed(1)
         kmeans.setMaxIter(10)
diff --git a/python/tests/test_linear_model.py b/python/tests/test_linear_model.py
index 6a51890d..82942050 100644
--- a/python/tests/test_linear_model.py
+++ b/python/tests/test_linear_model.py
@@ -21,12 +21,7 @@
 import pytest
 from _pytest.logging import LogCaptureFixture
 from packaging import version
-
-if version.parse(pyspark.__version__) < version.parse("3.4.0"):
-    from pyspark.sql.utils import IllegalArgumentException  # type: ignore
-else:
-    from pyspark.errors import IllegalArgumentException  # type: ignore
-
+from pyspark.errors import IllegalArgumentException  # type: ignore
 from pyspark.ml import Model
 from pyspark.ml.evaluation import RegressionEvaluator
 from pyspark.ml.feature import VectorAssembler
diff --git a/python/tests/test_logistic_regression.py b/python/tests/test_logistic_regression.py
index c5fecbcd..297c8960 100644
--- a/python/tests/test_logistic_regression.py
+++ b/python/tests/test_logistic_regression.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# Copyright (c) 2024-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -19,18 +19,12 @@
 import cupyx.scipy.sparse
 import numpy as np
 import pandas as pd
-import pyspark
 import pytest
 from _pytest.logging import LogCaptureFixture
 from gen_data_distributed import SparseRegressionDataGen
 from packaging import version
 from py4j.protocol import Py4JJavaError
-
-if version.parse(pyspark.__version__) < version.parse("3.4.0"):
-    from pyspark.sql.utils import IllegalArgumentException  # type: ignore
-else:
-    from pyspark.errors import IllegalArgumentException  # type: ignore
-
+from pyspark.errors import IllegalArgumentException  # type: ignore
 from pyspark.ml.classification import LogisticRegression as SparkLogisticRegression
 from pyspark.ml.classification import (
     LogisticRegressionModel as SparkLogisticRegressionModel,
@@ -1247,14 +1241,6 @@ def test_crossvalidator_logistic_regression(
     if convert_to_sparse:
         assert feature_type == feature_types.vector
 
-        if version.parse(pyspark.__version__) < version.parse("3.4.0"):
-            import logging
-
-            err_msg = "pyspark < 3.4 is detected. Cannot import pyspark `unwrap_udt` function. "
-            "The test case will be skipped. Please install pyspark>=3.4."
-            logging.info(err_msg)
-        return
-
     # Train a toy model
 
     n_classes = 2 if metric_name == "areaUnderROC" else 10
@@ -1625,13 +1611,6 @@ def test_compat_sparse_binomial(
         assert gpu_lr.hasParam("enable_sparse_data_optim") is True
         assert gpu_lr.getOrDefault("enable_sparse_data_optim") == None
 
-        if version.parse(pyspark.__version__) < version.parse("3.4.0"):
-            err_msg = "Cannot import pyspark `unwrap_udt` function. Please install pyspark>=3.4 "
-            "or run on Databricks Runtime."
-            with pytest.raises(RuntimeError, match=err_msg):
-                gpu_lr.fit(bdf)
-            return
-
         check_sparse_estimator_preprocess(gpu_lr, bdf, dimension=3)
 
         gpu_model = gpu_lr.fit(bdf)
@@ -1677,13 +1656,6 @@ def test_compat_sparse_multinomial(
         assert gpu_lr.hasParam("enable_sparse_data_optim") is True
         assert gpu_lr.getOrDefault("enable_sparse_data_optim") == None
 
-        if version.parse(pyspark.__version__) < version.parse("3.4.0"):
-            err_msg = "Cannot import pyspark `unwrap_udt` function. Please install pyspark>=3.4 "
-            "or run on Databricks Runtime."
-            with pytest.raises(RuntimeError, match=err_msg):
-                gpu_lr.fit(mdf)
-            return
-
         gpu_model = gpu_lr.fit(mdf)
 
         cpu_lr = SparkLogisticRegression(**params)
@@ -1705,16 +1677,6 @@ def test_sparse_nlp20news(
     standardization: bool,
     caplog: LogCaptureFixture,
 ) -> None:
-    if version.parse(pyspark.__version__) < version.parse("3.4.0"):
-        import logging
-
-        err_msg = (
-            "pyspark < 3.4 is detected. Cannot import pyspark `unwrap_udt` function. "
-        )
-        "The test case will be skipped. Please install pyspark>=3.4."
-        logging.info(err_msg)
-        return
-
     tolerance = 0.001
     reg_param = 1e-2
 
@@ -1813,16 +1775,6 @@ def test_quick_sparse(
     gpu_number: int,
     float32_inputs: bool = True,
 ) -> None:
-    if version.parse(pyspark.__version__) < version.parse("3.4.0"):
-        import logging
-
-        err_msg = (
-            "pyspark < 3.4 is detected. Cannot import pyspark `unwrap_udt` function. "
-        )
-        "The test case will be skipped. Please install pyspark>=3.4."
-        logging.info(err_msg)
-        return
-
     convert_to_sparse = True
     tolerance = 0.005
     reg_param = reg_factors[0]
@@ -2069,16 +2021,6 @@ def test_standardization_sparse_example(
 ) -> None:
     _convert_index = "int32" if random.choice([True, False]) is True else "int64"
 
-    if version.parse(pyspark.__version__) < version.parse("3.4.0"):
-        import logging
-
-        err_msg = (
-            "pyspark < 3.4 is detected. Cannot import pyspark `unwrap_udt` function. "
-        )
-        "The test case will be skipped. Please install pyspark>=3.4."
-        logging.info(err_msg)
-        return
-
     tolerance = 0.001
     # Compare accuracy and probability only when regularizaiton is disabled.
     # It is observed that no regularization leads to large absolute values of coefficients, and
@@ -2347,9 +2289,6 @@ def test_sparse_all_zeroes(
             "labelCol": "label",
         }
 
-        if version.parse(pyspark.__version__) < version.parse("3.4.0"):
-            return
-
         gpu_lr = LogisticRegression(enable_sparse_data_optim=True, **params)
         gpu_model = gpu_lr.fit(bdf)
         check_sparse_model_preprocess(gpu_model, bdf)
@@ -2390,9 +2329,6 @@ def test_sparse_one_gpu_all_zeroes(
             "labelCol": "label",
         }
 
-        if version.parse(pyspark.__version__) < version.parse("3.4.0"):
-            return
-
         gpu_lr = LogisticRegression(
             enable_sparse_data_optim=True, verbose=True, **params
         )
diff --git a/python/tests/test_pca.py b/python/tests/test_pca.py
index fdbe5fe6..de08045e 100644
--- a/python/tests/test_pca.py
+++ b/python/tests/test_pca.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2022-2025, NVIDIA CORPORATION.
+# Copyright (c) 2022-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,16 +17,9 @@
 from typing import Any, Dict, Tuple, Type, TypeVar
 
 import numpy as np
-import pyspark
 import pytest
 from _pytest.logging import LogCaptureFixture
-from packaging import version
-
-if version.parse(pyspark.__version__) < version.parse("3.4.0"):
-    from pyspark.sql.utils import IllegalArgumentException  # type: ignore
-else:
-    from pyspark.errors import IllegalArgumentException  # type: ignore
-
+from pyspark.errors import IllegalArgumentException  # type: ignore
 from pyspark.ml.feature import PCA as SparkPCA
 from pyspark.ml.feature import PCAModel as SparkPCAModel
 from pyspark.ml.functions import array_to_vector
diff --git a/python/tests/test_random_forest.py b/python/tests/test_random_forest.py
index 088d3b90..a5ea86c8 100644
--- a/python/tests/test_random_forest.py
+++ b/python/tests/test_random_forest.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2025-2026, NVIDIA CORPORATION.
+# Copyright (c) 2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -18,17 +18,10 @@
 from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union, cast
 
 import numpy as np
-import pyspark
 import pytest
 from _pytest.logging import LogCaptureFixture
 from cuml import accuracy_score
-from packaging import version
-
-if version.parse(pyspark.__version__) < version.parse("3.4.0"):
-    from pyspark.sql.utils import IllegalArgumentException  # type: ignore
-else:
-    from pyspark.errors import IllegalArgumentException  # type: ignore
-
+from pyspark.errors import IllegalArgumentException  # type: ignore
 from pyspark.ml.classification import (
     RandomForestClassificationModel as SparkRFClassificationModel,
 )
diff --git a/python/tests/test_umap.py b/python/tests/test_umap.py
index e07ca1d7..538f9bc6 100644
--- a/python/tests/test_umap.py
+++ b/python/tests/test_umap.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2025-2026, NVIDIA CORPORATION.
+# Copyright (c) 2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -442,23 +442,12 @@ def test_umap_model_persistence(
     import os
     import re
 
-    import pyspark
-    from packaging import version
-
     with CleanSparkSession() as spark:
 
         n_rows = 5000
         n_cols = 200
 
         if sparse_fit:
-            if version.parse(pyspark.__version__) < version.parse("3.4.0"):
-                import logging
-
-                err_msg = "pyspark < 3.4 is detected. Cannot import pyspark `unwrap_udt` function for SparseVector. "
-                "The test case will be skipped. Please install pyspark>=3.4."
-                logging.info(err_msg)
-                return
-
             sparse_vec_data, input_raw_data = _load_sparse_data(n_rows, n_cols, 30)
             df = spark.createDataFrame(sparse_vec_data, ["features"])
         else:
@@ -547,17 +536,6 @@ def test_umap_chunking(
         )
 
         if sparse_fit:
-            import pyspark
-            from packaging import version
-
-            if version.parse(pyspark.__version__) < version.parse("3.4.0"):
-                import logging
-
-                err_msg = "pyspark < 3.4 is detected. Cannot import pyspark `unwrap_udt` function for SparseVector. "
-                "The test case will be skipped. Please install pyspark>=3.4."
-                logging.info(err_msg)
-                return
-
             sparse_vec_data, input_raw_data = _load_sparse_data(n_rows, n_cols, 30)
             df = spark.createDataFrame(sparse_vec_data, ["features"])
             nbytes = input_raw_data.data.nbytes
@@ -742,17 +720,7 @@ def test_umap_build_algo(gpu_number: int, metric: str) -> None:
 def test_umap_sparse_vector(
     n_rows: int, n_cols: int, nnz: int, metric: str, gpu_number: int, tmp_path: str
 ) -> None:
-    import pyspark
     from cuml.manifold import UMAP as cumlUMAP
-    from packaging import version
-
-    if version.parse(pyspark.__version__) < version.parse("3.4.0"):
-        import logging
-
-        err_msg = "pyspark < 3.4 is detected. Cannot import pyspark `unwrap_udt` function for SparseVector. "
-        "The test case will be skipped. Please install pyspark>=3.4."
-        logging.info(err_msg)
-        return
 
     # Hellinger measures similarity between probability distributions; normalize to prevent distances from collapsing to zero
     normalize = metric == "hellinger"

From 4d7490b150930b04146edf14747dcdfb63b4785e Mon Sep 17 00:00:00 2001
From: Erik Ordentlich <eordentlich@gmail.com>
Date: Tue, 23 Jun 2026 16:16:16 -0700
Subject: [PATCH 12/13] license

Signed-off-by: Erik Ordentlich <eordentlich@gmail.com>
---
 python/benchmark/databricks/cpu_cluster_spec.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/benchmark/databricks/cpu_cluster_spec.sh b/python/benchmark/databricks/cpu_cluster_spec.sh
index 42284c15..7efda20b 100644
--- a/python/benchmark/databricks/cpu_cluster_spec.sh
+++ b/python/benchmark/databricks/cpu_cluster_spec.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# Copyright (c) 2024-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From 7492b96625c50fde48dacf4292d2f15618c7e81b Mon Sep 17 00:00:00 2001
From: Erik Ordentlich <eordentlich@gmail.com>
Date: Wed, 24 Jun 2026 18:14:18 -0700
Subject: [PATCH 13/13] make copyrights more consistent, make example
 Dockerfile.pip use python 3.11 for compatibility with spark < 4

Signed-off-by: Erik Ordentlich <eordentlich@gmail.com>
---
 docker/Dockerfile.pip                        | 16 +++++++++++-----
 docs/site/compatibility.md                   |  2 +-
 docs/source/conf.py                          |  4 ++--
 jvm/pom.xml                                  |  2 +-
 notebooks/aws-emr/init-bootstrap-action.sh   |  2 +-
 notebooks/databricks/init-pip-cuda-12.sh     |  2 +-
 notebooks/dataproc/spark_rapids_ml.sh        |  2 +-
 python/benchmark/aws-emr/run_benchmark.sh    |  2 +-
 python/benchmark/databricks/README.md        |  2 +-
 python/benchmark/databricks/run_benchmark.sh |  2 +-
 python/benchmark/dataproc/init_benchmark.sh  |  2 +-
 python/benchmark/dataproc/run_benchmark.sh   |  2 +-
 python/pyproject.toml                        |  2 +-
 python/run_benchmark.sh                      |  2 +-
 python/tests/test_random_forest.py           |  2 +-
 python/tests/test_umap.py                    |  2 +-
 16 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/docker/Dockerfile.pip b/docker/Dockerfile.pip
index f1b418e9..f435ae33 100644
--- a/docker/Dockerfile.pip
+++ b/docker/Dockerfile.pip
@@ -14,9 +14,8 @@
 # limitations under the License.
 #
 
-# for simplicity, use oldest ubuntu with python > 3.10, and corresponding available cuda version
-ARG CUDA_VERSION=12.6.0
-FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu24.04
+ARG CUDA_VERSION=12.2.2
+FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
 
 ARG PYSPARK_VERSION=3.4.4
 ARG RAPIDS_VERSION=26.06.0
@@ -36,15 +35,22 @@ RUN apt-get update -y \
     && DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt install -y openjdk-17-jdk \
     && rm -rf /var/lib/apt/lists
 
+# some of the below needed for python, installed from source below, to have full functionality
 RUN apt-get update -y \
-    && apt install -y git numactl python3.12-venv python3-pip python-is-python3 software-properties-common wget zip
+    && apt install -y git numactl software-properties-common wget zip build-essential zlib1g-dev \
+    libncurses5-dev libgdbm-dev libnss3-dev libssl-dev libreadline-dev libffi-dev libsqlite3-dev libbz2-dev
+
+# install python 3.11 as base image has python 3.10
+RUN bash -c "wget https://www.python.org/ftp/python/3.11.9/Python-3.11.9.tgz && \
+    tar xzf Python-3.11.9.tgz && cd Python-3.11.9 && \
+    ./configure --enable-optimizations && make altinstall"
 
 # 1. Define the venv path and update system PATH
 ENV VIRTUAL_ENV=/opt/venv
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 
 # 2. Create the virtual environment
-RUN python3 -m venv $VIRTUAL_ENV 
+RUN python3.11 -m venv $VIRTUAL_ENV 
 
 RUN python -m pip install --upgrade pip
 
diff --git a/docs/site/compatibility.md b/docs/site/compatibility.md
index c114d0ac..f640b1b1 100644
--- a/docs/site/compatibility.md
+++ b/docs/site/compatibility.md
@@ -31,7 +31,7 @@ The following table shows the currently supported algorithms.  The goal is to ex
 
 | Spark Rapids ML | CUDA  | Spark  | Python |
 | :-------------- | :---- | :----- | :----- |
-| 26.0.6          | 12.2+ | 3.4+   | 3.11+  |
+| 26.6.0          | 12.2+ | 3.4+   | 3.11+  |
 
 
 ## Single vs Double precision inputs
diff --git a/docs/source/conf.py b/docs/source/conf.py
index b284ea8a..e7fd2fe0 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2026, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -21,7 +21,7 @@
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 
 project = 'spark-rapids-ml'
-copyright = '2026, NVIDIA'
+copyright = '2025-2026, NVIDIA'
 author = 'NVIDIA'
 release = '26.06.0'
 
diff --git a/jvm/pom.xml b/jvm/pom.xml
index 9b1e45a6..0f2b1fc5 100644
--- a/jvm/pom.xml
+++ b/jvm/pom.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!--
-  Copyright (c) 2026, NVIDIA CORPORATION.
+  Copyright (c) 2025-2026, NVIDIA CORPORATION.
 
   Licensed under the Apache License, Version 2.0 (the "License");
   You may not use this file except in compliance with the License.
diff --git a/notebooks/aws-emr/init-bootstrap-action.sh b/notebooks/aws-emr/init-bootstrap-action.sh
index af9aa5c9..de6da8d7 100755
--- a/notebooks/aws-emr/init-bootstrap-action.sh
+++ b/notebooks/aws-emr/init-bootstrap-action.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2026, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/notebooks/databricks/init-pip-cuda-12.sh b/notebooks/databricks/init-pip-cuda-12.sh
index 714c555f..aa26cda8 100644
--- a/notebooks/databricks/init-pip-cuda-12.sh
+++ b/notebooks/databricks/init-pip-cuda-12.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2026, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/notebooks/dataproc/spark_rapids_ml.sh b/notebooks/dataproc/spark_rapids_ml.sh
index 957ed3ea..c16b6c5f 100644
--- a/notebooks/dataproc/spark_rapids_ml.sh
+++ b/notebooks/dataproc/spark_rapids_ml.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2026, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/benchmark/aws-emr/run_benchmark.sh b/python/benchmark/aws-emr/run_benchmark.sh
index 15a364e2..168e5706 100755
--- a/python/benchmark/aws-emr/run_benchmark.sh
+++ b/python/benchmark/aws-emr/run_benchmark.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2026, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/benchmark/databricks/README.md b/python/benchmark/databricks/README.md
index ba11fcd7..e1e64f41 100644
--- a/python/benchmark/databricks/README.md
+++ b/python/benchmark/databricks/README.md
@@ -41,7 +41,7 @@ This directory contains shell scripts for running larger scale benchmarks on Dat
 
 2. The benchmarks can be run as
     ```bash
-    ./run_benchmark.sh [cpu|gpu|gpu_etl] [[12.2|13.3|14.3]] >> benchmark_log
+    ./run_benchmark.sh [cpu|gpu|gpu_etl] [[15.4|16.4|17.3]] >> benchmark_log
     ```
 
     The script creates a cpu or gpu cluster, respectively using the cluster specs in [cpu_cluster_spec](./cpu_cluster_spec.sh), [gpu_cluster_spec](./gpu_cluster_spec.sh), [gpu_etl_cluster_spec](./gpu_etl_cluster_spec.sh), depending on the supplied argument.  In gpu and gpu_etl mode each algorithm benchmark is run 3 times, and similarly in cpu mode, except for kmeans and random forest classifier and regressor which are each run 1 time due to their long running times.  gpu_etl mode also uses the [spark-rapids](https://github.com/NVIDIA/spark-rapids) gpu accelerated plugin.
diff --git a/python/benchmark/databricks/run_benchmark.sh b/python/benchmark/databricks/run_benchmark.sh
index db253b96..8a4bd385 100755
--- a/python/benchmark/databricks/run_benchmark.sh
+++ b/python/benchmark/databricks/run_benchmark.sh
@@ -15,7 +15,7 @@
 
 
 cluster_type=${1:-gpu_etl}
-db_version=${2:-15.4}
+db_version=${2:-17.3}
 SPARK_RAPIDS_VERSION=26.06.0
 SCALA_VERSION=2.12
 
diff --git a/python/benchmark/dataproc/init_benchmark.sh b/python/benchmark/dataproc/init_benchmark.sh
index 68622024..f6a2250b 100755
--- a/python/benchmark/dataproc/init_benchmark.sh
+++ b/python/benchmark/dataproc/init_benchmark.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2026, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/benchmark/dataproc/run_benchmark.sh b/python/benchmark/dataproc/run_benchmark.sh
index 9ad8cc69..9da84b05 100755
--- a/python/benchmark/dataproc/run_benchmark.sh
+++ b/python/benchmark/dataproc/run_benchmark.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2026, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 3c418d85..b85cc554 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -1,4 +1,4 @@
-# Copyright (c) 2026, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/run_benchmark.sh b/python/run_benchmark.sh
index 4c3ed34d..4bc7ddbc 100755
--- a/python/run_benchmark.sh
+++ b/python/run_benchmark.sh
@@ -1,5 +1,5 @@
 #! /bin/bash
-# Copyright (c) 2026, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/tests/test_random_forest.py b/python/tests/test_random_forest.py
index a5ea86c8..db4d437d 100644
--- a/python/tests/test_random_forest.py
+++ b/python/tests/test_random_forest.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2026, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/tests/test_umap.py b/python/tests/test_umap.py
index 538f9bc6..450591f1 100644
--- a/python/tests/test_umap.py
+++ b/python/tests/test_umap.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2026, NVIDIA CORPORATION.
+# Copyright (c) 2025-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.