kedro-org · MigQ2 · Oct 1, 2024 · Oct 1, 2024
diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md
@@ -20,13 +20,16 @@
 * Fixed deprecated load and save approaches of GBQTableDataset and GBQQueryDataset by invoking save and load directly over `pandas-gbq` lib
 
 ## Breaking Changes
+* Now `_get_spark()` does not use `databricks-connect` by default when run in a Databricks notebook
+
 ## Community contributions
 Many thanks to the following Kedroids for contributing PRs to this release:
 * [Brandon Meek](https://github.com/bpmeek)
 * [yury-fedotov](https://github.com/yury-fedotov)
 * [gitgud5000](https://github.com/gitgud5000)
 * [janickspirig](https://github.com/janickspirig)
 * [Galen Seilis](https://github.com/galenseilis)
+* [MigQ2](https://github.com/MigQ2)
 
 
 # Release 4.1.0

diff --git a/kedro-datasets/kedro_datasets/spark/spark_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_dataset.py
@@ -38,7 +38,12 @@ def _get_spark() -> Any:
     extended configuration mechanisms and notebook compatibility,
     otherwise we use classic pyspark.
     """
-    try:
+    MIN_DBCONNECT_V2_VERSION = 13
+    if (
+        "DATABRICKS_RUNTIME_VERSION" in os.environ
+        and int(os.environ["DATABRICKS_RUNTIME_VERSION"].split(".")[0])
+        >= MIN_DBCONNECT_V2_VERSION
+    ):
         # When using databricks-connect >= 13.0.0 (a.k.a databricks-connect-v2)
         # the remote session is instantiated using the databricks module
         # If the databricks-connect module is installed, we use a remote session
@@ -47,9 +52,9 @@ def _get_spark() -> Any:
         # We can't test this as there's no Databricks test env available
         spark = DatabricksSession.builder.getOrCreate()  # pragma: no cover
 
-    except ImportError:
+    else:
         # For "normal" spark sessions that don't use databricks-connect
-        # we get spark normally
+        # or for databricks-connect<13 we get spark "normally"
         spark = SparkSession.builder.getOrCreate()
 
     return spark