From 38d80427bf8007482a84d503cfa2e266083d66a3 Mon Sep 17 00:00:00 2001
From: Miguel Rodriguez Gutierrez <miguel7r@hotmail.com>
Date: Tue, 1 Oct 2024 14:35:08 +0000
Subject: [PATCH 1/2] Removed databricks-connect by default in notebooks

Signed-off-by: Miguel Rodriguez Gutierrez <miguel7r@hotmail.com>
---
 kedro-datasets/RELEASE.md                            | 3 +++
 kedro-datasets/kedro_datasets/spark/spark_dataset.py | 9 ++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md
index 52ba9fe51..eeb094ba5 100755
--- a/kedro-datasets/RELEASE.md
+++ b/kedro-datasets/RELEASE.md
@@ -20,6 +20,8 @@
 * Fixed deprecated load and save approaches of GBQTableDataset and GBQQueryDataset by invoking save and load directly over `pandas-gbq` lib
 
 ## Breaking Changes
+* Now `_get_spark()` does not use `databricks-connect` by default when run in a Databricks notebook
+
 ## Community contributions
 Many thanks to the following Kedroids for contributing PRs to this release:
 * [Brandon Meek](https://github.com/bpmeek)
@@ -27,6 +29,7 @@ Many thanks to the following Kedroids for contributing PRs to this release:
 * [gitgud5000](https://github.com/gitgud5000)
 * [janickspirig](https://github.com/janickspirig)
 * [Galen Seilis](https://github.com/galenseilis)
+* [MigQ2](https://github.com/MigQ2)
 
 
 # Release 4.1.0
diff --git a/kedro-datasets/kedro_datasets/spark/spark_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_dataset.py
index e077d6390..74f20e1b8 100644
--- a/kedro-datasets/kedro_datasets/spark/spark_dataset.py
+++ b/kedro-datasets/kedro_datasets/spark/spark_dataset.py
@@ -38,7 +38,10 @@ def _get_spark() -> Any:
     extended configuration mechanisms and notebook compatibility,
     otherwise we use classic pyspark.
     """
-    try:
+    if (
+        "DATABRICKS_RUNTIME_VERSION" in os.environ
+        and int(os.environ["DATABRICKS_RUNTIME_VERSION"].split(".")[0]) >= 13
+    ):
         # When using databricks-connect >= 13.0.0 (a.k.a databricks-connect-v2)
         # the remote session is instantiated using the databricks module
         # If the databricks-connect module is installed, we use a remote session
@@ -47,9 +50,9 @@ def _get_spark() -> Any:
         # We can't test this as there's no Databricks test env available
         spark = DatabricksSession.builder.getOrCreate()  # pragma: no cover
 
-    except ImportError:
+    else:
         # For "normal" spark sessions that don't use databricks-connect
-        # we get spark normally
+        # or for databricks-connect<13 we get spark "normally"
         spark = SparkSession.builder.getOrCreate()
 
     return spark

From 8a1a7047d1b5632b5e9d35ef861ebba5db77f395 Mon Sep 17 00:00:00 2001
From: Miguel Rodriguez Gutierrez <mrgutierrez@apexglobal.co>
Date: Tue, 1 Oct 2024 14:58:52 +0000
Subject: [PATCH 2/2] Fixed DBR version constant

Signed-off-by: Miguel Rodriguez Gutierrez <mrgutierrez@apexglobal.co>
---
 kedro-datasets/kedro_datasets/spark/spark_dataset.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kedro-datasets/kedro_datasets/spark/spark_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_dataset.py
index 74f20e1b8..b73ab4398 100644
--- a/kedro-datasets/kedro_datasets/spark/spark_dataset.py
+++ b/kedro-datasets/kedro_datasets/spark/spark_dataset.py
@@ -38,9 +38,11 @@ def _get_spark() -> Any:
     extended configuration mechanisms and notebook compatibility,
     otherwise we use classic pyspark.
     """
+    MIN_DBCONNECT_V2_VERSION = 13
     if (
         "DATABRICKS_RUNTIME_VERSION" in os.environ
-        and int(os.environ["DATABRICKS_RUNTIME_VERSION"].split(".")[0]) >= 13
+        and int(os.environ["DATABRICKS_RUNTIME_VERSION"].split(".")[0])
+        >= MIN_DBCONNECT_V2_VERSION
     ):
         # When using databricks-connect >= 13.0.0 (a.k.a databricks-connect-v2)
         # the remote session is instantiated using the databricks module