diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md index 52ba9fe51..eeb094ba5 100755 --- a/kedro-datasets/RELEASE.md +++ b/kedro-datasets/RELEASE.md @@ -20,6 +20,8 @@ * Fixed deprecated load and save approaches of GBQTableDataset and GBQQueryDataset by invoking save and load directly over `pandas-gbq` lib ## Breaking Changes +* Now `_get_spark()` does not use `databricks-connect` by default when run in a Databricks notebook + ## Community contributions Many thanks to the following Kedroids for contributing PRs to this release: * [Brandon Meek](https://github.com/bpmeek) @@ -27,6 +29,7 @@ Many thanks to the following Kedroids for contributing PRs to this release: * [gitgud5000](https://github.com/gitgud5000) * [janickspirig](https://github.com/janickspirig) * [Galen Seilis](https://github.com/galenseilis) +* [MigQ2](https://github.com/MigQ2) # Release 4.1.0 diff --git a/kedro-datasets/kedro_datasets/spark/spark_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_dataset.py index e077d6390..b73ab4398 100644 --- a/kedro-datasets/kedro_datasets/spark/spark_dataset.py +++ b/kedro-datasets/kedro_datasets/spark/spark_dataset.py @@ -38,7 +38,12 @@ def _get_spark() -> Any: extended configuration mechanisms and notebook compatibility, otherwise we use classic pyspark. """ - try: + MIN_DBCONNECT_V2_VERSION = 13 + if ( + "DATABRICKS_RUNTIME_VERSION" in os.environ + and int(os.environ["DATABRICKS_RUNTIME_VERSION"].split(".")[0]) + >= MIN_DBCONNECT_V2_VERSION + ): # When using databricks-connect >= 13.0.0 (a.k.a databricks-connect-v2) # the remote session is instantiated using the databricks module # If the databricks-connect module is installed, we use a remote session @@ -47,9 +52,9 @@ def _get_spark() -> Any: # We can't test this as there's no Databricks test env available spark = DatabricksSession.builder.getOrCreate() # pragma: no cover - except ImportError: + else: # For "normal" spark sessions that don't use databricks-connect - # we get spark normally + # or for databricks-connect<13 we get spark "normally" spark = SparkSession.builder.getOrCreate() return spark