From 1348138c3a91d91c452cae52d825de9dc62a09da Mon Sep 17 00:00:00 2001 From: Subham Sangwan Date: Fri, 26 Jun 2026 18:22:49 +0530 Subject: [PATCH] Fix DatabricksSqlHook sqlalchemy_url and get_uri to include http_path from connection extra --- .../databricks/hooks/databricks_sql.py | 32 ++++++++------ .../databricks/hooks/test_databricks_sql.py | 43 +++++++++++++++++++ 2 files changed, 62 insertions(+), 13 deletions(-) diff --git a/providers/databricks/src/airflow/providers/databricks/hooks/databricks_sql.py b/providers/databricks/src/airflow/providers/databricks/hooks/databricks_sql.py index ffc089607c33a..23227431cfded 100644 --- a/providers/databricks/src/airflow/providers/databricks/hooks/databricks_sql.py +++ b/providers/databricks/src/airflow/providers/databricks/hooks/databricks_sql.py @@ -184,20 +184,26 @@ def _get_sql_endpoint_by_name(self, endpoint_name) -> dict[str, Any]: else: return endpoint + def _resolve_http_path(self) -> str: + """Resolve http_path from explicit parameter, endpoint name, or connection extra.""" + if self._http_path: + return self._http_path + if self._sql_endpoint_name: + endpoint = self._get_sql_endpoint_by_name(self._sql_endpoint_name) + self._http_path = endpoint["odbc_params"]["path"] + return self._http_path + if "http_path" in self.databricks_conn.extra_dejson: + self._http_path = self.databricks_conn.extra_dejson["http_path"] + return self._http_path + raise ValueError( + "http_path should be provided either explicitly, " + "or in extra parameter of Databricks connection, " + "or sql_endpoint_name should be specified" + ) + def get_conn(self) -> AirflowConnection: """Return a Databricks SQL connection object.""" - if not self._http_path: - if self._sql_endpoint_name: - endpoint = self._get_sql_endpoint_by_name(self._sql_endpoint_name) - self._http_path = endpoint["odbc_params"]["path"] - elif "http_path" in self.databricks_conn.extra_dejson: - self._http_path = self.databricks_conn.extra_dejson["http_path"] - else: - raise AirflowException( - "http_path should be provided either explicitly, " - "or in extra parameter of Databricks connection, " - "or sql_endpoint_name should be specified" - ) + self._resolve_http_path() prev_token = self._token new_token = self._get_token(raise_error=True) @@ -255,7 +261,7 @@ def sqlalchemy_url(self) -> URL: ) url_query = { - "http_path": self._http_path, + "http_path": self._resolve_http_path(), "catalog": self.catalog, "schema": self.schema, } diff --git a/providers/databricks/tests/unit/databricks/hooks/test_databricks_sql.py b/providers/databricks/tests/unit/databricks/hooks/test_databricks_sql.py index cd3c00e2839b1..02034d865f398 100644 --- a/providers/databricks/tests/unit/databricks/hooks/test_databricks_sql.py +++ b/providers/databricks/tests/unit/databricks/hooks/test_databricks_sql.py @@ -150,6 +150,49 @@ def test_get_uri(): assert uri == expected_uri +CONN_ID_WITH_HTTP_PATH_EXTRA = "databricks_with_http_path_extra" + + +@pytest.fixture +def create_connection_with_http_path_extra(create_connection_without_db): + create_connection_without_db( + Connection( + conn_id=CONN_ID_WITH_HTTP_PATH_EXTRA, + conn_type="databricks", + host=HOST, + login=None, + password=TOKEN, + extra={"http_path": HTTP_PATH}, + ) + ) + + +def test_sqlalchemy_url_with_http_path_from_connection_extra(create_connection_with_http_path_extra): + hook = DatabricksSqlHook(databricks_conn_id=CONN_ID_WITH_HTTP_PATH_EXTRA, catalog=CATALOG, schema=SCHEMA) + url = hook.sqlalchemy_url.render_as_string(hide_password=False) + expected_url = ( + f"databricks://token:{TOKEN}@{HOST}?" + f"catalog={CATALOG}&http_path={quote_plus(HTTP_PATH)}&schema={SCHEMA}" + ) + assert url == expected_url + + +def test_get_uri_with_http_path_from_connection_extra(create_connection_with_http_path_extra): + hook = DatabricksSqlHook(databricks_conn_id=CONN_ID_WITH_HTTP_PATH_EXTRA, catalog=CATALOG, schema=SCHEMA) + uri = hook.get_uri() + expected_uri = ( + f"databricks://token:{TOKEN}@{HOST}?" + f"catalog={CATALOG}&http_path={quote_plus(HTTP_PATH)}&schema={SCHEMA}" + ) + assert uri == expected_uri + + +def test_resolve_http_path_raises_when_not_provided(): + hook = DatabricksSqlHook(databricks_conn_id=DEFAULT_CONN_ID) + with pytest.raises(ValueError, match="http_path should be provided"): + hook._resolve_http_path() + + def get_cursor_descriptions(fields: list[str]) -> list[tuple[str]]: return [(field,) for field in fields]