From 159e0a3e45ac81e6465c6bb010492f33f7e98064 Mon Sep 17 00:00:00 2001
From: Ravi Kumar Pilla <ravi_kumar_pilla@mckinsey.com>
Date: Tue, 7 Jan 2025 18:48:56 -0600
Subject: [PATCH 1/4] chore(datasets): Remove tracking datasets which are used
 in Kedro Viz Experiment Tracking (#969)

* remove et related kedro datasets

* update release note and static json schema

* temporary doc fix
---
 kedro-datasets/RELEASE.md                     |   4 +
 .../docs/source/api/kedro_datasets.rst        |   2 -
 kedro-datasets/kedro_datasets/_typing.py      |   5 -
 .../kedro_datasets/dask/csv_dataset.py        |   4 +-
 .../kedro_datasets/dask/parquet_dataset.py    |   4 +-
 .../kedro_datasets/tracking/__init__.py       |  26 ---
 .../kedro_datasets/tracking/json_dataset.py   |  56 -----
 .../tracking/metrics_dataset.py               |  76 -------
 kedro-datasets/pyproject.toml                 |   4 -
 .../static/jsonschema/kedro-catalog-0.18.json |  72 -------
 .../static/jsonschema/kedro-catalog-0.19.json |  72 -------
 kedro-datasets/tests/tracking/__init__.py     |   0
 .../tests/tracking/test_json_dataset.py       | 195 -----------------
 .../tests/tracking/test_metrics_dataset.py    | 204 ------------------
 14 files changed, 8 insertions(+), 716 deletions(-)
 delete mode 100644 kedro-datasets/kedro_datasets/tracking/__init__.py
 delete mode 100644 kedro-datasets/kedro_datasets/tracking/json_dataset.py
 delete mode 100644 kedro-datasets/kedro_datasets/tracking/metrics_dataset.py
 delete mode 100644 kedro-datasets/tests/tracking/__init__.py
 delete mode 100644 kedro-datasets/tests/tracking/test_json_dataset.py
 delete mode 100644 kedro-datasets/tests/tracking/test_metrics_dataset.py

diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md
index a477dca5e..16fa5b18a 100755
--- a/kedro-datasets/RELEASE.md
+++ b/kedro-datasets/RELEASE.md
@@ -1,7 +1,11 @@
 # Upcoming Release
 ## Major features and improvements
 ## Bug fixes and other changes
+
 ## Breaking Changes
+
+- Removed `tracking.MetricsDataset` and `tracking.JSONDataset`
+
 ## Community contributions
 
 # Release 6.0.0
diff --git a/kedro-datasets/docs/source/api/kedro_datasets.rst b/kedro-datasets/docs/source/api/kedro_datasets.rst
index 0cbd3bc4e..63142220a 100644
--- a/kedro-datasets/docs/source/api/kedro_datasets.rst
+++ b/kedro-datasets/docs/source/api/kedro_datasets.rst
@@ -62,6 +62,4 @@ kedro_datasets
    svmlight.SVMLightDataset
    tensorflow.TensorFlowModelDataset
    text.TextDataset
-   tracking.JSONDataset
-   tracking.MetricsDataset
    yaml.YAMLDataset
diff --git a/kedro-datasets/kedro_datasets/_typing.py b/kedro-datasets/kedro_datasets/_typing.py
index feb6d91b7..aa083f514 100644
--- a/kedro-datasets/kedro_datasets/_typing.py
+++ b/kedro-datasets/kedro_datasets/_typing.py
@@ -9,8 +9,3 @@
 ImagePreview = NewType("ImagePreview", str)
 PlotlyPreview = NewType("PlotlyPreview", dict)
 JSONPreview = NewType("JSONPreview", str)
-
-
-# experiment tracking datasets types
-MetricsTrackingPreview = NewType("MetricsTrackingPreview", dict)
-JSONTrackingPreview = NewType("JSONTrackingPreview", dict)
diff --git a/kedro-datasets/kedro_datasets/dask/csv_dataset.py b/kedro-datasets/kedro_datasets/dask/csv_dataset.py
index 053da6b00..bc5b5764b 100644
--- a/kedro-datasets/kedro_datasets/dask/csv_dataset.py
+++ b/kedro-datasets/kedro_datasets/dask/csv_dataset.py
@@ -67,9 +67,9 @@ def __init__(  # noqa: PLR0913
             filepath: Filepath in POSIX format to a CSV file
                 CSV collection or the directory of a multipart CSV.
             load_args: Additional loading options `dask.dataframe.read_csv`:
-                https://docs.dask.org/en/latest/generated/dask.dataframe.read_csv.html
+                https://docs.dask.org/en/stable/generated/dask.dataframe.read_csv.html
             save_args: Additional saving options for `dask.dataframe.to_csv`:
-                https://docs.dask.org/en/latest/generated/dask.dataframe.to_csv.html
+                https://docs.dask.org/en/stable/generated/dask.dataframe.to_csv.html
             credentials: Credentials required to get access to the underlying filesystem.
                 E.g. for ``GCSFileSystem`` it should look like `{"token": None}`.
             fs_args: Optional parameters to the backend file system driver:
diff --git a/kedro-datasets/kedro_datasets/dask/parquet_dataset.py b/kedro-datasets/kedro_datasets/dask/parquet_dataset.py
index 1acfe7cda..3b2dff73e 100644
--- a/kedro-datasets/kedro_datasets/dask/parquet_dataset.py
+++ b/kedro-datasets/kedro_datasets/dask/parquet_dataset.py
@@ -97,9 +97,9 @@ def __init__(  # noqa: PLR0913
             filepath: Filepath in POSIX format to a parquet file
                 parquet collection or the directory of a multipart parquet.
             load_args: Additional loading options `dask.dataframe.read_parquet`:
-                https://docs.dask.org/en/latest/generated/dask.dataframe.read_parquet.html
+                https://docs.dask.org/en/stable/generated/dask.dataframe.read_parquet.html
             save_args: Additional saving options for `dask.dataframe.to_parquet`:
-                https://docs.dask.org/en/latest/generated/dask.dataframe.to_parquet.html
+                https://docs.dask.org/en/stable/generated/dask.dataframe.to_parquet.html
             credentials: Credentials required to get access to the underlying filesystem.
                 E.g. for ``GCSFileSystem`` it should look like `{"token": None}`.
             fs_args: Optional parameters to the backend file system driver:
diff --git a/kedro-datasets/kedro_datasets/tracking/__init__.py b/kedro-datasets/kedro_datasets/tracking/__init__.py
deleted file mode 100644
index 1b1a5c70d..000000000
--- a/kedro-datasets/kedro_datasets/tracking/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-"""Dataset implementations to save data for Kedro Experiment Tracking."""
-
-import warnings
-from typing import Any
-
-import lazy_loader as lazy
-
-from kedro_datasets import KedroDeprecationWarning
-
-# https://github.com/pylint-dev/pylint/issues/4300#issuecomment-1043601901
-JSONDataset: Any
-MetricsDataset: Any
-
-__getattr__, __dir__, __all__ = lazy.attach(
-    __name__,
-    submod_attrs={
-        "json_dataset": ["JSONDataset"],
-        "metrics_dataset": ["MetricsDataset"],
-    },
-)
-
-warnings.warn(
-    "`tracking.JSONDataset` and `tracking.MetricsDataset` are deprecated. These datasets will be removed in kedro-datasets 7.0.0",
-    KedroDeprecationWarning,
-    stacklevel=2,
-)
diff --git a/kedro-datasets/kedro_datasets/tracking/json_dataset.py b/kedro-datasets/kedro_datasets/tracking/json_dataset.py
deleted file mode 100644
index d73df1b10..000000000
--- a/kedro-datasets/kedro_datasets/tracking/json_dataset.py
+++ /dev/null
@@ -1,56 +0,0 @@
-"""``JSONDataset`` saves data to a JSON file using an underlying
-filesystem (e.g.: local, S3, GCS). It uses native json to handle the JSON file.
-The ``JSONDataset`` is part of Kedro Experiment Tracking. The dataset is versioned by default.
-"""
-
-import json
-from typing import NoReturn
-
-from kedro.io.core import DatasetError, get_filepath_str
-
-from kedro_datasets._typing import JSONTrackingPreview
-from kedro_datasets.json import json_dataset
-
-
-class JSONDataset(json_dataset.JSONDataset):
-    """``JSONDataset`` saves data to a JSON file using an underlying
-    filesystem (e.g.: local, S3, GCS). It uses native json to handle the JSON file.
-    The ``JSONDataset`` is part of Kedro Experiment Tracking.
-    The dataset is write-only and it is versioned by default.
-
-    Example usage for the
-    `YAML API <https://docs.kedro.org/en/stable/data/\
-    data_catalog_yaml_examples.html>`_:
-
-    .. code-block:: yaml
-
-        cars:
-          type: tracking.JSONDataset
-          filepath: data/09_tracking/cars.json
-
-    Example usage for the
-    `Python API <https://docs.kedro.org/en/stable/data/\
-    advanced_data_catalog_usage.html>`_:
-
-    .. code-block:: pycon
-
-        >>> from kedro_datasets.tracking import JSONDataset
-        >>>
-        >>> data = {"col1": 1, "col2": 0.23, "col3": 0.002}
-        >>>
-        >>> dataset = JSONDataset(filepath=tmp_path / "test.json")
-        >>> dataset.save(data)
-
-    """
-
-    versioned = True
-
-    def load(self) -> NoReturn:
-        raise DatasetError(f"Loading not supported for '{self.__class__.__name__}'")
-
-    def preview(self) -> JSONTrackingPreview:  # type: ignore[override]
-        "Load the JSON tracking dataset used in Kedro-viz experiment tracking."
-        load_path = get_filepath_str(self._get_load_path(), self._protocol)
-
-        with self._fs.open(load_path, **self._fs_open_args_load) as fs_file:
-            return JSONTrackingPreview(json.load(fs_file))
diff --git a/kedro-datasets/kedro_datasets/tracking/metrics_dataset.py b/kedro-datasets/kedro_datasets/tracking/metrics_dataset.py
deleted file mode 100644
index 6202acf34..000000000
--- a/kedro-datasets/kedro_datasets/tracking/metrics_dataset.py
+++ /dev/null
@@ -1,76 +0,0 @@
-"""``MetricsDataset`` saves data to a JSON file using an underlying
-filesystem (e.g.: local, S3, GCS). It uses native json to handle the JSON file.
-The ``MetricsDataset`` is part of Kedro Experiment Tracking. The dataset is versioned by default
-and only takes metrics of numeric values.
-"""
-
-import json
-from typing import NoReturn
-
-from kedro.io.core import DatasetError, get_filepath_str
-
-from kedro_datasets._typing import MetricsTrackingPreview
-from kedro_datasets.json import json_dataset
-
-
-class MetricsDataset(json_dataset.JSONDataset):
-    """``MetricsDataset`` saves data to a JSON file using an underlying
-    filesystem (e.g.: local, S3, GCS). It uses native json to handle the JSON file. The
-    ``MetricsDataset`` is part of Kedro Experiment Tracking. The dataset is write-only,
-    it is versioned by default and only takes metrics of numeric values.
-
-    Example usage for the
-    `YAML API <https://docs.kedro.org/en/stable/data/\
-    data_catalog_yaml_examples.html>`_:
-
-    .. code-block:: yaml
-
-        cars:
-          type: tracking.MetricsDataset
-          filepath: data/09_tracking/cars.json
-
-    Example usage for the
-    `Python API <https://docs.kedro.org/en/stable/data/\
-    advanced_data_catalog_usage.html>`_:
-
-    .. code-block:: pycon
-
-        >>> from kedro_datasets.tracking import MetricsDataset
-        >>>
-        >>> data = {"col1": 1, "col2": 0.23, "col3": 0.002}
-        >>>
-        >>> dataset = MetricsDataset(filepath=tmp_path / "test.json")
-        >>> dataset.save(data)
-
-    """
-
-    versioned = True
-
-    def load(self) -> NoReturn:
-        raise DatasetError(f"Loading not supported for '{self.__class__.__name__}'")
-
-    def save(self, data: dict[str, float]) -> None:
-        """Converts all values in the data from a ``MetricsDataset`` to float to make sure
-        they are numeric values which can be displayed in Kedro Viz and then saves the dataset.
-        """
-        try:
-            for key, value in data.items():
-                data[key] = float(value)
-        except ValueError as exc:
-            raise DatasetError(
-                f"The MetricsDataset expects only numeric values. {exc}"
-            ) from exc
-
-        save_path = get_filepath_str(self._get_save_path(), self._protocol)
-
-        with self._fs.open(save_path, **self._fs_open_args_save) as fs_file:
-            json.dump(data, fs_file, **self._save_args)
-
-        self._invalidate_cache()
-
-    def preview(self) -> MetricsTrackingPreview:  # type: ignore[override]
-        "Load the Metrics tracking dataset used in Kedro-viz experiment tracking"
-        load_path = get_filepath_str(self._get_load_path(), self._protocol)
-
-        with self._fs.open(load_path, **self._fs_open_args_load) as fs_file:
-            return json.load(fs_file)
diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml
index 91b938c19..3ee8eb9e9 100644
--- a/kedro-datasets/pyproject.toml
+++ b/kedro-datasets/pyproject.toml
@@ -163,10 +163,6 @@ tensorflow = ["kedro-datasets[tensorflow-tensorflowmodeldataset]"]
 text-textdataset = []
 text = ["kedro-datasets[text-textdataset]"]
 
-tracking-jsondataset = []
-tracking-metricsdataset = []
-tracking = ["kedro-datasets[tracking-jsondataset, tracking-metricsdataset]"]
-
 yaml-yamldataset = ["kedro-datasets[pandas-base]", "PyYAML>=4.2, <7.0"]
 yaml = ["kedro-datasets[yaml-yamldataset]"]
 
diff --git a/kedro-datasets/static/jsonschema/kedro-catalog-0.18.json b/kedro-datasets/static/jsonschema/kedro-catalog-0.18.json
index 195f0234a..b9fa61d14 100644
--- a/kedro-datasets/static/jsonschema/kedro-catalog-0.18.json
+++ b/kedro-datasets/static/jsonschema/kedro-catalog-0.18.json
@@ -42,8 +42,6 @@
               "spark.SparkJDBCDataSet",
               "tensorflow.TensorFlowModelDataset",
               "text.TextDataSet",
-              "tracking.JSONDataSet",
-              "tracking.MetricsDataSet",
               "yaml.YAMLDataSet"
             ]
           }
@@ -1312,76 +1310,6 @@
               }
             }
           },
-          {
-            "if": {
-              "properties": {
-                "type": {
-                  "const": "tracking.JSONDataSet"
-                }
-              }
-            },
-            "then": {
-              "required": [
-                "filepath"
-              ],
-              "properties": {
-                "filepath": {
-                  "type": "string",
-                  "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning."
-                },
-                "save_args": {
-                  "type": "object",
-                  "description": "json options for saving JSON files (arguments passed\ninto ```json.dump``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/json.html\nAll defaults are preserved, but \"default_flow_style\", which is set to False."
-                },
-                "credentials": {
-                  "type": [
-                    "object",
-                    "string"
-                  ],
-                  "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`."
-                },
-                "fs_args": {
-                  "type": "object",
-                  "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving."
-                }
-              }
-            }
-          },
-          {
-            "if": {
-              "properties": {
-                "type": {
-                  "const": "tracking.MetricsDataSet"
-                }
-              }
-            },
-            "then": {
-              "required": [
-                "filepath"
-              ],
-              "properties": {
-                "filepath": {
-                  "type": "string",
-                  "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning."
-                },
-                "save_args": {
-                  "type": "object",
-                  "description": "json options for saving JSON files (arguments passed\ninto ```json.dump``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/json.html\nAll defaults are preserved, but \"default_flow_style\", which is set to False."
-                },
-                "credentials": {
-                  "type": [
-                    "object",
-                    "string"
-                  ],
-                  "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`."
-                },
-                "fs_args": {
-                  "type": "object",
-                  "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving."
-                }
-              }
-            }
-          },
           {
             "if": {
               "properties": {
diff --git a/kedro-datasets/static/jsonschema/kedro-catalog-0.19.json b/kedro-datasets/static/jsonschema/kedro-catalog-0.19.json
index f19266812..087725710 100644
--- a/kedro-datasets/static/jsonschema/kedro-catalog-0.19.json
+++ b/kedro-datasets/static/jsonschema/kedro-catalog-0.19.json
@@ -41,8 +41,6 @@
               "spark.SparkJDBCDataset",
               "tensorflow.TensorFlowModelDataset",
               "text.TextDataset",
-              "tracking.JSONDataset",
-              "tracking.MetricsDataset",
               "yaml.YAMLDataset"
             ]
           }
@@ -1277,76 +1275,6 @@
               }
             }
           },
-          {
-            "if": {
-              "properties": {
-                "type": {
-                  "const": "tracking.JSONDataset"
-                }
-              }
-            },
-            "then": {
-              "required": [
-                "filepath"
-              ],
-              "properties": {
-                "filepath": {
-                  "type": "string",
-                  "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning."
-                },
-                "save_args": {
-                  "type": "object",
-                  "description": "json options for saving JSON files (arguments passed\ninto ```json.dump``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/json.html\nAll defaults are preserved, but \"default_flow_style\", which is set to False."
-                },
-                "credentials": {
-                  "type": [
-                    "object",
-                    "string"
-                  ],
-                  "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`."
-                },
-                "fs_args": {
-                  "type": "object",
-                  "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving."
-                }
-              }
-            }
-          },
-          {
-            "if": {
-              "properties": {
-                "type": {
-                  "const": "tracking.MetricsDataset"
-                }
-              }
-            },
-            "then": {
-              "required": [
-                "filepath"
-              ],
-              "properties": {
-                "filepath": {
-                  "type": "string",
-                  "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning."
-                },
-                "save_args": {
-                  "type": "object",
-                  "description": "json options for saving JSON files (arguments passed\ninto ```json.dump``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/json.html\nAll defaults are preserved, but \"default_flow_style\", which is set to False."
-                },
-                "credentials": {
-                  "type": [
-                    "object",
-                    "string"
-                  ],
-                  "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`."
-                },
-                "fs_args": {
-                  "type": "object",
-                  "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving."
-                }
-              }
-            }
-          },
           {
             "if": {
               "properties": {
diff --git a/kedro-datasets/tests/tracking/__init__.py b/kedro-datasets/tests/tracking/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/kedro-datasets/tests/tracking/test_json_dataset.py b/kedro-datasets/tests/tracking/test_json_dataset.py
deleted file mode 100644
index de24ba9b9..000000000
--- a/kedro-datasets/tests/tracking/test_json_dataset.py
+++ /dev/null
@@ -1,195 +0,0 @@
-import inspect
-import json
-from pathlib import Path, PurePosixPath
-
-import pytest
-from fsspec.implementations.local import LocalFileSystem
-from gcsfs import GCSFileSystem
-from kedro.io.core import PROTOCOL_DELIMITER, DatasetError, Version
-from s3fs.core import S3FileSystem
-
-from kedro_datasets.tracking import JSONDataset
-
-
-@pytest.fixture
-def filepath_json(tmp_path):
-    return (tmp_path / "test.json").as_posix()
-
-
-@pytest.fixture
-def json_dataset(filepath_json, save_args, fs_args):
-    return JSONDataset(filepath=filepath_json, save_args=save_args, fs_args=fs_args)
-
-
-@pytest.fixture
-def explicit_versioned_json_dataset(filepath_json, load_version, save_version):
-    return JSONDataset(
-        filepath=filepath_json, version=Version(load_version, save_version)
-    )
-
-
-@pytest.fixture
-def dummy_data():
-    return {"col1": 1, "col2": 2, "col3": "mystring"}
-
-
-class TestJSONDataset:
-    def test_save(self, filepath_json, dummy_data, tmp_path, save_version):
-        """Test saving and reloading the dataset."""
-        json_dataset = JSONDataset(
-            filepath=filepath_json, version=Version(None, save_version)
-        )
-        json_dataset.save(dummy_data)
-
-        actual_filepath = Path(json_dataset._filepath.as_posix())
-        test_filepath = tmp_path / "locally_saved.json"
-
-        test_filepath.parent.mkdir(parents=True, exist_ok=True)
-        with open(test_filepath, "w", encoding="utf-8") as file:
-            json.dump(dummy_data, file)
-
-        with open(test_filepath, encoding="utf-8") as file:
-            test_data = json.load(file)
-
-        with open(
-            (actual_filepath / save_version / "test.json"), encoding="utf-8"
-        ) as actual_file:
-            actual_data = json.load(actual_file)
-
-        assert actual_data == test_data
-        assert json_dataset._fs_open_args_load == {}
-        assert json_dataset._fs_open_args_save == {"mode": "w"}
-
-    def test_load_fail(self, json_dataset, dummy_data):
-        json_dataset.save(dummy_data)
-        pattern = r"Loading not supported for 'JSONDataset'"
-        with pytest.raises(DatasetError, match=pattern):
-            json_dataset.load()
-
-    def test_exists(self, json_dataset, dummy_data):
-        """Test `exists` method invocation for both existing and
-        nonexistent dataset."""
-        assert not json_dataset.exists()
-        json_dataset.save(dummy_data)
-        assert json_dataset.exists()
-
-    @pytest.mark.parametrize(
-        "save_args", [{"k1": "v1", "index": "value"}], indirect=True
-    )
-    def test_save_extra_params(self, json_dataset, save_args):
-        """Test overriding the default save arguments."""
-        for key, value in save_args.items():
-            assert json_dataset._save_args[key] == value
-
-    @pytest.mark.parametrize(
-        "fs_args",
-        [{"open_args_load": {"mode": "rb", "compression": "gzip"}}],
-        indirect=True,
-    )
-    def test_open_extra_args(self, json_dataset, fs_args):
-        assert json_dataset._fs_open_args_load == fs_args["open_args_load"]
-        assert json_dataset._fs_open_args_save == {"mode": "w"}  # default unchanged
-
-    @pytest.mark.parametrize(
-        "filepath,instance_type",
-        [
-            ("s3://bucket/file.json", S3FileSystem),
-            ("file:///tmp/test.json", LocalFileSystem),
-            ("/tmp/test.json", LocalFileSystem),
-            ("gcs://bucket/file.json", GCSFileSystem),
-        ],
-    )
-    def test_protocol_usage(self, filepath, instance_type):
-        dataset = JSONDataset(filepath=filepath)
-        assert isinstance(dataset._fs, instance_type)
-
-        path = filepath.split(PROTOCOL_DELIMITER, 1)[-1]
-
-        assert str(dataset._filepath) == path
-        assert isinstance(dataset._filepath, PurePosixPath)
-
-    def test_catalog_release(self, mocker):
-        fs_mock = mocker.patch("fsspec.filesystem").return_value
-        filepath = "test.json"
-        dataset = JSONDataset(filepath=filepath)
-        dataset.release()
-        fs_mock.invalidate_cache.assert_called_once_with(filepath)
-
-    def test_not_version_str_repr(self):
-        """Test that version is not in string representation of the class instance."""
-        filepath = "test.json"
-        ds = JSONDataset(filepath=filepath)
-
-        assert filepath in str(ds)
-        assert "version" not in str(ds)
-        assert "JSONDataset" in str(ds)
-        assert "protocol" in str(ds)
-        # Default save_args
-        assert "save_args={'indent': 2}" in str(ds)
-
-    def test_version_str_repr(self, load_version, save_version):
-        """Test that version is in string representation of the class instance."""
-        filepath = "test.json"
-        ds_versioned = JSONDataset(
-            filepath=filepath, version=Version(load_version, save_version)
-        )
-
-        assert filepath in str(ds_versioned)
-        ver_str = f"version=Version(load={load_version}, save='{save_version}')"
-        assert ver_str in str(ds_versioned)
-        assert "JSONDataset" in str(ds_versioned)
-        assert "protocol" in str(ds_versioned)
-        # Default save_args
-        assert "save_args={'indent': 2}" in str(ds_versioned)
-
-    def test_prevent_overwrite(self, explicit_versioned_json_dataset, dummy_data):
-        """Check the error when attempting to override the dataset if the
-        corresponding json file for a given save version already exists."""
-        explicit_versioned_json_dataset.save(dummy_data)
-        pattern = (
-            r"Save path \'.+\' for JSONDataset\(.+\) must "
-            r"not exist if versioning is enabled\."
-        )
-        with pytest.raises(DatasetError, match=pattern):
-            explicit_versioned_json_dataset.save(dummy_data)
-
-    @pytest.mark.parametrize(
-        "load_version", ["2019-01-01T23.59.59.999Z"], indirect=True
-    )
-    @pytest.mark.parametrize(
-        "save_version", ["2019-01-02T00.00.00.000Z"], indirect=True
-    )
-    def test_save_version_warning(
-        self,
-        explicit_versioned_json_dataset,
-        load_version,
-        save_version,
-        dummy_data,
-    ):
-        """Check the warning when saving to the path that differs from
-        the subsequent load path."""
-        pattern = (
-            f"Save version '{save_version}' did not match "
-            f"load version '{load_version}' for "
-            r"JSONDataset\(.+\)"
-        )
-        with pytest.warns(UserWarning, match=pattern):
-            explicit_versioned_json_dataset.save(dummy_data)
-
-    def test_http_filesystem_no_versioning(self):
-        pattern = "Versioning is not supported for HTTP protocols."
-
-        with pytest.raises(DatasetError, match=pattern):
-            JSONDataset(
-                filepath="https://example.com/file.json", version=Version(None, None)
-            )
-
-    def test_preview(self, json_dataset, dummy_data):
-        expected_preview = {"col1": 1, "col2": 2, "col3": "mystring"}
-        json_dataset.save(dummy_data)
-        preview = json_dataset.preview()
-        assert preview == expected_preview
-        assert (
-            inspect.signature(json_dataset.preview).return_annotation.__name__
-            == "JSONTrackingPreview"
-        )
diff --git a/kedro-datasets/tests/tracking/test_metrics_dataset.py b/kedro-datasets/tests/tracking/test_metrics_dataset.py
deleted file mode 100644
index b638fcdfd..000000000
--- a/kedro-datasets/tests/tracking/test_metrics_dataset.py
+++ /dev/null
@@ -1,204 +0,0 @@
-import inspect
-import json
-from pathlib import Path, PurePosixPath
-
-import pytest
-from fsspec.implementations.local import LocalFileSystem
-from gcsfs import GCSFileSystem
-from kedro.io.core import PROTOCOL_DELIMITER, DatasetError, Version
-from s3fs.core import S3FileSystem
-
-from kedro_datasets.tracking import MetricsDataset
-
-
-@pytest.fixture
-def filepath_json(tmp_path):
-    return (tmp_path / "test.json").as_posix()
-
-
-@pytest.fixture
-def metrics_dataset(filepath_json, save_args, fs_args):
-    return MetricsDataset(filepath=filepath_json, save_args=save_args, fs_args=fs_args)
-
-
-@pytest.fixture
-def explicit_versioned_metrics_dataset(filepath_json, load_version, save_version):
-    return MetricsDataset(
-        filepath=filepath_json, version=Version(load_version, save_version)
-    )
-
-
-@pytest.fixture
-def dummy_data():
-    return {"col1": 1, "col2": 2, "col3": 3}
-
-
-class TestMetricsDataset:
-    def test_save_data(
-        self,
-        dummy_data,
-        tmp_path,
-        filepath_json,
-        save_version,
-    ):
-        """Test saving and reloading the dataset."""
-        metrics_dataset = MetricsDataset(
-            filepath=filepath_json, version=Version(None, save_version)
-        )
-        metrics_dataset.save(dummy_data)
-
-        actual_filepath = Path(metrics_dataset._filepath.as_posix())
-        test_filepath = tmp_path / "locally_saved.json"
-
-        test_filepath.parent.mkdir(parents=True, exist_ok=True)
-        with open(test_filepath, "w", encoding="utf-8") as file:
-            json.dump(dummy_data, file)
-
-        with open(test_filepath, encoding="utf-8") as file:
-            test_data = json.load(file)
-
-        with open(
-            (actual_filepath / save_version / "test.json"), encoding="utf-8"
-        ) as actual_file:
-            actual_data = json.load(actual_file)
-
-        assert actual_data == test_data
-        assert metrics_dataset._fs_open_args_load == {}
-        assert metrics_dataset._fs_open_args_save == {"mode": "w"}
-
-    def test_load_fail(self, metrics_dataset, dummy_data):
-        metrics_dataset.save(dummy_data)
-        pattern = r"Loading not supported for 'MetricsDataset'"
-        with pytest.raises(DatasetError, match=pattern):
-            metrics_dataset.load()
-
-    def test_exists(self, metrics_dataset, dummy_data):
-        """Test `exists` method invocation for both existing and
-        nonexistent dataset."""
-        assert not metrics_dataset.exists()
-        metrics_dataset.save(dummy_data)
-        assert metrics_dataset.exists()
-
-    @pytest.mark.parametrize(
-        "save_args", [{"k1": "v1", "index": "value"}], indirect=True
-    )
-    def test_save_extra_params(self, metrics_dataset, save_args):
-        """Test overriding the default save arguments."""
-        for key, value in save_args.items():
-            assert metrics_dataset._save_args[key] == value
-
-    @pytest.mark.parametrize(
-        "fs_args",
-        [{"open_args_load": {"mode": "rb", "compression": "gzip"}}],
-        indirect=True,
-    )
-    def test_open_extra_args(self, metrics_dataset, fs_args):
-        assert metrics_dataset._fs_open_args_load == fs_args["open_args_load"]
-        assert metrics_dataset._fs_open_args_save == {"mode": "w"}  # default unchanged
-
-    @pytest.mark.parametrize(
-        "filepath,instance_type",
-        [
-            ("s3://bucket/file.json", S3FileSystem),
-            ("file:///tmp/test.json", LocalFileSystem),
-            ("/tmp/test.json", LocalFileSystem),
-            ("gcs://bucket/file.json", GCSFileSystem),
-        ],
-    )
-    def test_protocol_usage(self, filepath, instance_type):
-        dataset = MetricsDataset(filepath=filepath)
-        assert isinstance(dataset._fs, instance_type)
-
-        path = filepath.split(PROTOCOL_DELIMITER, 1)[-1]
-
-        assert str(dataset._filepath) == path
-        assert isinstance(dataset._filepath, PurePosixPath)
-
-    def test_catalog_release(self, mocker):
-        fs_mock = mocker.patch("fsspec.filesystem").return_value
-        filepath = "test.json"
-        dataset = MetricsDataset(filepath=filepath)
-        dataset.release()
-        fs_mock.invalidate_cache.assert_called_once_with(filepath)
-
-    def test_fail_on_saving_non_numeric_value(self, metrics_dataset):
-        data = {"col1": 1, "col2": 2, "col3": "hello"}
-
-        pattern = "The MetricsDataset expects only numeric values."
-        with pytest.raises(DatasetError, match=pattern):
-            metrics_dataset.save(data)
-
-    def test_not_version_str_repr(self):
-        """Test that version is not in string representation of the class instance."""
-        filepath = "test.json"
-        ds = MetricsDataset(filepath=filepath)
-
-        assert filepath in str(ds)
-        assert "version" not in str(ds)
-        assert "MetricsDataset" in str(ds)
-        assert "protocol" in str(ds)
-        # Default save_args
-        assert "save_args={'indent': 2}" in str(ds)
-
-    def test_version_str_repr(self, load_version, save_version):
-        """Test that version is in string representation of the class instance."""
-        filepath = "test.json"
-        ds_versioned = MetricsDataset(
-            filepath=filepath, version=Version(load_version, save_version)
-        )
-
-        assert filepath in str(ds_versioned)
-        ver_str = f"version=Version(load={load_version}, save='{save_version}')"
-        assert ver_str in str(ds_versioned)
-        assert "MetricsDataset" in str(ds_versioned)
-        assert "protocol" in str(ds_versioned)
-        # Default save_args
-        assert "save_args={'indent': 2}" in str(ds_versioned)
-
-    def test_prevent_overwrite(self, explicit_versioned_metrics_dataset, dummy_data):
-        """Check the error when attempting to override the dataset if the
-        corresponding json file for a given save version already exists."""
-        explicit_versioned_metrics_dataset.save(dummy_data)
-        pattern = (
-            r"Save path \'.+\' for MetricsDataset\(.+\) must "
-            r"not exist if versioning is enabled\."
-        )
-        with pytest.raises(DatasetError, match=pattern):
-            explicit_versioned_metrics_dataset.save(dummy_data)
-
-    @pytest.mark.parametrize(
-        "load_version", ["2019-01-01T23.59.59.999Z"], indirect=True
-    )
-    @pytest.mark.parametrize(
-        "save_version", ["2019-01-02T00.00.00.000Z"], indirect=True
-    )
-    def test_save_version_warning(
-        self, explicit_versioned_metrics_dataset, load_version, save_version, dummy_data
-    ):
-        """Check the warning when saving to the path that differs from
-        the subsequent load path."""
-        pattern = (
-            f"Save version '{save_version}' did not match "
-            f"load version '{load_version}' for "
-            r"MetricsDataset\(.+\)"
-        )
-        with pytest.warns(UserWarning, match=pattern):
-            explicit_versioned_metrics_dataset.save(dummy_data)
-
-    def test_http_filesystem_no_versioning(self):
-        pattern = "Versioning is not supported for HTTP protocols."
-
-        with pytest.raises(DatasetError, match=pattern):
-            MetricsDataset(
-                filepath="https://example.com/file.json", version=Version(None, None)
-            )
-
-    def test_preview(self, metrics_dataset, dummy_data):
-        expected_preview = {"col1": 1, "col2": 2, "col3": 3}
-        metrics_dataset.save(dummy_data)
-        preview = metrics_dataset.preview()
-        assert preview == expected_preview
-        assert (
-            inspect.signature(metrics_dataset.preview).return_annotation.__name__
-            == "MetricsTrackingPreview"
-        )

From 6f0ffa9e7f7ca95058aca9993a7c16aa2ebbbad5 Mon Sep 17 00:00:00 2001
From: Ankita Katiyar <110245118+ankatiyar@users.noreply.github.com>
Date: Mon, 13 Jan 2025 15:15:32 +0000
Subject: [PATCH 2/4] docs(datasets): Move to linkcode extension (#985)

Move to linkcode extension

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
---
 kedro-datasets/docs/source/conf.py | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/kedro-datasets/docs/source/conf.py b/kedro-datasets/docs/source/conf.py
index f62e80104..039658936 100644
--- a/kedro-datasets/docs/source/conf.py
+++ b/kedro-datasets/docs/source/conf.py
@@ -14,6 +14,8 @@
 from __future__ import annotations
 
 import importlib
+import inspect
+import os
 import re
 import sys
 from inspect import getmembers, isclass, isfunction
@@ -22,6 +24,8 @@
 from click import secho, style
 from kedro import __version__ as release
 
+import kedro_datasets
+
 # -- Project information -----------------------------------------------------
 
 project = "kedro-datasets"
@@ -47,7 +51,7 @@
     "sphinx_autodoc_typehints",
     "sphinx.ext.doctest",
     "sphinx.ext.ifconfig",
-    "sphinx.ext.viewcode",
+    "sphinx.ext.linkcode",
     "sphinxcontrib.jquery",
     "sphinx_copybutton",
     "myst_parser",
@@ -452,3 +456,25 @@ def setup(app):
 user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:99.0) Gecko/20100101 Firefox/99.0"
 
 myst_heading_anchors = 5
+
+def linkcode_resolve(domain, info):
+    """Resolve a GitHub URL corresponding to a Python object."""
+    if domain != 'py':
+        return None
+
+    try:
+        mod = sys.modules[info['module']]
+        obj = mod
+        for attr in info['fullname'].split('.'):
+            obj = getattr(obj, attr)
+        obj = inspect.unwrap(obj)
+
+        filename = inspect.getsourcefile(obj)
+        source, lineno = inspect.getsourcelines(obj)
+        relpath = os.path.relpath(filename, start=os.path.dirname(
+          kedro_datasets.__file__))
+
+        return f'https://github.com/kedro-org/kedro-plugins/blob/main/kedro-datasets/kedro_datasets/{relpath}#L{lineno}#L{lineno + len(source) - 1}'
+
+    except (KeyError, ImportError, AttributeError, TypeError, OSError, ValueError):
+        return None

From 630f4ea3f032ea62b7b5834762579975a2b44ebc Mon Sep 17 00:00:00 2001
From: Ravi Kumar Pilla <ravi_kumar_pilla@mckinsey.com>
Date: Mon, 13 Jan 2025 09:48:50 -0600
Subject: [PATCH 3/4] fix(datasets): Fix polars.CSVDataset `save` on Windows
 (#979)

* test csv win

Signed-off-by: ravi_kumar_pilla <ravi_kumar_pilla@mckinsey.com>

* change ci yaml for testing

Signed-off-by: ravi_kumar_pilla <ravi_kumar_pilla@mckinsey.com>

* change ci yaml for testing

Signed-off-by: ravi_kumar_pilla <ravi_kumar_pilla@mckinsey.com>

* add default encoding when opening file

* revert workflow tests

Signed-off-by: ravi_kumar_pilla <ravi_kumar_pilla@mckinsey.com>

* fix lint

Signed-off-by: ravi_kumar_pilla <ravi_kumar_pilla@mckinsey.com>

* update release note

* update release note

---------

Signed-off-by: ravi_kumar_pilla <ravi_kumar_pilla@mckinsey.com>
---
 kedro-datasets/RELEASE.md                           |  5 ++++-
 kedro-datasets/kedro_datasets/polars/csv_dataset.py |  4 +++-
 kedro-datasets/tests/polars/test_csv_dataset.py     | 10 ----------
 3 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md
index 16fa5b18a..27df63f78 100755
--- a/kedro-datasets/RELEASE.md
+++ b/kedro-datasets/RELEASE.md
@@ -1,10 +1,13 @@
 # Upcoming Release
 ## Major features and improvements
+
 ## Bug fixes and other changes
 
+- Fix polars.CSVDataset `save` method on Windows using `utf-8` as default encoding.
+
 ## Breaking Changes
 
-- Removed `tracking.MetricsDataset` and `tracking.JSONDataset`
+- Removed `tracking.MetricsDataset` and `tracking.JSONDataset`.
 
 ## Community contributions
 
diff --git a/kedro-datasets/kedro_datasets/polars/csv_dataset.py b/kedro-datasets/kedro_datasets/polars/csv_dataset.py
index 6d8a988a5..9e6f35846 100644
--- a/kedro-datasets/kedro_datasets/polars/csv_dataset.py
+++ b/kedro-datasets/kedro_datasets/polars/csv_dataset.py
@@ -72,7 +72,9 @@ class CSVDataset(AbstractVersionedDataset[pl.DataFrame, pl.DataFrame]):
 
     DEFAULT_LOAD_ARGS: dict[str, Any] = {"rechunk": True}
     DEFAULT_SAVE_ARGS: dict[str, Any] = {}
-    DEFAULT_FS_ARGS: dict[str, Any] = {"open_args_save": {"mode": "w"}}
+    DEFAULT_FS_ARGS: dict[str, Any] = {
+        "open_args_save": {"mode": "w", "encoding": "utf-8"}
+    }
 
     def __init__(  # noqa: PLR0913
         self,
diff --git a/kedro-datasets/tests/polars/test_csv_dataset.py b/kedro-datasets/tests/polars/test_csv_dataset.py
index e03f192cc..5312e9b48 100644
--- a/kedro-datasets/tests/polars/test_csv_dataset.py
+++ b/kedro-datasets/tests/polars/test_csv_dataset.py
@@ -88,14 +88,12 @@ def mocked_csv_in_s3(mocked_s3_bucket, mocked_dataframe: pl.DataFrame):
 
 
 class TestCSVDataset:
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_save_and_load(self, csv_dataset, dummy_dataframe):
         """Test saving and reloading the dataset."""
         csv_dataset.save(dummy_dataframe)
         reloaded = csv_dataset.load()
         assert_frame_equal(dummy_dataframe, reloaded)
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_exists(self, csv_dataset, dummy_dataframe):
         """Test `exists` method invocation for both existing and
         nonexistent dataset."""
@@ -204,7 +202,6 @@ def test_version_str_repr(self, load_version, save_version):
         assert "load_args={'rechunk': True}" in str(ds)
         assert "load_args={'rechunk': True}" in str(ds_versioned)
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_save_and_load(self, versioned_csv_dataset, dummy_dataframe):
         """Test that saved and reloaded data matches the original one for
         the versioned dataset."""
@@ -212,7 +209,6 @@ def test_save_and_load(self, versioned_csv_dataset, dummy_dataframe):
         reloaded_df = versioned_csv_dataset.load()
         assert_frame_equal(dummy_dataframe, reloaded_df)
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_multiple_loads(self, versioned_csv_dataset, dummy_dataframe, filepath_csv):
         """Test that if a new version is created mid-run, by an
         external system, it won't be loaded in the current run."""
@@ -236,7 +232,6 @@ def test_multiple_loads(self, versioned_csv_dataset, dummy_dataframe, filepath_c
             ds_new.resolve_load_version() == v_new
         )  # new version is discoverable by a new instance
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_multiple_saves(self, dummy_dataframe, filepath_csv):
         """Test multiple cycles of save followed by load for the same dataset"""
         ds_versioned = CSVDataset(filepath=filepath_csv, version=Version(None, None))
@@ -259,7 +254,6 @@ def test_multiple_saves(self, dummy_dataframe, filepath_csv):
         ds_new = CSVDataset(filepath=filepath_csv, version=Version(None, None))
         assert ds_new.resolve_load_version() == second_load_version
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_release_instance_cache(self, dummy_dataframe, filepath_csv):
         """Test that cache invalidation does not affect other instances"""
         ds_a = CSVDataset(filepath=filepath_csv, version=Version(None, None))
@@ -288,14 +282,12 @@ def test_no_versions(self, versioned_csv_dataset):
         with pytest.raises(DatasetError, match=pattern):
             versioned_csv_dataset.load()
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_exists(self, versioned_csv_dataset, dummy_dataframe):
         """Test `exists` method invocation for versioned dataset."""
         assert not versioned_csv_dataset.exists()
         versioned_csv_dataset.save(dummy_dataframe)
         assert versioned_csv_dataset.exists()
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_prevent_overwrite(self, versioned_csv_dataset, dummy_dataframe):
         """Check the error when attempting to override the dataset if the
         corresponding CSV file for a given save version already exists."""
@@ -307,7 +299,6 @@ def test_prevent_overwrite(self, versioned_csv_dataset, dummy_dataframe):
         with pytest.raises(DatasetError, match=pattern):
             versioned_csv_dataset.save(dummy_dataframe)
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     @pytest.mark.parametrize(
         "load_version", ["2019-01-01T23.59.59.999Z"], indirect=True
     )
@@ -334,7 +325,6 @@ def test_http_filesystem_no_versioning(self):
                 filepath="https://example.com/file.csv", version=Version(None, None)
             )
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_versioning_existing_dataset(
         self, csv_dataset, versioned_csv_dataset, dummy_dataframe
     ):

From bf0c407edb237b1a9ce7993c7cf90796246209a6 Mon Sep 17 00:00:00 2001
From: ElenaKhaustova <157851531+ElenaKhaustova@users.noreply.github.com>
Date: Mon, 13 Jan 2025 16:55:09 +0000
Subject: [PATCH 4/4] feat(all): Replace trufflehog with detect-secrets (#983)

* Removed trufflehog

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated github actions per plugin

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated release notes

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated validate-pr check scopes

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated lint command

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Added key to trigger check

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated GH action to track per plugin

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Removed secret

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated GH for kedro-datasets

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated secrets baseline

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

---------

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>
---
 .github/workflows/detect-secrets.yml     |  46 +++
 .github/workflows/kedro-airflow.yml      |   7 +
 .github/workflows/kedro-datasets.yml     |   7 +
 .github/workflows/kedro-docker.yml       |   7 +
 .github/workflows/kedro-telemetry.yml    |   7 +
 .github/workflows/validate-pr-title.yaml |   1 +
 .pre-commit-config.yaml                  |  12 +-
 .secrets.baseline                        | 494 +++++++++++++++++++++++
 Makefile                                 |   5 +-
 kedro-airflow/RELEASE.md                 |   1 +
 kedro-airflow/pyproject.toml             |   2 +-
 kedro-datasets/RELEASE.md                |   2 +
 kedro-datasets/pyproject.toml            |   2 +-
 kedro-docker/RELEASE.md                  |   1 +
 kedro-docker/pyproject.toml              |   2 +-
 kedro-telemetry/RELEASE.md               |   1 +
 kedro-telemetry/pyproject.toml           |   2 +-
 trufflehog-ignore.txt                    |   3 -
 18 files changed, 585 insertions(+), 17 deletions(-)
 create mode 100644 .github/workflows/detect-secrets.yml
 create mode 100644 .secrets.baseline
 delete mode 100644 trufflehog-ignore.txt

diff --git a/.github/workflows/detect-secrets.yml b/.github/workflows/detect-secrets.yml
new file mode 100644
index 000000000..bd360b52b
--- /dev/null
+++ b/.github/workflows/detect-secrets.yml
@@ -0,0 +1,46 @@
+name: Detect secrets on plugins
+
+on:
+  workflow_call:
+    inputs:
+      plugin:
+        type: string
+      os:
+        type: string
+      python-version:
+        type: string
+
+jobs:
+  detect-secrets:
+    defaults:
+      run:
+        shell: bash
+    runs-on: ${{ inputs.os }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ inputs.python-version }}
+      - name: Cache python packages
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{inputs.plugin}}-${{inputs.os}}-python-${{inputs.python-version}}
+          restore-keys: ${{inputs.plugin}}
+      - name: Install uv
+        run: |
+          python -m pip install "uv==0.2.21"
+      - name: Install dependencies
+        run: |
+          cd ${{ inputs.plugin }}
+          uv pip install --system "kedro @ git+https://github.com/kedro-org/kedro@main"
+          uv pip install --system "${{inputs.plugin}}[lint] @ ."
+          uv pip freeze --system
+      - name: Install pre-commit hooks
+        run: |
+          pre-commit install --install-hooks
+          pre-commit install --hook-type pre-push
+      - name: Scan all tracked files
+        run: git ls-files ":(glob)*" ${{ inputs.plugin }} -z | xargs -0 detect-secrets-hook --baseline .secrets.baseline
diff --git a/.github/workflows/kedro-airflow.yml b/.github/workflows/kedro-airflow.yml
index 85e7ca62d..92c269ea2 100644
--- a/.github/workflows/kedro-airflow.yml
+++ b/.github/workflows/kedro-airflow.yml
@@ -46,3 +46,10 @@ jobs:
       plugin: kedro-airflow
       os: ${{ matrix.os }}
       python-version: ${{ matrix.python-version }}
+
+  detect-secrets:
+    uses: ./.github/workflows/detect-secrets.yml
+    with:
+      plugin: kedro-airflow
+      os: ubuntu-latest
+      python-version: "3.11"
diff --git a/.github/workflows/kedro-datasets.yml b/.github/workflows/kedro-datasets.yml
index d5aae0282..010115b73 100644
--- a/.github/workflows/kedro-datasets.yml
+++ b/.github/workflows/kedro-datasets.yml
@@ -61,3 +61,10 @@ jobs:
       - name: Documentation check for kedro-datasets
         run: |
           make check-datasets-docs
+
+  detect-secrets:
+    uses: ./.github/workflows/detect-secrets.yml
+    with:
+      plugin: kedro-datasets
+      os: ubuntu-latest
+      python-version: "3.11"
diff --git a/.github/workflows/kedro-docker.yml b/.github/workflows/kedro-docker.yml
index 66783b3b5..16ffcbafe 100644
--- a/.github/workflows/kedro-docker.yml
+++ b/.github/workflows/kedro-docker.yml
@@ -46,3 +46,10 @@ jobs:
       plugin: kedro-docker
       os: ${{ matrix.os }}
       python-version: ${{ matrix.python-version }}
+
+  detect-secrets:
+    uses: ./.github/workflows/detect-secrets.yml
+    with:
+      plugin: kedro-docker
+      os: ubuntu-latest
+      python-version: "3.11"
diff --git a/.github/workflows/kedro-telemetry.yml b/.github/workflows/kedro-telemetry.yml
index 5584ac775..aac47914e 100644
--- a/.github/workflows/kedro-telemetry.yml
+++ b/.github/workflows/kedro-telemetry.yml
@@ -35,3 +35,10 @@ jobs:
       plugin: kedro-telemetry
       os: ubuntu-latest
       python-version: "3.11"
+
+  detect-secrets:
+    uses: ./.github/workflows/detect-secrets.yml
+    with:
+      plugin: kedro-telemetry
+      os: ubuntu-latest
+      python-version: "3.11"
diff --git a/.github/workflows/validate-pr-title.yaml b/.github/workflows/validate-pr-title.yaml
index b6e6fc808..cb1e65327 100644
--- a/.github/workflows/validate-pr-title.yaml
+++ b/.github/workflows/validate-pr-title.yaml
@@ -19,5 +19,6 @@ jobs:
             datasets
             docker
             telemetry
+            all
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9f9706a34..9d2eb8de3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -24,6 +24,12 @@ repos:
       additional_dependencies:
         - black==22.12.0
 
+  - repo: https://github.com/Yelp/detect-secrets
+    rev: v1.5.0
+    hooks:
+      - id: detect-secrets
+        args: [ '--baseline', '.secrets.baseline' ]
+
   - repo: local
     hooks:
       - id: ruff-kedro-datasets
@@ -86,12 +92,6 @@ repos:
         pass_filenames: false
         entry: black kedro-telemetry/kedro_telemetry kedro-telemetry/tests
 
-      - id: secret_scan
-        name: "Secret scan"
-        language: system
-        pass_filenames: false
-        entry: make secret-scan
-
       - id: bandit
         name: "Bandit security check"
         language: system
diff --git a/.secrets.baseline b/.secrets.baseline
new file mode 100644
index 000000000..ce3799e06
--- /dev/null
+++ b/.secrets.baseline
@@ -0,0 +1,494 @@
+{
+  "version": "1.5.0",
+  "plugins_used": [
+    {
+      "name": "ArtifactoryDetector"
+    },
+    {
+      "name": "AWSKeyDetector"
+    },
+    {
+      "name": "AzureStorageKeyDetector"
+    },
+    {
+      "name": "Base64HighEntropyString",
+      "limit": 4.5
+    },
+    {
+      "name": "BasicAuthDetector"
+    },
+    {
+      "name": "CloudantDetector"
+    },
+    {
+      "name": "DiscordBotTokenDetector"
+    },
+    {
+      "name": "GitHubTokenDetector"
+    },
+    {
+      "name": "GitLabTokenDetector"
+    },
+    {
+      "name": "HexHighEntropyString",
+      "limit": 3.0
+    },
+    {
+      "name": "IbmCloudIamDetector"
+    },
+    {
+      "name": "IbmCosHmacDetector"
+    },
+    {
+      "name": "IPPublicDetector"
+    },
+    {
+      "name": "JwtTokenDetector"
+    },
+    {
+      "name": "KeywordDetector",
+      "keyword_exclude": ""
+    },
+    {
+      "name": "MailchimpDetector"
+    },
+    {
+      "name": "NpmDetector"
+    },
+    {
+      "name": "OpenAIDetector"
+    },
+    {
+      "name": "PrivateKeyDetector"
+    },
+    {
+      "name": "PypiTokenDetector"
+    },
+    {
+      "name": "SendGridDetector"
+    },
+    {
+      "name": "SlackDetector"
+    },
+    {
+      "name": "SoftlayerDetector"
+    },
+    {
+      "name": "SquareOAuthDetector"
+    },
+    {
+      "name": "StripeDetector"
+    },
+    {
+      "name": "TelegramBotTokenDetector"
+    },
+    {
+      "name": "TwilioKeyDetector"
+    }
+  ],
+  "filters_used": [
+    {
+      "path": "detect_secrets.filters.allowlist.is_line_allowlisted"
+    },
+    {
+      "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies",
+      "min_level": 2
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_indirect_reference"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_likely_id_string"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_lock_file"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_not_alphanumeric_string"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_potential_uuid"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_sequential_string"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_swagger_file"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_templated_secret"
+    }
+  ],
+  "results": {
+    "kedro-datasets/kedro_datasets/dask/parquet_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/kedro_datasets/dask/parquet_dataset.py",
+        "hashed_secret": "6e1d66a1596528c308e601c10aa0b92d53606ab9",
+        "is_verified": false,
+        "line_number": 71
+      }
+    ],
+    "kedro-datasets/kedro_datasets/pandas/sql_dataset.py": [
+      {
+        "type": "Basic Auth Credentials",
+        "filename": "kedro-datasets/kedro_datasets/pandas/sql_dataset.py",
+        "hashed_secret": "46e3d772a1888eadff26c7ada47fd7502d796e07",
+        "is_verified": false,
+        "line_number": 130
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/kedro_datasets/pandas/sql_dataset.py",
+        "hashed_secret": "e026e197bb77b12d16ab6986e068751f016d0ea5",
+        "is_verified": false,
+        "line_number": 382
+      }
+    ],
+    "kedro-datasets/kedro_datasets/snowflake/snowpark_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/kedro_datasets/snowflake/snowpark_dataset.py",
+        "hashed_secret": "a761ce3a45d97e41840a788495e85a70d1bb3815",
+        "is_verified": false,
+        "line_number": 83
+      }
+    ],
+    "kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py",
+        "hashed_secret": "46e3d772a1888eadff26c7ada47fd7502d796e07",
+        "is_verified": false,
+        "line_number": 57
+      }
+    ],
+    "kedro-datasets/kedro_datasets_experimental/langchain/_anthropic.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/kedro_datasets_experimental/langchain/_anthropic.py",
+        "hashed_secret": "b60d121b438a380c343d5ec3c2037564b82ffef3",
+        "is_verified": false,
+        "line_number": 44
+      }
+    ],
+    "kedro-datasets/kedro_datasets_experimental/langchain/_cohere.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/kedro_datasets_experimental/langchain/_cohere.py",
+        "hashed_secret": "b60d121b438a380c343d5ec3c2037564b82ffef3",
+        "is_verified": false,
+        "line_number": 45
+      }
+    ],
+    "kedro-datasets/kedro_datasets_experimental/tests/netcdf/test_netcdf_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/kedro_datasets_experimental/tests/netcdf/test_netcdf_dataset.py",
+        "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e",
+        "is_verified": false,
+        "line_number": 17
+      }
+    ],
+    "kedro-datasets/kedro_datasets_experimental/tests/video/test_video_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/kedro_datasets_experimental/tests/video/test_video_dataset.py",
+        "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e",
+        "is_verified": false,
+        "line_number": 16
+      }
+    ],
+    "kedro-datasets/tests/dask/test_csv_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/dask/test_csv_dataset.py",
+        "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e",
+        "is_verified": false,
+        "line_number": 14
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/dask/test_csv_dataset.py",
+        "hashed_secret": "727d8ff68b6b550f2cf6e737b3cad5149c65fe5b",
+        "is_verified": false,
+        "line_number": 27
+      }
+    ],
+    "kedro-datasets/tests/dask/test_parquet_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/dask/test_parquet_dataset.py",
+        "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e",
+        "is_verified": false,
+        "line_number": 16
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/dask/test_parquet_dataset.py",
+        "hashed_secret": "727d8ff68b6b550f2cf6e737b3cad5149c65fe5b",
+        "is_verified": false,
+        "line_number": 29
+      }
+    ],
+    "kedro-datasets/tests/holoviews/test_holoviews_writer.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/holoviews/test_holoviews_writer.py",
+        "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
+        "is_verified": false,
+        "line_number": 108
+      }
+    ],
+    "kedro-datasets/tests/matplotlib/test_matplotlib_writer.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/matplotlib/test_matplotlib_writer.py",
+        "hashed_secret": "dc724af18fbdd4e59189f5fe768a5f8311527050",
+        "is_verified": false,
+        "line_number": 16
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/matplotlib/test_matplotlib_writer.py",
+        "hashed_secret": "727d8ff68b6b550f2cf6e737b3cad5149c65fe5b",
+        "is_verified": false,
+        "line_number": 59
+      }
+    ],
+    "kedro-datasets/tests/pandas/test_csv_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/pandas/test_csv_dataset.py",
+        "hashed_secret": "727d8ff68b6b550f2cf6e737b3cad5149c65fe5b",
+        "is_verified": false,
+        "line_number": 66
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/pandas/test_csv_dataset.py",
+        "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
+        "is_verified": false,
+        "line_number": 213
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/pandas/test_csv_dataset.py",
+        "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e",
+        "is_verified": false,
+        "line_number": 405
+      }
+    ],
+    "kedro-datasets/tests/pandas/test_generic_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/pandas/test_generic_dataset.py",
+        "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
+        "is_verified": false,
+        "line_number": 126
+      }
+    ],
+    "kedro-datasets/tests/pandas/test_json_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/pandas/test_json_dataset.py",
+        "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
+        "is_verified": false,
+        "line_number": 140
+      }
+    ],
+    "kedro-datasets/tests/pandas/test_sql_dataset.py": [
+      {
+        "type": "Basic Auth Credentials",
+        "filename": "kedro-datasets/tests/pandas/test_sql_dataset.py",
+        "hashed_secret": "46e3d772a1888eadff26c7ada47fd7502d796e07",
+        "is_verified": false,
+        "line_number": 19
+      }
+    ],
+    "kedro-datasets/tests/pandas/test_xml_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/pandas/test_xml_dataset.py",
+        "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
+        "is_verified": false,
+        "line_number": 117
+      }
+    ],
+    "kedro-datasets/tests/partitions/test_incremental_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/partitions/test_incremental_dataset.py",
+        "hashed_secret": "727d8ff68b6b550f2cf6e737b3cad5149c65fe5b",
+        "is_verified": false,
+        "line_number": 440
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/partitions/test_incremental_dataset.py",
+        "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e",
+        "is_verified": false,
+        "line_number": 460
+      }
+    ],
+    "kedro-datasets/tests/partitions/test_partitioned_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/partitions/test_partitioned_dataset.py",
+        "hashed_secret": "76f747de912e8682e29a23cb506dd5bf0de080d2",
+        "is_verified": false,
+        "line_number": 415
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/partitions/test_partitioned_dataset.py",
+        "hashed_secret": "9027cc5a2c1321de60a2d71ccde6229d1152d6d3",
+        "is_verified": false,
+        "line_number": 416
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/partitions/test_partitioned_dataset.py",
+        "hashed_secret": "5dcbdf371f181b9b7a41a4be7be70f8cbee67da7",
+        "is_verified": false,
+        "line_number": 452
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/partitions/test_partitioned_dataset.py",
+        "hashed_secret": "727d8ff68b6b550f2cf6e737b3cad5149c65fe5b",
+        "is_verified": false,
+        "line_number": 503
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/partitions/test_partitioned_dataset.py",
+        "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e",
+        "is_verified": false,
+        "line_number": 523
+      }
+    ],
+    "kedro-datasets/tests/plotly/test_html_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/plotly/test_html_dataset.py",
+        "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
+        "is_verified": false,
+        "line_number": 70
+      }
+    ],
+    "kedro-datasets/tests/plotly/test_json_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/plotly/test_json_dataset.py",
+        "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
+        "is_verified": false,
+        "line_number": 83
+      }
+    ],
+    "kedro-datasets/tests/plotly/test_plotly_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/plotly/test_plotly_dataset.py",
+        "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
+        "is_verified": false,
+        "line_number": 81
+      }
+    ],
+    "kedro-datasets/tests/polars/test_csv_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/polars/test_csv_dataset.py",
+        "hashed_secret": "727d8ff68b6b550f2cf6e737b3cad5149c65fe5b",
+        "is_verified": false,
+        "line_number": 65
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/polars/test_csv_dataset.py",
+        "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
+        "is_verified": false,
+        "line_number": 159
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/polars/test_csv_dataset.py",
+        "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e",
+        "is_verified": false,
+        "line_number": 351
+      }
+    ],
+    "kedro-datasets/tests/polars/test_eager_polars_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/polars/test_eager_polars_dataset.py",
+        "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
+        "is_verified": false,
+        "line_number": 126
+      }
+    ],
+    "kedro-datasets/tests/polars/test_lazy_polars_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/polars/test_lazy_polars_dataset.py",
+        "hashed_secret": "727d8ff68b6b550f2cf6e737b3cad5149c65fe5b",
+        "is_verified": false,
+        "line_number": 93
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/polars/test_lazy_polars_dataset.py",
+        "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
+        "is_verified": false,
+        "line_number": 198
+      }
+    ],
+    "kedro-datasets/tests/snowflake/test_snowpark_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/snowflake/test_snowpark_dataset.py",
+        "hashed_secret": "1365dbfe676a193420ed7981184720b426ef2b7a",
+        "is_verified": false,
+        "line_number": 32
+      }
+    ],
+    "kedro-datasets/tests/spark/test_spark_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/spark/test_spark_dataset.py",
+        "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e",
+        "is_verified": false,
+        "line_number": 42
+      }
+    ],
+    "kedro-datasets/tests/spark/test_spark_jdbc_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/spark/test_spark_jdbc_dataset.py",
+        "hashed_secret": "4f4fa638cf19a2919f12e0105085c123ca5c5172",
+        "is_verified": false,
+        "line_number": 15
+      }
+    ],
+    "kedro-datasets/tests/spark/test_spark_streaming_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/spark/test_spark_streaming_dataset.py",
+        "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e",
+        "is_verified": false,
+        "line_number": 17
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/spark/test_spark_streaming_dataset.py",
+        "hashed_secret": "727d8ff68b6b550f2cf6e737b3cad5149c65fe5b",
+        "is_verified": false,
+        "line_number": 64
+      }
+    ]
+  },
+  "generated_at": "2025-01-13T16:27:46Z"
+}
diff --git a/Makefile b/Makefile
index c7946d605..e8c8a4e08 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ install-pip-setuptools:
 	python -m pip install -U pip setuptools wheel
 
 lint:
-	pre-commit run -a --hook-stage manual ruff-$(plugin) && pre-commit run trailing-whitespace --all-files && pre-commit run end-of-file-fixer --all-files && pre-commit run check-yaml --all-files && pre-commit run check-added-large-files --all-files && pre-commit run check-case-conflict --all-files && pre-commit run check-merge-conflict --all-files && pre-commit run debug-statements --all-files && pre-commit run black-$(plugin) --all-files --hook-stage manual && pre-commit run secret_scan --all-files --hook-stage manual && pre-commit run bandit --all-files --hook-stage manual
+	pre-commit run -a --hook-stage manual ruff-$(plugin) && pre-commit run trailing-whitespace --all-files && pre-commit run end-of-file-fixer --all-files && pre-commit run check-yaml --all-files && pre-commit run check-added-large-files --all-files && pre-commit run check-case-conflict --all-files && pre-commit run check-merge-conflict --all-files && pre-commit run debug-statements --all-files && pre-commit run black-$(plugin) --all-files --hook-stage manual && pre-commit run bandit --all-files --hook-stage manual
 	$(MAKE) mypy
 
 mypy:
@@ -21,9 +21,6 @@ test:
 e2e-tests:
 	cd $(plugin) && behave
 
-secret-scan:
-	trufflehog --max_depth 1 --exclude_paths trufflehog-ignore.txt .
-
 install-test-requirements:
 	cd $(plugin) && uv pip install ".[test]"
 
diff --git a/kedro-airflow/RELEASE.md b/kedro-airflow/RELEASE.md
index 6bd0b7163..348945ac9 100755
--- a/kedro-airflow/RELEASE.md
+++ b/kedro-airflow/RELEASE.md
@@ -1,4 +1,5 @@
 # Upcoming Release
+* Replaced `trufflehog` with `detect-secrets` for detecting secrets within a code base.
 
 # Release 0.9.2
 * Removed support for Python 3.8
diff --git a/kedro-airflow/pyproject.toml b/kedro-airflow/pyproject.toml
index ec7563cdd..6ef8a8b40 100644
--- a/kedro-airflow/pyproject.toml
+++ b/kedro-airflow/pyproject.toml
@@ -38,9 +38,9 @@ test = [
 lint = [
     "bandit",
     "black~=22.0",
+    "detect-secrets~=1.5.0",
     "mypy~=1.0",
     "pre-commit>=2.9.2",
-    "trufflehog>=2.1.0, <3.0",
     "ruff~=0.0.290",
     # mypy requirements
     "types-PyYAML",
diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md
index 27df63f78..15c13da84 100755
--- a/kedro-datasets/RELEASE.md
+++ b/kedro-datasets/RELEASE.md
@@ -1,6 +1,8 @@
 # Upcoming Release
 ## Major features and improvements
 
+- Replaced `trufflehog` with `detect-secrets` for detecting secrets within a code base.
+
 ## Bug fixes and other changes
 
 - Fix polars.CSVDataset `save` method on Windows using `utf-8` as default encoding.
diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml
index 3ee8eb9e9..1fcde25c6 100644
--- a/kedro-datasets/pyproject.toml
+++ b/kedro-datasets/pyproject.toml
@@ -270,11 +270,11 @@ lint = [
     "bandit>=1.6.2, <2.0",
     "blacken-docs==1.9.2",
     "black~=22.0",
+    "detect-secrets~=1.5.0",
     "import-linter[toml]==1.2.6",
     "mypy~=1.0",
     "pre-commit>=2.9.2",
     "ruff~=0.0.290",
-    "trufflehog~=2.1",
     # mypy related dependencies
     "types-cachetools",
     "types-PyYAML",
diff --git a/kedro-docker/RELEASE.md b/kedro-docker/RELEASE.md
index f81181579..b7bab9313 100644
--- a/kedro-docker/RELEASE.md
+++ b/kedro-docker/RELEASE.md
@@ -1,4 +1,5 @@
 # Upcoming Release
+* Replaced `trufflehog` with `detect-secrets` for detecting secrets within a code base.
 
 # Release 0.6.2
 
diff --git a/kedro-docker/pyproject.toml b/kedro-docker/pyproject.toml
index 15c8d04fc..b669a0e2d 100644
--- a/kedro-docker/pyproject.toml
+++ b/kedro-docker/pyproject.toml
@@ -39,9 +39,9 @@ test = [
 lint = [
     "bandit",
     "black~=22.0",
+    "detect-secrets~=1.5.0",
     "mypy~=1.0",
     "pre-commit>=2.9.2",
-    "trufflehog>=2.1.0, <3.0",
     "ruff~=0.0.290",
 ]
 
diff --git a/kedro-telemetry/RELEASE.md b/kedro-telemetry/RELEASE.md
index df7bb603a..1b4fce80f 100644
--- a/kedro-telemetry/RELEASE.md
+++ b/kedro-telemetry/RELEASE.md
@@ -1,4 +1,5 @@
 # Upcoming release
+* Replaced `trufflehog` with `detect-secrets` for detecting secrets within a code base.
 
 # Release 0.6.2
 * Removed support for Python 3.8
diff --git a/kedro-telemetry/pyproject.toml b/kedro-telemetry/pyproject.toml
index 45f9d995d..1f43f2315 100644
--- a/kedro-telemetry/pyproject.toml
+++ b/kedro-telemetry/pyproject.toml
@@ -35,9 +35,9 @@ test = [
 lint = [
     "bandit>=1.6.2, <2.0",
     "black~=22.0",
+    "detect-secrets~=1.5.0",
     "mypy~=1.0",
     "pre-commit>=2.9.2",
-    "trufflehog>=2.1.0, <3.0",
     "ruff~=0.0.290",
     # mypy requirements
     "types-requests",
diff --git a/trufflehog-ignore.txt b/trufflehog-ignore.txt
deleted file mode 100644
index 1929a2634..000000000
--- a/trufflehog-ignore.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-kedro-telemetry/README.md
-kedro-telemetry/RELEASE.md
-kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py