From 1b6316d624a421a7160536a768b8b88c9d6a52bd Mon Sep 17 00:00:00 2001 From: Jad <64837518+Jad-yehya@users.noreply.github.com> Date: Mon, 18 Nov 2024 17:02:35 +0100 Subject: [PATCH] Code cleanup for MSL, PSM, SMAP and simulated datasets (#24) --- datasets/msl.py | 3 --- datasets/psm.py | 5 ++--- datasets/simulated.py | 5 +++++ datasets/smap.py | 3 --- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/datasets/msl.py b/datasets/msl.py index 4fb74a4..be585ee 100644 --- a/datasets/msl.py +++ b/datasets/msl.py @@ -24,9 +24,6 @@ class Dataset(BaseDataset): name = "MSL" - install_cmd = "conda" - requirements = ["pandas", "requests"] - parameters = { "debug": [False], } diff --git a/datasets/psm.py b/datasets/psm.py index 84f7d18..bd5e60f 100644 --- a/datasets/psm.py +++ b/datasets/psm.py @@ -20,8 +20,7 @@ class Dataset(BaseDataset): name = "PSM" - install_cmd = "conda" - requirements = ["pandas"] + parameters = { "debug": [False], } @@ -31,8 +30,8 @@ class Dataset(BaseDataset): } def get_data(self): - # Check if the data is already here path = config.get_data_path(key="PSM") + # Check if the data is already here if not path.exists(): path.mkdir(parents=True, exist_ok=True) diff --git a/datasets/simulated.py b/datasets/simulated.py index 6c7925e..7f48524 100644 --- a/datasets/simulated.py +++ b/datasets/simulated.py @@ -26,6 +26,10 @@ class Dataset(BaseDataset): } def get_data(self): + # Simulated dataset created using scikit-learn + # We create a regression dataset with some anomalies + + # Creating normal data X_train, _ = make_regression( n_samples=self.n_samples, n_features=self.n_features, @@ -40,6 +44,7 @@ def get_data(self): assert X_test.shape == (self.n_samples, self.n_features) + # Adding anomalies y_test = np.zeros(self.n_samples) for i in range(self.n_anomaly): idx = np.random.randint(self.n_samples) diff --git a/datasets/smap.py b/datasets/smap.py index df02bac..86dd691 100644 --- a/datasets/smap.py +++ b/datasets/smap.py @@ -22,9 +22,6 @@ class Dataset(BaseDataset): name = "SMAP" - install_cmd = "conda" - requirements = ["pandas", "scikit-learn"] - parameters = { "debug": [False], "n_splits": [5],