From 1b6316d624a421a7160536a768b8b88c9d6a52bd Mon Sep 17 00:00:00 2001
From: Jad <64837518+Jad-yehya@users.noreply.github.com>
Date: Mon, 18 Nov 2024 17:02:35 +0100
Subject: [PATCH] Code cleanup for MSL, PSM, SMAP and simulated datasets (#24)

---
 datasets/msl.py       | 3 ---
 datasets/psm.py       | 5 ++---
 datasets/simulated.py | 5 +++++
 datasets/smap.py      | 3 ---
 4 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/datasets/msl.py b/datasets/msl.py
index 4fb74a4..be585ee 100644
--- a/datasets/msl.py
+++ b/datasets/msl.py
@@ -24,9 +24,6 @@
 class Dataset(BaseDataset):
     name = "MSL"
 
-    install_cmd = "conda"
-    requirements = ["pandas", "requests"]
-
     parameters = {
         "debug": [False],
     }
diff --git a/datasets/psm.py b/datasets/psm.py
index 84f7d18..bd5e60f 100644
--- a/datasets/psm.py
+++ b/datasets/psm.py
@@ -20,8 +20,7 @@
 
 class Dataset(BaseDataset):
     name = "PSM"
-    install_cmd = "conda"
-    requirements = ["pandas"]
+
     parameters = {
         "debug": [False],
     }
@@ -31,8 +30,8 @@ class Dataset(BaseDataset):
     }
 
     def get_data(self):
-        # Check if the data is already here
         path = config.get_data_path(key="PSM")
+
         # Check if the data is already here
         if not path.exists():
             path.mkdir(parents=True, exist_ok=True)
diff --git a/datasets/simulated.py b/datasets/simulated.py
index 6c7925e..7f48524 100644
--- a/datasets/simulated.py
+++ b/datasets/simulated.py
@@ -26,6 +26,10 @@ class Dataset(BaseDataset):
     }
 
     def get_data(self):
+        # Simulated dataset created using scikit-learn
+        # We create a regression dataset with some anomalies
+
+        # Creating normal data
         X_train, _ = make_regression(
             n_samples=self.n_samples,
             n_features=self.n_features,
@@ -40,6 +44,7 @@ def get_data(self):
 
         assert X_test.shape == (self.n_samples, self.n_features)
 
+        # Adding anomalies
         y_test = np.zeros(self.n_samples)
         for i in range(self.n_anomaly):
             idx = np.random.randint(self.n_samples)
diff --git a/datasets/smap.py b/datasets/smap.py
index df02bac..86dd691 100644
--- a/datasets/smap.py
+++ b/datasets/smap.py
@@ -22,9 +22,6 @@
 class Dataset(BaseDataset):
     name = "SMAP"
 
-    install_cmd = "conda"
-    requirements = ["pandas", "scikit-learn"]
-
     parameters = {
         "debug": [False],
         "n_splits": [5],