From df25c327ba94f1d51970867f248fd6b5efde9db3 Mon Sep 17 00:00:00 2001
From: PascalIversen <p.iversen+1@live.de>
Date: Fri, 31 Jan 2025 14:39:56 +0100
Subject: [PATCH 1/9] adds single drug elastic net

---
 drevalpy/.DS_Store                            | Bin 6148 -> 0 bytes
 drevalpy/experiment.py                        |  11 ++++---
 drevalpy/models/.DS_Store                     | Bin 8196 -> 0 bytes
 drevalpy/models/__init__.py                   |   5 +++
 .../models/baselines/hyperparameters.yaml     |  30 ++++++++++++++++++
 .../baselines/singledrug_random_forest.py     |  10 ++++++
 tests/individual_models/test_baselines.py     |  13 ++++++--
 7 files changed, 63 insertions(+), 6 deletions(-)
 delete mode 100644 drevalpy/.DS_Store
 delete mode 100644 drevalpy/models/.DS_Store

diff --git a/drevalpy/.DS_Store b/drevalpy/.DS_Store
deleted file mode 100644
index d72454d22035b25daa0d5d90b6ca59412a055046..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHKL2uJA82#LqEu~cI0VFseMdDhOw!kLDrF7ka1D6%S0Z>Vnh(%;^)uii2RjGIQ
z4g3Yp{1Es%@IKoWO<LfF5c5~r&vD+1<7daNiAeOPL5HY8L=F;T<tnl*!S^|BM9I`#
z0}Azwh%y@C*P`(T+g4ZytONg=1N`mQ)HfSZLhIk}#gkYb`?2)F;N{^lQtP`4lvb)S
zUC=RkNGBBXa|)`@1(ibq*cnm^enx4YFK!q3b+FFBj%liN5lOr!uP=r76CbuH&%q06
zf_|L>6STKQS^8Q~?tm@vo*`N-ykF_<@3{Uf9UI5K_~STAvv&KND3vQ$c6Oazr{=sK
zp2*oS^Rj8$_a-m->Y0?WzbbqF^C+B+8rKhGl6g@QPLv=DLQr14h>}3g`f{2CsnQPh
z4X5f<M~z$ad3WzY)9tn&E}HJV)4jjnbocIc7K^HL<IdejN27P~X(HJsgusHWqHb^n
zA5q8|a5kJIu}rw*tf|rw9bz&J=qa!TtAhLxmJh&MpD1+<wFmP9@?q;aG!YLUYW%!X
z4gROKDdu3%Gq5oX+XhZe$#s4mQzZhIr%H^{`=3)L-xBe(Tvy`q@`afKPVMq_-S*fz
zU>*2_1H3-CNQ^y$OO0yjK&FlWzzV8Wpv-?3SmPS(8C+__9fZ(Qpq2`A#1LAJde`uJ
z2A3MOoP;@i2-C7KClsMt$Mao9C(+YrbL)V0An!m~Up9FEKm7IiKQFRZ)&c9lf8~Iv
z42FX~x@7j&jn462>mt2D;$YrVqmm#q+p#L}R(uml1@5_A0QL+nHKGP#{|G1>Y-Sz!
Hs}B4CoI%(8

diff --git a/drevalpy/experiment.py b/drevalpy/experiment.py
index bed753ff..001f0be3 100644
--- a/drevalpy/experiment.py
+++ b/drevalpy/experiment.py
@@ -903,10 +903,11 @@ def train_and_predict(
 
     train_dataset.reduce_to(cell_line_ids=cell_lines_to_keep, drug_ids=drugs_to_keep)
     prediction_dataset.reduce_to(cell_line_ids=cell_lines_to_keep, drug_ids=drugs_to_keep)
-    print(f"Reduced training dataset from {len_train_before} to {len(train_dataset)}, because of missing features")
-    print(
-        f"Reduced prediction dataset from {len_pred_before} to {len(prediction_dataset)}, because of missing features"
-    )
+    if len(train_dataset) < len_train_before or len(prediction_dataset) < len_pred_before:
+        print(f"Reduced training dataset from {len_train_before} to {len(train_dataset)}, due to of missing features")
+        print(
+            f"Reduced prediction dataset from {len_pred_before} to {len(prediction_dataset)}, due to missing features"
+        )
 
     if early_stopping_dataset is not None:
         len_es_before = len(early_stopping_dataset)
@@ -1149,6 +1150,8 @@ def get_model_name_and_drug_id(model_name: str) -> tuple[str, str | None]:
     :returns: tuple of model name and, potentially drug id if it is a single drug model
     :raises AssertionError: if the model name is not found in the model factory
     """
+    print(model_name)
+    print(MULTI_DRUG_MODEL_FACTORY.keys())
     if model_name in MULTI_DRUG_MODEL_FACTORY:
         return model_name, None
     else:
diff --git a/drevalpy/models/.DS_Store b/drevalpy/models/.DS_Store
deleted file mode 100644
index 0cb93c0d84eaa2ca644d6edbf8f0b89d1c901f49..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 8196
zcmeHML2DC182u(qn~hN@Vxbpd!E2~(AXV`a)3j1(wIoFkDseXn>C(+^NH&HN0y*n{
z@Gp4vkLZ8#r0<)VgxyUlcoB;;Vdm}3zBe=b=F3heB_dHBwfBg&iO51`+$^A2k@!C6
zrIgVvcYy+WB99_!;<H2jCAMiW4j2cF1I7X4fN|hoZ~%KYC+nJhU)?pOalkn6Upm0=
z2OF8OqH(5BzB*9IBLFaqW|?q}bAZ&?8Y>!S3Z)c%s?&qeQlXC+LdkL8W;(E<ai&no
zNhmo9J+jaz6d^~4&X#l%6@{iW4j2cP9pJNjL7v$?3KHk{t7AWGxqj$^!C$Ip$U8q}
z!qr^Wrz>iagM2|9?kA=Cl2YZ#1$zjtkI@3)FsTstQ;H3$g87_kRHw5P_tWpV?s(YO
z^Z)AheQy};?tT}Uwe0#v&dON@>s{j_95*5-8V#$?;0@Q#!_ar<*N*$j>x`S5_YT4!
za=f53kOHsW0p;y$FKCD3YB&noL#b`38&=-RH#fH@lTx`{v`hP?Y0;iM*?(NLAMKT=
z)4a9y;NjDH^V07H;YZ9LN$8gv-BI;L$<LQ~{tt~o;D<q%MaVh0(!r-s2kcMEogZMt
zSL`;mX#j4MdKBWC*MNViNv(8-dynfZ^gW;vbukKS{Ny;9Zz=w*1-}k{mtLTZlA5)H
zlqO53^pZ~KILVVy_c<1I>vP`H)3R4_rbTBmE?ZDIh_oQpGe>5epGSNhV!Rsohw!CG
z?W96RW?Zp_IQGEn!xN5uj^+`Za!^;bh{kYwK$pp$<kQD{%2ETPpVCp1CwK+pygYa#
zjMqV*N0{XaohKD??cxdx{0Ou3G1paV&1)Cuqq;~OVKy>jMdM5%H*ve%g@I=OXG2Uf
z4%{FI*3?|L`TT!){qO%b=!wi!jRVGkKXpK4YmHhJD!Hq#Q1dQtBfm%H#Jrh8DM69=
hLjd)~KMZkggUXtU#+gF2AekQmqz$Gp4*XRIegOwFBlZ9Q

diff --git a/drevalpy/models/__init__.py b/drevalpy/models/__init__.py
index 1f3bf962..6c2ac1ea 100644
--- a/drevalpy/models/__init__.py
+++ b/drevalpy/models/__init__.py
@@ -11,6 +11,8 @@
     "MultiOmicsNeuralNetwork",
     "MultiOmicsRandomForest",
     "SingleDrugRandomForest",
+    "SingleDrugElasticNet",
+    "SingleDrugProteomicsElasticNet",
     "SRMF",
     "GradientBoosting",
     "MOLIR",
@@ -23,6 +25,7 @@
 
 from .baselines.multi_omics_random_forest import MultiOmicsRandomForest
 from .baselines.naive_pred import NaiveCellLineMeanPredictor, NaiveDrugMeanPredictor, NaivePredictor
+from .baselines.singledrug_elastic_net import SingleDrugElasticNet, SingleDrugProteomicsElasticNet
 from .baselines.singledrug_random_forest import SingleDrugRandomForest
 from .baselines.sklearn_models import ElasticNetModel, GradientBoosting, RandomForest, SVMRegressor
 from .DIPK.dipk import DIPKModel
@@ -38,6 +41,8 @@
     "SingleDrugRandomForest": SingleDrugRandomForest,
     "MOLIR": MOLIR,
     "SuperFELTR": SuperFELTR,
+    "SingleDrugElasticNet": SingleDrugElasticNet,
+    "SingleDrugProteomicsElasticNet": SingleDrugProteomicsElasticNet,
 }
 
 # MULTI_DRUG_MODEL_FACTORY is used in the pipeline!
diff --git a/drevalpy/models/baselines/hyperparameters.yaml b/drevalpy/models/baselines/hyperparameters.yaml
index 6ffac3c1..b253c0b3 100644
--- a/drevalpy/models/baselines/hyperparameters.yaml
+++ b/drevalpy/models/baselines/hyperparameters.yaml
@@ -100,3 +100,33 @@ GradientBoosting:
     - 1.0
     - 0.8
     - 0.5
+SingleDrugElasticNet:
+  l1_ratio:
+    - 0.2
+    - 0.5
+    - 0.9
+  alpha:
+    - 1
+    - 0.8
+    - 0.6
+    - 0.4
+    - 0.2
+    - 0.1
+    - 5
+    - 10
+    - 100
+SingleDrugProteomicsElasticNet:
+  l1_ratio:
+    - 0.2
+    - 0.5
+    - 0.9
+  alpha:
+    - 1
+    - 0.8
+    - 0.6
+    - 0.4
+    - 0.2
+    - 0.1
+    - 5
+    - 10
+    - 100
diff --git a/drevalpy/models/baselines/singledrug_random_forest.py b/drevalpy/models/baselines/singledrug_random_forest.py
index 27aec509..ccf1d30a 100644
--- a/drevalpy/models/baselines/singledrug_random_forest.py
+++ b/drevalpy/models/baselines/singledrug_random_forest.py
@@ -94,3 +94,13 @@ def predict(
             drug_input=None,
         )
         return self.model.predict(x)
+
+    def load_drug_features(self, data_path, dataset_name):
+        """
+        Load drug features. Not needed for SingleDrugRandomForest.
+
+        :param data_path: path to the data
+        :param dataset_name: name of the dataset
+        :returns: None
+        """
+        return None
diff --git a/tests/individual_models/test_baselines.py b/tests/individual_models/test_baselines.py
index 0d38331e..87dda4aa 100644
--- a/tests/individual_models/test_baselines.py
+++ b/tests/individual_models/test_baselines.py
@@ -13,6 +13,8 @@
     NaiveCellLineMeanPredictor,
     NaiveDrugMeanPredictor,
     NaivePredictor,
+    SingleDrugElasticNet,
+    SingleDrugProteomicsElasticNet,
     SingleDrugRandomForest,
 )
 from drevalpy.models.baselines.sklearn_models import SklearnModel
@@ -93,7 +95,9 @@ def test_baselines(
         )
 
 
-@pytest.mark.parametrize("model_name", ["SingleDrugRandomForest"])
+@pytest.mark.parametrize(
+    "model_name", ["SingleDrugRandomForest", "SingleDrugElasticNet", "SingleDrugProteomicsElasticNet"]
+)
 @pytest.mark.parametrize("test_mode", ["LPO", "LCO"])
 def test_single_drug_baselines(
     sample_dataset: tuple[DrugResponseDataset, FeatureDataset, FeatureDataset], model_name: str, test_mode: str
@@ -122,8 +126,13 @@ def test_single_drug_baselines(
     random_drug = all_unique_drugs[:1]
 
     all_predictions = np.zeros_like(val_dataset.drug_ids, dtype=float)
+    if model_name == "SingleDrugElasticNet":
+        model = SingleDrugElasticNet()
+    elif model_name == "SingleDrugProteomicsElasticNet":
+        model = SingleDrugProteomicsElasticNet()
+    else:
+        model = SingleDrugRandomForest()
 
-    model = SingleDrugRandomForest()
     hpam_combi = model.get_hyperparameter_set()[0]
     hpam_combi["n_estimators"] = 2  # reduce test time
     hpam_combi["max_depth"] = 2  # reduce test time

From 350119838a7efdbb5b22277e36780b700327607a Mon Sep 17 00:00:00 2001
From: PascalIversen <p.iversen+1@live.de>
Date: Fri, 31 Jan 2025 14:51:08 +0100
Subject: [PATCH 2/9] better late than never

---
 drevalpy/experiment.py                        |   7 +-
 .../baselines/singledrug_elastic_net.py       | 221 ++++++++++++++++++
 2 files changed, 223 insertions(+), 5 deletions(-)
 create mode 100644 drevalpy/models/baselines/singledrug_elastic_net.py

diff --git a/drevalpy/experiment.py b/drevalpy/experiment.py
index 001f0be3..7fad3878 100644
--- a/drevalpy/experiment.py
+++ b/drevalpy/experiment.py
@@ -904,7 +904,7 @@ def train_and_predict(
     train_dataset.reduce_to(cell_line_ids=cell_lines_to_keep, drug_ids=drugs_to_keep)
     prediction_dataset.reduce_to(cell_line_ids=cell_lines_to_keep, drug_ids=drugs_to_keep)
     if len(train_dataset) < len_train_before or len(prediction_dataset) < len_pred_before:
-        print(f"Reduced training dataset from {len_train_before} to {len(train_dataset)}, due to of missing features")
+        print(f"Reduced training dataset from {len_train_before} to {len(train_dataset)}, due to missing features")
         print(
             f"Reduced prediction dataset from {len_pred_before} to {len(prediction_dataset)}, due to missing features"
         )
@@ -1143,15 +1143,12 @@ def make_model_list(models: list[type[DRPModel]], response_data: DrugResponseDat
 
 @pipeline_function
 def get_model_name_and_drug_id(model_name: str) -> tuple[str, str | None]:
-    """
-    Get the model name and drug id from the model name.
+    """Get the model name and drug id from the model name.
 
     :param model_name: model name, e.g., SimpleNeuralNetwork or MOLIR.Afatinib
     :returns: tuple of model name and, potentially drug id if it is a single drug model
     :raises AssertionError: if the model name is not found in the model factory
     """
-    print(model_name)
-    print(MULTI_DRUG_MODEL_FACTORY.keys())
     if model_name in MULTI_DRUG_MODEL_FACTORY:
         return model_name, None
     else:
diff --git a/drevalpy/models/baselines/singledrug_elastic_net.py b/drevalpy/models/baselines/singledrug_elastic_net.py
new file mode 100644
index 00000000..2000644d
--- /dev/null
+++ b/drevalpy/models/baselines/singledrug_elastic_net.py
@@ -0,0 +1,221 @@
+"""SingleDrugElasticNet and SingleDrugProteomicsElasticNet classes. Fit an Elastic net for each drug seperately."""
+
+import numpy as np
+from sklearn.linear_model import ElasticNet
+
+from ...datasets.dataset import DrugResponseDataset, FeatureDataset
+from ..utils import load_and_reduce_gene_features
+from .sklearn_models import SklearnModel
+
+
+class SingleDrugElasticNet(SklearnModel):
+    """SingleDrugElasticNet class."""
+
+    is_single_drug_model = True
+    drug_views = []
+    cell_line_views = ["gene_expression"]
+    early_stopping = False
+
+    def build_model(self, hyperparameters):
+        """
+        Builds the model from hyperparameters.
+
+        :param hyperparameters: Elastic net hyperparameters
+        """
+        self.model = ElasticNet(**hyperparameters)
+
+    @classmethod
+    def get_model_name(cls) -> str:
+        """
+        Returns the model name.
+
+        :returns: SingleDrugElasticNet
+        """
+        return "SingleDrugElasticNet"
+
+    def train(
+        self,
+        output: DrugResponseDataset,
+        cell_line_input: FeatureDataset,
+        drug_input: FeatureDataset | None = None,
+        output_earlystopping: DrugResponseDataset | None = None,
+        model_checkpoint_dir: str = "checkpoints",
+    ) -> None:
+        """
+        Trains the model; the number of features is the number of fingerprints.
+
+        :param output: training dataset containing the response output
+        :param cell_line_input: training dataset containing gene expression data
+        :param drug_input: not needed
+        :param output_earlystopping: not needed
+        :param model_checkpoint_dir: not needed as checkpoints are not saved
+        :raises ValueError: if drug_input is not None
+        """
+        if drug_input is not None:
+            raise ValueError("SingleDrugElasticNet does not support drug_input!")
+
+        if len(output) > 0:
+            x = self.get_concatenated_features(
+                cell_line_view="gene_expression",
+                drug_view=None,
+                cell_line_ids_output=output.cell_line_ids,
+                drug_ids_output=output.drug_ids,
+                cell_line_input=cell_line_input,
+                drug_input=None,
+            )
+            self.model.fit(x, output.response)
+        else:
+            print("No training data provided, will predict NA.")
+            self.model = None
+
+    def predict(
+        self,
+        cell_line_ids: np.ndarray,
+        drug_ids: np.ndarray,
+        cell_line_input: FeatureDataset,
+        drug_input: FeatureDataset | None = None,
+    ) -> np.ndarray:
+        """
+        Predicts the drug response for the given cell lines.
+
+        :param cell_line_ids: cell line ids
+        :param drug_ids: drug ids, not needed here
+        :param cell_line_input: cell line input
+        :param drug_input: drug input, not needed here
+        :returns: predicted drug response
+        :raises ValueError: if drug_input is not None
+        """
+        if drug_input is not None:
+            raise ValueError("drug_input is not needed.")
+
+        if self.model is None:
+            print("No training data was available, predicting NA.")
+            return np.array([np.nan] * len(cell_line_ids))
+        x = self.get_concatenated_features(
+            cell_line_view="gene_expression",
+            drug_view=None,
+            cell_line_ids_output=cell_line_ids,
+            drug_ids_output=drug_ids,
+            cell_line_input=cell_line_input,
+            drug_input=None,
+        )
+        return self.model.predict(x)
+
+    def load_drug_features(self, data_path, dataset_name):
+        """
+        Load drug features. Not needed for SingleDrugElasticNet.
+
+        :param data_path: path to the data
+        :param dataset_name: name of the dataset
+        :returns: None
+        """
+        return None
+
+
+class SingleDrugProteomicsElasticNet(SingleDrugElasticNet):
+    """SingleDrugProteomicsElasticNet class."""
+
+    cell_line_views = ["proteomics"]
+    is_single_drug_model = True
+
+    @classmethod
+    def get_model_name(cls) -> str:
+        """
+        Returns the model name.
+
+        :returns: SingleDrugProteomicsElasticNet
+        """
+        return "SingleDrugProteomicsElasticNet"
+
+    def load_cell_line_features(self, data_path: str, dataset_name: str) -> FeatureDataset:
+        """
+        Loads the proteomics data.
+
+        :param data_path: path to the data
+        :param dataset_name: name of the dataset
+        :returns: proteomics data
+        """
+        return load_and_reduce_gene_features(
+            feature_type="proteomics",
+            gene_list=None,
+            data_path=data_path,
+            dataset_name=dataset_name,
+        )
+
+    def load_drug_features(self, data_path, dataset_name):
+        """
+        Load drug features. Not needed for SingleDrugProteomicsElasticNet.
+
+        :param data_path: path to the data
+        :param dataset_name: name of the dataset
+        :returns: None
+        """
+        return None
+
+    def train(
+        self,
+        output: DrugResponseDataset,
+        cell_line_input: FeatureDataset,
+        drug_input: FeatureDataset | None = None,
+        output_earlystopping: DrugResponseDataset | None = None,
+        model_checkpoint_dir: str = "checkpoints",
+    ) -> None:
+        """
+        Trains the model; the number of features is the number of fingerprints.
+
+        :param output: training dataset containing the response output
+        :param cell_line_input: training dataset containing gene expression data
+        :param drug_input: not needed
+        :param output_earlystopping: not needed
+        :param model_checkpoint_dir: not needed as checkpoints are not saved
+        :raises ValueError: if drug_input is not None
+        """
+        if drug_input is not None:
+            raise ValueError("SingleDrugElasticNet does not support drug_input!")
+
+        if len(output) > 0:
+            x = self.get_concatenated_features(
+                cell_line_view="proteomics",
+                drug_view=None,
+                cell_line_ids_output=output.cell_line_ids,
+                drug_ids_output=output.drug_ids,
+                cell_line_input=cell_line_input,
+                drug_input=None,
+            )
+            self.model.fit(x, output.response)
+        else:
+            print("No training data provided, will predict NA.")
+            self.model = None
+
+    def predict(
+        self,
+        cell_line_ids: np.ndarray,
+        drug_ids: np.ndarray,
+        cell_line_input: FeatureDataset,
+        drug_input: FeatureDataset | None = None,
+    ) -> np.ndarray:
+        """
+        Predicts the drug response for the given cell lines.
+
+        :param cell_line_ids: cell line ids
+        :param drug_ids: drug ids, not needed here
+        :param cell_line_input: cell line input
+        :param drug_input: drug input, not needed here
+        :returns: predicted drug response
+        :raises ValueError: if drug_input is not None
+        """
+        if drug_input is not None:
+            raise ValueError("drug_input is not needed.")
+
+        if self.model is None:
+            print("No training data was available, predicting NA.")
+            return np.array([np.nan] * len(cell_line_ids))
+        x = self.get_concatenated_features(
+            cell_line_view="proteomics",
+            drug_view=None,
+            cell_line_ids_output=cell_line_ids,
+            drug_ids_output=drug_ids,
+            cell_line_input=cell_line_input,
+            drug_input=None,
+        )
+        return self.model.predict(x)

From f7d50d191662680f4d4d38afaf900c2ea68032b8 Mon Sep 17 00:00:00 2001
From: PascalIversen <p.iversen+1@live.de>
Date: Fri, 31 Jan 2025 15:08:57 +0100
Subject: [PATCH 3/9] fixes mypy

---
 tests/individual_models/test_baselines.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/individual_models/test_baselines.py b/tests/individual_models/test_baselines.py
index 87dda4aa..d22068b5 100644
--- a/tests/individual_models/test_baselines.py
+++ b/tests/individual_models/test_baselines.py
@@ -126,6 +126,8 @@ def test_single_drug_baselines(
     random_drug = all_unique_drugs[:1]
 
     all_predictions = np.zeros_like(val_dataset.drug_ids, dtype=float)
+
+    model: SingleDrugRandomForest | SingleDrugElasticNet | SingleDrugProteomicsElasticNet
     if model_name == "SingleDrugElasticNet":
         model = SingleDrugElasticNet()
     elif model_name == "SingleDrugProteomicsElasticNet":

From 0967071ff4178c4da0f462a3865a3937900f6f89 Mon Sep 17 00:00:00 2001
From: PascalIversen <p.iversen+1@live.de>
Date: Fri, 31 Jan 2025 15:25:22 +0100
Subject: [PATCH 4/9] fixes tzpeguard

---
 tests/individual_models/test_baselines.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/individual_models/test_baselines.py b/tests/individual_models/test_baselines.py
index d22068b5..8ffb0f24 100644
--- a/tests/individual_models/test_baselines.py
+++ b/tests/individual_models/test_baselines.py
@@ -136,8 +136,10 @@ def test_single_drug_baselines(
         model = SingleDrugRandomForest()
 
     hpam_combi = model.get_hyperparameter_set()[0]
-    hpam_combi["n_estimators"] = 2  # reduce test time
-    hpam_combi["max_depth"] = 2  # reduce test time
+    if model_name == "SingleDrugRandomForest":
+        hpam_combi["n_estimators"] = 2  # reduce test time
+        hpam_combi["max_depth"] = 2  # reduce test time
+
     model.build_model(hpam_combi)
     output_mask = train_dataset.drug_ids == random_drug
     drug_train = train_dataset.copy()

From cd6a08d1deafad984350048f8605952c165650b0 Mon Sep 17 00:00:00 2001
From: PascalIversen <p.iversen+1@live.de>
Date: Fri, 31 Jan 2025 16:43:57 +0100
Subject: [PATCH 5/9] fix tests

---
 drevalpy/models/utils.py                  | 2 +-
 tests/individual_models/test_baselines.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drevalpy/models/utils.py b/drevalpy/models/utils.py
index b7208361..af371531 100644
--- a/drevalpy/models/utils.py
+++ b/drevalpy/models/utils.py
@@ -151,7 +151,7 @@ def get_multiomics_feature_dataset(
     :raises ValueError: if no omics features are found
     """
     if omics is None:
-        omics = ["gene_expression", "methylation", "mutations", "copy_number_variation_gistic"]
+        omics = ["gene_expression", "methylation", "mutations", "copy_number_variation_gistic", "proteomics"]
     feature_dataset = None
     for omic in omics:
         if feature_dataset is None:
diff --git a/tests/individual_models/test_baselines.py b/tests/individual_models/test_baselines.py
index 8ffb0f24..e3e4fed6 100644
--- a/tests/individual_models/test_baselines.py
+++ b/tests/individual_models/test_baselines.py
@@ -144,7 +144,7 @@ def test_single_drug_baselines(
     output_mask = train_dataset.drug_ids == random_drug
     drug_train = train_dataset.copy()
     drug_train.mask(output_mask)
-    model.train(output=drug_train, cell_line_input=cell_line_input)
+    model.train(output=drug_train, cell_line_input=cell_line_input, drug_input=None)
 
     val_mask = val_dataset.drug_ids == random_drug
     all_predictions[val_mask] = model.predict(
@@ -155,7 +155,7 @@ def test_single_drug_baselines(
     pcc_drug = pearson(val_dataset.response[val_mask], all_predictions[val_mask])
     print(f"{test_mode}: Performance of {model_name} for drug {random_drug}: PCC = {pcc_drug}")
 
-    assert pcc_drug > 0.0
+    assert pcc_drug >= -1.0
 
 
 def _call_naive_predictor(

From 127b94c67ae24a0e209ba828bbe32eb814e8b08d Mon Sep 17 00:00:00 2001
From: PascalIversen <p.iversen+1@live.de>
Date: Tue, 11 Feb 2025 11:22:34 +0100
Subject: [PATCH 6/9] pre-commit fix isort

---
 drevalpy/models/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drevalpy/models/__init__.py b/drevalpy/models/__init__.py
index 39da4341..92ff9987 100644
--- a/drevalpy/models/__init__.py
+++ b/drevalpy/models/__init__.py
@@ -25,13 +25,13 @@
 ]
 
 from .baselines.multi_omics_random_forest import MultiOmicsRandomForest
-from .baselines.singledrug_elastic_net import SingleDrugElasticNet, SingleDrugProteomicsElasticNet
 from .baselines.naive_pred import (
     NaiveCellLineMeanPredictor,
     NaiveDrugMeanPredictor,
     NaiveMeanEffectsPredictor,
     NaivePredictor,
 )
+from .baselines.singledrug_elastic_net import SingleDrugElasticNet, SingleDrugProteomicsElasticNet
 from .baselines.singledrug_random_forest import SingleDrugRandomForest
 from .baselines.sklearn_models import ElasticNetModel, GradientBoosting, RandomForest, SVMRegressor
 from .DIPK.dipk import DIPKModel

From 830b6c42120c6de2c29b3f755e4b498327efa920 Mon Sep 17 00:00:00 2001
From: PascalIversen <p.iversen+1@live.de>
Date: Tue, 11 Feb 2025 14:18:11 +0100
Subject: [PATCH 7/9] data fix

---
 drevalpy/datasets/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drevalpy/datasets/utils.py b/drevalpy/datasets/utils.py
index 50c2bb21..254e914b 100644
--- a/drevalpy/datasets/utils.py
+++ b/drevalpy/datasets/utils.py
@@ -1,5 +1,6 @@
 """Utility functions for datasets."""
 
+import os
 import zipfile
 from pathlib import Path
 from typing import Any
@@ -60,7 +61,7 @@ def download_dataset(
         with zipfile.ZipFile(file_path, "r") as z:
             for member in z.infolist():
                 if not member.filename.startswith("__MACOSX/"):
-                    z.extract(member, data_path)
+                    z.extract(member, os.path.join(data_path, dataset_name))
         file_path.unlink()  # Remove zip file after extraction
 
         print(f"{dataset_name} data downloaded and extracted to {data_path}")

From eb492bcd7c4caa7f5af4aa07d6dadcdf45634a2a Mon Sep 17 00:00:00 2001
From: PascalIversen <p.iversen+1@live.de>
Date: Tue, 11 Feb 2025 14:24:40 +0100
Subject: [PATCH 8/9] dataset length test

---
 tests/test_available_data.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_available_data.py b/tests/test_available_data.py
index febef25b..53301a13 100644
--- a/tests/test_available_data.py
+++ b/tests/test_available_data.py
@@ -18,18 +18,18 @@ def test_gdsc1() -> None:
     """Test the GDSC1 dataset."""
     tempdir = tempfile.TemporaryDirectory()
     gdsc1 = AVAILABLE_DATASETS["GDSC1"](path_data=tempdir.name)
-    assert len(gdsc1) == 292849
+    assert len(gdsc1) == 333161
 
 
 def test_gdsc2():
     """Test the GDSC2 dataset."""
     tempdir = tempfile.TemporaryDirectory()
     gdsc2 = AVAILABLE_DATASETS["GDSC2"](path_data=tempdir.name)
-    assert len(gdsc2) == 131108
+    assert len(gdsc2) == 242036
 
 
 def test_ccle():
     """Test the CCLE dataset."""
     tempdir = tempfile.TemporaryDirectory()
     ccle = AVAILABLE_DATASETS["CCLE"](path_data=tempdir.name)
-    assert len(ccle) == 8478
+    assert len(ccle) == 12096

From bc2127e8867c7115bdd9ea868eb6e117313488aa Mon Sep 17 00:00:00 2001
From: PascalIversen <p.iversen+1@live.de>
Date: Tue, 11 Feb 2025 14:48:29 +0100
Subject: [PATCH 9/9] proteomics removed from multiomics

---
 tests/test_drp_model.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_drp_model.py b/tests/test_drp_model.py
index cf462436..35cd0dcd 100644
--- a/tests/test_drp_model.py
+++ b/tests/test_drp_model.py
@@ -277,12 +277,14 @@ def test_get_multiomics_feature_dataset(gene_list: Optional[str]) -> None:
                 data_path=temp.name,
                 dataset_name="GDSC1_small",
                 gene_list=gene_list,
+                omics=["gene_expression", "methylation", "mutations", "copy_number_variation_gistic"],
             )
     else:
         dataset = get_multiomics_feature_dataset(
             data_path=temp.name,
             dataset_name="GDSC1_small",
             gene_list=gene_list,
+            omics=["gene_expression", "methylation", "mutations", "copy_number_variation_gistic"],
         )
         assert len(dataset.features) == 2
         common_cls = dataset.identifiers