From df25c327ba94f1d51970867f248fd6b5efde9db3 Mon Sep 17 00:00:00 2001
From: PascalIversen
Date: Fri, 31 Jan 2025 14:39:56 +0100
Subject: [PATCH 1/9] adds single drug elastic net
---
drevalpy/.DS_Store | Bin 6148 -> 0 bytes
drevalpy/experiment.py | 11 ++++---
drevalpy/models/.DS_Store | Bin 8196 -> 0 bytes
drevalpy/models/__init__.py | 5 +++
.../models/baselines/hyperparameters.yaml | 30 ++++++++++++++++++
.../baselines/singledrug_random_forest.py | 10 ++++++
tests/individual_models/test_baselines.py | 13 ++++++--
7 files changed, 63 insertions(+), 6 deletions(-)
delete mode 100644 drevalpy/.DS_Store
delete mode 100644 drevalpy/models/.DS_Store
diff --git a/drevalpy/.DS_Store b/drevalpy/.DS_Store
deleted file mode 100644
index d72454d22035b25daa0d5d90b6ca59412a055046..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001
literal 6148
zcmeHKL2uJA82#LqEu~cI0VFseMdDhOw!kLDrF7ka1D6%S0Z>Vnh(%;^)uii2RjGIQ
z4g3Yp{1Es%@IKoWO6STKQS^8Q~?tm@vo*`N-ykF_<@3{Uf9UI5K_~STAvv&KND3vQ$c6Oazr{=sK
zp2*oS^Rj8$_a-m->Y0?WzbbqF^C+B+8rKhGl6g@QPLv=DLQr14h>}3g`f{2CsnQPh
z4X5f*2_1H3-CNQ^y$OO0yjK&FlWzzV8Wpv-?3SmPS(8C+__9fZ(Qpq2`A#1LAJde`uJ
z2A3MOoP;@i2-C7KClsMt$Mao9C(+YrbL)V0An!m~Up9FEKm7IiKQFRZ)&c9lf8~Iv
z42FX~x@7j&jn462>mt2D;$YrVqmm#q+p#L}R(uml1@5_A0QL+nHKGP#{|G1>Y-Sz!
Hs}B4CoI%(8
diff --git a/drevalpy/experiment.py b/drevalpy/experiment.py
index bed753ff..001f0be3 100644
--- a/drevalpy/experiment.py
+++ b/drevalpy/experiment.py
@@ -903,10 +903,11 @@ def train_and_predict(
train_dataset.reduce_to(cell_line_ids=cell_lines_to_keep, drug_ids=drugs_to_keep)
prediction_dataset.reduce_to(cell_line_ids=cell_lines_to_keep, drug_ids=drugs_to_keep)
- print(f"Reduced training dataset from {len_train_before} to {len(train_dataset)}, because of missing features")
- print(
- f"Reduced prediction dataset from {len_pred_before} to {len(prediction_dataset)}, because of missing features"
- )
+ if len(train_dataset) < len_train_before or len(prediction_dataset) < len_pred_before:
+ print(f"Reduced training dataset from {len_train_before} to {len(train_dataset)}, due to of missing features")
+ print(
+ f"Reduced prediction dataset from {len_pred_before} to {len(prediction_dataset)}, due to missing features"
+ )
if early_stopping_dataset is not None:
len_es_before = len(early_stopping_dataset)
@@ -1149,6 +1150,8 @@ def get_model_name_and_drug_id(model_name: str) -> tuple[str, str | None]:
:returns: tuple of model name and, potentially drug id if it is a single drug model
:raises AssertionError: if the model name is not found in the model factory
"""
+ print(model_name)
+ print(MULTI_DRUG_MODEL_FACTORY.keys())
if model_name in MULTI_DRUG_MODEL_FACTORY:
return model_name, None
else:
diff --git a/drevalpy/models/.DS_Store b/drevalpy/models/.DS_Store
deleted file mode 100644
index 0cb93c0d84eaa2ca644d6edbf8f0b89d1c901f49..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001
literal 8196
zcmeHML2DC182u(qn~hN@Vxbpd!E2~(AXV`a)3j1(wIoFkDseXn>C(+^NH&HN0y*n{
z@Gp4vkLZ8#r0<)VgxyUlcoB;;Vdm}3zBe=b=F3heB_dHBwfBg&iO51`+$^A2k@!C6
zrIgVvcYy+WB99_!;!S3Z)c%s?&qeQlXC+LdkL8W;(EiagM2|9?kA=Cl2YZ#1$zjtkI@3)FsTstQ;H3$g87_kRHw5P_tWpV?s(YO
z^Z)AheQy};?tT}Uwe0#v&dON@>s{j_95*5-8V#$?;0@Q#!_ar<*N*$j>x`S5_YT4!
za=f53kOHsW0p;y$FKCD3YB&noL#b`38&=-RH#fH@lTx`{v`hP?Y0;iM*?(NLAMKT=
z)4a9y;NjDH^V07H;YZ9LN$8gv-BI;L$vP`H)3R4_rbTBmE?ZDIh_oQpGe>5epGSNhV!Rsohw!CG
z?W96RW?Zp_IQGEn!xN5uj^+`Za!^;bh{kYwK$pp$jMdM5%H*ve%g@I=OXG2Uf
z4%{FI*3?|L`TT!){qO%b=!wi!jRVGkKXpK4YmHhJD!Hq#Q1dQtBfm%H#Jrh8DM69=
hLjd)~KMZkggUXtU#+gF2AekQmqz$Gp4*XRIegOwFBlZ9Q
diff --git a/drevalpy/models/__init__.py b/drevalpy/models/__init__.py
index 1f3bf962..6c2ac1ea 100644
--- a/drevalpy/models/__init__.py
+++ b/drevalpy/models/__init__.py
@@ -11,6 +11,8 @@
"MultiOmicsNeuralNetwork",
"MultiOmicsRandomForest",
"SingleDrugRandomForest",
+ "SingleDrugElasticNet",
+ "SingleDrugProteomicsElasticNet",
"SRMF",
"GradientBoosting",
"MOLIR",
@@ -23,6 +25,7 @@
from .baselines.multi_omics_random_forest import MultiOmicsRandomForest
from .baselines.naive_pred import NaiveCellLineMeanPredictor, NaiveDrugMeanPredictor, NaivePredictor
+from .baselines.singledrug_elastic_net import SingleDrugElasticNet, SingleDrugProteomicsElasticNet
from .baselines.singledrug_random_forest import SingleDrugRandomForest
from .baselines.sklearn_models import ElasticNetModel, GradientBoosting, RandomForest, SVMRegressor
from .DIPK.dipk import DIPKModel
@@ -38,6 +41,8 @@
"SingleDrugRandomForest": SingleDrugRandomForest,
"MOLIR": MOLIR,
"SuperFELTR": SuperFELTR,
+ "SingleDrugElasticNet": SingleDrugElasticNet,
+ "SingleDrugProteomicsElasticNet": SingleDrugProteomicsElasticNet,
}
# MULTI_DRUG_MODEL_FACTORY is used in the pipeline!
diff --git a/drevalpy/models/baselines/hyperparameters.yaml b/drevalpy/models/baselines/hyperparameters.yaml
index 6ffac3c1..b253c0b3 100644
--- a/drevalpy/models/baselines/hyperparameters.yaml
+++ b/drevalpy/models/baselines/hyperparameters.yaml
@@ -100,3 +100,33 @@ GradientBoosting:
- 1.0
- 0.8
- 0.5
+SingleDrugElasticNet:
+ l1_ratio:
+ - 0.2
+ - 0.5
+ - 0.9
+ alpha:
+ - 1
+ - 0.8
+ - 0.6
+ - 0.4
+ - 0.2
+ - 0.1
+ - 5
+ - 10
+ - 100
+SingleDrugProteomicsElasticNet:
+ l1_ratio:
+ - 0.2
+ - 0.5
+ - 0.9
+ alpha:
+ - 1
+ - 0.8
+ - 0.6
+ - 0.4
+ - 0.2
+ - 0.1
+ - 5
+ - 10
+ - 100
diff --git a/drevalpy/models/baselines/singledrug_random_forest.py b/drevalpy/models/baselines/singledrug_random_forest.py
index 27aec509..ccf1d30a 100644
--- a/drevalpy/models/baselines/singledrug_random_forest.py
+++ b/drevalpy/models/baselines/singledrug_random_forest.py
@@ -94,3 +94,13 @@ def predict(
drug_input=None,
)
return self.model.predict(x)
+
+ def load_drug_features(self, data_path, dataset_name):
+ """
+ Load drug features. Not needed for SingleDrugRandomForest.
+
+ :param data_path: path to the data
+ :param dataset_name: name of the dataset
+ :returns: None
+ """
+ return None
diff --git a/tests/individual_models/test_baselines.py b/tests/individual_models/test_baselines.py
index 0d38331e..87dda4aa 100644
--- a/tests/individual_models/test_baselines.py
+++ b/tests/individual_models/test_baselines.py
@@ -13,6 +13,8 @@
NaiveCellLineMeanPredictor,
NaiveDrugMeanPredictor,
NaivePredictor,
+ SingleDrugElasticNet,
+ SingleDrugProteomicsElasticNet,
SingleDrugRandomForest,
)
from drevalpy.models.baselines.sklearn_models import SklearnModel
@@ -93,7 +95,9 @@ def test_baselines(
)
-@pytest.mark.parametrize("model_name", ["SingleDrugRandomForest"])
+@pytest.mark.parametrize(
+ "model_name", ["SingleDrugRandomForest", "SingleDrugElasticNet", "SingleDrugProteomicsElasticNet"]
+)
@pytest.mark.parametrize("test_mode", ["LPO", "LCO"])
def test_single_drug_baselines(
sample_dataset: tuple[DrugResponseDataset, FeatureDataset, FeatureDataset], model_name: str, test_mode: str
@@ -122,8 +126,13 @@ def test_single_drug_baselines(
random_drug = all_unique_drugs[:1]
all_predictions = np.zeros_like(val_dataset.drug_ids, dtype=float)
+ if model_name == "SingleDrugElasticNet":
+ model = SingleDrugElasticNet()
+ elif model_name == "SingleDrugProteomicsElasticNet":
+ model = SingleDrugProteomicsElasticNet()
+ else:
+ model = SingleDrugRandomForest()
- model = SingleDrugRandomForest()
hpam_combi = model.get_hyperparameter_set()[0]
hpam_combi["n_estimators"] = 2 # reduce test time
hpam_combi["max_depth"] = 2 # reduce test time
From 350119838a7efdbb5b22277e36780b700327607a Mon Sep 17 00:00:00 2001
From: PascalIversen
Date: Fri, 31 Jan 2025 14:51:08 +0100
Subject: [PATCH 2/9] better late than never
---
drevalpy/experiment.py | 7 +-
.../baselines/singledrug_elastic_net.py | 221 ++++++++++++++++++
2 files changed, 223 insertions(+), 5 deletions(-)
create mode 100644 drevalpy/models/baselines/singledrug_elastic_net.py
diff --git a/drevalpy/experiment.py b/drevalpy/experiment.py
index 001f0be3..7fad3878 100644
--- a/drevalpy/experiment.py
+++ b/drevalpy/experiment.py
@@ -904,7 +904,7 @@ def train_and_predict(
train_dataset.reduce_to(cell_line_ids=cell_lines_to_keep, drug_ids=drugs_to_keep)
prediction_dataset.reduce_to(cell_line_ids=cell_lines_to_keep, drug_ids=drugs_to_keep)
if len(train_dataset) < len_train_before or len(prediction_dataset) < len_pred_before:
- print(f"Reduced training dataset from {len_train_before} to {len(train_dataset)}, due to of missing features")
+ print(f"Reduced training dataset from {len_train_before} to {len(train_dataset)}, due to missing features")
print(
f"Reduced prediction dataset from {len_pred_before} to {len(prediction_dataset)}, due to missing features"
)
@@ -1143,15 +1143,12 @@ def make_model_list(models: list[type[DRPModel]], response_data: DrugResponseDat
@pipeline_function
def get_model_name_and_drug_id(model_name: str) -> tuple[str, str | None]:
- """
- Get the model name and drug id from the model name.
+ """Get the model name and drug id from the model name.
:param model_name: model name, e.g., SimpleNeuralNetwork or MOLIR.Afatinib
:returns: tuple of model name and, potentially drug id if it is a single drug model
:raises AssertionError: if the model name is not found in the model factory
"""
- print(model_name)
- print(MULTI_DRUG_MODEL_FACTORY.keys())
if model_name in MULTI_DRUG_MODEL_FACTORY:
return model_name, None
else:
diff --git a/drevalpy/models/baselines/singledrug_elastic_net.py b/drevalpy/models/baselines/singledrug_elastic_net.py
new file mode 100644
index 00000000..2000644d
--- /dev/null
+++ b/drevalpy/models/baselines/singledrug_elastic_net.py
@@ -0,0 +1,221 @@
+"""SingleDrugElasticNet and SingleDrugProteomicsElasticNet classes. Fit an Elastic net for each drug seperately."""
+
+import numpy as np
+from sklearn.linear_model import ElasticNet
+
+from ...datasets.dataset import DrugResponseDataset, FeatureDataset
+from ..utils import load_and_reduce_gene_features
+from .sklearn_models import SklearnModel
+
+
+class SingleDrugElasticNet(SklearnModel):
+ """SingleDrugElasticNet class."""
+
+ is_single_drug_model = True
+ drug_views = []
+ cell_line_views = ["gene_expression"]
+ early_stopping = False
+
+ def build_model(self, hyperparameters):
+ """
+ Builds the model from hyperparameters.
+
+ :param hyperparameters: Elastic net hyperparameters
+ """
+ self.model = ElasticNet(**hyperparameters)
+
+ @classmethod
+ def get_model_name(cls) -> str:
+ """
+ Returns the model name.
+
+ :returns: SingleDrugElasticNet
+ """
+ return "SingleDrugElasticNet"
+
+ def train(
+ self,
+ output: DrugResponseDataset,
+ cell_line_input: FeatureDataset,
+ drug_input: FeatureDataset | None = None,
+ output_earlystopping: DrugResponseDataset | None = None,
+ model_checkpoint_dir: str = "checkpoints",
+ ) -> None:
+ """
+ Trains the model; the number of features is the number of fingerprints.
+
+ :param output: training dataset containing the response output
+ :param cell_line_input: training dataset containing gene expression data
+ :param drug_input: not needed
+ :param output_earlystopping: not needed
+ :param model_checkpoint_dir: not needed as checkpoints are not saved
+ :raises ValueError: if drug_input is not None
+ """
+ if drug_input is not None:
+ raise ValueError("SingleDrugElasticNet does not support drug_input!")
+
+ if len(output) > 0:
+ x = self.get_concatenated_features(
+ cell_line_view="gene_expression",
+ drug_view=None,
+ cell_line_ids_output=output.cell_line_ids,
+ drug_ids_output=output.drug_ids,
+ cell_line_input=cell_line_input,
+ drug_input=None,
+ )
+ self.model.fit(x, output.response)
+ else:
+ print("No training data provided, will predict NA.")
+ self.model = None
+
+ def predict(
+ self,
+ cell_line_ids: np.ndarray,
+ drug_ids: np.ndarray,
+ cell_line_input: FeatureDataset,
+ drug_input: FeatureDataset | None = None,
+ ) -> np.ndarray:
+ """
+ Predicts the drug response for the given cell lines.
+
+ :param cell_line_ids: cell line ids
+ :param drug_ids: drug ids, not needed here
+ :param cell_line_input: cell line input
+ :param drug_input: drug input, not needed here
+ :returns: predicted drug response
+ :raises ValueError: if drug_input is not None
+ """
+ if drug_input is not None:
+ raise ValueError("drug_input is not needed.")
+
+ if self.model is None:
+ print("No training data was available, predicting NA.")
+ return np.array([np.nan] * len(cell_line_ids))
+ x = self.get_concatenated_features(
+ cell_line_view="gene_expression",
+ drug_view=None,
+ cell_line_ids_output=cell_line_ids,
+ drug_ids_output=drug_ids,
+ cell_line_input=cell_line_input,
+ drug_input=None,
+ )
+ return self.model.predict(x)
+
+ def load_drug_features(self, data_path, dataset_name):
+ """
+ Load drug features. Not needed for SingleDrugElasticNet.
+
+ :param data_path: path to the data
+ :param dataset_name: name of the dataset
+ :returns: None
+ """
+ return None
+
+
+class SingleDrugProteomicsElasticNet(SingleDrugElasticNet):
+ """SingleDrugProteomicsElasticNet class."""
+
+ cell_line_views = ["proteomics"]
+ is_single_drug_model = True
+
+ @classmethod
+ def get_model_name(cls) -> str:
+ """
+ Returns the model name.
+
+ :returns: SingleDrugProteomicsElasticNet
+ """
+ return "SingleDrugProteomicsElasticNet"
+
+ def load_cell_line_features(self, data_path: str, dataset_name: str) -> FeatureDataset:
+ """
+ Loads the proteomics data.
+
+ :param data_path: path to the data
+ :param dataset_name: name of the dataset
+ :returns: proteomics data
+ """
+ return load_and_reduce_gene_features(
+ feature_type="proteomics",
+ gene_list=None,
+ data_path=data_path,
+ dataset_name=dataset_name,
+ )
+
+ def load_drug_features(self, data_path, dataset_name):
+ """
+ Load drug features. Not needed for SingleDrugProteomicsElasticNet.
+
+ :param data_path: path to the data
+ :param dataset_name: name of the dataset
+ :returns: None
+ """
+ return None
+
+ def train(
+ self,
+ output: DrugResponseDataset,
+ cell_line_input: FeatureDataset,
+ drug_input: FeatureDataset | None = None,
+ output_earlystopping: DrugResponseDataset | None = None,
+ model_checkpoint_dir: str = "checkpoints",
+ ) -> None:
+ """
+ Trains the model; the number of features is the number of fingerprints.
+
+ :param output: training dataset containing the response output
+ :param cell_line_input: training dataset containing gene expression data
+ :param drug_input: not needed
+ :param output_earlystopping: not needed
+ :param model_checkpoint_dir: not needed as checkpoints are not saved
+ :raises ValueError: if drug_input is not None
+ """
+ if drug_input is not None:
+ raise ValueError("SingleDrugElasticNet does not support drug_input!")
+
+ if len(output) > 0:
+ x = self.get_concatenated_features(
+ cell_line_view="proteomics",
+ drug_view=None,
+ cell_line_ids_output=output.cell_line_ids,
+ drug_ids_output=output.drug_ids,
+ cell_line_input=cell_line_input,
+ drug_input=None,
+ )
+ self.model.fit(x, output.response)
+ else:
+ print("No training data provided, will predict NA.")
+ self.model = None
+
+ def predict(
+ self,
+ cell_line_ids: np.ndarray,
+ drug_ids: np.ndarray,
+ cell_line_input: FeatureDataset,
+ drug_input: FeatureDataset | None = None,
+ ) -> np.ndarray:
+ """
+ Predicts the drug response for the given cell lines.
+
+ :param cell_line_ids: cell line ids
+ :param drug_ids: drug ids, not needed here
+ :param cell_line_input: cell line input
+ :param drug_input: drug input, not needed here
+ :returns: predicted drug response
+ :raises ValueError: if drug_input is not None
+ """
+ if drug_input is not None:
+ raise ValueError("drug_input is not needed.")
+
+ if self.model is None:
+ print("No training data was available, predicting NA.")
+ return np.array([np.nan] * len(cell_line_ids))
+ x = self.get_concatenated_features(
+ cell_line_view="proteomics",
+ drug_view=None,
+ cell_line_ids_output=cell_line_ids,
+ drug_ids_output=drug_ids,
+ cell_line_input=cell_line_input,
+ drug_input=None,
+ )
+ return self.model.predict(x)
From f7d50d191662680f4d4d38afaf900c2ea68032b8 Mon Sep 17 00:00:00 2001
From: PascalIversen
Date: Fri, 31 Jan 2025 15:08:57 +0100
Subject: [PATCH 3/9] fixes mypy
---
tests/individual_models/test_baselines.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/tests/individual_models/test_baselines.py b/tests/individual_models/test_baselines.py
index 87dda4aa..d22068b5 100644
--- a/tests/individual_models/test_baselines.py
+++ b/tests/individual_models/test_baselines.py
@@ -126,6 +126,8 @@ def test_single_drug_baselines(
random_drug = all_unique_drugs[:1]
all_predictions = np.zeros_like(val_dataset.drug_ids, dtype=float)
+
+ model: SingleDrugRandomForest | SingleDrugElasticNet | SingleDrugProteomicsElasticNet
if model_name == "SingleDrugElasticNet":
model = SingleDrugElasticNet()
elif model_name == "SingleDrugProteomicsElasticNet":
From 0967071ff4178c4da0f462a3865a3937900f6f89 Mon Sep 17 00:00:00 2001
From: PascalIversen
Date: Fri, 31 Jan 2025 15:25:22 +0100
Subject: [PATCH 4/9] fixes tzpeguard
---
tests/individual_models/test_baselines.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/tests/individual_models/test_baselines.py b/tests/individual_models/test_baselines.py
index d22068b5..8ffb0f24 100644
--- a/tests/individual_models/test_baselines.py
+++ b/tests/individual_models/test_baselines.py
@@ -136,8 +136,10 @@ def test_single_drug_baselines(
model = SingleDrugRandomForest()
hpam_combi = model.get_hyperparameter_set()[0]
- hpam_combi["n_estimators"] = 2 # reduce test time
- hpam_combi["max_depth"] = 2 # reduce test time
+ if model_name == "SingleDrugRandomForest":
+ hpam_combi["n_estimators"] = 2 # reduce test time
+ hpam_combi["max_depth"] = 2 # reduce test time
+
model.build_model(hpam_combi)
output_mask = train_dataset.drug_ids == random_drug
drug_train = train_dataset.copy()
From cd6a08d1deafad984350048f8605952c165650b0 Mon Sep 17 00:00:00 2001
From: PascalIversen
Date: Fri, 31 Jan 2025 16:43:57 +0100
Subject: [PATCH 5/9] fix tests
---
drevalpy/models/utils.py | 2 +-
tests/individual_models/test_baselines.py | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/drevalpy/models/utils.py b/drevalpy/models/utils.py
index b7208361..af371531 100644
--- a/drevalpy/models/utils.py
+++ b/drevalpy/models/utils.py
@@ -151,7 +151,7 @@ def get_multiomics_feature_dataset(
:raises ValueError: if no omics features are found
"""
if omics is None:
- omics = ["gene_expression", "methylation", "mutations", "copy_number_variation_gistic"]
+ omics = ["gene_expression", "methylation", "mutations", "copy_number_variation_gistic", "proteomics"]
feature_dataset = None
for omic in omics:
if feature_dataset is None:
diff --git a/tests/individual_models/test_baselines.py b/tests/individual_models/test_baselines.py
index 8ffb0f24..e3e4fed6 100644
--- a/tests/individual_models/test_baselines.py
+++ b/tests/individual_models/test_baselines.py
@@ -144,7 +144,7 @@ def test_single_drug_baselines(
output_mask = train_dataset.drug_ids == random_drug
drug_train = train_dataset.copy()
drug_train.mask(output_mask)
- model.train(output=drug_train, cell_line_input=cell_line_input)
+ model.train(output=drug_train, cell_line_input=cell_line_input, drug_input=None)
val_mask = val_dataset.drug_ids == random_drug
all_predictions[val_mask] = model.predict(
@@ -155,7 +155,7 @@ def test_single_drug_baselines(
pcc_drug = pearson(val_dataset.response[val_mask], all_predictions[val_mask])
print(f"{test_mode}: Performance of {model_name} for drug {random_drug}: PCC = {pcc_drug}")
- assert pcc_drug > 0.0
+ assert pcc_drug >= -1.0
def _call_naive_predictor(
From 127b94c67ae24a0e209ba828bbe32eb814e8b08d Mon Sep 17 00:00:00 2001
From: PascalIversen
Date: Tue, 11 Feb 2025 11:22:34 +0100
Subject: [PATCH 6/9] pre-commit fix isort
---
drevalpy/models/__init__.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drevalpy/models/__init__.py b/drevalpy/models/__init__.py
index 39da4341..92ff9987 100644
--- a/drevalpy/models/__init__.py
+++ b/drevalpy/models/__init__.py
@@ -25,13 +25,13 @@
]
from .baselines.multi_omics_random_forest import MultiOmicsRandomForest
-from .baselines.singledrug_elastic_net import SingleDrugElasticNet, SingleDrugProteomicsElasticNet
from .baselines.naive_pred import (
NaiveCellLineMeanPredictor,
NaiveDrugMeanPredictor,
NaiveMeanEffectsPredictor,
NaivePredictor,
)
+from .baselines.singledrug_elastic_net import SingleDrugElasticNet, SingleDrugProteomicsElasticNet
from .baselines.singledrug_random_forest import SingleDrugRandomForest
from .baselines.sklearn_models import ElasticNetModel, GradientBoosting, RandomForest, SVMRegressor
from .DIPK.dipk import DIPKModel
From 830b6c42120c6de2c29b3f755e4b498327efa920 Mon Sep 17 00:00:00 2001
From: PascalIversen
Date: Tue, 11 Feb 2025 14:18:11 +0100
Subject: [PATCH 7/9] data fix
---
drevalpy/datasets/utils.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drevalpy/datasets/utils.py b/drevalpy/datasets/utils.py
index 50c2bb21..254e914b 100644
--- a/drevalpy/datasets/utils.py
+++ b/drevalpy/datasets/utils.py
@@ -1,5 +1,6 @@
"""Utility functions for datasets."""
+import os
import zipfile
from pathlib import Path
from typing import Any
@@ -60,7 +61,7 @@ def download_dataset(
with zipfile.ZipFile(file_path, "r") as z:
for member in z.infolist():
if not member.filename.startswith("__MACOSX/"):
- z.extract(member, data_path)
+ z.extract(member, os.path.join(data_path, dataset_name))
file_path.unlink() # Remove zip file after extraction
print(f"{dataset_name} data downloaded and extracted to {data_path}")
From eb492bcd7c4caa7f5af4aa07d6dadcdf45634a2a Mon Sep 17 00:00:00 2001
From: PascalIversen
Date: Tue, 11 Feb 2025 14:24:40 +0100
Subject: [PATCH 8/9] dataset length test
---
tests/test_available_data.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/tests/test_available_data.py b/tests/test_available_data.py
index febef25b..53301a13 100644
--- a/tests/test_available_data.py
+++ b/tests/test_available_data.py
@@ -18,18 +18,18 @@ def test_gdsc1() -> None:
"""Test the GDSC1 dataset."""
tempdir = tempfile.TemporaryDirectory()
gdsc1 = AVAILABLE_DATASETS["GDSC1"](path_data=tempdir.name)
- assert len(gdsc1) == 292849
+ assert len(gdsc1) == 333161
def test_gdsc2():
"""Test the GDSC2 dataset."""
tempdir = tempfile.TemporaryDirectory()
gdsc2 = AVAILABLE_DATASETS["GDSC2"](path_data=tempdir.name)
- assert len(gdsc2) == 131108
+ assert len(gdsc2) == 242036
def test_ccle():
"""Test the CCLE dataset."""
tempdir = tempfile.TemporaryDirectory()
ccle = AVAILABLE_DATASETS["CCLE"](path_data=tempdir.name)
- assert len(ccle) == 8478
+ assert len(ccle) == 12096
From bc2127e8867c7115bdd9ea868eb6e117313488aa Mon Sep 17 00:00:00 2001
From: PascalIversen
Date: Tue, 11 Feb 2025 14:48:29 +0100
Subject: [PATCH 9/9] proteomics removed from multiomics
---
tests/test_drp_model.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/tests/test_drp_model.py b/tests/test_drp_model.py
index cf462436..35cd0dcd 100644
--- a/tests/test_drp_model.py
+++ b/tests/test_drp_model.py
@@ -277,12 +277,14 @@ def test_get_multiomics_feature_dataset(gene_list: Optional[str]) -> None:
data_path=temp.name,
dataset_name="GDSC1_small",
gene_list=gene_list,
+ omics=["gene_expression", "methylation", "mutations", "copy_number_variation_gistic"],
)
else:
dataset = get_multiomics_feature_dataset(
data_path=temp.name,
dataset_name="GDSC1_small",
gene_list=gene_list,
+ omics=["gene_expression", "methylation", "mutations", "copy_number_variation_gistic"],
)
assert len(dataset.features) == 2
common_cls = dataset.identifiers