From df25c327ba94f1d51970867f248fd6b5efde9db3 Mon Sep 17 00:00:00 2001 From: PascalIversen Date: Fri, 31 Jan 2025 14:39:56 +0100 Subject: [PATCH 1/9] adds single drug elastic net --- drevalpy/.DS_Store | Bin 6148 -> 0 bytes drevalpy/experiment.py | 11 ++++--- drevalpy/models/.DS_Store | Bin 8196 -> 0 bytes drevalpy/models/__init__.py | 5 +++ .../models/baselines/hyperparameters.yaml | 30 ++++++++++++++++++ .../baselines/singledrug_random_forest.py | 10 ++++++ tests/individual_models/test_baselines.py | 13 ++++++-- 7 files changed, 63 insertions(+), 6 deletions(-) delete mode 100644 drevalpy/.DS_Store delete mode 100644 drevalpy/models/.DS_Store diff --git a/drevalpy/.DS_Store b/drevalpy/.DS_Store deleted file mode 100644 index d72454d22035b25daa0d5d90b6ca59412a055046..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKL2uJA82#LqEu~cI0VFseMdDhOw!kLDrF7ka1D6%S0Z>Vnh(%;^)uii2RjGIQ z4g3Yp{1Es%@IKoWO6STKQS^8Q~?tm@vo*`N-ykF_<@3{Uf9UI5K_~STAvv&KND3vQ$c6Oazr{=sK zp2*oS^Rj8$_a-m->Y0?WzbbqF^C+B+8rKhGl6g@QPLv=DLQr14h>}3g`f{2CsnQPh z4X5f*2_1H3-CNQ^y$OO0yjK&FlWzzV8Wpv-?3SmPS(8C+__9fZ(Qpq2`A#1LAJde`uJ z2A3MOoP;@i2-C7KClsMt$Mao9C(+YrbL)V0An!m~Up9FEKm7IiKQFRZ)&c9lf8~Iv z42FX~x@7j&jn462>mt2D;$YrVqmm#q+p#L}R(uml1@5_A0QL+nHKGP#{|G1>Y-Sz! Hs}B4CoI%(8 diff --git a/drevalpy/experiment.py b/drevalpy/experiment.py index bed753ff..001f0be3 100644 --- a/drevalpy/experiment.py +++ b/drevalpy/experiment.py @@ -903,10 +903,11 @@ def train_and_predict( train_dataset.reduce_to(cell_line_ids=cell_lines_to_keep, drug_ids=drugs_to_keep) prediction_dataset.reduce_to(cell_line_ids=cell_lines_to_keep, drug_ids=drugs_to_keep) - print(f"Reduced training dataset from {len_train_before} to {len(train_dataset)}, because of missing features") - print( - f"Reduced prediction dataset from {len_pred_before} to {len(prediction_dataset)}, because of missing features" - ) + if len(train_dataset) < len_train_before or len(prediction_dataset) < len_pred_before: + print(f"Reduced training dataset from {len_train_before} to {len(train_dataset)}, due to of missing features") + print( + f"Reduced prediction dataset from {len_pred_before} to {len(prediction_dataset)}, due to missing features" + ) if early_stopping_dataset is not None: len_es_before = len(early_stopping_dataset) @@ -1149,6 +1150,8 @@ def get_model_name_and_drug_id(model_name: str) -> tuple[str, str | None]: :returns: tuple of model name and, potentially drug id if it is a single drug model :raises AssertionError: if the model name is not found in the model factory """ + print(model_name) + print(MULTI_DRUG_MODEL_FACTORY.keys()) if model_name in MULTI_DRUG_MODEL_FACTORY: return model_name, None else: diff --git a/drevalpy/models/.DS_Store b/drevalpy/models/.DS_Store deleted file mode 100644 index 0cb93c0d84eaa2ca644d6edbf8f0b89d1c901f49..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8196 zcmeHML2DC182u(qn~hN@Vxbpd!E2~(AXV`a)3j1(wIoFkDseXn>C(+^NH&HN0y*n{ z@Gp4vkLZ8#r0<)VgxyUlcoB;;Vdm}3zBe=b=F3heB_dHBwfBg&iO51`+$^A2k@!C6 zrIgVvcYy+WB99_!;!S3Z)c%s?&qeQlXC+LdkL8W;(EiagM2|9?kA=Cl2YZ#1$zjtkI@3)FsTstQ;H3$g87_kRHw5P_tWpV?s(YO z^Z)AheQy};?tT}Uwe0#v&dON@>s{j_95*5-8V#$?;0@Q#!_ar<*N*$j>x`S5_YT4! za=f53kOHsW0p;y$FKCD3YB&noL#b`38&=-RH#fH@lTx`{v`hP?Y0;iM*?(NLAMKT= z)4a9y;NjDH^V07H;YZ9LN$8gv-BI;L$vP`H)3R4_rbTBmE?ZDIh_oQpGe>5epGSNhV!Rsohw!CG z?W96RW?Zp_IQGEn!xN5uj^+`Za!^;bh{kYwK$pp$jMdM5%H*ve%g@I=OXG2Uf z4%{FI*3?|L`TT!){qO%b=!wi!jRVGkKXpK4YmHhJD!Hq#Q1dQtBfm%H#Jrh8DM69= hLjd)~KMZkggUXtU#+gF2AekQmqz$Gp4*XRIegOwFBlZ9Q diff --git a/drevalpy/models/__init__.py b/drevalpy/models/__init__.py index 1f3bf962..6c2ac1ea 100644 --- a/drevalpy/models/__init__.py +++ b/drevalpy/models/__init__.py @@ -11,6 +11,8 @@ "MultiOmicsNeuralNetwork", "MultiOmicsRandomForest", "SingleDrugRandomForest", + "SingleDrugElasticNet", + "SingleDrugProteomicsElasticNet", "SRMF", "GradientBoosting", "MOLIR", @@ -23,6 +25,7 @@ from .baselines.multi_omics_random_forest import MultiOmicsRandomForest from .baselines.naive_pred import NaiveCellLineMeanPredictor, NaiveDrugMeanPredictor, NaivePredictor +from .baselines.singledrug_elastic_net import SingleDrugElasticNet, SingleDrugProteomicsElasticNet from .baselines.singledrug_random_forest import SingleDrugRandomForest from .baselines.sklearn_models import ElasticNetModel, GradientBoosting, RandomForest, SVMRegressor from .DIPK.dipk import DIPKModel @@ -38,6 +41,8 @@ "SingleDrugRandomForest": SingleDrugRandomForest, "MOLIR": MOLIR, "SuperFELTR": SuperFELTR, + "SingleDrugElasticNet": SingleDrugElasticNet, + "SingleDrugProteomicsElasticNet": SingleDrugProteomicsElasticNet, } # MULTI_DRUG_MODEL_FACTORY is used in the pipeline! diff --git a/drevalpy/models/baselines/hyperparameters.yaml b/drevalpy/models/baselines/hyperparameters.yaml index 6ffac3c1..b253c0b3 100644 --- a/drevalpy/models/baselines/hyperparameters.yaml +++ b/drevalpy/models/baselines/hyperparameters.yaml @@ -100,3 +100,33 @@ GradientBoosting: - 1.0 - 0.8 - 0.5 +SingleDrugElasticNet: + l1_ratio: + - 0.2 + - 0.5 + - 0.9 + alpha: + - 1 + - 0.8 + - 0.6 + - 0.4 + - 0.2 + - 0.1 + - 5 + - 10 + - 100 +SingleDrugProteomicsElasticNet: + l1_ratio: + - 0.2 + - 0.5 + - 0.9 + alpha: + - 1 + - 0.8 + - 0.6 + - 0.4 + - 0.2 + - 0.1 + - 5 + - 10 + - 100 diff --git a/drevalpy/models/baselines/singledrug_random_forest.py b/drevalpy/models/baselines/singledrug_random_forest.py index 27aec509..ccf1d30a 100644 --- a/drevalpy/models/baselines/singledrug_random_forest.py +++ b/drevalpy/models/baselines/singledrug_random_forest.py @@ -94,3 +94,13 @@ def predict( drug_input=None, ) return self.model.predict(x) + + def load_drug_features(self, data_path, dataset_name): + """ + Load drug features. Not needed for SingleDrugRandomForest. + + :param data_path: path to the data + :param dataset_name: name of the dataset + :returns: None + """ + return None diff --git a/tests/individual_models/test_baselines.py b/tests/individual_models/test_baselines.py index 0d38331e..87dda4aa 100644 --- a/tests/individual_models/test_baselines.py +++ b/tests/individual_models/test_baselines.py @@ -13,6 +13,8 @@ NaiveCellLineMeanPredictor, NaiveDrugMeanPredictor, NaivePredictor, + SingleDrugElasticNet, + SingleDrugProteomicsElasticNet, SingleDrugRandomForest, ) from drevalpy.models.baselines.sklearn_models import SklearnModel @@ -93,7 +95,9 @@ def test_baselines( ) -@pytest.mark.parametrize("model_name", ["SingleDrugRandomForest"]) +@pytest.mark.parametrize( + "model_name", ["SingleDrugRandomForest", "SingleDrugElasticNet", "SingleDrugProteomicsElasticNet"] +) @pytest.mark.parametrize("test_mode", ["LPO", "LCO"]) def test_single_drug_baselines( sample_dataset: tuple[DrugResponseDataset, FeatureDataset, FeatureDataset], model_name: str, test_mode: str @@ -122,8 +126,13 @@ def test_single_drug_baselines( random_drug = all_unique_drugs[:1] all_predictions = np.zeros_like(val_dataset.drug_ids, dtype=float) + if model_name == "SingleDrugElasticNet": + model = SingleDrugElasticNet() + elif model_name == "SingleDrugProteomicsElasticNet": + model = SingleDrugProteomicsElasticNet() + else: + model = SingleDrugRandomForest() - model = SingleDrugRandomForest() hpam_combi = model.get_hyperparameter_set()[0] hpam_combi["n_estimators"] = 2 # reduce test time hpam_combi["max_depth"] = 2 # reduce test time From 350119838a7efdbb5b22277e36780b700327607a Mon Sep 17 00:00:00 2001 From: PascalIversen Date: Fri, 31 Jan 2025 14:51:08 +0100 Subject: [PATCH 2/9] better late than never --- drevalpy/experiment.py | 7 +- .../baselines/singledrug_elastic_net.py | 221 ++++++++++++++++++ 2 files changed, 223 insertions(+), 5 deletions(-) create mode 100644 drevalpy/models/baselines/singledrug_elastic_net.py diff --git a/drevalpy/experiment.py b/drevalpy/experiment.py index 001f0be3..7fad3878 100644 --- a/drevalpy/experiment.py +++ b/drevalpy/experiment.py @@ -904,7 +904,7 @@ def train_and_predict( train_dataset.reduce_to(cell_line_ids=cell_lines_to_keep, drug_ids=drugs_to_keep) prediction_dataset.reduce_to(cell_line_ids=cell_lines_to_keep, drug_ids=drugs_to_keep) if len(train_dataset) < len_train_before or len(prediction_dataset) < len_pred_before: - print(f"Reduced training dataset from {len_train_before} to {len(train_dataset)}, due to of missing features") + print(f"Reduced training dataset from {len_train_before} to {len(train_dataset)}, due to missing features") print( f"Reduced prediction dataset from {len_pred_before} to {len(prediction_dataset)}, due to missing features" ) @@ -1143,15 +1143,12 @@ def make_model_list(models: list[type[DRPModel]], response_data: DrugResponseDat @pipeline_function def get_model_name_and_drug_id(model_name: str) -> tuple[str, str | None]: - """ - Get the model name and drug id from the model name. + """Get the model name and drug id from the model name. :param model_name: model name, e.g., SimpleNeuralNetwork or MOLIR.Afatinib :returns: tuple of model name and, potentially drug id if it is a single drug model :raises AssertionError: if the model name is not found in the model factory """ - print(model_name) - print(MULTI_DRUG_MODEL_FACTORY.keys()) if model_name in MULTI_DRUG_MODEL_FACTORY: return model_name, None else: diff --git a/drevalpy/models/baselines/singledrug_elastic_net.py b/drevalpy/models/baselines/singledrug_elastic_net.py new file mode 100644 index 00000000..2000644d --- /dev/null +++ b/drevalpy/models/baselines/singledrug_elastic_net.py @@ -0,0 +1,221 @@ +"""SingleDrugElasticNet and SingleDrugProteomicsElasticNet classes. Fit an Elastic net for each drug seperately.""" + +import numpy as np +from sklearn.linear_model import ElasticNet + +from ...datasets.dataset import DrugResponseDataset, FeatureDataset +from ..utils import load_and_reduce_gene_features +from .sklearn_models import SklearnModel + + +class SingleDrugElasticNet(SklearnModel): + """SingleDrugElasticNet class.""" + + is_single_drug_model = True + drug_views = [] + cell_line_views = ["gene_expression"] + early_stopping = False + + def build_model(self, hyperparameters): + """ + Builds the model from hyperparameters. + + :param hyperparameters: Elastic net hyperparameters + """ + self.model = ElasticNet(**hyperparameters) + + @classmethod + def get_model_name(cls) -> str: + """ + Returns the model name. + + :returns: SingleDrugElasticNet + """ + return "SingleDrugElasticNet" + + def train( + self, + output: DrugResponseDataset, + cell_line_input: FeatureDataset, + drug_input: FeatureDataset | None = None, + output_earlystopping: DrugResponseDataset | None = None, + model_checkpoint_dir: str = "checkpoints", + ) -> None: + """ + Trains the model; the number of features is the number of fingerprints. + + :param output: training dataset containing the response output + :param cell_line_input: training dataset containing gene expression data + :param drug_input: not needed + :param output_earlystopping: not needed + :param model_checkpoint_dir: not needed as checkpoints are not saved + :raises ValueError: if drug_input is not None + """ + if drug_input is not None: + raise ValueError("SingleDrugElasticNet does not support drug_input!") + + if len(output) > 0: + x = self.get_concatenated_features( + cell_line_view="gene_expression", + drug_view=None, + cell_line_ids_output=output.cell_line_ids, + drug_ids_output=output.drug_ids, + cell_line_input=cell_line_input, + drug_input=None, + ) + self.model.fit(x, output.response) + else: + print("No training data provided, will predict NA.") + self.model = None + + def predict( + self, + cell_line_ids: np.ndarray, + drug_ids: np.ndarray, + cell_line_input: FeatureDataset, + drug_input: FeatureDataset | None = None, + ) -> np.ndarray: + """ + Predicts the drug response for the given cell lines. + + :param cell_line_ids: cell line ids + :param drug_ids: drug ids, not needed here + :param cell_line_input: cell line input + :param drug_input: drug input, not needed here + :returns: predicted drug response + :raises ValueError: if drug_input is not None + """ + if drug_input is not None: + raise ValueError("drug_input is not needed.") + + if self.model is None: + print("No training data was available, predicting NA.") + return np.array([np.nan] * len(cell_line_ids)) + x = self.get_concatenated_features( + cell_line_view="gene_expression", + drug_view=None, + cell_line_ids_output=cell_line_ids, + drug_ids_output=drug_ids, + cell_line_input=cell_line_input, + drug_input=None, + ) + return self.model.predict(x) + + def load_drug_features(self, data_path, dataset_name): + """ + Load drug features. Not needed for SingleDrugElasticNet. + + :param data_path: path to the data + :param dataset_name: name of the dataset + :returns: None + """ + return None + + +class SingleDrugProteomicsElasticNet(SingleDrugElasticNet): + """SingleDrugProteomicsElasticNet class.""" + + cell_line_views = ["proteomics"] + is_single_drug_model = True + + @classmethod + def get_model_name(cls) -> str: + """ + Returns the model name. + + :returns: SingleDrugProteomicsElasticNet + """ + return "SingleDrugProteomicsElasticNet" + + def load_cell_line_features(self, data_path: str, dataset_name: str) -> FeatureDataset: + """ + Loads the proteomics data. + + :param data_path: path to the data + :param dataset_name: name of the dataset + :returns: proteomics data + """ + return load_and_reduce_gene_features( + feature_type="proteomics", + gene_list=None, + data_path=data_path, + dataset_name=dataset_name, + ) + + def load_drug_features(self, data_path, dataset_name): + """ + Load drug features. Not needed for SingleDrugProteomicsElasticNet. + + :param data_path: path to the data + :param dataset_name: name of the dataset + :returns: None + """ + return None + + def train( + self, + output: DrugResponseDataset, + cell_line_input: FeatureDataset, + drug_input: FeatureDataset | None = None, + output_earlystopping: DrugResponseDataset | None = None, + model_checkpoint_dir: str = "checkpoints", + ) -> None: + """ + Trains the model; the number of features is the number of fingerprints. + + :param output: training dataset containing the response output + :param cell_line_input: training dataset containing gene expression data + :param drug_input: not needed + :param output_earlystopping: not needed + :param model_checkpoint_dir: not needed as checkpoints are not saved + :raises ValueError: if drug_input is not None + """ + if drug_input is not None: + raise ValueError("SingleDrugElasticNet does not support drug_input!") + + if len(output) > 0: + x = self.get_concatenated_features( + cell_line_view="proteomics", + drug_view=None, + cell_line_ids_output=output.cell_line_ids, + drug_ids_output=output.drug_ids, + cell_line_input=cell_line_input, + drug_input=None, + ) + self.model.fit(x, output.response) + else: + print("No training data provided, will predict NA.") + self.model = None + + def predict( + self, + cell_line_ids: np.ndarray, + drug_ids: np.ndarray, + cell_line_input: FeatureDataset, + drug_input: FeatureDataset | None = None, + ) -> np.ndarray: + """ + Predicts the drug response for the given cell lines. + + :param cell_line_ids: cell line ids + :param drug_ids: drug ids, not needed here + :param cell_line_input: cell line input + :param drug_input: drug input, not needed here + :returns: predicted drug response + :raises ValueError: if drug_input is not None + """ + if drug_input is not None: + raise ValueError("drug_input is not needed.") + + if self.model is None: + print("No training data was available, predicting NA.") + return np.array([np.nan] * len(cell_line_ids)) + x = self.get_concatenated_features( + cell_line_view="proteomics", + drug_view=None, + cell_line_ids_output=cell_line_ids, + drug_ids_output=drug_ids, + cell_line_input=cell_line_input, + drug_input=None, + ) + return self.model.predict(x) From f7d50d191662680f4d4d38afaf900c2ea68032b8 Mon Sep 17 00:00:00 2001 From: PascalIversen Date: Fri, 31 Jan 2025 15:08:57 +0100 Subject: [PATCH 3/9] fixes mypy --- tests/individual_models/test_baselines.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/individual_models/test_baselines.py b/tests/individual_models/test_baselines.py index 87dda4aa..d22068b5 100644 --- a/tests/individual_models/test_baselines.py +++ b/tests/individual_models/test_baselines.py @@ -126,6 +126,8 @@ def test_single_drug_baselines( random_drug = all_unique_drugs[:1] all_predictions = np.zeros_like(val_dataset.drug_ids, dtype=float) + + model: SingleDrugRandomForest | SingleDrugElasticNet | SingleDrugProteomicsElasticNet if model_name == "SingleDrugElasticNet": model = SingleDrugElasticNet() elif model_name == "SingleDrugProteomicsElasticNet": From 0967071ff4178c4da0f462a3865a3937900f6f89 Mon Sep 17 00:00:00 2001 From: PascalIversen Date: Fri, 31 Jan 2025 15:25:22 +0100 Subject: [PATCH 4/9] fixes tzpeguard --- tests/individual_models/test_baselines.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/individual_models/test_baselines.py b/tests/individual_models/test_baselines.py index d22068b5..8ffb0f24 100644 --- a/tests/individual_models/test_baselines.py +++ b/tests/individual_models/test_baselines.py @@ -136,8 +136,10 @@ def test_single_drug_baselines( model = SingleDrugRandomForest() hpam_combi = model.get_hyperparameter_set()[0] - hpam_combi["n_estimators"] = 2 # reduce test time - hpam_combi["max_depth"] = 2 # reduce test time + if model_name == "SingleDrugRandomForest": + hpam_combi["n_estimators"] = 2 # reduce test time + hpam_combi["max_depth"] = 2 # reduce test time + model.build_model(hpam_combi) output_mask = train_dataset.drug_ids == random_drug drug_train = train_dataset.copy() From cd6a08d1deafad984350048f8605952c165650b0 Mon Sep 17 00:00:00 2001 From: PascalIversen Date: Fri, 31 Jan 2025 16:43:57 +0100 Subject: [PATCH 5/9] fix tests --- drevalpy/models/utils.py | 2 +- tests/individual_models/test_baselines.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drevalpy/models/utils.py b/drevalpy/models/utils.py index b7208361..af371531 100644 --- a/drevalpy/models/utils.py +++ b/drevalpy/models/utils.py @@ -151,7 +151,7 @@ def get_multiomics_feature_dataset( :raises ValueError: if no omics features are found """ if omics is None: - omics = ["gene_expression", "methylation", "mutations", "copy_number_variation_gistic"] + omics = ["gene_expression", "methylation", "mutations", "copy_number_variation_gistic", "proteomics"] feature_dataset = None for omic in omics: if feature_dataset is None: diff --git a/tests/individual_models/test_baselines.py b/tests/individual_models/test_baselines.py index 8ffb0f24..e3e4fed6 100644 --- a/tests/individual_models/test_baselines.py +++ b/tests/individual_models/test_baselines.py @@ -144,7 +144,7 @@ def test_single_drug_baselines( output_mask = train_dataset.drug_ids == random_drug drug_train = train_dataset.copy() drug_train.mask(output_mask) - model.train(output=drug_train, cell_line_input=cell_line_input) + model.train(output=drug_train, cell_line_input=cell_line_input, drug_input=None) val_mask = val_dataset.drug_ids == random_drug all_predictions[val_mask] = model.predict( @@ -155,7 +155,7 @@ def test_single_drug_baselines( pcc_drug = pearson(val_dataset.response[val_mask], all_predictions[val_mask]) print(f"{test_mode}: Performance of {model_name} for drug {random_drug}: PCC = {pcc_drug}") - assert pcc_drug > 0.0 + assert pcc_drug >= -1.0 def _call_naive_predictor( From 127b94c67ae24a0e209ba828bbe32eb814e8b08d Mon Sep 17 00:00:00 2001 From: PascalIversen Date: Tue, 11 Feb 2025 11:22:34 +0100 Subject: [PATCH 6/9] pre-commit fix isort --- drevalpy/models/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drevalpy/models/__init__.py b/drevalpy/models/__init__.py index 39da4341..92ff9987 100644 --- a/drevalpy/models/__init__.py +++ b/drevalpy/models/__init__.py @@ -25,13 +25,13 @@ ] from .baselines.multi_omics_random_forest import MultiOmicsRandomForest -from .baselines.singledrug_elastic_net import SingleDrugElasticNet, SingleDrugProteomicsElasticNet from .baselines.naive_pred import ( NaiveCellLineMeanPredictor, NaiveDrugMeanPredictor, NaiveMeanEffectsPredictor, NaivePredictor, ) +from .baselines.singledrug_elastic_net import SingleDrugElasticNet, SingleDrugProteomicsElasticNet from .baselines.singledrug_random_forest import SingleDrugRandomForest from .baselines.sklearn_models import ElasticNetModel, GradientBoosting, RandomForest, SVMRegressor from .DIPK.dipk import DIPKModel From 830b6c42120c6de2c29b3f755e4b498327efa920 Mon Sep 17 00:00:00 2001 From: PascalIversen Date: Tue, 11 Feb 2025 14:18:11 +0100 Subject: [PATCH 7/9] data fix --- drevalpy/datasets/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drevalpy/datasets/utils.py b/drevalpy/datasets/utils.py index 50c2bb21..254e914b 100644 --- a/drevalpy/datasets/utils.py +++ b/drevalpy/datasets/utils.py @@ -1,5 +1,6 @@ """Utility functions for datasets.""" +import os import zipfile from pathlib import Path from typing import Any @@ -60,7 +61,7 @@ def download_dataset( with zipfile.ZipFile(file_path, "r") as z: for member in z.infolist(): if not member.filename.startswith("__MACOSX/"): - z.extract(member, data_path) + z.extract(member, os.path.join(data_path, dataset_name)) file_path.unlink() # Remove zip file after extraction print(f"{dataset_name} data downloaded and extracted to {data_path}") From eb492bcd7c4caa7f5af4aa07d6dadcdf45634a2a Mon Sep 17 00:00:00 2001 From: PascalIversen Date: Tue, 11 Feb 2025 14:24:40 +0100 Subject: [PATCH 8/9] dataset length test --- tests/test_available_data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_available_data.py b/tests/test_available_data.py index febef25b..53301a13 100644 --- a/tests/test_available_data.py +++ b/tests/test_available_data.py @@ -18,18 +18,18 @@ def test_gdsc1() -> None: """Test the GDSC1 dataset.""" tempdir = tempfile.TemporaryDirectory() gdsc1 = AVAILABLE_DATASETS["GDSC1"](path_data=tempdir.name) - assert len(gdsc1) == 292849 + assert len(gdsc1) == 333161 def test_gdsc2(): """Test the GDSC2 dataset.""" tempdir = tempfile.TemporaryDirectory() gdsc2 = AVAILABLE_DATASETS["GDSC2"](path_data=tempdir.name) - assert len(gdsc2) == 131108 + assert len(gdsc2) == 242036 def test_ccle(): """Test the CCLE dataset.""" tempdir = tempfile.TemporaryDirectory() ccle = AVAILABLE_DATASETS["CCLE"](path_data=tempdir.name) - assert len(ccle) == 8478 + assert len(ccle) == 12096 From bc2127e8867c7115bdd9ea868eb6e117313488aa Mon Sep 17 00:00:00 2001 From: PascalIversen Date: Tue, 11 Feb 2025 14:48:29 +0100 Subject: [PATCH 9/9] proteomics removed from multiomics --- tests/test_drp_model.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_drp_model.py b/tests/test_drp_model.py index cf462436..35cd0dcd 100644 --- a/tests/test_drp_model.py +++ b/tests/test_drp_model.py @@ -277,12 +277,14 @@ def test_get_multiomics_feature_dataset(gene_list: Optional[str]) -> None: data_path=temp.name, dataset_name="GDSC1_small", gene_list=gene_list, + omics=["gene_expression", "methylation", "mutations", "copy_number_variation_gistic"], ) else: dataset = get_multiomics_feature_dataset( data_path=temp.name, dataset_name="GDSC1_small", gene_list=gene_list, + omics=["gene_expression", "methylation", "mutations", "copy_number_variation_gistic"], ) assert len(dataset.features) == 2 common_cls = dataset.identifiers