Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file removed drevalpy/.DS_Store
Binary file not shown.
3 changes: 2 additions & 1 deletion drevalpy/datasets/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Utility functions for datasets."""

import os
import zipfile
from pathlib import Path
from typing import Any
Expand Down Expand Up @@ -60,7 +61,7 @@ def download_dataset(
with zipfile.ZipFile(file_path, "r") as z:
for member in z.infolist():
if not member.filename.startswith("__MACOSX/"):
z.extract(member, data_path)
z.extract(member, os.path.join(data_path, dataset_name))
file_path.unlink() # Remove zip file after extraction

print(f"{dataset_name} data downloaded and extracted to {data_path}")
Expand Down
12 changes: 6 additions & 6 deletions drevalpy/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -903,10 +903,11 @@ def train_and_predict(

train_dataset.reduce_to(cell_line_ids=cell_lines_to_keep, drug_ids=drugs_to_keep)
prediction_dataset.reduce_to(cell_line_ids=cell_lines_to_keep, drug_ids=drugs_to_keep)
print(f"Reduced training dataset from {len_train_before} to {len(train_dataset)}, because of missing features")
print(
f"Reduced prediction dataset from {len_pred_before} to {len(prediction_dataset)}, because of missing features"
)
if len(train_dataset) < len_train_before or len(prediction_dataset) < len_pred_before:
print(f"Reduced training dataset from {len_train_before} to {len(train_dataset)}, due to missing features")
print(
f"Reduced prediction dataset from {len_pred_before} to {len(prediction_dataset)}, due to missing features"
)

if early_stopping_dataset is not None:
len_es_before = len(early_stopping_dataset)
Expand Down Expand Up @@ -1142,8 +1143,7 @@ def make_model_list(models: list[type[DRPModel]], response_data: DrugResponseDat

@pipeline_function
def get_model_name_and_drug_id(model_name: str) -> tuple[str, str | None]:
"""
Get the model name and drug id from the model name.
"""Get the model name and drug id from the model name.

:param model_name: model name, e.g., SimpleNeuralNetwork or MOLIR.Afatinib
:returns: tuple of model name and, potentially drug id if it is a single drug model
Expand Down
Binary file removed drevalpy/models/.DS_Store
Binary file not shown.
5 changes: 5 additions & 0 deletions drevalpy/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
"MultiOmicsNeuralNetwork",
"MultiOmicsRandomForest",
"SingleDrugRandomForest",
"SingleDrugElasticNet",
"SingleDrugProteomicsElasticNet",
"SRMF",
"GradientBoosting",
"MOLIR",
Expand All @@ -29,6 +31,7 @@
NaiveMeanEffectsPredictor,
NaivePredictor,
)
from .baselines.singledrug_elastic_net import SingleDrugElasticNet, SingleDrugProteomicsElasticNet
from .baselines.singledrug_random_forest import SingleDrugRandomForest
from .baselines.sklearn_models import ElasticNetModel, GradientBoosting, RandomForest, SVMRegressor
from .DIPK.dipk import DIPKModel
Expand All @@ -44,6 +47,8 @@
"SingleDrugRandomForest": SingleDrugRandomForest,
"MOLIR": MOLIR,
"SuperFELTR": SuperFELTR,
"SingleDrugElasticNet": SingleDrugElasticNet,
"SingleDrugProteomicsElasticNet": SingleDrugProteomicsElasticNet,
}

# MULTI_DRUG_MODEL_FACTORY is used in the pipeline!
Expand Down
30 changes: 30 additions & 0 deletions drevalpy/models/baselines/hyperparameters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,33 @@ GradientBoosting:
- 1.0
- 0.8
- 0.5
SingleDrugElasticNet:
l1_ratio:
- 0.2
- 0.5
- 0.9
alpha:
- 1
- 0.8
- 0.6
- 0.4
- 0.2
- 0.1
- 5
- 10
- 100
SingleDrugProteomicsElasticNet:
l1_ratio:
- 0.2
- 0.5
- 0.9
alpha:
- 1
- 0.8
- 0.6
- 0.4
- 0.2
- 0.1
- 5
- 10
- 100
221 changes: 221 additions & 0 deletions drevalpy/models/baselines/singledrug_elastic_net.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
"""SingleDrugElasticNet and SingleDrugProteomicsElasticNet classes. Fit an Elastic net for each drug seperately."""

import numpy as np
from sklearn.linear_model import ElasticNet

from ...datasets.dataset import DrugResponseDataset, FeatureDataset
from ..utils import load_and_reduce_gene_features
from .sklearn_models import SklearnModel


class SingleDrugElasticNet(SklearnModel):
"""SingleDrugElasticNet class."""

is_single_drug_model = True
drug_views = []
cell_line_views = ["gene_expression"]
early_stopping = False

def build_model(self, hyperparameters):
"""
Builds the model from hyperparameters.

:param hyperparameters: Elastic net hyperparameters
"""
self.model = ElasticNet(**hyperparameters)

@classmethod
def get_model_name(cls) -> str:
"""
Returns the model name.

:returns: SingleDrugElasticNet
"""
return "SingleDrugElasticNet"

def train(
self,
output: DrugResponseDataset,
cell_line_input: FeatureDataset,
drug_input: FeatureDataset | None = None,
output_earlystopping: DrugResponseDataset | None = None,
model_checkpoint_dir: str = "checkpoints",
) -> None:
"""
Trains the model; the number of features is the number of fingerprints.

:param output: training dataset containing the response output
:param cell_line_input: training dataset containing gene expression data
:param drug_input: not needed
:param output_earlystopping: not needed
:param model_checkpoint_dir: not needed as checkpoints are not saved
:raises ValueError: if drug_input is not None
"""
if drug_input is not None:
raise ValueError("SingleDrugElasticNet does not support drug_input!")

if len(output) > 0:
x = self.get_concatenated_features(
cell_line_view="gene_expression",
drug_view=None,
cell_line_ids_output=output.cell_line_ids,
drug_ids_output=output.drug_ids,
cell_line_input=cell_line_input,
drug_input=None,
)
self.model.fit(x, output.response)
else:
print("No training data provided, will predict NA.")
self.model = None

def predict(
self,
cell_line_ids: np.ndarray,
drug_ids: np.ndarray,
cell_line_input: FeatureDataset,
drug_input: FeatureDataset | None = None,
) -> np.ndarray:
"""
Predicts the drug response for the given cell lines.

:param cell_line_ids: cell line ids
:param drug_ids: drug ids, not needed here
:param cell_line_input: cell line input
:param drug_input: drug input, not needed here
:returns: predicted drug response
:raises ValueError: if drug_input is not None
"""
if drug_input is not None:
raise ValueError("drug_input is not needed.")

if self.model is None:
print("No training data was available, predicting NA.")
return np.array([np.nan] * len(cell_line_ids))
x = self.get_concatenated_features(
cell_line_view="gene_expression",
drug_view=None,
cell_line_ids_output=cell_line_ids,
drug_ids_output=drug_ids,
cell_line_input=cell_line_input,
drug_input=None,
)
return self.model.predict(x)

def load_drug_features(self, data_path, dataset_name):
"""
Load drug features. Not needed for SingleDrugElasticNet.

:param data_path: path to the data
:param dataset_name: name of the dataset
:returns: None
"""
return None


class SingleDrugProteomicsElasticNet(SingleDrugElasticNet):
"""SingleDrugProteomicsElasticNet class."""

cell_line_views = ["proteomics"]
is_single_drug_model = True

@classmethod
def get_model_name(cls) -> str:
"""
Returns the model name.

:returns: SingleDrugProteomicsElasticNet
"""
return "SingleDrugProteomicsElasticNet"

def load_cell_line_features(self, data_path: str, dataset_name: str) -> FeatureDataset:
"""
Loads the proteomics data.

:param data_path: path to the data
:param dataset_name: name of the dataset
:returns: proteomics data
"""
return load_and_reduce_gene_features(
feature_type="proteomics",
gene_list=None,
data_path=data_path,
dataset_name=dataset_name,
)

def load_drug_features(self, data_path, dataset_name):
"""
Load drug features. Not needed for SingleDrugProteomicsElasticNet.

:param data_path: path to the data
:param dataset_name: name of the dataset
:returns: None
"""
return None

def train(
self,
output: DrugResponseDataset,
cell_line_input: FeatureDataset,
drug_input: FeatureDataset | None = None,
output_earlystopping: DrugResponseDataset | None = None,
model_checkpoint_dir: str = "checkpoints",
) -> None:
"""
Trains the model; the number of features is the number of fingerprints.

:param output: training dataset containing the response output
:param cell_line_input: training dataset containing gene expression data
:param drug_input: not needed
:param output_earlystopping: not needed
:param model_checkpoint_dir: not needed as checkpoints are not saved
:raises ValueError: if drug_input is not None
"""
if drug_input is not None:
raise ValueError("SingleDrugElasticNet does not support drug_input!")

if len(output) > 0:
x = self.get_concatenated_features(
cell_line_view="proteomics",
drug_view=None,
cell_line_ids_output=output.cell_line_ids,
drug_ids_output=output.drug_ids,
cell_line_input=cell_line_input,
drug_input=None,
)
self.model.fit(x, output.response)
else:
print("No training data provided, will predict NA.")
self.model = None

def predict(
self,
cell_line_ids: np.ndarray,
drug_ids: np.ndarray,
cell_line_input: FeatureDataset,
drug_input: FeatureDataset | None = None,
) -> np.ndarray:
"""
Predicts the drug response for the given cell lines.

:param cell_line_ids: cell line ids
:param drug_ids: drug ids, not needed here
:param cell_line_input: cell line input
:param drug_input: drug input, not needed here
:returns: predicted drug response
:raises ValueError: if drug_input is not None
"""
if drug_input is not None:
raise ValueError("drug_input is not needed.")

if self.model is None:
print("No training data was available, predicting NA.")
return np.array([np.nan] * len(cell_line_ids))
x = self.get_concatenated_features(
cell_line_view="proteomics",
drug_view=None,
cell_line_ids_output=cell_line_ids,
drug_ids_output=drug_ids,
cell_line_input=cell_line_input,
drug_input=None,
)
return self.model.predict(x)
10 changes: 10 additions & 0 deletions drevalpy/models/baselines/singledrug_random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,13 @@ def predict(
drug_input=None,
)
return self.model.predict(x)

def load_drug_features(self, data_path, dataset_name):
"""
Load drug features. Not needed for SingleDrugRandomForest.

:param data_path: path to the data
:param dataset_name: name of the dataset
:returns: None
"""
return None
2 changes: 1 addition & 1 deletion drevalpy/models/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def get_multiomics_feature_dataset(
:raises ValueError: if no omics features are found
"""
if omics is None:
omics = ["gene_expression", "methylation", "mutations", "copy_number_variation_gistic"]
omics = ["gene_expression", "methylation", "mutations", "copy_number_variation_gistic", "proteomics"]
feature_dataset = None
for omic in omics:
if feature_dataset is None:
Expand Down
Loading
Loading