Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion create_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,10 @@ def draw_per_grouping_algorithm_plots(
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate reports from evaluation results")
parser.add_argument("--run_id", required=True, help="Run ID for the current execution")
parser.add_argument("--dataset", required=True, help="Dataset name for which to render the result file")
args = parser.parse_args()
run_id = args.run_id
dataset = args.dataset

# assert that the run_id folder exists
if not os.path.exists(f"results/{run_id}"):
Expand All @@ -280,7 +282,7 @@ def draw_per_grouping_algorithm_plots(
evaluation_results_per_drug,
evaluation_results_per_cell_line,
true_vs_pred,
) = parse_results(path_to_results=f"results/{run_id}")
) = parse_results(path_to_results=f"results/{run_id}", dataset=dataset)

# part of pipeline: EVALUATE_FINAL, COLLECT_RESULTS
(
Expand Down
15 changes: 4 additions & 11 deletions drevalpy/models/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Utility functions for loading and processing data."""

import os.path
import warnings

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -94,17 +93,11 @@ def iterate_features(df: pd.DataFrame, feature_type: str) -> dict[str, dict[str,
if cl in features.keys():
continue
rows = df.loc[cl]
rows = rows.astype(float).to_numpy()
if (len(rows.shape) > 1) and (rows.shape[0] > 1): # multiple rows returned
warnings.warn(
f"Multiple rows returned for Cell Line {cl} (and maybe others) "
f"in feature {feature_type}, taking the first one.",
stacklevel=2,
)

rows = rows.iloc[0]
# convert to float values
rows = rows.astype(float)
features[cl] = {feature_type: rows.values}
# take mean
rows = np.mean(rows, axis=0)
features[cl] = {feature_type: rows}
return features


Expand Down
10 changes: 6 additions & 4 deletions drevalpy/visualization/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,12 @@ def _parse_layout(f: TextIO, path_to_layout: str) -> None:
f.write("".join(layout))


def parse_results(path_to_results: str) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
def parse_results(path_to_results: str, dataset: str) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
"""
Parse the results from the given directory.

:param path_to_results: path to the results directory
:param dataset: dataset name, e.g., GDSC2
:returns: evaluation results, evaluation results per drug, evaluation results per cell line, and true vs. predicted
values
"""
Expand All @@ -54,7 +55,7 @@ def parse_results(path_to_results: str) -> tuple[pd.DataFrame, pd.DataFrame, pd.
# Convert the path to a forward-slash version for the regex (for Windows)
result_dir_str = str(result_dir).replace("\\", "/")
pattern = re.compile(
rf"{result_dir_str}/(LPO|LCO|LDO)/[^/]+/(predictions|cross_study|randomization|robustness)/.*\.csv$"
rf"{result_dir_str}/{dataset}/(LPO|LCO|LDO)/[^/]+/(predictions|cross_study|randomization|robustness)/.*\.csv$"
)
result_files = [file for file in result_files if pattern.match(str(file).replace("\\", "/"))]

Expand All @@ -69,8 +70,9 @@ def parse_results(path_to_results: str) -> tuple[pd.DataFrame, pd.DataFrame, pd.
rel_file = str(os.path.normpath(file.relative_to(result_dir))).replace("\\", "/")
print(f'Evaluating file: "{rel_file}" ...')
file_parts = rel_file.split("/")
lpo_lco_ldo = file_parts[0]
algorithm = file_parts[1]
dataset = file_parts[0]
lpo_lco_ldo = file_parts[1]
algorithm = file_parts[2]
(
overall_eval,
eval_results_per_drug,
Expand Down
5 changes: 2 additions & 3 deletions tests/test_drp_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,10 +151,9 @@ def test_iterate_features() -> None:
"""Test the iteration over features."""
df = pd.DataFrame({"GeneA": [1, 2, 3, 2], "GeneB": [4, 5, 6, 2], "GeneC": [7, 8, 9, 2]})
df.index = ["CellLine1", "CellLine2", "CellLine3", "CellLine1"]
with pytest.warns(UserWarning):
features = iterate_features(df, "gene_expression")
features = iterate_features(df, "gene_expression")
assert len(features) == 3
assert np.all(features["CellLine1"]["gene_expression"] == [1, 4, 7])
assert np.all(features["CellLine1"]["gene_expression"] == [1.5, 3, 4.5])


def test_load_drug_ids_from_csv() -> None:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_run_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def test_run_suite(args):
evaluation_results_per_drug,
evaluation_results_per_cell_line,
true_vs_pred,
) = parse_results(path_to_results=os.path.join(temp_dir.name, args.run_id, args.dataset_name))
) = parse_results(path_to_results=os.path.join(temp_dir.name, args.run_id), dataset="Toy_Data")

(
evaluation_results,
Expand Down
Loading