diff --git a/create_report.py b/create_report.py index a38ce223..97ed1c31 100644 --- a/create_report.py +++ b/create_report.py @@ -267,8 +267,10 @@ def draw_per_grouping_algorithm_plots( if __name__ == "__main__": parser = argparse.ArgumentParser(description="Generate reports from evaluation results") parser.add_argument("--run_id", required=True, help="Run ID for the current execution") + parser.add_argument("--dataset", required=True, help="Dataset name for which to render the result file") args = parser.parse_args() run_id = args.run_id + dataset = args.dataset # assert that the run_id folder exists if not os.path.exists(f"results/{run_id}"): @@ -280,7 +282,7 @@ def draw_per_grouping_algorithm_plots( evaluation_results_per_drug, evaluation_results_per_cell_line, true_vs_pred, - ) = parse_results(path_to_results=f"results/{run_id}") + ) = parse_results(path_to_results=f"results/{run_id}", dataset=dataset) # part of pipeline: EVALUATE_FINAL, COLLECT_RESULTS ( diff --git a/drevalpy/models/utils.py b/drevalpy/models/utils.py index 7731aa3f..375c0075 100644 --- a/drevalpy/models/utils.py +++ b/drevalpy/models/utils.py @@ -1,7 +1,6 @@ """Utility functions for loading and processing data.""" import os.path -import warnings import numpy as np import pandas as pd @@ -94,17 +93,11 @@ def iterate_features(df: pd.DataFrame, feature_type: str) -> dict[str, dict[str, if cl in features.keys(): continue rows = df.loc[cl] + rows = rows.astype(float).to_numpy() if (len(rows.shape) > 1) and (rows.shape[0] > 1): # multiple rows returned - warnings.warn( - f"Multiple rows returned for Cell Line {cl} (and maybe others) " - f"in feature {feature_type}, taking the first one.", - stacklevel=2, - ) - - rows = rows.iloc[0] - # convert to float values - rows = rows.astype(float) - features[cl] = {feature_type: rows.values} + # take mean + rows = np.mean(rows, axis=0) + features[cl] = {feature_type: rows} return features diff --git a/drevalpy/visualization/utils.py b/drevalpy/visualization/utils.py index a643b12e..078bdf5c 100644 --- a/drevalpy/visualization/utils.py +++ b/drevalpy/visualization/utils.py @@ -37,11 +37,12 @@ def _parse_layout(f: TextIO, path_to_layout: str) -> None: f.write("".join(layout)) -def parse_results(path_to_results: str) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]: +def parse_results(path_to_results: str, dataset: str) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]: """ Parse the results from the given directory. :param path_to_results: path to the results directory + :param dataset: dataset name, e.g., GDSC2 :returns: evaluation results, evaluation results per drug, evaluation results per cell line, and true vs. predicted values """ @@ -54,7 +55,7 @@ def parse_results(path_to_results: str) -> tuple[pd.DataFrame, pd.DataFrame, pd. # Convert the path to a forward-slash version for the regex (for Windows) result_dir_str = str(result_dir).replace("\\", "/") pattern = re.compile( - rf"{result_dir_str}/(LPO|LCO|LDO)/[^/]+/(predictions|cross_study|randomization|robustness)/.*\.csv$" + rf"{result_dir_str}/{dataset}/(LPO|LCO|LDO)/[^/]+/(predictions|cross_study|randomization|robustness)/.*\.csv$" ) result_files = [file for file in result_files if pattern.match(str(file).replace("\\", "/"))] @@ -69,8 +70,9 @@ def parse_results(path_to_results: str) -> tuple[pd.DataFrame, pd.DataFrame, pd. rel_file = str(os.path.normpath(file.relative_to(result_dir))).replace("\\", "/") print(f'Evaluating file: "{rel_file}" ...') file_parts = rel_file.split("/") - lpo_lco_ldo = file_parts[0] - algorithm = file_parts[1] + dataset = file_parts[0] + lpo_lco_ldo = file_parts[1] + algorithm = file_parts[2] ( overall_eval, eval_results_per_drug, diff --git a/tests/test_drp_model.py b/tests/test_drp_model.py index ca0b910a..32db04e4 100644 --- a/tests/test_drp_model.py +++ b/tests/test_drp_model.py @@ -151,10 +151,9 @@ def test_iterate_features() -> None: """Test the iteration over features.""" df = pd.DataFrame({"GeneA": [1, 2, 3, 2], "GeneB": [4, 5, 6, 2], "GeneC": [7, 8, 9, 2]}) df.index = ["CellLine1", "CellLine2", "CellLine3", "CellLine1"] - with pytest.warns(UserWarning): - features = iterate_features(df, "gene_expression") + features = iterate_features(df, "gene_expression") assert len(features) == 3 - assert np.all(features["CellLine1"]["gene_expression"] == [1, 4, 7]) + assert np.all(features["CellLine1"]["gene_expression"] == [1.5, 3, 4.5]) def test_load_drug_ids_from_csv() -> None: diff --git a/tests/test_run_suite.py b/tests/test_run_suite.py index 3bfd19cd..9ad1173c 100644 --- a/tests/test_run_suite.py +++ b/tests/test_run_suite.py @@ -53,7 +53,7 @@ def test_run_suite(args): evaluation_results_per_drug, evaluation_results_per_cell_line, true_vs_pred, - ) = parse_results(path_to_results=os.path.join(temp_dir.name, args.run_id, args.dataset_name)) + ) = parse_results(path_to_results=os.path.join(temp_dir.name, args.run_id), dataset="Toy_Data") ( evaluation_results,