From 8ffd82d9e6ace912f4fc638cd038522bd5407d01 Mon Sep 17 00:00:00 2001 From: Jonathan Karlsen Date: Fri, 14 Mar 2025 09:04:43 +0100 Subject: [PATCH] Validate `DESIGN_MATRIX` when running `DESIGN2PARAMS` This commit makes us run validation for DESIGN_MATRIX when parsing configs using DESIGN2PARAMS, and log a warning if it would have failed. --- src/ert/config/design_matrix.py | 4 +- src/ert/config/ert_config.py | 33 +++++++- .../ert/unit_tests/config/test_ert_config.py | 75 +++++++++++++++++++ 3 files changed, 109 insertions(+), 3 deletions(-) diff --git a/src/ert/config/design_matrix.py b/src/ert/config/design_matrix.py index 1664db7ecc6..5c305549cc3 100644 --- a/src/ert/config/design_matrix.py +++ b/src/ert/config/design_matrix.py @@ -33,7 +33,7 @@ def __post_init__(self) -> None: self.active_realizations, self.design_matrix_df, self.parameter_configuration, - ) = self.read_design_matrix() + ) = self.read_and_validate_design_matrix() except (ValueError, AttributeError) as exc: raise ConfigValidationError.with_context( f"Error reading design matrix {self.xls_filename}: {exc}", @@ -157,7 +157,7 @@ def merge_with_existing_parameters( new_param_config += [parameter_group] return new_param_config, design_parameter_group - def read_design_matrix( + def read_and_validate_design_matrix( self, ) -> tuple[list[bool], pd.DataFrame, GenKwConfig]: # Read the parameter names (first row) as strings to prevent pandas from modifying them. diff --git a/src/ert/config/ert_config.py b/src/ert/config/ert_config.py index 3670b4f6298..549930c7849 100644 --- a/src/ert/config/ert_config.py +++ b/src/ert/config/ert_config.py @@ -23,6 +23,7 @@ from pydantic import field_validator from pydantic.dataclasses import dataclass, rebuild_dataclass +from ert.config.design_matrix import DesignMatrix from ert.config.parsing.context_values import ContextBoolEncoder from ert.plugins import ErtPluginManager from ert.plugins.workflow_config import ErtScriptWorkflow @@ -459,7 +460,7 @@ def create_list_of_forward_model_steps_to_run( env_pr_fm_step: dict[str, dict[str, Any]], ) -> list[ForwardModelStep]: errors = [] - fm_steps = [] + fm_steps: list[ForwardModelStep] = [] env_vars = {} for key, val in config_dict.get("SETENV", []): @@ -510,7 +511,17 @@ def create_list_of_forward_model_steps_to_run( if should_add_step: fm_steps.append(fm_step) + design_matrices: list[DesignMatrix] = [] for fm_step in fm_steps: + if fm_step.name == "DESIGN2PARAMS": + xls_filename = fm_step.private_args.get("") + designsheet = fm_step.private_args.get("") + defaultsheet = fm_step.private_args.get("") + if design_matrix := validate_ert_design_matrix( + xls_filename, designsheet, defaultsheet + ): + design_matrices.append(design_matrix) + if fm_step.name in preinstalled_forward_model_steps: try: substituted_json = create_forward_model_json( @@ -539,6 +550,15 @@ def create_list_of_forward_model_steps_to_run( f"Unexpected plugin forward model exception: {e!s}", context=fm_step.name, ) + try: + main_design_matrix: DesignMatrix | None = None + for design_matrix in design_matrices: + if main_design_matrix is None: + main_design_matrix = design_matrix + else: + main_design_matrix = main_design_matrix.merge_with_other(design_matrix) + except Exception as exc: + logger.warning(f"Design matrix merging would have failed due to: {exc}") if errors: raise ConfigValidationError.from_collected(errors) @@ -1253,5 +1273,16 @@ def _forward_model_step_from_config_file( ) +def validate_ert_design_matrix( + xlsfilename, designsheetname, defaultssheetname +) -> DesignMatrix | None: + try: + return DesignMatrix(xlsfilename, designsheetname, defaultssheetname) + except Exception as exc: + logger.warning( + f"DESIGN_MATRIX validation of DESIGN2PARAMS would have failed with: {exc!s}" + ) + + # Due to circular dependency in type annotations between ErtConfig -> WorkflowJob -> ErtScript -> ErtConfig rebuild_dataclass(ErtConfig) diff --git a/tests/ert/unit_tests/config/test_ert_config.py b/tests/ert/unit_tests/config/test_ert_config.py index ea3da75d3b8..625f4b487b6 100644 --- a/tests/ert/unit_tests/config/test_ert_config.py +++ b/tests/ert/unit_tests/config/test_ert_config.py @@ -10,6 +10,7 @@ from textwrap import dedent from unittest.mock import MagicMock +import pandas as pd import pytest from hypothesis import HealthCheck, assume, given, settings from hypothesis import strategies as st @@ -17,6 +18,7 @@ from ert.config import ConfigValidationError, ErtConfig, HookRuntime from ert.config.ert_config import _split_string_into_sections, create_forward_model_json +from ert.config.forward_model_step import ForwardModelStep from ert.config.parsing import ConfigKeys, ConfigWarning from ert.config.parsing.context_values import ( ContextBool, @@ -29,6 +31,7 @@ from ert.config.parsing.queue_system import QueueSystem from ert.plugins import ErtPluginManager from ert.shared import ert_share_path +from tests.ert.ui_tests.cli.analysis.test_design_matrix import _create_design_matrix from .config_dict_generator import config_generators @@ -1991,3 +1994,75 @@ def run(self, *args): assert ert_config.substitutions[""] == "ertconfig_foo" assert ert_config.substitutions[""] == "ertconfig_foo2" + + +def test_design2params_also_validates_design_matrix(tmp_path, caplog, monkeypatch): + design_matrix_file = tmp_path / "my_design_matrix.xlsx" + _create_design_matrix( + design_matrix_file, + pd.DataFrame( + { + "REAL": ["not_a_valid_real"], + "a": [1], + "category": ["cat1"], + } + ), + pd.DataFrame([["b", 1], ["c", 2]]), + ) + mock_design2params = ForwardModelStep( + "DESIGN2PARAMS", + "/usr/bin/env", + arglist=["", "", "", ""], + ) + monkeypatch.setattr( + ErtConfig, + "PREINSTALLED_FORWARD_MODEL_STEPS", + {"DESIGN2PARAMS": mock_design2params}, + ) + ErtConfig.from_file_contents( + f"NUM_REALIZATIONS 1\nFORWARD_MODEL DESIGN2PARAMS(={design_matrix_file}, =DesignSheet01,=DefaultSheet)" + ) + assert "DESIGN_MATRIX validation of DESIGN2PARAMS" in caplog.text + + +def test_multiple_design2params_also_validates_design_matrix_merging( + tmp_path, caplog, monkeypatch +): + design_matrix_file = tmp_path / "my_design_matrix.xlsx" + design_matrix_file2 = tmp_path / "my_design_matrix2.xlsx" + _create_design_matrix( + design_matrix_file, + pd.DataFrame( + { + "REAL": [0, 1], + "letters": ["x", "y"], + } + ), + pd.DataFrame([["a", 1], ["c", 2]]), + ) + _create_design_matrix( + design_matrix_file2, + pd.DataFrame({"REAL": [1, 2], "numbers": [99, 98]}), + pd.DataFrame(), + ) + mock_design2params = ForwardModelStep( + "DESIGN2PARAMS", + "/usr/bin/env", + arglist=["", "", "", ""], + ) + monkeypatch.setattr( + ErtConfig, + "PREINSTALLED_FORWARD_MODEL_STEPS", + {"DESIGN2PARAMS": mock_design2params}, + ) + ErtConfig.from_file_contents( + f"""\ + NUM_REALIZATIONS 1 + FORWARD_MODEL DESIGN2PARAMS(={design_matrix_file}, =DesignSheet01,=DefaultSheet) + FORWARD_MODEL DESIGN2PARAMS(={design_matrix_file2}, =DesignSheet01,=DefaultSheet) + """ + ) + assert ( + "Design matrix merging would have failed due to: Design Matrices don't have the same active realizations" + in caplog.text + )