Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate DESIGN_MATRIX when running DESIGN2PARAMS #10316

Merged
merged 1 commit into from
Mar 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/ert/config/design_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def __post_init__(self) -> None:
self.active_realizations,
self.design_matrix_df,
self.parameter_configuration,
) = self.read_design_matrix()
) = self.read_and_validate_design_matrix()
except (ValueError, AttributeError) as exc:
raise ConfigValidationError.with_context(
f"Error reading design matrix {self.xls_filename}: {exc}",
Expand Down Expand Up @@ -157,7 +157,7 @@ def merge_with_existing_parameters(
new_param_config += [parameter_group]
return new_param_config, design_parameter_group

def read_design_matrix(
def read_and_validate_design_matrix(
self,
) -> tuple[list[bool], pd.DataFrame, GenKwConfig]:
# Read the parameter names (first row) as strings to prevent pandas from modifying them.
Expand Down
33 changes: 32 additions & 1 deletion src/ert/config/ert_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from pydantic import field_validator
from pydantic.dataclasses import dataclass, rebuild_dataclass

from ert.config.design_matrix import DesignMatrix
from ert.config.parsing.context_values import ContextBoolEncoder
from ert.plugins import ErtPluginManager
from ert.plugins.workflow_config import ErtScriptWorkflow
Expand Down Expand Up @@ -459,7 +460,7 @@ def create_list_of_forward_model_steps_to_run(
env_pr_fm_step: dict[str, dict[str, Any]],
) -> list[ForwardModelStep]:
errors = []
fm_steps = []
fm_steps: list[ForwardModelStep] = []

env_vars = {}
for key, val in config_dict.get("SETENV", []):
Expand Down Expand Up @@ -510,7 +511,17 @@ def create_list_of_forward_model_steps_to_run(
if should_add_step:
fm_steps.append(fm_step)

design_matrices: list[DesignMatrix] = []
for fm_step in fm_steps:
if fm_step.name == "DESIGN2PARAMS":
xls_filename = fm_step.private_args.get("<xls_filename>")
designsheet = fm_step.private_args.get("<designsheet>")
defaultsheet = fm_step.private_args.get("<defaultssheet>")
if design_matrix := validate_ert_design_matrix(
xls_filename, designsheet, defaultsheet
):
design_matrices.append(design_matrix)

if fm_step.name in preinstalled_forward_model_steps:
try:
substituted_json = create_forward_model_json(
Expand Down Expand Up @@ -539,6 +550,15 @@ def create_list_of_forward_model_steps_to_run(
f"Unexpected plugin forward model exception: {e!s}",
context=fm_step.name,
)
try:
main_design_matrix: DesignMatrix | None = None
for design_matrix in design_matrices:
if main_design_matrix is None:
main_design_matrix = design_matrix
else:
main_design_matrix = main_design_matrix.merge_with_other(design_matrix)
except Exception as exc:
logger.warning(f"Design matrix merging would have failed due to: {exc}")

if errors:
raise ConfigValidationError.from_collected(errors)
Expand Down Expand Up @@ -1253,5 +1273,16 @@ def _forward_model_step_from_config_file(
)


def validate_ert_design_matrix(
xlsfilename, designsheetname, defaultssheetname
) -> DesignMatrix | None:
try:
return DesignMatrix(xlsfilename, designsheetname, defaultssheetname)
except Exception as exc:
logger.warning(
f"DESIGN_MATRIX validation of DESIGN2PARAMS would have failed with: {exc!s}"
)


# Due to circular dependency in type annotations between ErtConfig -> WorkflowJob -> ErtScript -> ErtConfig
rebuild_dataclass(ErtConfig)
75 changes: 75 additions & 0 deletions tests/ert/unit_tests/config/test_ert_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@
from textwrap import dedent
from unittest.mock import MagicMock

import pandas as pd
import pytest
from hypothesis import HealthCheck, assume, given, settings
from hypothesis import strategies as st
from pydantic import RootModel, TypeAdapter

from ert.config import ConfigValidationError, ErtConfig, HookRuntime
from ert.config.ert_config import _split_string_into_sections, create_forward_model_json
from ert.config.forward_model_step import ForwardModelStep
from ert.config.parsing import ConfigKeys, ConfigWarning
from ert.config.parsing.context_values import (
ContextBool,
Expand All @@ -29,6 +31,7 @@
from ert.config.parsing.queue_system import QueueSystem
from ert.plugins import ErtPluginManager
from ert.shared import ert_share_path
from tests.ert.ui_tests.cli.analysis.test_design_matrix import _create_design_matrix

from .config_dict_generator import config_generators

Expand Down Expand Up @@ -1991,3 +1994,75 @@ def run(self, *args):

assert ert_config.substitutions["<FOO>"] == "ertconfig_foo"
assert ert_config.substitutions["<FOO2>"] == "ertconfig_foo2"


def test_design2params_also_validates_design_matrix(tmp_path, caplog, monkeypatch):
design_matrix_file = tmp_path / "my_design_matrix.xlsx"
_create_design_matrix(
design_matrix_file,
pd.DataFrame(
{
"REAL": ["not_a_valid_real"],
"a": [1],
"category": ["cat1"],
}
),
pd.DataFrame([["b", 1], ["c", 2]]),
)
mock_design2params = ForwardModelStep(
"DESIGN2PARAMS",
"/usr/bin/env",
arglist=["<IENS>", "<xls_filename>", "<designsheet>", "<defaultssheet>"],
)
monkeypatch.setattr(
ErtConfig,
"PREINSTALLED_FORWARD_MODEL_STEPS",
{"DESIGN2PARAMS": mock_design2params},
)
ErtConfig.from_file_contents(
f"NUM_REALIZATIONS 1\nFORWARD_MODEL DESIGN2PARAMS(<xls_filename>={design_matrix_file}, <designsheet>=DesignSheet01,<defaultssheet>=DefaultSheet)"
)
assert "DESIGN_MATRIX validation of DESIGN2PARAMS" in caplog.text


def test_multiple_design2params_also_validates_design_matrix_merging(
tmp_path, caplog, monkeypatch
):
design_matrix_file = tmp_path / "my_design_matrix.xlsx"
design_matrix_file2 = tmp_path / "my_design_matrix2.xlsx"
_create_design_matrix(
design_matrix_file,
pd.DataFrame(
{
"REAL": [0, 1],
"letters": ["x", "y"],
}
),
pd.DataFrame([["a", 1], ["c", 2]]),
)
_create_design_matrix(
design_matrix_file2,
pd.DataFrame({"REAL": [1, 2], "numbers": [99, 98]}),
pd.DataFrame(),
)
mock_design2params = ForwardModelStep(
"DESIGN2PARAMS",
"/usr/bin/env",
arglist=["<IENS>", "<xls_filename>", "<designsheet>", "<defaultssheet>"],
)
monkeypatch.setattr(
ErtConfig,
"PREINSTALLED_FORWARD_MODEL_STEPS",
{"DESIGN2PARAMS": mock_design2params},
)
ErtConfig.from_file_contents(
f"""\
NUM_REALIZATIONS 1
FORWARD_MODEL DESIGN2PARAMS(<xls_filename>={design_matrix_file}, <designsheet>=DesignSheet01,<defaultssheet>=DefaultSheet)
FORWARD_MODEL DESIGN2PARAMS(<xls_filename>={design_matrix_file2}, <designsheet>=DesignSheet01,<defaultssheet>=DefaultSheet)
"""
)
assert (
"Design matrix merging would have failed due to: Design Matrices don't have the same active realizations"
in caplog.text
)