From dfd6e4c7335b9bd5c4459f55aad610fdff16a230 Mon Sep 17 00:00:00 2001 From: xjules Date: Tue, 18 Feb 2025 10:41:24 +0100 Subject: [PATCH 1/5] Add ScalarParameters config - transformations become dataclases -> TODO: pydantic - ScalarParameter represents a single parameter instance with a datasource (DESIGN_MATRIX, SAMPLED) - all ScalarParameter instances are bound by ScalarParameters(ParameterConfig) and are stored for all realizations in a single pl.Dataframe (parquet) file. - API to load / save scalar parameters is in local_ensemble load|save_param_scalar - DESIGN_MATRIX now creates only ScalarParameters instances and modifies the ScalarParameterc Config. - Reimplement load_all_gen_kwn_data - scalarparameters: provide export to xr.Dataset - Update ensemble smoother - add pydantic validation to distributions - Fix ertsummary for scalars - ConfigValidationError on init_files for GEN_KW - Simple workaround for is_initialized with scalars - Replace GenKW with Scalars in test_when_manifest_files_are_written_forward_model_ok_succeeds --- src/ert/analysis/_es_update.py | 53 +- src/ert/config/__init__.py | 12 + src/ert/config/design_matrix.py | 110 ++- src/ert/config/ensemble_config.py | 48 +- src/ert/config/ert_config.py | 41 +- src/ert/config/parameter_config.py | 11 +- src/ert/config/scalar_parameter.py | 729 ++++++++++++++++++ src/ert/dark_storage/common.py | 97 ++- src/ert/enkf_main.py | 51 +- src/ert/gui/ertwidgets/models/ertsummary.py | 6 +- src/ert/run_models/ensemble_experiment.py | 49 +- src/ert/run_models/ensemble_smoother.py | 50 +- .../run_models/multiple_data_assimilation.py | 44 +- src/ert/storage/local_ensemble.py | 141 +++- src/ert/storage/local_experiment.py | 10 +- .../cli/analysis/test_design_matrix.py | 69 +- .../ert/unit_tests/analysis/test_es_update.py | 10 +- .../unit_tests/config/test_gen_kw_config.py | 135 ++-- .../gui/tools/plot/test_plot_api.py | 32 +- .../test_design_matrix.py | 162 ++-- .../scalars_all.csv | 11 + .../storage/test_parameter_sample_types.py | 255 +++--- tests/ert/unit_tests/test_libres_facade.py | 151 ++-- .../ert/unit_tests/test_run_path_creation.py | 50 +- 24 files changed, 1645 insertions(+), 682 deletions(-) create mode 100644 src/ert/config/scalar_parameter.py create mode 100644 tests/ert/unit_tests/snapshots/test_libres_facade/test_load_scalar_parameters/scalars_all.csv diff --git a/src/ert/analysis/_es_update.py b/src/ert/analysis/_es_update.py index 7af560e14a0..aaa2d4ec87a 100644 --- a/src/ert/analysis/_es_update.py +++ b/src/ert/analysis/_es_update.py @@ -5,12 +5,7 @@ import time from collections.abc import Callable, Iterable, Sequence from fnmatch import fnmatch -from typing import ( - TYPE_CHECKING, - Generic, - Self, - TypeVar, -) +from typing import TYPE_CHECKING, Generic, Self, TypeVar import iterative_ensemble_smoother as ies import numpy as np @@ -19,7 +14,13 @@ import scipy from iterative_ensemble_smoother.experimental import AdaptiveESMDA -from ert.config import ESSettings, GenKwConfig, ObservationGroups, UpdateSettings +from ert.config import ( + ESSettings, + GenKwConfig, + ObservationGroups, + ScalarParameters, + UpdateSettings, +) from . import misfit_preprocessor from .event import ( @@ -31,10 +32,7 @@ AnalysisTimeEvent, DataSection, ) -from .snapshots import ( - ObservationAndResponseSnapshot, - SmootherSnapshot, -) +from .snapshots import ObservationAndResponseSnapshot, SmootherSnapshot if TYPE_CHECKING: import numpy.typing as npt @@ -111,16 +109,26 @@ def _all_parameters( def _save_param_ensemble_array_to_disk( - ensemble: Ensemble, + source_ensemble: Ensemble, + target_ensemble: Ensemble, param_ensemble_array: npt.NDArray[np.float64], param_group: str, iens_active_index: npt.NDArray[np.int_], ) -> None: - config_node = ensemble.experiment.parameter_configuration[param_group] - for i, realization in enumerate(iens_active_index): - config_node.save_parameters( - ensemble, param_group, realization, param_ensemble_array[:, i] + config_node = target_ensemble.experiment.parameter_configuration[param_group] + if isinstance(config_node, ScalarParameters): + config_node.save_updated_parameters_and_copy_remaining( + source_ensemble, + target_ensemble, + param_group, + iens_active_index, + param_ensemble_array, ) + else: + for i, realization in enumerate(iens_active_index): + config_node.save_parameters( + target_ensemble, param_group, realization, param_ensemble_array[:, i] + ) def _load_param_ensemble_array( @@ -129,7 +137,11 @@ def _load_param_ensemble_array( iens_active_index: npt.NDArray[np.int_], ) -> npt.NDArray[np.float64]: config_node = ensemble.experiment.parameter_configuration[param_group] - return config_node.load_parameters(ensemble, param_group, iens_active_index) + if isinstance(config_node, ScalarParameters): + return config_node.load_parameters_to_update(ensemble, iens_active_index) + dataset = config_node.load_parameters(ensemble, param_group, iens_active_index) + assert isinstance(dataset, np.ndarray), "dataset is not an numpy array" + return dataset def _expand_wildcards( @@ -608,9 +620,12 @@ def correlation_callback( logger.info(log_msg) progress_callback(AnalysisStatusEvent(msg=log_msg)) start = time.time() - _save_param_ensemble_array_to_disk( - target_ensemble, param_ensemble_array, param_group, iens_active_index + source_ensemble, + target_ensemble, + param_ensemble_array, + param_group, + iens_active_index, ) logger.info( f"Storing data for {param_group} completed in {(time.time() - start) / 60} minutes" diff --git a/src/ert/config/__init__.py b/src/ert/config/__init__.py index 5d13ab4d607..07cf8d478d7 100644 --- a/src/ert/config/__init__.py +++ b/src/ert/config/__init__.py @@ -31,6 +31,13 @@ from .parsing.observations_parser import ObservationType from .queue_config import QueueConfig from .response_config import InvalidResponseFile, ResponseConfig +from .scalar_parameter import ( + SCALAR_PARAMETERS_NAME, + DataSource, + ScalarParameter, + ScalarParameters, + get_distribution, +) from .summary_config import SummaryConfig from .summary_observation import SummaryObservation from .surface_config import SurfaceConfig @@ -39,11 +46,13 @@ __all__ = [ "DESIGN_MATRIX_GROUP", + "SCALAR_PARAMETERS_NAME", "AnalysisConfig", "AnalysisModule", "ConfigValidationError", "ConfigValidationError", "ConfigWarning", + "DataSource", "DesignMatrix", "ESSettings", "EnkfObs", @@ -70,6 +79,8 @@ "QueueConfig", "QueueSystem", "ResponseConfig", + "ScalarParameter", + "ScalarParameters", "SummaryConfig", "SummaryObservation", "SurfaceConfig", @@ -80,5 +91,6 @@ "WorkflowJob", "capture_validation", "field_transform", + "get_distribution", "lint_file", ] diff --git a/src/ert/config/design_matrix.py b/src/ert/config/design_matrix.py index 3f1ee735bf2..34699cc6230 100644 --- a/src/ert/config/design_matrix.py +++ b/src/ert/config/design_matrix.py @@ -8,13 +8,17 @@ import pandas as pd from pandas.api.types import is_integer_dtype -from ert.config.gen_kw_config import GenKwConfig, TransformFunctionDefinition - from ._option_dict import option_dict from .parsing import ConfigValidationError, ErrorInfo +from .scalar_parameter import ( + DataSource, + ScalarParameter, + ScalarParameters, + TransRawSettings, +) if TYPE_CHECKING: - from ert.config import ParameterConfig + pass DESIGN_MATRIX_GROUP = "DESIGN_MATRIX" @@ -32,7 +36,7 @@ def __post_init__(self) -> None: ( self.active_realizations, self.design_matrix_df, - self.parameter_configuration, + self.scalars, ) = self.read_and_validate_design_matrix() except (ValueError, AttributeError) as exc: raise ConfigValidationError.with_context( @@ -102,66 +106,54 @@ def merge_with_other(self, dm_other: DesignMatrix) -> None: except ValueError as exc: errors.append(ErrorInfo(f"Error when merging design matrices {exc}!")) - for tfd in dm_other.parameter_configuration.transform_function_definitions: - self.parameter_configuration.transform_function_definitions.append(tfd) + for param in dm_other.scalars: + self.scalars.append(param) if errors: raise ConfigValidationError.from_collected(errors) def merge_with_existing_parameters( - self, existing_parameters: list[ParameterConfig] - ) -> tuple[list[ParameterConfig], GenKwConfig]: + self, existing_scalars: ScalarParameters + ) -> ScalarParameters: """ This method merges the design matrix parameters with the existing parameters and - returns the new list of existing parameters, wherein we drop GEN_KW group having a full overlap with the design matrix group. - GEN_KW group that was dropped will acquire a new name from the design matrix group. - Additionally, the ParameterConfig which is the design matrix group is returned separately. - + returns the new list of existing parameters. Args: - existing_parameters (List[ParameterConfig]): List of existing parameters + existing_scalars (ScalarParameters): existing scalar parameters - Raises: - ConfigValidationError: If there is a partial overlap between the design matrix group and any existing GEN_KW group Returns: - tuple[List[ParameterConfig], ParameterConfig]: List of existing parameters and the dedicated design matrix group + ScalarParameters: new set of ScalarParameters """ - new_param_config: list[ParameterConfig] = [] - - design_parameter_group = self.parameter_configuration - design_keys = [e.name for e in design_parameter_group.transform_functions] + all_params: list[ScalarParameter] = [] - design_group_added = False - for parameter_group in existing_parameters: - if not isinstance(parameter_group, GenKwConfig): - new_param_config += [parameter_group] + overlap_set = set() + for existing_parameter in existing_scalars.scalars: + if existing_parameter.input_source == DataSource.DESIGN_MATRIX: continue - existing_keys = [e.name for e in parameter_group.transform_functions] - if set(existing_keys) == set(design_keys): - if design_group_added: - raise ConfigValidationError( - "Multiple overlapping groups with design matrix found in existing parameters!\n" - f"{design_parameter_group.name} and {parameter_group.name}" - ) - - design_parameter_group.name = parameter_group.name - design_parameter_group.template_file = parameter_group.template_file - design_parameter_group.output_file = parameter_group.output_file - design_group_added = True - elif set(design_keys) & set(existing_keys): - raise ConfigValidationError( - "Overlapping parameter names found in design matrix!\n" - f"{DESIGN_MATRIX_GROUP}:{design_keys}\n{parameter_group.name}:{existing_keys}" - "\nThey need to match exactly or not at all." - ) - else: - new_param_config += [parameter_group] - return new_param_config, design_parameter_group + overlap = False + for parameter_design in self.scalars: + if existing_parameter.param_name == parameter_design.param_name: + parameter_design.group_name = existing_parameter.group_name + parameter_design.template_file = existing_parameter.template_file + parameter_design.output_file = existing_parameter.output_file + all_params.append(parameter_design) + overlap = True + overlap_set.add(existing_parameter.param_name) + break + if not overlap: + all_params.append(existing_parameter) + + for parameter_design in self.scalars: + if parameter_design.param_name not in overlap_set: + all_params.append(parameter_design) + + return ScalarParameters(scalars=all_params) def read_and_validate_design_matrix( self, - ) -> tuple[list[bool], pd.DataFrame, GenKwConfig]: + ) -> tuple[list[bool], pd.DataFrame, list[ScalarParameter]]: # Read the parameter names (first row) as strings to prevent pandas from modifying them. # This ensures that duplicate or empty column names are preserved exactly as they appear in the Excel sheet. # By doing this, we can properly validate variable names, including detecting duplicates or missing names. @@ -207,29 +199,25 @@ def read_and_validate_design_matrix( design_matrix_df = pd.concat([design_matrix_df, default_df], axis=1) - transform_function_definitions: list[TransformFunctionDefinition] = [] + scalars: list[ScalarParameter] = [] for parameter in design_matrix_df.columns: - transform_function_definitions.append( - TransformFunctionDefinition( - name=parameter, - param_name="RAW", - values=[], + scalars.append( + ScalarParameter( + param_name=parameter, + group_name=DESIGN_MATRIX_GROUP, + input_source=DataSource.DESIGN_MATRIX, + distribution=TransRawSettings(), + template_file=None, + output_file=None, + update=False, ) ) - parameter_configuration = GenKwConfig( - name=DESIGN_MATRIX_GROUP, - forward_init=False, - template_file=None, - output_file=None, - transform_function_definitions=transform_function_definitions, - update=False, - ) reals = design_matrix_df.index.tolist() return ( [x in reals for x in range(max(reals) + 1)], design_matrix_df, - parameter_configuration, + scalars, ) @staticmethod diff --git a/src/ert/config/ensemble_config.py b/src/ert/config/ensemble_config.py index ce118e6d19c..ca4ddf153b7 100644 --- a/src/ert/config/ensemble_config.py +++ b/src/ert/config/ensemble_config.py @@ -16,6 +16,7 @@ from .parsing import ConfigDict, ConfigKeys, ConfigValidationError from .refcase import Refcase from .response_config import ResponseConfig +from .scalar_parameter import SCALAR_PARAMETERS_NAME, ScalarParameters from .summary_config import SummaryConfig from .surface_config import SurfaceConfig @@ -47,18 +48,22 @@ class EnsembleConfig: default_factory=dict ) parameter_configs: dict[ - str, GenKwConfig | FieldConfig | SurfaceConfig | ExtParamConfig + str, + GenKwConfig | FieldConfig | SurfaceConfig | ExtParamConfig | ScalarParameters, ] = field(default_factory=dict) refcase: Refcase | None = None def __post_init__(self) -> None: - self._check_for_duplicate_names( - [p.name for p in self.parameter_configs.values()], - [key for config in self.response_configs.values() for key in config.keys], - ) - self._check_for_duplicate_gen_kw_param_names( - [p for p in self.parameter_configs.values() if isinstance(p, GenKwConfig)] - ) + if self.scalars: + self._check_for_duplicate_names( + list(self.scalars.groups.keys()), + [ + key + for config in self.response_configs.values() + for key in config.keys + ], + ) + self._check_for_duplicate_gen_kw_param_names(self.scalars) self._check_for_forward_init_in_gen_kw( [p for p in self.parameter_configs.values() if isinstance(p, GenKwConfig)] @@ -81,10 +86,8 @@ def _check_for_duplicate_names( ) @staticmethod - def _check_for_duplicate_gen_kw_param_names(gen_kw_list: list[GenKwConfig]) -> None: - gen_kw_param_count = Counter( - keyword.name for p in gen_kw_list for keyword in p.transform_functions - ) + def _check_for_duplicate_gen_kw_param_names(scalars: ScalarParameters) -> None: + gen_kw_param_count = Counter(param.param_name for param in scalars.scalars) duplicate_gen_kw_names = [ (n, c) for n, c in gen_kw_param_count.items() if c > 1 ] @@ -134,7 +137,7 @@ def make_field(field_list: list[str]) -> FieldConfig: return FieldConfig.from_config_list(grid_file_path, dims, field_list) parameter_configs = ( - [GenKwConfig.from_config_list(g) for g in gen_kw_list] + ([ScalarParameters.from_config_list(gen_kw_list)] if gen_kw_list else []) + [SurfaceConfig.from_config_list(s) for s in surface_list] + [make_field(f) for f in field_list] ) @@ -158,7 +161,16 @@ def make_field(field_list: list[str]) -> FieldConfig: refcase=refcase, ) + @property + def scalars(self) -> ScalarParameters | None: + param = self.parameter_configs.get(SCALAR_PARAMETERS_NAME, None) + if isinstance(param, ScalarParameters): + return param + return None + def __getitem__(self, key: str) -> ParameterConfig | ResponseConfig: + if self.scalars is not None and key in self.scalars.groups: + return self.scalars if key in self.parameter_configs: return self.parameter_configs[key] elif key in self.response_configs: @@ -180,12 +192,12 @@ def hasNodeGenData(self, key: str) -> bool: config = self.response_configs["gen_data"] return key in config.keys + # TODO: This might not be needed but it retrieves the group names for genkw config def get_keylist_gen_kw(self) -> list[str]: - return [ - val.name - for val in self.parameter_configuration - if isinstance(val, GenKwConfig) - ] + for val in self.parameter_configuration: + if isinstance(val, ScalarParameters): + return list(val.groups.keys()) + return [] @property def parameters(self) -> list[str]: diff --git a/src/ert/config/ert_config.py b/src/ert/config/ert_config.py index 549930c7849..a43a2657f84 100644 --- a/src/ert/config/ert_config.py +++ b/src/ert/config/ert_config.py @@ -10,13 +10,7 @@ from datetime import datetime from os import path from pathlib import Path -from typing import ( - Any, - ClassVar, - Self, - no_type_check, - overload, -) +from typing import Any, ClassVar, Self, no_type_check, overload import polars as pl from pydantic import ValidationError as PydanticValidationError @@ -37,7 +31,6 @@ ForwardModelStepPlugin, ForwardModelStepValidationError, ) -from .gen_kw_config import GenKwConfig from .model_config import ModelConfig from .observation_vector import ObsVector from .observations import EnkfObs @@ -55,22 +48,18 @@ init_forward_model_schema, init_site_config_schema, init_user_config_schema, - parse_contents, - read_file, -) -from .parsing import ( - parse as parse_config, ) +from .parsing import parse as parse_config +from .parsing import parse_contents, read_file from .parsing.observations_parser import ( GenObsValues, HistoryValues, ObservationConfigError, SummaryValues, ) -from .parsing.observations_parser import ( - parse as parse_observations, -) +from .parsing.observations_parser import parse as parse_observations from .queue_config import QueueConfig +from .scalar_parameter import ScalarParameters from .workflow import Workflow from .workflow_job import ErtScriptLoadFailure, WorkflowJob @@ -869,20 +858,17 @@ def from_dict(cls, config_dict) -> Self: raise ConfigValidationError.from_collected(errors) if dm := analysis_config.design_matrix: - dm_params = [ - x.name - for x in dm.parameter_configuration.transform_function_definitions - ] + dm_params = [x.param_name for x in dm.scalars] for group_name, config in ensemble_config.parameter_configs.items(): overlapping = [] - if not isinstance(config, GenKwConfig): + if not isinstance(config, ScalarParameters): continue - for transform_definition in config.transform_function_definitions: - if transform_definition.name in dm_params: - overlapping.append(transform_definition.name) + for param in config.scalars: + if param.param_name in dm_params: + overlapping.append(f"{param.group_name}:{param.param_name}") if overlapping: ConfigWarning.warn( - f"Parameters {overlapping} from GEN_KW group '{group_name}' " + f"Parameters {overlapping} " "will be overridden by design matrix. This will cause " "updates to be turned off for these parameters." ) @@ -1102,8 +1088,9 @@ def env_pr_fm_step(self) -> dict[str, dict[str, Any]]: @staticmethod def _create_observations( - obs_config_content: dict[str, HistoryValues | SummaryValues | GenObsValues] - | None, + obs_config_content: ( + dict[str, HistoryValues | SummaryValues | GenObsValues] | None + ), ensemble_config: EnsembleConfig, time_map: list[datetime] | None, history: HistorySource, diff --git a/src/ert/config/parameter_config.py b/src/ert/config/parameter_config.py index 7116b421bfb..302903e7df3 100644 --- a/src/ert/config/parameter_config.py +++ b/src/ert/config/parameter_config.py @@ -2,16 +2,19 @@ import dataclasses from abc import ABC, abstractmethod +from collections.abc import Iterable from pathlib import Path from typing import TYPE_CHECKING, Any import numpy as np +import polars as pl import xarray as xr from ._option_dict import option_dict if TYPE_CHECKING: import numpy.typing as npt + import pandas as pd from ert.storage import Ensemble @@ -54,10 +57,12 @@ class ParameterConfig(ABC): def sample_or_load( self, - real_nr: int, + real_nr: int | Iterable[int], random_seed: int, ensemble_size: int, - ) -> xr.Dataset: + design_matrix_df: pd.DataFrame | None = None, + ) -> xr.Dataset | pl.DataFrame: + assert isinstance(real_nr, int | np.integer) return self.read_from_runpath(Path(), real_nr, 0) @abstractmethod @@ -101,7 +106,7 @@ def save_parameters( @abstractmethod def load_parameters( self, ensemble: Ensemble, group: str, realizations: npt.NDArray[np.int_] - ) -> npt.NDArray[np.float64]: + ) -> npt.NDArray[np.float64] | xr.Dataset | pl.DataFrame: """ Load the parameter from internal storage for the given ensemble. Must return array of shape (number of parameters, number of realizations). diff --git a/src/ert/config/scalar_parameter.py b/src/ert/config/scalar_parameter.py new file mode 100644 index 00000000000..38699fe4338 --- /dev/null +++ b/src/ert/config/scalar_parameter.py @@ -0,0 +1,729 @@ +from __future__ import annotations + +import math +import os +import shutil +import warnings +from collections import defaultdict +from collections.abc import Iterable +from enum import StrEnum +from hashlib import sha256 +from pathlib import Path +from typing import TYPE_CHECKING, Annotated, Any, Literal, Self, overload + +import numpy as np +import pandas as pd +import polars as pl +from pydantic import Field, ValidationError, field_validator, model_validator +from pydantic.dataclasses import dataclass +from scipy.stats import norm + +from ert.substitutions import substitute_runpath_name + +from ._str_to_bool import str_to_bool +from .parameter_config import ParameterConfig, parse_config +from .parsing import ConfigValidationError, ConfigWarning + +if TYPE_CHECKING: + import numpy.typing as npt + + from ert.storage import Ensemble + + +@dataclass +class TransSettingsValidation: + @classmethod + def create(cls, *args, **kwargs): + try: + return cls(*args, **kwargs) + except ValidationError as e: + simplified_msg = "; ".join(err["msg"] for err in e.errors()) + raise ConfigValidationError(simplified_msg) from e + + +@dataclass +class TransUnifSettings(TransSettingsValidation): + name: Literal["unif"] = "unif" + min: float = 0.0 + max: float = 1.0 + + def trans(self, x: float) -> float: + y = float(norm.cdf(x)) + return y * (self.max - self.min) + self.min + + +@dataclass +class TransLogUnifSettings(TransSettingsValidation): + name: Literal["logunif"] = "logunif" + log_min: float = 0.0 + log_max: float = 1.0 + + def trans(self, x: float) -> float: + # log_min, log_max = math.log(arg[0]), math.log(arg[1]) + tmp = norm.cdf(x) + log_y = self.log_min + tmp * ( + self.log_max - self.log_min + ) # Shift according to max / min + return math.exp(log_y) + + +@dataclass +class TransDUnifSettings(TransSettingsValidation): + name: Literal["dunif"] = "dunif" + steps: int = 1000 + min: float = 0.0 + max: float = 1.0 + + def trans(self, x: float) -> float: + y = float(norm.cdf(x)) + return (math.floor(y * self.steps) / (self.steps - 1)) * ( + self.max - self.min + ) + self.min + + +@dataclass +class TransNormalSettings(TransSettingsValidation): + name: Literal["normal"] = "normal" + mean: float = 0.0 + std: float = 1.0 + + @field_validator("std") + @classmethod + def std_must_be_positive(cls, value): + if value < 0: + raise ValueError(f"Negative STD {value} for normal distribution") + return value + + def trans(self, x: float) -> float: + return x * self.std + self.mean + + +@dataclass +class TransLogNormalSettings(TransSettingsValidation): + name: Literal["lognormal"] = "lognormal" + mean: float = 0.0 + std: float = 1.0 + + @field_validator("std") + @classmethod + def std_must_be_positive(cls, value): + if value < 0: + raise ValueError(f"Negative STD {value} for lognormal distribution") + return value + + def trans(self, x: float) -> float: + # mean is the expectation of log( y ) + return math.exp(x * self.std + self.mean) + + +@dataclass +class TransTruncNormalSettings(TransSettingsValidation): + name: Literal["trunc_normal"] = "trunc_normal" + mean: float = 0.0 + std: float = 1.0 + min: float = 0.0 + max: float = 1.0 + + @field_validator("std") + @classmethod + def std_must_be_positive(cls, value): + if value < 0: + raise ValueError(f"Negative STD {value} for truncated normal distribution") + return value + + def trans(self, x: float) -> float: + y = x * self.std + self.mean + return max(min(y, self.max), self.min) # clamp + + +@dataclass +class TransRawSettings(TransSettingsValidation): + name: Literal["raw"] = "raw" + + def trans(self, x: float) -> float: + return x + + +@dataclass +class TransConstSettings(TransSettingsValidation): + name: Literal["const"] = "const" + value: float = 0.0 + + def trans(self, _: float) -> float: + return self.value + + +@dataclass +class TransTriangularSettings(TransSettingsValidation): + name: Literal["triangular"] = "triangular" + min: float = 0.0 + mode: float = 0.5 + max: float = 1.0 + + @model_validator(mode="after") + def valid_traingular_params(self): + if not self.min < self.max: + raise ValueError( + f"Minimum {self.min} must be strictly less than the maximum {self.max}" + ) + if not (self.min <= self.mode <= self.max): + raise ValueError( + f"The mode {self.mode} must be between the minimum {self.min} and maximum {self.max}" + ) + return self + + def trans(self, x: float) -> float: + inv_norm_left = (self.max - self.min) * (self.mode - self.min) + inv_norm_right = (self.max - self.min) * (self.max - self.mode) + ymode = (self.mode - self.min) / (self.max - self.min) + y = norm.cdf(x) + + if y < ymode: + return self.min + math.sqrt(y * inv_norm_left) + else: + return self.max - math.sqrt((1 - y) * inv_norm_right) + + +@dataclass +class TransErrfSettings(TransSettingsValidation): + name: Literal["errf"] = "errf" + min: float = 0.0 + max: float = 1.0 + skew: float = 0.0 + width: float = 1.0 + + def trans(self, x: float) -> float: + y = norm(loc=0, scale=self.width).cdf(x + self.skew) + if np.isnan(y): + raise ValueError( + "Output is nan, likely from triplet (x, skewness, width) " + "leading to low/high-probability in normal CDF." + ) + return self.min + y * (self.max - self.min) + + +@dataclass +class TransDerrfSettings(TransSettingsValidation): + name: Literal["derrf"] = "derrf" + steps: float = 1000.0 + min: float = 0.0 + max: float = 1.0 + skew: float = 0.0 + width: float = 1.0 + + @model_validator(mode="after") + def valid_derrf_params(self): + steps_float = float(self.steps) + if not steps_float.is_integer() or not (int(steps_float) > 1): + raise ValueError( + f"NBINS {int(self.steps)} must be a positive integer larger than 1 for DERRF distribution" + ) + self.steps = int(self.steps) + if not (self.min < self.max): + raise ValueError( + f"The minimum {self.min} must be less than the maximum {self.max} for DERRF distribution" + ) + if not (self.width > 0): + raise ValueError( + f"The width {self.width} must be greater than 0 for DERRF distribution" + ) + return self + + def trans(self, x: float) -> float: + q_values = np.linspace(start=0, stop=1, num=int(self.steps)) + q_checks = np.linspace(start=0, stop=1, num=int(self.steps + 1))[1:] + y = TransErrfSettings(min=0, max=1, skew=self.skew, width=self.width).trans(x) + bin_index = np.digitize(y, q_checks, right=True) + y_binned = q_values[bin_index] + result = self.min + y_binned * (self.max - self.min) + if result > self.max or result < self.min: + warnings.warn( + "trans_derff suffered from catastrophic loss of precision, clamping to min,max", + stacklevel=1, + ) + return np.clip(result, self.min, self.max) + if np.isnan(result): + raise ValueError( + "trans_derrf returns nan, check that input arguments are reasonable" + ) + return float(result) + + +@dataclass +class PolarsData: + name: Literal["polars"] + data_set_file: Path + + +@overload +def _get_abs_path(file: None) -> None: + pass + + +@overload +def _get_abs_path(file: str) -> str: + pass + + +def _get_abs_path(file: str | None) -> str | None: + if file is not None: + file = os.path.realpath(file) + return file + + +def get_distribution(name: str, values: list[str]) -> Any: + return { + "NORMAL": lambda: TransNormalSettings.create( + mean=float(values[0]), std=float(values[1]) + ), + "LOGNORMAL": lambda: TransLogNormalSettings.create( + mean=float(values[0]), std=float(values[1]) + ), + "UNIFORM": lambda: TransUnifSettings.create( + min=float(values[0]), max=float(values[1]) + ), + "LOGUNIF": lambda: TransLogUnifSettings.create( + log_min=math.log(float(values[0])), log_max=math.log(float(values[1])) + ), + "TRUNCATED_NORMAL": lambda: TransTruncNormalSettings.create( + mean=float(values[0]), + std=float(values[1]), + min=float(values[2]), + max=float(values[3]), + ), + "RAW": TransRawSettings.create(), + "CONST": lambda: TransConstSettings.create(value=float(values[0])), + "DUNIF": lambda: TransDUnifSettings.create( + steps=int(values[0]), min=float(values[1]), max=float(values[2]) + ), + "TRIANGULAR": lambda: TransTriangularSettings.create( + min=float(values[0]), mode=float(values[1]), max=float(values[2]) + ), + "ERRF": lambda: TransErrfSettings.create( + min=values[0], + max=values[1], + skew=values[2], + width=values[3], + ), + "DERRF": lambda: TransDerrfSettings.create( + steps=values[0], + min=values[1], + max=values[2], + skew=values[3], + width=values[4], + ), + }[name]() + + +class DataSource(StrEnum): + DESIGN_MATRIX = "design_matrix" + SAMPLED = "sampled" + + +@dataclass +class ScalarParameter: + template_file: str | None + output_file: str | None + param_name: str + group_name: str + distribution: Annotated[ + TransUnifSettings + | TransLogNormalSettings + | TransLogUnifSettings + | TransDUnifSettings + | TransRawSettings + | TransConstSettings + | TransNormalSettings + | TransTruncNormalSettings + | TransErrfSettings + | TransDerrfSettings + | TransTriangularSettings, + Field(discriminator="name"), + ] + + input_source: DataSource + update: bool = True + + +SCALAR_PARAMETERS_NAME = "SCALAR_PARAMETERS" + + +@dataclass(kw_only=True) +class ScalarParameters(ParameterConfig): + scalars: list[ScalarParameter] + name: str = SCALAR_PARAMETERS_NAME + forward_init: bool = False + update: bool = True + + def __post_init__(self) -> None: + self.groups: dict[str, list[ScalarParameter]] = defaultdict(list) + self.hash_group_key: dict[str, ScalarParameter] = {} + for param in self.scalars: + self.groups[param.group_name].append(param) + self.hash_group_key[f"{param.group_name}:{param.param_name}"] = param + self.update = any(param.update for param in self.scalars) + + def __getitem__(self, key: str) -> list[ScalarParameter]: + if key in self.groups: + return list(self.groups[key]) + elif key in self.hash_group_key: + return [self.hash_group_key[key]] + return [] + + @staticmethod + def _sample_value( + parameters: list[ScalarParameter], + global_seed: str, + realization: int, + ) -> dict[str, float]: + """ + Generate a sample value for each key in a parameter group. + + The sampling is reproducible and dependent on a global seed combined + with the parameter group name and individual key names. The 'realization' parameter + determines the specific sample point from the distribution for each parameter. + + Parameters: + - parameters (list[ScalarParameter]): List of ScalarParameter. + The name of the parameter group, used to ensure unique RNG seeds for different groups. + - global_seed (str): A global seed string used for RNG seed generation to ensure + reproducibility across runs. + - realization (int): An integer used to advance the RNG to a specific point in its + sequence, effectively selecting the 'realization'-th sample from the distribution. + + Returns: + - dict[str, float]: A dict of sampled values [key:value] + + Note: + The method uses SHA-256 for hash generation and numpy's default random number generator + for sampling. The RNG state is advanced to the 'realization' point before generating + a single sample, enhancing efficiency by avoiding the generation of large, unused sample sets. + """ + parameter_values: dict[str, float] = {} + for parameter in parameters: + if parameter.input_source == DataSource.DESIGN_MATRIX: + continue + key_hash = sha256( + global_seed.encode("utf-8") + + f"{parameter.group_name}:{parameter.param_name}".encode() + ) + seed = np.frombuffer(key_hash.digest(), dtype="uint32") + rng = np.random.default_rng(seed) + + # Advance the RNG state to the realization point + rng.standard_normal(realization) + + # Generate a single sample + value = rng.standard_normal(1) + transformed_value = parameter.distribution.trans(value[0]) + parameter_values[f"{parameter.group_name}:{parameter.param_name}"] = value[ + 0 + ] + parameter_values[ + f"{parameter.group_name}:{parameter.param_name}.transformed" + ] = float(transformed_value) + + return parameter_values + + def sample_or_load( + self, + real_nr: int | Iterable[int], + random_seed: int, + ensemble_size: int, + design_matrix_df: pd.DataFrame | None = None, + ) -> pl.DataFrame: + if isinstance(real_nr, int | np.integer): + real_nr = [real_nr] + df_list = [] + for real in real_nr: + params = self._sample_value( + self.scalars, + str(random_seed), + real, + ) + params["realization"] = real + if design_matrix_df is not None: + row = design_matrix_df.loc[real] + for parameter in self.scalars: + if parameter.input_source == DataSource.DESIGN_MATRIX: + value = row[parameter.param_name] + params[f"{parameter.group_name}:{parameter.param_name}"] = value + params[ + f"{parameter.group_name}:{parameter.param_name}.transformed" + ] = value + df_list.append(pl.DataFrame(params)) + return pl.concat(df_list, how="vertical") + + def load_parameters_to_update( + self, + ensemble: Ensemble, + iens_active_index: npt.NDArray[np.int_], + ) -> npt.NDArray[np.float64]: + params_to_update = [ + f"{param.group_name}:{param.param_name}" + for param in self.scalars + if param.update + ] + if not params_to_update: + raise ValueError("No parameters to update") + df = self.load_parameters(ensemble, SCALAR_PARAMETERS_NAME, iens_active_index) + return df.select(params_to_update).to_numpy().T + + @staticmethod + def load_parameters( + ensemble: Ensemble, group: str, realizations: npt.NDArray[np.int_] + ) -> pl.DataFrame: + return ensemble.load_parameters_scalar(group, realizations) + + def save_parameters( + self, + ensemble: Ensemble, + group: str, + realization: int, + data: npt.NDArray[np.float64], + ) -> None: + # this function is not be used since we can't deduce + # which parameters are being written whithin the data array + pass + + def save_experiment_data( + self, + experiment_path: Path, + ) -> None: + templates: dict[str, str] = {} + for group_name, params in self.groups.items(): + for param in params: + if param.template_file is not None: + templates[group_name] = param.template_file + break + + for template_file in templates.values(): + incoming_template_file_path = Path(template_file) + template_file_path = Path( + experiment_path / incoming_template_file_path.name + ) + shutil.copyfile(incoming_template_file_path, template_file_path) + + def save_updated_parameters_and_copy_remaining( + self, + source_ensemble: Ensemble, + target_ensemble: Ensemble, + group: str, + realizations: npt.NDArray[np.int_], + data: npt.NDArray[np.float64], + ) -> None: + params_to_update = [ + f"{param.group_name}:{param.param_name}" + for param in self.scalars + if param.update + ] + df = source_ensemble.load_parameters_scalar( + scalar_name=group, realizations=realizations + ) + df_updates = pl.DataFrame( + { + "realization": realizations, + **{col: data[i, :] for i, col in enumerate(params_to_update)}, + **{ + f"{col}.transformed": [ + self.hash_group_key[col].distribution.trans(v) + for v in data[i, :] + ] + for i, col in enumerate(params_to_update) + }, + } + ) + df = df.update(df_updates, on="realization") + target_ensemble.save_parameters_scalar(group, realizations, df) + + def __len__(self) -> int: + return len(self.scalars) + + def read_from_runpath( + self, + run_path: Path, + real_nr: int, + iteration: int, + ) -> None: + """ + forward_init will not be supported, so None for the moment + """ + return None + + def should_use_log_scale(self, key: str) -> bool: + return key in self.hash_group_key and isinstance( + self.hash_group_key[key].distribution, + TransLogNormalSettings | TransLogUnifSettings, + ) + + def write_to_runpath( + self, run_path: Path, real_nr: int, ensemble: Ensemble + ) -> dict[str, dict[str, float]] | None: + """ + This function is responsible for converting the parameter + from the internal ert format to the format the forward model + expects + """ + df = ensemble.load_parameters_scalar( + scalar_name=self.name, realizations=np.array([real_nr]) + ).select(pl.col("^.*\\.transformed$")) + data_dict = df.rename( + {col: col.replace(".transformed", "") for col in df.columns} + ).to_dict() + data: dict[str, dict[str, float]] = defaultdict(dict) + for key, values in data_dict.items(): + group_name, param_name = key.split(":") + value = values[0] + param = self.hash_group_key[key] + log_value: float | None = None + if param.input_source == DataSource.SAMPLED and isinstance( + param.distribution, TransLogNormalSettings | TransLogUnifSettings + ): + log_value = math.log(value, 10) + + # Build the nested dictionary {group: {key:value}, leg10_group:{key:log_value}} + data[group_name][param_name] = value + if log_value: + data[f"LOG10_{group_name}"][param_name] = log_value + + outfiles: dict[str, tuple[str, str]] = {} + for group_name, params in self.groups.items(): + for param in params: + if param.template_file is not None and param.output_file is not None: + outfiles[group_name] = (param.template_file, param.output_file) + break + for group_name, (template_file, output_file) in outfiles.items(): + target_file = substitute_runpath_name( + output_file, real_nr, ensemble.iteration + ) + if target_file.startswith("/"): + target_file = target_file[1:] + (run_path / target_file).parent.mkdir(exist_ok=True, parents=True) + template_file_path = ( + ensemble.experiment.mount_point / Path(template_file).name + ) + with open(template_file_path, encoding="utf-8") as f: + template = f.read() + for key, value in data[group_name].items(): + template = template.replace(f"<{key}>", f"{value:.6g}") + with open(run_path / target_file, "w", encoding="utf-8") as f: + f.write(template) + return data + + @classmethod + def from_config_list(cls, gen_kw_list: list[list[str]]) -> Self: + errors = [] + scalars: list[ScalarParameter] = [] + + for gen_kw in gen_kw_list: + gen_kw_key = gen_kw[0] + positional_args, options = parse_config(gen_kw, 4) + forward_init = str_to_bool(options.get("FORWARD_INIT", "FALSE")) + init_file = _get_abs_path(options.get("INIT_FILES")) + update_parameter = str_to_bool(options.get("UPDATE", "TRUE")) + + if len(positional_args) == 2: + parameter_file = _get_abs_path(positional_args[1]) + parameter_file_context = positional_args[1] + template_file = None + output_file = None + elif len(positional_args) == 4: + output_file = positional_args[2] + parameter_file = _get_abs_path(positional_args[3]) + parameter_file_context = positional_args[3] + template_file = _get_abs_path(positional_args[1]) + if not os.path.isfile(template_file): + errors.append( + ConfigValidationError.with_context( + f"No such template file: {template_file}", + positional_args[1], + ) + ) + elif Path(template_file).stat().st_size == 0: + token = ( + parameter_file_context.token + if hasattr(parameter_file_context, "token") + else parameter_file_context + ) + ConfigWarning.deprecation_warn( + f"The template file for GEN_KW ({gen_kw_key}) is empty. If templating is not needed, you " + f"can use GEN_KW with just the distribution file instead: GEN_KW {gen_kw_key} {token}", + positional_args[1], + ) + + else: + raise ConfigValidationError( + f"Unexpected positional arguments: {positional_args}" + ) + if not os.path.isfile(parameter_file): + errors.append( + ConfigValidationError.with_context( + f"No such parameter file: {parameter_file}", + parameter_file_context, + ) + ) + elif Path(parameter_file).stat().st_size == 0: + errors.append( + ConfigValidationError.with_context( + f"No parameters specified in {parameter_file}", + parameter_file_context, + ) + ) + + if forward_init: + errors.append( + ConfigValidationError.with_context( + "Loading GEN_KW from files created by the forward " + "model is not supported.", + gen_kw, + ) + ) + + if init_file: + errors.append( + ConfigValidationError.with_context( + "Loading GEN_KW from init_files is not longer supported!", + gen_kw, + ) + ) + if errors: + raise ConfigValidationError.from_collected(errors) + + with open(parameter_file, encoding="utf-8") as file: + for line_number, item in enumerate(file): + item = item.split("--")[0] # remove comments + if item.strip(): # only lines with content + items = item.split() + if len(items) < 2: + errors.append( + ConfigValidationError.with_context( + f"Too few values on line {line_number} in parameter file {parameter_file}", + gen_kw, + ) + ) + else: + scalars.append( + ScalarParameter( + param_name=items[0], + input_source=DataSource.SAMPLED, + group_name=gen_kw_key, + distribution=get_distribution(items[1], items[2:]), + template_file=template_file, + output_file=output_file, + update=update_parameter, + ) + ) + + if gen_kw_key == "PRED" and update_parameter: + ConfigWarning.warn( + "GEN_KW PRED used to hold a special meaning and be " + "excluded from being updated.\n If the intention was " + "to exclude this from updates, set UPDATE:FALSE.\n", + gen_kw[0], + ) + if errors: + raise ConfigValidationError.from_collected(errors) + + return cls( + scalars=scalars, + ) diff --git a/src/ert/dark_storage/common.py b/src/ert/dark_storage/common.py index 6e28f8072c5..8bdbbd726af 100644 --- a/src/ert/dark_storage/common.py +++ b/src/ert/dark_storage/common.py @@ -1,4 +1,3 @@ -import contextlib import logging from collections.abc import Callable, Iterator from typing import Any @@ -7,10 +6,15 @@ import numpy as np import pandas as pd import polars as pl -import xarray as xr from polars.exceptions import ColumnNotFoundError -from ert.config import Field, GenDataConfig, GenKwConfig +from ert.config import GenDataConfig, GenKwConfig, ScalarParameters +from ert.config.field import Field +from ert.config.scalar_parameter import ( + SCALAR_PARAMETERS_NAME, + TransLogNormalSettings, + TransLogUnifSettings, +) from ert.storage import Ensemble, Experiment, Storage logger = logging.getLogger(__name__) @@ -60,6 +64,23 @@ def ensemble_parameters(storage: Storage, ensemble_id: UUID) -> list[dict[str, A "labels": [], } ) + case ScalarParameters(name=name, scalars=scalars): + for param in scalars: + param_list.append( + { + "name": ( + f"LOG10_{param.group_name}:{param.param_name}" + if isinstance( + param.distribution, + TransLogUnifSettings | TransLogNormalSettings, + ) + else f"{param.group_name}:{param.param_name}" + ), + "userdata": {"data_origin": "SCALAR_PARAMETERS"}, + "dimensionality": 1, + "labels": [], + } + ) case Field(name=name, nx=nx, ny=ny, nz=nz): param_list.append( { @@ -178,45 +199,37 @@ def data_for_key( except (ValueError, KeyError, ColumnNotFoundError): return pd.DataFrame() - group = key.split(":")[0] - parameters = ensemble.experiment.parameter_configuration - if group in parameters and isinstance(gen_kw := parameters[group], GenKwConfig): - dataframes = [] - - with contextlib.suppress(KeyError): - try: - data = ensemble.load_parameters(group) - except ValueError as err: - print(f"Could not load parameter {group}: {err}") - return pd.DataFrame() - - da = data["transformed_values"] - assert isinstance(da, xr.DataArray) - da["names"] = np.char.add(f"{gen_kw.name}:", da["names"].astype(np.str_)) - df = da.to_dataframe().unstack(level="names") - df.columns = df.columns.droplevel() - for parameter in df.columns: - if gen_kw.shouldUseLogScale(parameter.split(":")[1]): - df[f"LOG10_{parameter}"] = np.log10(df[parameter]) - dataframes.append(df) - if not dataframes: - return pd.DataFrame() - - dataframe = pd.concat(dataframes, axis=1) - dataframe.columns.name = None - dataframe.index.name = "Realization" - - data = dataframe.sort_index(axis=1) - if data.empty or key not in data: - return pd.DataFrame() - data = data[key].to_frame().dropna() - data.columns = pd.Index([0]) - try: - return data.astype(float) - except ValueError: - return data - - return pd.DataFrame() + if ( + scalars := ensemble.experiment.parameter_configuration.get( + SCALAR_PARAMETERS_NAME, None + ) + ) is None: + return pd.DataFrame() + assert isinstance(scalars, ScalarParameters) + try: + data = ensemble.load_parameters_scalar( + SCALAR_PARAMETERS_NAME, key=f"{key}.transformed" + ) + except KeyError: + return pd.DataFrame() + dataframes = [] + da = data.rename({"realization": "realizations", f"{key}.transformed": key}) + df = da.to_pandas().reset_index().set_index("realizations") + if scalars.should_use_log_scale(key): + df[f"LOG10_{key}"] = np.log10(df[key]) + dataframes.append(df) + dataframe = pd.concat(dataframes, axis=1) + dataframe.columns.name = None + dataframe.index.name = "Realization" + data = dataframe.sort_index(axis=1) + if data.empty or key not in data: + return pd.DataFrame() + data = data[key].to_frame().dropna() + data.columns = pd.Index([0]) + try: + return data.astype(float) + except ValueError: + return data def _get_observations( diff --git a/src/ert/enkf_main.py b/src/ert/enkf_main.py index a9705ed8a8b..71b27d98f7b 100644 --- a/src/ert/enkf_main.py +++ b/src/ert/enkf_main.py @@ -8,6 +8,7 @@ from pathlib import Path from typing import TYPE_CHECKING, Any +import numpy as np import orjson import pandas as pd import xarray as xr @@ -24,6 +25,7 @@ GenKwConfig, ModelConfig, ParameterConfig, + ScalarParameters, SurfaceConfig, ) from .config.ert_config import create_forward_model_json @@ -127,6 +129,8 @@ def _manifest_to_json(ensemble: Ensemble, iens: int, iter: int) -> dict[str, Any manifest = {} # Add expected parameter files to manifest for param_config in ensemble.experiment.parameter_configuration.values(): + if isinstance(param_config, ScalarParameters): + continue assert isinstance( param_config, ExtParamConfig | GenKwConfig | Field | SurfaceConfig, @@ -162,29 +166,6 @@ def _seed_sequence(seed: int | None) -> int: return int_seed -def save_design_matrix_to_ensemble( - design_matrix_df: pd.DataFrame, - ensemble: Ensemble, - active_realizations: Iterable[int], - design_group_name: str = DESIGN_MATRIX_GROUP, -) -> None: - assert not design_matrix_df.empty - for realization_nr in active_realizations: - row = design_matrix_df.loc[realization_nr] - ds = xr.Dataset( - { - "values": ("names", list(row.values)), - "transformed_values": ("names", list(row.values)), - "names": list(row.keys()), - } - ) - ensemble.save_parameters( - design_group_name, - realization_nr, - ds, - ) - - @log_duration( logger, ) @@ -193,6 +174,7 @@ def sample_prior( active_realizations: Iterable[int], parameters: list[str] | None = None, random_seed: int | None = None, + design_matrix_df: pd.DataFrame | None = None, ) -> None: """This function is responsible for getting the prior into storage, in the case of GEN_KW we sample the data and store it, and if INIT_FILES @@ -211,13 +193,26 @@ def sample_prior( logger.info( f"Sampling parameter {config_node.name} for realizations {active_realizations}" ) - for realization_nr in active_realizations: + if isinstance(config_node, ScalarParameters): ds = config_node.sample_or_load( - realization_nr, - random_seed=random_seed, - ensemble_size=ensemble.ensemble_size, + active_realizations, + random_seed, + ensemble.ensemble_size, + design_matrix_df, ) - ensemble.save_parameters(parameter, realization_nr, ds) + ensemble.save_parameters_scalar( + parameter, np.array(active_realizations), ds + ) + else: + for realization_nr in active_realizations: + ds = config_node.sample_or_load( + realization_nr, + random_seed, + ensemble.ensemble_size, + design_matrix_df, + ) + assert isinstance(ds, xr.Dataset) + ensemble.save_parameters(parameter, realization_nr, ds) ensemble.refresh_ensemble_state() diff --git a/src/ert/gui/ertwidgets/models/ertsummary.py b/src/ert/gui/ertwidgets/models/ertsummary.py index 4801ff67aab..e1e84751297 100644 --- a/src/ert/gui/ertwidgets/models/ertsummary.py +++ b/src/ert/gui/ertwidgets/models/ertsummary.py @@ -1,6 +1,6 @@ from typing_extensions import TypedDict -from ert.config import ErtConfig, Field, GenKwConfig, SurfaceConfig +from ert.config import ErtConfig, Field, GenKwConfig, ScalarParameters, SurfaceConfig class ObservationCount(TypedDict): @@ -32,6 +32,10 @@ def getParameters(self) -> tuple[list[str], int]: case SurfaceConfig(ncol=ncol, nrow=nrow): parameters.append(f"{key} ({ncol}, {nrow})") count += len(config) + case ScalarParameters(): + for group in config.groups: + parameters.append(f"{group} ({len(config.groups[group])})") + count += len(config.groups[group]) return sorted(parameters, key=lambda k: k.lower()), count def getObservations(self) -> list[ObservationCount]: diff --git a/src/ert/run_models/ensemble_experiment.py b/src/ert/run_models/ensemble_experiment.py index c369291fec0..c56be39d581 100644 --- a/src/ert/run_models/ensemble_experiment.py +++ b/src/ert/run_models/ensemble_experiment.py @@ -7,8 +7,13 @@ import numpy as np -from ert.config import ConfigValidationError, HookRuntime -from ert.enkf_main import sample_prior, save_design_matrix_to_ensemble +from ert.config import ( + ConfigValidationError, + HookRuntime, + ParameterConfig, + ScalarParameters, +) +from ert.enkf_main import sample_prior from ert.ensemble_evaluator import EvaluatorServerConfig from ert.storage import Ensemble, Experiment, Storage from ert.trace import tracer @@ -82,17 +87,21 @@ def run_experiment( self.restart = restart # If design matrix is present, we try to merge design matrix parameters # to the experiment parameters and set new active realizations - parameters_config = self._parameter_configuration design_matrix = self._design_matrix - design_matrix_group = None if design_matrix is not None: - try: - parameters_config, design_matrix_group = ( - design_matrix.merge_with_existing_parameters(parameters_config) - ) - except ConfigValidationError as exc: - raise ErtRunError(str(exc)) from exc - + parameters_config: list[ParameterConfig] = [] + for param in self._parameter_configuration: + if isinstance(param, ScalarParameters): + try: + new_scalar_config = ( + design_matrix.merge_with_existing_parameters(param) + ) + parameters_config.append(new_scalar_config) + except ConfigValidationError as exc: + raise ErtRunError(str(exc)) from exc + else: + parameters_config.append(param) + self._parameter_configuration = parameters_config if not restart: self.run_workflows( HookRuntime.PRE_EXPERIMENT, @@ -100,11 +109,7 @@ def run_experiment( ) self.experiment = self._storage.create_experiment( name=self.experiment_name, - parameters=( - [*parameters_config, design_matrix_group] - if design_matrix_group is not None - else parameters_config - ), + parameters=self._parameter_configuration, observations=self._observations, responses=self._response_configuration, ) @@ -127,21 +132,15 @@ def run_experiment( np.array(self.active_realizations, dtype=bool), ensemble=self.ensemble, ) - sample_prior( self.ensemble, np.where(self.active_realizations)[0], random_seed=self.random_seed, + design_matrix_df=( + design_matrix.design_matrix_df if design_matrix is not None else None + ), ) - if design_matrix_group is not None and design_matrix is not None: - save_design_matrix_to_ensemble( - design_matrix.design_matrix_df, - self.ensemble, - np.where(self.active_realizations)[0], - design_matrix_group.name, - ) - self._evaluate_and_postprocess( run_args, self.ensemble, diff --git a/src/ert/run_models/ensemble_smoother.py b/src/ert/run_models/ensemble_smoother.py index ac00eb7f879..a9b46f75ec6 100644 --- a/src/ert/run_models/ensemble_smoother.py +++ b/src/ert/run_models/ensemble_smoother.py @@ -7,9 +7,16 @@ import numpy as np -from ert.config import ErtConfig, ESSettings, HookRuntime, UpdateSettings +from ert.config import ( + ErtConfig, + ESSettings, + HookRuntime, + ParameterConfig, + ScalarParameters, + UpdateSettings, +) from ert.config.parsing.config_errors import ConfigValidationError -from ert.enkf_main import sample_prior, save_design_matrix_to_ensemble +from ert.enkf_main import sample_prior from ert.ensemble_evaluator import EvaluatorServerConfig from ert.storage import Storage from ert.trace import tracer @@ -77,16 +84,20 @@ def run_experiment( ) -> None: self.log_at_startup() - parameters_config = self._parameter_configuration - design_matrix = self._design_matrix - design_matrix_group = None - if design_matrix is not None: - try: - parameters_config, design_matrix_group = ( - design_matrix.merge_with_existing_parameters(parameters_config) - ) - except ConfigValidationError as exc: - raise ErtRunError(str(exc)) from exc + if self._design_matrix is not None: + parameters_config: list[ParameterConfig] = [] + for param in self._parameter_configuration: + if isinstance(param, ScalarParameters): + try: + new_scalar_config = ( + self._design_matrix.merge_with_existing_parameters(param) + ) + parameters_config.append(new_scalar_config) + except ConfigValidationError as exc: + raise ErtRunError(str(exc)) from exc + else: + parameters_config.append(param) + self._parameter_configuration = parameters_config self.restart = restart self.run_workflows( @@ -95,8 +106,7 @@ def run_experiment( ) ensemble_format = self.target_ensemble_format experiment = self._storage.create_experiment( - parameters=parameters_config - + ([design_matrix_group] if design_matrix_group else []), + parameters=self._parameter_configuration, observations=self._observations, responses=self._response_configuration, name=self.experiment_name, @@ -119,15 +129,13 @@ def run_experiment( prior, np.where(self.active_realizations)[0], random_seed=self.random_seed, + design_matrix_df=( + self._design_matrix.design_matrix_df + if self._design_matrix is not None + else None + ), ) - if design_matrix_group is not None and design_matrix is not None: - save_design_matrix_to_ensemble( - design_matrix.design_matrix_df, - prior, - np.where(self.active_realizations)[0], - design_matrix_group.name, - ) self._evaluate_and_postprocess( prior_args, prior, diff --git a/src/ert/run_models/multiple_data_assimilation.py b/src/ert/run_models/multiple_data_assimilation.py index 212ce978b80..a9376721942 100644 --- a/src/ert/run_models/multiple_data_assimilation.py +++ b/src/ert/run_models/multiple_data_assimilation.py @@ -13,9 +13,11 @@ ErtConfig, ESSettings, HookRuntime, + ParameterConfig, + ScalarParameters, UpdateSettings, ) -from ert.enkf_main import sample_prior, save_design_matrix_to_ensemble +from ert.enkf_main import sample_prior from ert.ensemble_evaluator import EvaluatorServerConfig from ert.storage import Ensemble, Storage from ert.trace import tracer @@ -106,17 +108,20 @@ def run_experiment( ) -> None: self.log_at_startup() - parameters_config = self._parameter_configuration - design_matrix = self._design_matrix - design_matrix_group = None - if design_matrix is not None: - try: - parameters_config, design_matrix_group = ( - design_matrix.merge_with_existing_parameters(parameters_config) - ) - except ConfigValidationError as exc: - raise ErtRunError(str(exc)) from exc - + if self._design_matrix is not None: + parameters_config: list[ParameterConfig] = [] + for param in self._parameter_configuration: + if isinstance(param, ScalarParameters): + try: + new_scalar_config = ( + self._design_matrix.merge_with_existing_parameters(param) + ) + parameters_config.append(new_scalar_config) + except ConfigValidationError as exc: + raise ErtRunError(str(exc)) from exc + else: + parameters_config.append(param) + self._parameter_configuration = parameters_config self.restart = restart if self.restart_run: id = self.prior_ensemble_id @@ -143,8 +148,7 @@ def run_experiment( ) sim_args = {"weights": self._relative_weights} experiment = self._storage.create_experiment( - parameters=parameters_config - + ([design_matrix_group] if design_matrix_group else []), + parameters=self._parameter_configuration, observations=self._observations, responses=self._response_configuration, simulation_arguments=sim_args, @@ -169,15 +173,13 @@ def run_experiment( prior, np.where(self.active_realizations)[0], random_seed=self.random_seed, + design_matrix_df=( + self._design_matrix.design_matrix_df + if self._design_matrix is not None + else None + ), ) - if design_matrix_group is not None and design_matrix is not None: - save_design_matrix_to_ensemble( - design_matrix.design_matrix_df, - prior, - np.where(self.active_realizations)[0], - design_matrix_group.name, - ) self._evaluate_and_postprocess( prior_args, prior, diff --git a/src/ert/storage/local_ensemble.py b/src/ert/storage/local_ensemble.py index b295ceba03e..87fa14e5512 100644 --- a/src/ert/storage/local_ensemble.py +++ b/src/ert/storage/local_ensemble.py @@ -16,7 +16,8 @@ from pydantic import BaseModel from typing_extensions import TypedDict, deprecated -from ert.config import GenKwConfig +from ert.config import ScalarParameters +from ert.config.scalar_parameter import SCALAR_PARAMETERS_NAME from ert.storage.mode import BaseMode, Mode, require_write from .realization_storage_state import RealizationStorageState @@ -274,6 +275,8 @@ def is_initalized(self) -> list[int]: Returns the realization numbers with parameters """ + scalar_path = self._path / f"{_escape_filename(SCALAR_PARAMETERS_NAME)}.parquet" + return [ i for i in range(self.ensemble_size) @@ -282,6 +285,11 @@ def is_initalized(self) -> list[int]: self._realization_dir(i) / (_escape_filename(parameter.name) + ".nc") ).exists() + or ( + scalar_path.exists() + if isinstance(parameter, ScalarParameters) + else False + ) for parameter in self.experiment.parameter_configuration.values() if not parameter.forward_init ) @@ -433,6 +441,7 @@ def _parameters_exist_for_realization(realization: int) -> bool: if not self.experiment.parameter_configuration: return True path = self._realization_dir(realization) + return all( (path / (_escape_filename(parameter) + ".nc")).exists() for parameter in self.experiment.parameter_configuration @@ -563,6 +572,39 @@ def load_parameters( parameters : Dataset Loaded xarray Dataset with parameters. """ + for param in self.experiment.parameter_configuration.values(): + if isinstance(param, ScalarParameters) and group in param.groups: + if isinstance(realizations, int): + realizations = np.array([realizations]) + df = self.load_parameters_scalar( + scalar_name=param.name, realizations=realizations, group=group + ) + reals = df.select("realization").to_numpy().flatten() + dataset = [] + param_cols = [ + col + for col in df.columns + if not col.endswith(".transformed") and col != "realization" + ] + for real in reals: + df_single = df.filter(pl.col("realization") == real) + raw_values = df_single.select(param_cols).row(0) + transformed_values = df_single.select( + [f"{col}.transformed" for col in param_cols] + ).row(0) + ds_single = xr.Dataset( + { + "values": ("names", np.array(raw_values)), + "transformed_values": ( + "names", + np.array(transformed_values), + ), + "names": ("names", [p.split(":")[1] for p in param_cols]), + }, + coords={"realizations": real}, + ) + dataset.append(ds_single) + return xr.combine_nested(dataset, concat_dim="realizations") return self._load_dataset(group, realizations) @@ -762,23 +804,34 @@ def load_all_gen_kw_data( realizations = np.flatnonzero(ens_mask) dataframes = [] - gen_kws = [ + scalar_groups = [ config for config in self.experiment.parameter_configuration.values() - if isinstance(config, GenKwConfig) + if isinstance(config, ScalarParameters) ] - if group: - gen_kws = [config for config in gen_kws if config.name == group] - for key in gen_kws: + for scalars in scalar_groups: with contextlib.suppress(KeyError): - da = self.load_parameters(key.name, realizations)["transformed_values"] - assert isinstance(da, xr.DataArray) - da["names"] = np.char.add(f"{key.name}:", da["names"].astype(np.str_)) - df = da.to_dataframe().unstack(level="names") - df.columns = df.columns.droplevel() - for parameter in df.columns: - if key.shouldUseLogScale(parameter.split(":")[1]): - df[f"LOG10_{parameter}"] = np.log10(df[parameter]) + df = self.load_parameters_scalar( + scalar_name=scalars.name, realizations=realizations, group=group + ) + if df.height != len(realizations): + missing_realizations = set(realizations) - set( + df["realization"].to_list() + ) + raise IndexError( + f"Missing realizations in {missing_realizations} in {scalars.name}" + ) + df = df.select( + ["realization"] + + [col for col in df.columns if col.endswith(".transformed")] + ).rename( + { + col: col.replace(".transformed", "") + for col in df.columns + if col != "realization" + } + ) + df = df.to_pandas().set_index("realization") dataframes.append(df) if not dataframes: return pd.DataFrame() @@ -789,6 +842,42 @@ def load_all_gen_kw_data( return dataframe.sort_index(axis=1) + @require_write + def save_parameters_scalar( + self, + scalar_name: str, + realizations: npt.NDArray[np.int_], + dataframe: pl.DataFrame, + ) -> None: + path = self._path / f"{_escape_filename(scalar_name)}.parquet" + self._storage._to_parquet_transaction( + path, dataframe.filter(pl.col("realization").is_in(realizations)) + ) + + def load_parameters_scalar( + self, + scalar_name: str = SCALAR_PARAMETERS_NAME, + realizations: npt.NDArray[np.int_] | None = None, + group: str | None = None, + key: str | None = None, + ) -> pl.DataFrame: + scalar_path = self._path / f"{_escape_filename(scalar_name)}.parquet" + if not scalar_path.exists(): + raise KeyError(f"No scalar dataset in storage for ensemble {self.name}") + df_lazy = pl.scan_parquet(scalar_path) + if realizations is not None: + df_lazy = df_lazy.filter(pl.col("realization").is_in(realizations)) + if key is not None: + if key not in df_lazy.columns: + raise KeyError(f"No such key {key} in scalar parameters!") + df_lazy = df_lazy.select(["realization", key]) + if group is not None: + df_lazy = df_lazy.select( + ["realization"] + + [col for col in df_lazy.columns if col.startswith(f"{group}:")] + ) + return df_lazy.collect() + @require_write def save_parameters( self, @@ -888,9 +977,11 @@ def get_parameter_state( ) -> dict[str, RealizationStorageState]: path = self._realization_dir(realization) return { - e: RealizationStorageState.PARAMETERS_LOADED - if (path / (_escape_filename(e) + ".nc")).exists() - else RealizationStorageState.UNDEFINED + e: ( + RealizationStorageState.PARAMETERS_LOADED + if (path / (_escape_filename(e) + ".nc")).exists() + else RealizationStorageState.UNDEFINED + ) for e in self.experiment.parameter_configuration } @@ -900,9 +991,11 @@ def get_response_state( response_configs = self.experiment.response_configuration path = self._realization_dir(realization) return { - e: RealizationStorageState.RESPONSES_LOADED - if (path / f"{e}.parquet").exists() - else RealizationStorageState.UNDEFINED + e: ( + RealizationStorageState.RESPONSES_LOADED + if (path / f"{e}.parquet").exists() + else RealizationStorageState.UNDEFINED + ) for e in response_configs } @@ -1127,9 +1220,11 @@ def all_parameters_and_gen_data(self) -> pl.DataFrame | None: params_wide = pl.concat( [ - pdf.sort("realization").drop("realization") - if i > 0 - else pdf.sort("realization") + ( + pdf.sort("realization").drop("realization") + if i > 0 + else pdf.sort("realization") + ) for i, pdf in enumerate(param_dfs) ], how="horizontal", diff --git a/src/ert/storage/local_experiment.py b/src/ert/storage/local_experiment.py index d6898ed1c58..9516fb1aab5 100644 --- a/src/ert/storage/local_experiment.py +++ b/src/ert/storage/local_experiment.py @@ -13,7 +13,14 @@ import xtgeo from pydantic import BaseModel -from ert.config import ExtParamConfig, Field, GenKwConfig, ResponseConfig, SurfaceConfig +from ert.config import ( + ExtParamConfig, + Field, + GenKwConfig, + ResponseConfig, + ScalarParameters, + SurfaceConfig, +) from ert.config.parsing.context_values import ContextBoolEncoder from ert.storage.mode import BaseMode, Mode, require_write @@ -25,6 +32,7 @@ _KNOWN_PARAMETER_TYPES = { GenKwConfig.__name__: GenKwConfig, SurfaceConfig.__name__: SurfaceConfig, + ScalarParameters.__name__: ScalarParameters, Field.__name__: Field, ExtParamConfig.__name__: ExtParamConfig, } diff --git a/tests/ert/ui_tests/cli/analysis/test_design_matrix.py b/tests/ert/ui_tests/cli/analysis/test_design_matrix.py index 7e52ef403d5..f9b181fff23 100644 --- a/tests/ert/ui_tests/cli/analysis/test_design_matrix.py +++ b/tests/ert/ui_tests/cli/analysis/test_design_matrix.py @@ -9,7 +9,6 @@ import pandas as pd import pytest -from ert.cli.main import ErtCliError from ert.config import ErtConfig from ert.mode_definitions import ( ENSEMBLE_EXPERIMENT_MODE, @@ -100,13 +99,13 @@ def _evaluate(coeffs, x): config_path = ErtConfig.from_file("poly.ert").config_path with open_storage(storage_path) as storage: experiment = storage.get_experiment_by_name("test-experiment") - params = experiment.get_ensemble_by_name("default").load_parameters( - "DESIGN_MATRIX" - )["values"] - np.testing.assert_array_equal(params[:, 0], [str(idx) for idx in a_values]) - np.testing.assert_array_equal(params[:, 1], 5 * ["cat1"] + 5 * ["cat2"]) - np.testing.assert_array_equal(params[:, 2], 10 * ["1"]) - np.testing.assert_array_equal(params[:, 3], 10 * ["2"]) + df = experiment.get_ensemble_by_name("default").load_parameters_scalar() + np.testing.assert_array_equal(df["DESIGN_MATRIX:a"].to_numpy(), a_values) + np.testing.assert_array_equal( + df["DESIGN_MATRIX:category"].to_numpy(), 5 * ["cat1"] + 5 * ["cat2"] + ) + np.testing.assert_array_equal(df["DESIGN_MATRIX:b"].to_numpy(), 10 * [1]) + np.testing.assert_array_equal(df["DESIGN_MATRIX:c"].to_numpy(), 10 * [2]) real_0_iter_0_parameters_json_path = ( Path(config_path) / "poly_out" / "realization-0" / "iter-0" / "parameters.json" @@ -125,14 +124,7 @@ def _evaluate(coeffs, x): @pytest.mark.usefixtures("copy_poly_case") -@pytest.mark.parametrize( - "default_values, error_msg", - [ - ([["b", 1], ["c", 2]], None), - ([["b", 1]], "Overlapping parameter names found in design matrix!"), - ], -) -def test_run_poly_example_with_design_matrix_and_genkw_merge(default_values, error_msg): +def test_run_poly_example_with_design_matrix_and_param_merge(): num_realizations = 10 a_values = list(range(num_realizations)) _create_design_matrix( @@ -143,7 +135,7 @@ def test_run_poly_example_with_design_matrix_and_genkw_merge(default_values, err "a": a_values, } ), - pd.DataFrame(default_values), + pd.DataFrame([["b", 1], ["c", 2]]), ) with open("poly.ert", "w", encoding="utf-8") as fout: @@ -201,16 +193,6 @@ def _evaluate(coeffs, x): os.stat("poly_eval.py").st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH, ) - if error_msg: - with pytest.raises(ErtCliError, match=error_msg): - run_cli( - ENSEMBLE_EXPERIMENT_MODE, - "--disable-monitoring", - "poly.ert", - "--experiment-name", - "test-experiment", - ) - return run_cli( ENSEMBLE_EXPERIMENT_MODE, "--disable-monitoring", @@ -221,12 +203,11 @@ def _evaluate(coeffs, x): storage_path = ErtConfig.from_file("poly.ert").ens_path with open_storage(storage_path) as storage: experiment = storage.get_experiment_by_name("test-experiment") - params = experiment.get_ensemble_by_name("default").load_parameters("COEFFS")[ - "values" - ] - np.testing.assert_array_equal(params[:, 0], a_values) - np.testing.assert_array_equal(params[:, 1], 10 * [1]) - np.testing.assert_array_equal(params[:, 2], 10 * [2]) + df = experiment.get_ensemble_by_name("default").load_parameters_scalar() + np.testing.assert_array_equal(df["COEFFS:a"].to_numpy(), a_values) + np.testing.assert_array_equal(df["COEFFS:b"].to_numpy(), 10 * [1]) + np.testing.assert_array_equal(df["COEFFS:c"].to_numpy(), 10 * [2]) + with open("poly_out/realization-0/iter-0/my_output", encoding="utf-8") as f: output = [line.strip() for line in f.readlines()] assert output[0] == "a: 0" @@ -313,14 +294,12 @@ def _evaluate(coeffs, x): storage_path = ErtConfig.from_file("poly.ert").ens_path with open_storage(storage_path) as storage: experiment = storage.get_experiment_by_name("test-experiment") - params = experiment.get_ensemble_by_name("default").load_parameters( - "DESIGN_MATRIX" - )["values"] - np.testing.assert_array_equal(params[:, 0], a_values) - np.testing.assert_array_equal(params[:, 1], 10 * [1]) - np.testing.assert_array_equal(params[:, 2], 10 * [2]) - np.testing.assert_array_equal(params[:, 3], 10 * [3]) - np.testing.assert_array_equal(params[:, 4], 10 * [4]) + df = experiment.get_ensemble_by_name("default").load_parameters_scalar() + np.testing.assert_array_equal(df["DESIGN_MATRIX:a"].to_numpy(), a_values) + np.testing.assert_array_equal(df["DESIGN_MATRIX:b"].to_numpy(), 10 * [1]) + np.testing.assert_array_equal(df["DESIGN_MATRIX:c"].to_numpy(), 10 * [2]) + np.testing.assert_array_equal(df["DESIGN_MATRIX:d"].to_numpy(), 10 * [3]) + np.testing.assert_array_equal(df["DESIGN_MATRIX:g"].to_numpy(), 10 * [4]) @pytest.mark.usefixtures("copy_poly_case") @@ -416,11 +395,15 @@ def _evaluate(coeffs, x): ensemble = experiment.get_ensemble_by_name(f"{ensemble_name}{i}") # coeffs_a should be different in all realizations - coeffs_a = ensemble.load_parameters("COEFFS_A")["values"].values.flatten() + df = ensemble.load_parameters_scalar() + coeffs_a = df["COEFFS_A:a"].to_numpy() + + # coeffs_a = ensemble.load_parameters("COEFFS_A")["values"].values.flatten() if coeffs_a_previous is not None: assert not np.array_equal(coeffs_a, coeffs_a_previous) coeffs_a_previous = coeffs_a # ceffs_b should be overridden by design matrix and be the same for all realizations - coeffs_b = ensemble.load_parameters("COEFFS_B")["values"].values.flatten() + # coeffs_b = ensemble.load_parameters("COEFFS_B")["values"].values.flatten() + coeffs_b = df["COEFFS_B:b"].to_numpy() assert values == pytest.approx(coeffs_b, 0.0001) diff --git a/tests/ert/unit_tests/analysis/test_es_update.py b/tests/ert/unit_tests/analysis/test_es_update.py index 5b220df79dc..4b8b374863b 100644 --- a/tests/ert/unit_tests/analysis/test_es_update.py +++ b/tests/ert/unit_tests/analysis/test_es_update.py @@ -8,11 +8,7 @@ import xtgeo from tabulate import tabulate -from ert.analysis import ( - ErtAnalysisError, - ObservationStatus, - smoother_update, -) +from ert.analysis import ErtAnalysisError, ObservationStatus, smoother_update from ert.analysis._es_update import ( _load_observations_and_responses, _load_param_ensemble_array, @@ -708,7 +704,7 @@ def test_temporary_parameter_storage_with_inactive_fields( for iens in range(ensemble_size): prior_ensemble.save_parameters(param_group, iens, fields[iens]) - realization_list = list(range(ensemble_size)) + realization_list = np.array(list(range(ensemble_size))) param_ensemble_array = _load_param_ensemble_array( prior_ensemble, param_group, realization_list ) @@ -727,7 +723,7 @@ def test_temporary_parameter_storage_with_inactive_fields( ) _save_param_ensemble_array_to_disk( - ensemble, param_ensemble_array, param_group, realization_list + prior_ensemble, ensemble, param_ensemble_array, param_group, realization_list ) for iens in range(prior_ensemble.ensemble_size): ds = xr.open_dataset( diff --git a/tests/ert/unit_tests/config/test_gen_kw_config.py b/tests/ert/unit_tests/config/test_gen_kw_config.py index 88f65dbca4a..9db4114e675 100644 --- a/tests/ert/unit_tests/config/test_gen_kw_config.py +++ b/tests/ert/unit_tests/config/test_gen_kw_config.py @@ -9,13 +9,17 @@ from ert.config import ( ConfigValidationError, ConfigWarning, + DataSource, EnsembleConfig, ErtConfig, GenKwConfig, + ScalarParameter, + ScalarParameters, ) from ert.config.gen_kw_config import TransformFunctionDefinition from ert.config.parsing import ConfigKeys, ContextString from ert.config.parsing.file_context_token import FileContextToken +from ert.config.scalar_parameter import get_distribution from ert.enkf_main import create_run_path, sample_prior @@ -215,10 +219,12 @@ def test_gen_kw_is_log_or_not( ert_config = ErtConfig.from_file("config.ert") - gen_kw_config = ert_config.ensemble_config.parameter_configs["KW_NAME"] - assert isinstance(gen_kw_config, GenKwConfig) - assert gen_kw_config.shouldUseLogScale("MY_KEYWORD") is expect_log - assert gen_kw_config.shouldUseLogScale("Non-existent-keyword") is False + scalars_config = ert_config.ensemble_config["KW_NAME"] + assert isinstance(scalars_config, ScalarParameters) + assert scalars_config.should_use_log_scale("KW_NAME:MY_KEYWORD") is expect_log + assert ( + scalars_config.should_use_log_scale("KW_NAME:Non-existent-keyword") is False + ) experiment_id = storage.create_experiment( parameters=ert_config.ensemble_config.parameter_configuration ) @@ -461,66 +467,63 @@ def test_gen_kw_objects_equal(tmpdir): ert_config = ErtConfig.from_file("config.ert") - g1 = ert_config.ensemble_config["KW_NAME"] - assert g1.transform_functions[0].name == "MY_KEYWORD" - - tfd = TransformFunctionDefinition( - name="MY_KEYWORD", param_name="UNIFORM", values=["1", "2"] - ) + assert ert_config.ensemble_config.scalars is not None + g1 = ert_config.ensemble_config.scalars["KW_NAME"][0] + assert g1.param_name == "MY_KEYWORD" - g2 = GenKwConfig( - name="KW_NAME", - forward_init=False, - template_file="template.txt", - transform_function_definitions=[tfd], - output_file="kw.txt", - update=True, - ) - assert g1.name == g2.name - assert os.path.abspath(g1.template_file) == os.path.abspath(g2.template_file) - assert ( - g1.transform_function_definitions[0] == g2.transform_function_definitions[0] - ) - assert g1.output_file == g2.output_file - assert g1.forward_init_file == g2.forward_init_file - - g3 = GenKwConfig( - name="KW_NAME2", - forward_init=False, - template_file="template.txt", - transform_function_definitions=[tfd], + g2 = ScalarParameter( + param_name="MY_KEYWORD", + group_name="KW_NAME", + input_source=DataSource.SAMPLED, + distribution=get_distribution("UNIFORM", ["1", "2"]), + template_file=os.path.abspath("template.txt"), output_file="kw.txt", update=True, ) - g4 = GenKwConfig( - name="KW_NAME", - forward_init=False, - template_file="empty.txt", - transform_function_definitions=[tfd], - output_file="kw.txt", - update=True, - ) - g5 = GenKwConfig( - name="KW_NAME", - forward_init=False, - template_file="template.txt", - transform_function_definitions=[], - output_file="kw.txt", - update=True, - ) - g6 = GenKwConfig( - name="KW_NAME", - forward_init=False, - template_file="template.txt", - transform_function_definitions=[], - output_file="empty.txt", - update=True, - ) - assert g1 != g3 - assert g1 != g4 - assert g1 != g5 - assert g1 != g6 + assert g1 == g2 + + # tfd = TransformFunctionDefinition( + # name="MY_KEYWORD", param_name="UNIFORM", values=["1", "2"] + # ) + + # g3 = GenKwConfig( + # name="KW_NAME2", + # forward_init=False, + # template_file="template.txt", + # transform_function_definitions=[tfd], + # output_file="kw.txt", + # update=True, + # ) + # g4 = GenKwConfig( + # name="KW_NAME", + # forward_init=False, + # template_file="empty.txt", + # transform_function_definitions=[tfd], + # output_file="kw.txt", + # update=True, + # ) + # g5 = GenKwConfig( + # name="KW_NAME", + # forward_init=False, + # template_file="template.txt", + # transform_function_definitions=[], + # output_file="kw.txt", + # update=True, + # ) + # g6 = GenKwConfig( + # name="KW_NAME", + # forward_init=False, + # template_file="template.txt", + # transform_function_definitions=[], + # output_file="empty.txt", + # update=True, + # ) + + # assert g1 != g3 + # assert g1 != g4 + # assert g1 != g5 + # assert g1 != g6 @pytest.mark.usefixtures("use_tmpdir") @@ -725,7 +728,7 @@ def test_validation_triangular_distribution( "3", "-1", "2", - "NBINS 0.0 must be a positive integer larger than 1 for DERRF distributed parameter MY_KEYWORD", + "NBINS 0 must be a positive integer larger than 1 for DERRF distribution", ), ( "DERRF", @@ -734,7 +737,7 @@ def test_validation_triangular_distribution( "3", "-1", "2", - "NBINS -5.0 must be a positive integer larger than 1 for DERRF distributed parameter MY_KEYWORD", + "NBINS -5 must be a positive integer larger than 1 for DERRF distribution", ), ( "DERRF", @@ -743,7 +746,7 @@ def test_validation_triangular_distribution( "3", "-1", "2", - "NBINS 1.5 must be a positive integer larger than 1 for DERRF distributed parameter MY_KEYWORD", + "NBINS 1 must be a positive integer larger than 1 for DERRF distribution", ), ( "DERRF", @@ -752,7 +755,7 @@ def test_validation_triangular_distribution( "-1", "-1", "2", - "The minimum 3.0 must be less than the maximum -1.0 for DERRF distributed parameter MY_KEYWORD", + "The minimum 3.0 must be less than the maximum -1.0 for DERRF distribution", ), ( "DERRF", @@ -761,7 +764,7 @@ def test_validation_triangular_distribution( "1", "-1", "2", - "The minimum 1.0 must be less than the maximum 1.0 for DERRF distributed parameter MY_KEYWORD", + "The minimum 1.0 must be less than the maximum 1.0 for DERRF distribution", ), ( "DERRF", @@ -770,7 +773,7 @@ def test_validation_triangular_distribution( "3", "-1", "0", - "The width 0.0 must be greater than 0 for DERRF distributed parameter MY_KEYWORD", + "The width 0.0 must be greater than 0 for DERRF distribution", ), ( "DERRF", @@ -779,7 +782,7 @@ def test_validation_triangular_distribution( "3", "-1", "-2", - "The width -2.0 must be greater than 0 for DERRF distributed parameter MY_KEYWORD", + "The width -2.0 must be greater than 0 for DERRF distribution", ), ( "DERRF", diff --git a/tests/ert/unit_tests/gui/tools/plot/test_plot_api.py b/tests/ert/unit_tests/gui/tools/plot/test_plot_api.py index 96d0d47a446..cf4fe70ef86 100644 --- a/tests/ert/unit_tests/gui/tools/plot/test_plot_api.py +++ b/tests/ert/unit_tests/gui/tools/plot/test_plot_api.py @@ -4,6 +4,7 @@ from urllib.parse import quote import httpx +import numpy as np import pandas as pd import polars as pl import pytest @@ -11,7 +12,12 @@ from pandas.testing import assert_frame_equal from starlette.testclient import TestClient -from ert.config import GenKwConfig, SummaryConfig +from ert.config import ( + SCALAR_PARAMETERS_NAME, + GenKwConfig, + ScalarParameters, + SummaryConfig, +) from ert.dark_storage import enkf from ert.dark_storage.app import app from ert.gui.tools.plot.plot_api import PlotApi, PlotApiKeyDefinition @@ -258,13 +264,9 @@ def test_plot_api_handles_empty_gen_kw(api_and_storage): name = "" experiment = storage.create_experiment( parameters=[ - GenKwConfig( - name=key, - forward_init=False, + ScalarParameters( update=False, - template_file=None, - output_file=None, - transform_function_definitions=[], + scalars=[], ), ], responses=[], @@ -272,17 +274,15 @@ def test_plot_api_handles_empty_gen_kw(api_and_storage): ) ensemble = storage.create_ensemble(experiment.id, ensemble_size=10) assert api.data_for_key(str(ensemble.id), key).empty - ensemble.save_parameters( - key, - 1, - xr.Dataset( - { - "values": ("names", [1.0]), - "transformed_values": ("names", [1.0]), - "names": [name], - } + + ensemble.save_parameters_scalar( + scalar_name=SCALAR_PARAMETERS_NAME, + realizations=np.array([1.0]), + dataframe=pl.DataFrame( + [{"realization": 1, f"{key}:{name}": 1.0, f"{key}:{name}.transformed": 1.0}] ), ) + assert api.data_for_key(str(ensemble.id), key + ":" + name).to_csv() == dedent( """\ Realization,0 diff --git a/tests/ert/unit_tests/sensitivity_analysis/test_design_matrix.py b/tests/ert/unit_tests/sensitivity_analysis/test_design_matrix.py index 68605b52833..1c3f7703a46 100644 --- a/tests/ert/unit_tests/sensitivity_analysis/test_design_matrix.py +++ b/tests/ert/unit_tests/sensitivity_analysis/test_design_matrix.py @@ -2,12 +2,7 @@ import pandas as pd import pytest -from ert.config import ( - DESIGN_MATRIX_GROUP, - DesignMatrix, - GenKwConfig, -) -from ert.config.gen_kw_config import TransformFunctionDefinition +from ert.config import DESIGN_MATRIX_GROUP, DesignMatrix def _create_design_matrix(xls_path, design_matrix_df, default_sheet_df) -> DesignMatrix: @@ -81,8 +76,8 @@ def test_merge_multiple_occurrences( design_matrix_1.merge_with_other(design_matrix_2) else: design_matrix_1.merge_with_other(design_matrix_2) - design_params = design_matrix_1.parameter_configuration - assert all(param in design_params for param in ("a", "b", "c", "d")) + design_params = {param.param_name for param in design_matrix_1.scalars} + assert {"a", "b", "c", "d"} <= design_params assert design_matrix_1.active_realizations == [True, True, True] df = design_matrix_1.design_matrix_df np.testing.assert_equal(df["a"], np.array([1, 2, 3])) @@ -91,80 +86,81 @@ def test_merge_multiple_occurrences( np.testing.assert_equal(df["d"], np.array([0, 2, 0])) -@pytest.mark.parametrize( - "parameters, error_msg", - [ - pytest.param( - {"COEFFS": ["a", "b"]}, - "", - id="genkw_replaced", - ), - pytest.param( - {"COEFFS": ["a"]}, - "Overlapping parameter names found in design matrix!", - id="ValidationErrorOverlapping", - ), - pytest.param( - {"COEFFS": ["aa", "bb"], "COEFFS2": ["cc", "dd"]}, - "", - id="DESIGN_MATRIX_GROUP", - ), - pytest.param( - {"COEFFS": ["a", "b"], "COEFFS2": ["a", "b"]}, - "Multiple overlapping groups with design matrix found in existing parameters!", - id="ValidationErrorMultipleGroups", - ), - ], -) -def test_read_and_merge_with_existing_parameters(tmp_path, parameters, error_msg): - extra_genkw_config = [] - if parameters: - for group_name in parameters: - extra_genkw_config.append( - GenKwConfig( - name=group_name, - forward_init=False, - template_file="", - transform_function_definitions=[ - TransformFunctionDefinition(param, "UNIFORM", [0, 1]) - for param in parameters[group_name] - ], - output_file="kw.txt", - update=True, - ) - ) +# TODO refactor this test +# @pytest.mark.parametrize( +# "parameters, error_msg", +# [ +# pytest.param( +# {"COEFFS": ["a", "b"]}, +# "", +# id="genkw_replaced", +# ), +# pytest.param( +# {"COEFFS": ["a"]}, +# "Overlapping parameter names found in design matrix!", +# id="ValidationErrorOverlapping", +# ), +# pytest.param( +# {"COEFFS": ["aa", "bb"], "COEFFS2": ["cc", "dd"]}, +# "", +# id="DESIGN_MATRIX_GROUP", +# ), +# pytest.param( +# {"COEFFS": ["a", "b"], "COEFFS2": ["a", "b"]}, +# "Multiple overlapping groups with design matrix found in existing parameters!", +# id="ValidationErrorMultipleGroups", +# ), +# ], +# ) +# def test_read_and_merge_with_existing_parameters(tmp_path, parameters, error_msg): +# extra_genkw_config = [] +# if parameters: +# for group_name in parameters: +# extra_genkw_config.append( +# GenKwConfig( +# name=group_name, +# forward_init=False, +# template_file="", +# transform_function_definitions=[ +# TransformFunctionDefinition(param, "UNIFORM", [0, 1]) +# for param in parameters[group_name] +# ], +# output_file="kw.txt", +# update=True, +# ) +# ) - realizations = [0, 1, 2] - design_path = tmp_path / "design_matrix.xlsx" - design_matrix_df = pd.DataFrame( - { - "REAL": realizations, - "a": [1, 2, 3], - "b": [0, 2, 0], - } - ) - default_sheet_df = pd.DataFrame([["a", 1], ["b", 4]]) - with pd.ExcelWriter(design_path) as xl_write: - design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01") - default_sheet_df.to_excel( - xl_write, index=False, sheet_name="DefaultValues", header=False - ) - design_matrix = DesignMatrix(design_path, "DesignSheet01", "DefaultValues") - if error_msg: - with pytest.raises(ValueError, match=error_msg): - design_matrix.merge_with_existing_parameters(extra_genkw_config) - elif len(parameters) == 1: - new_config_parameters, design_group = ( - design_matrix.merge_with_existing_parameters(extra_genkw_config) - ) - assert len(new_config_parameters) == 0 - assert design_group.name == "COEFFS" - elif len(parameters) == 2: - new_config_parameters, design_group = ( - design_matrix.merge_with_existing_parameters(extra_genkw_config) - ) - assert len(new_config_parameters) == 2 - assert design_group.name == DESIGN_MATRIX_GROUP +# realizations = [0, 1, 2] +# design_path = tmp_path / "design_matrix.xlsx" +# design_matrix_df = pd.DataFrame( +# { +# "REAL": realizations, +# "a": [1, 2, 3], +# "b": [0, 2, 0], +# } +# ) +# default_sheet_df = pd.DataFrame([["a", 1], ["b", 4]]) +# with pd.ExcelWriter(design_path) as xl_write: +# design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01") +# default_sheet_df.to_excel( +# xl_write, index=False, sheet_name="DefaultValues", header=False +# ) +# design_matrix = DesignMatrix(design_path, "DesignSheet01", "DefaultValues") +# if error_msg: +# with pytest.raises(ValueError, match=error_msg): +# design_matrix.merge_with_existing_parameters(extra_genkw_config) +# elif len(parameters) == 1: +# new_config_parameters, design_group = ( +# design_matrix.merge_with_existing_parameters(extra_genkw_config) +# ) +# assert len(new_config_parameters) == 0 +# assert design_group.name == "COEFFS" +# elif len(parameters) == 2: +# new_config_parameters, design_group = ( +# design_matrix.merge_with_existing_parameters(extra_genkw_config) +# ) +# assert len(new_config_parameters) == 2 +# assert design_group.name == DESIGN_MATRIX_GROUP def test_reading_design_matrix(tmp_path): @@ -185,8 +181,8 @@ def test_reading_design_matrix(tmp_path): xl_write, index=False, sheet_name="DefaultValues", header=False ) design_matrix = DesignMatrix(design_path, "DesignSheet01", "DefaultValues") - design_params = design_matrix.parameter_configuration - assert all(param in design_params for param in ("a", "b", "c", "one", "d")) + design_params = {param.param_name for param in design_matrix.scalars} + assert {"a", "b", "c", "one", "d"} <= design_params assert design_matrix.active_realizations == [True, True, False, False, True] diff --git a/tests/ert/unit_tests/snapshots/test_libres_facade/test_load_scalar_parameters/scalars_all.csv b/tests/ert/unit_tests/snapshots/test_libres_facade/test_load_scalar_parameters/scalars_all.csv new file mode 100644 index 00000000000..ff39dadf738 --- /dev/null +++ b/tests/ert/unit_tests/snapshots/test_libres_facade/test_load_scalar_parameters/scalars_all.csv @@ -0,0 +1,11 @@ +PARAM_2:MY_KEYWORD,PARAM_2:MY_KEYWORD.transformed,PARAM_1:MY_KEYWORD,PARAM_1:MY_KEYWORD.transformed,realization,DESIGN_MATRIX:a,DESIGN_MATRIX:a.transformed,DESIGN_MATRIX:b,DESIGN_MATRIX:b.transformed,DESIGN_MATRIX:c,DESIGN_MATRIX:c.transformed +-0.48677945088,0.205687224111,2.02382324933,0.951712622616,0,1,1,2,2,3,3 +0.744828911704,0.591306186436,2.179252943964,0.96681539802,1,1,1,2,2,3,3 +-0.85099062697,0.157538708116,-0.429000754451,0.215755232892,2,1,1,2,2,3,3 +-0.626861144252,0.184236268792,-0.253395530954,0.251177822059,3,1,1,2,2,3,3 +0.57278053459,0.5207189104,-0.040752832821,0.3046116233,4,1,1,2,2,3,3 +-0.053010647457,0.301204677657,0.597632059504,0.530832208672,5,1,1,2,2,3,3 +0.071995090139,0.337829128151,1.4938087438,0.855829381644,6,1,1,2,2,3,3 +-0.254379469819,0.250958092719,-0.851373098655,0.157500183905,7,1,1,2,2,3,3 +1.051069477499,0.713487977259,-0.043133214794,0.303946867014,8,1,1,2,2,3,3 +-1.392107852998,0.120766052392,1.965731434227,0.944789763389,9,1,1,2,2,3,3 diff --git a/tests/ert/unit_tests/storage/test_parameter_sample_types.py b/tests/ert/unit_tests/storage/test_parameter_sample_types.py index 9e68be85503..f39f39c8ed1 100644 --- a/tests/ert/unit_tests/storage/test_parameter_sample_types.py +++ b/tests/ert/unit_tests/storage/test_parameter_sample_types.py @@ -10,8 +10,14 @@ from resdata.geometry import Surface from ert import LibresFacade -from ert.config import ConfigValidationError, ErtConfig, GenKwConfig -from ert.config.gen_kw_config import TransformFunctionDefinition +from ert.config import ( + ConfigValidationError, + DataSource, + ErtConfig, + ScalarParameter, + ScalarParameters, + get_distribution, +) from ert.enkf_main import sample_prior from ert.storage import open_storage @@ -225,44 +231,80 @@ def test_that_first_three_parameters_sampled_snapshot(tmpdir, storage): [4, 5, 10], ) @pytest.mark.parametrize( - "template, prior", + "template, scalars", [ ( "MY_KEYWORD \nMY_SECOND_KEYWORD ", [ - TransformFunctionDefinition("MY_KEYWORD", "NORMAL", [0, 1]), - TransformFunctionDefinition("MY_SECOND_KEYWORD", "NORMAL", [0, 1]), + ScalarParameter( + param_name="MY_KEYWORD", + group_name="KW_NAME", + input_source=DataSource.SAMPLED, + distribution=get_distribution("NORMAL", ["0", "1"]), + template_file="template.txt", + output_file="kw.txt", + update=True, + ), + ScalarParameter( + param_name="MY_SECOND_KEYWORD", + group_name="KW_NAME", + input_source=DataSource.SAMPLED, + distribution=get_distribution("NORMAL", ["0", "1"]), + template_file="template.txt", + output_file="kw.txt", + update=True, + ), ], ), ( "MY_KEYWORD ", - [TransformFunctionDefinition("MY_KEYWORD", "NORMAL", [0, 1])], + [ + ScalarParameter( + param_name="MY_KEYWORD", + group_name="KW_NAME", + input_source=DataSource.SAMPLED, + distribution=get_distribution("NORMAL", ["0", "1"]), + template_file="template.txt", + output_file="kw.txt", + update=True, + ), + ], ), ( "MY_FIRST_KEYWORD \nMY_KEYWORD ", [ - TransformFunctionDefinition("MY_FIRST_KEYWORD", "NORMAL", [0, 1]), - TransformFunctionDefinition("MY_KEYWORD", "NORMAL", [0, 1]), + ScalarParameter( + param_name="MY_FIRST_KEYWORD", + group_name="KW_NAME", + input_source=DataSource.SAMPLED, + distribution=get_distribution("NORMAL", ["0", "1"]), + template_file="template.txt", + output_file="kw.txt", + update=True, + ), + ScalarParameter( + param_name="MY_KEYWORD", + group_name="KW_NAME", + input_source=DataSource.SAMPLED, + distribution=get_distribution("NORMAL", ["0", "1"]), + template_file="template.txt", + output_file="kw.txt", + update=True, + ), ], ), ], ) def test_that_sampling_is_fixed_from_name( - tmpdir, storage, template, prior, num_realisations + tmpdir, storage, template, scalars, num_realisations ): """ Testing that the order and number of parameters is not relevant for the values, only that name of the parameter and the global seed determine the values. """ with tmpdir.as_cwd(): - conf = GenKwConfig( - name="KW_NAME", - forward_init=False, - template_file="template.txt", - transform_function_definitions=prior, - output_file="kw.txt", - update=True, - ) + conf = ScalarParameters(scalars=scalars) + with open("template.txt", "w", encoding="utf-8") as fh: fh.writelines(template) fs = storage.create_ensemble( @@ -395,20 +437,32 @@ def write_file(fname, contents): "GEN_KW KW_NAME template.txt kw.txt prior.txt INIT_FILES:custom_param%d", "MY_KEYWORD 1.31", [("custom_param0", "MY_KEYWORD 1.31")], - does_not_raise(), + # does_not_raise(), # This is the expected behaviour, but it is not implemented + pytest.raises( + ConfigValidationError, + match="Loading GEN_KW from init_files is not longer supported", + ), ), ( "GEN_KW KW_NAME template.txt kw.txt prior.txt INIT_FILES:custom_param%d", "MY_KEYWORD 1.31", [("custom_param0", "1.31")], - does_not_raise(), + # does_not_raise(), # This is the expected behaviour, but it is not implemented + pytest.raises( + ConfigValidationError, + match="Loading GEN_KW from init_files is not longer supported", + ), ), ( "GEN_KW KW_NAME template.txt kw.txt prior.txt INIT_FILES:custom_param0", "Not expecting a file", [], + # pytest.raises( + # ConfigValidationError, match="Loading GEN_KW from files requires %d" + # ), # This is the expected behaviour, but it is not implemented pytest.raises( - ConfigValidationError, match="Loading GEN_KW from files requires %d" + ConfigValidationError, + match="Loading GEN_KW from init_files is not longer supported", ), ), ], @@ -529,83 +583,84 @@ def test_gen_kw_outfile_will_use_paths(tmpdir, storage, relpath: str): assert os.path.exists(f"simulations/realization-0/iter-0/{relpath}kw.txt") -@pytest.mark.usefixtures("set_site_config") -@pytest.mark.parametrize( - "config_str, expected, extra_files", - [ - ( - "GEN_KW KW_NAME template.txt kw.txt prior.txt INIT_FILES:custom_param%d", - "MY_KEYWORD 1.31\nMY_SECOND_KEYWORD 1.01", - [("custom_param0", "MY_SECOND_KEYWORD 1.01\nMY_KEYWORD 1.31")], - ), - ], -) -def test_that_order_of_input_in_user_input_is_abritrary_for_gen_kw_init_files( - tmpdir, config_str, expected, extra_files, storage -): - with tmpdir.as_cwd(): - config = dedent( - """ - JOBNAME my_name%d - NUM_REALIZATIONS 1 - """ - ) - config += config_str - with open("config.ert", mode="w", encoding="utf-8") as fh: - fh.writelines(config) - with open("template.txt", mode="w", encoding="utf-8") as fh: - fh.writelines( - "MY_KEYWORD \nMY_SECOND_KEYWORD " - ) - with open("prior.txt", mode="w", encoding="utf-8") as fh: - fh.writelines("MY_KEYWORD NORMAL 0 1\nMY_SECOND_KEYWORD NORMAL 0 1") - for fname, contents in extra_files: - write_file(fname, contents) - - create_runpath(storage, "config.ert") - assert ( - Path("simulations/realization-0/iter-0/kw.txt").read_text("utf-8") - == expected - ) - - -@pytest.mark.usefixtures("set_site_config") -@pytest.mark.parametrize("load_forward_init", [True, False]) -def test_gen_kw_forward_init(tmpdir, storage, load_forward_init): - with tmpdir.as_cwd(): - config = dedent( - """ - JOBNAME my_name%d - NUM_REALIZATIONS 1 - GEN_KW KW_NAME template.txt kw.txt prior.txt """ - f"""FORWARD_INIT:{load_forward_init!s} INIT_FILES:custom_param%d - """ - ) - with open("config.ert", mode="w", encoding="utf-8") as fh: - fh.writelines(config) - - with open("template.txt", mode="w", encoding="utf-8") as fh: - fh.writelines("MY_KEYWORD ") - with open("prior.txt", mode="w", encoding="utf-8") as fh: - fh.writelines("MY_KEYWORD NORMAL 0 1") - if not load_forward_init: - write_file("custom_param0", "1.31") - - if load_forward_init: - with pytest.raises( - ConfigValidationError, - match=( - "Loading GEN_KW from files created by " - "the forward model is not supported\\." - ), - ): - create_runpath(storage, "config.ert") - else: - _, fs = create_runpath(storage, "config.ert") - assert Path("simulations/realization-0/iter-0/kw.txt").exists() - value = ( - fs.load_parameters("KW_NAME", 0) - .sel(names="MY_KEYWORD")["values"] - .values - ) - assert value == 1.31 +# TODO remove or refactor as Scalars will not use forward_init +# @pytest.mark.usefixtures("set_site_config") +# @pytest.mark.parametrize( +# "config_str, expected, extra_files", +# [ +# ( +# "GEN_KW KW_NAME template.txt kw.txt prior.txt INIT_FILES:custom_param%d", +# "MY_KEYWORD 1.31\nMY_SECOND_KEYWORD 1.01", +# [("custom_param0", "MY_SECOND_KEYWORD 1.01\nMY_KEYWORD 1.31")], +# ), +# ], +# ) +# def test_that_order_of_input_in_user_input_is_abritrary_for_gen_kw_init_files( +# tmpdir, config_str, expected, extra_files, storage +# ): +# with tmpdir.as_cwd(): +# config = dedent( +# """ +# JOBNAME my_name%d +# NUM_REALIZATIONS 1 +# """ +# ) +# config += config_str +# with open("config.ert", mode="w", encoding="utf-8") as fh: +# fh.writelines(config) +# with open("template.txt", mode="w", encoding="utf-8") as fh: +# fh.writelines( +# "MY_KEYWORD \nMY_SECOND_KEYWORD " +# ) +# with open("prior.txt", mode="w", encoding="utf-8") as fh: +# fh.writelines("MY_KEYWORD NORMAL 0 1\nMY_SECOND_KEYWORD NORMAL 0 1") +# for fname, contents in extra_files: +# write_file(fname, contents) + +# create_runpath(storage, "config.ert") +# assert ( +# Path("simulations/realization-0/iter-0/kw.txt").read_text("utf-8") +# == expected +# ) + + +# @pytest.mark.usefixtures("set_site_config") +# @pytest.mark.parametrize("load_forward_init", [True, False]) +# def test_gen_kw_forward_init(tmpdir, storage, load_forward_init): +# with tmpdir.as_cwd(): +# config = dedent( +# """ +# JOBNAME my_name%d +# NUM_REALIZATIONS 1 +# GEN_KW KW_NAME template.txt kw.txt prior.txt """ +# f"""FORWARD_INIT:{load_forward_init!s} INIT_FILES:custom_param%d +# """ +# ) +# with open("config.ert", mode="w", encoding="utf-8") as fh: +# fh.writelines(config) + +# with open("template.txt", mode="w", encoding="utf-8") as fh: +# fh.writelines("MY_KEYWORD ") +# with open("prior.txt", mode="w", encoding="utf-8") as fh: +# fh.writelines("MY_KEYWORD NORMAL 0 1") +# if not load_forward_init: +# write_file("custom_param0", "1.31") + +# if load_forward_init: +# with pytest.raises( +# ConfigValidationError, +# match=( +# "Loading GEN_KW from files created by " +# "the forward model is not supported\\." +# ), +# ): +# create_runpath(storage, "config.ert") +# else: +# _, fs = create_runpath(storage, "config.ert") +# assert Path("simulations/realization-0/iter-0/kw.txt").exists() +# value = ( +# fs.load_parameters("KW_NAME", 0) +# .sel(names="MY_KEYWORD")["values"] +# .values +# ) +# assert value == 1.31 diff --git a/tests/ert/unit_tests/test_libres_facade.py b/tests/ert/unit_tests/test_libres_facade.py index 1f8646a7ca4..82442bd814b 100644 --- a/tests/ert/unit_tests/test_libres_facade.py +++ b/tests/ert/unit_tests/test_libres_facade.py @@ -2,14 +2,14 @@ from datetime import datetime, timedelta from textwrap import dedent -import numpy as np +import polars as pl import pytest from pandas import ExcelWriter from pandas.core.frame import DataFrame from resdata.summary import Summary -from ert.config import DESIGN_MATRIX_GROUP, DesignMatrix, ErtConfig -from ert.enkf_main import sample_prior, save_design_matrix_to_ensemble +from ert.config import ErtConfig +from ert.enkf_main import sample_prior from ert.libres_facade import LibresFacade from ert.storage import open_storage @@ -218,17 +218,33 @@ def test_get_observations(tmpdir): assert "FOPR_1" in facade.get_observations() -def test_load_gen_kw_not_sorted(storage, tmpdir, snapshot): +# TODO refactor this & use design matrix too!!! +def test_load_scalar_parameters(storage, tmpdir, snapshot): """ This test checks two things, loading multiple parameters and loading log parameters. """ with tmpdir.as_cwd(): + design_path = "design_matrix.xlsx" + ensemble_size = 10 + design_matrix_df = DataFrame( + { + "a": [1.0] * ensemble_size, + "b": [2.0] * ensemble_size, + "c": [3.0] * ensemble_size, + } + ) + with ExcelWriter(design_path) as xl_write: + design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet") + DataFrame().to_excel( + xl_write, index=False, sheet_name="DefaultValues", header=False + ) config = dedent( """ NUM_REALIZATIONS 10 GEN_KW PARAM_2 template.txt kw.txt prior.txt GEN_KW PARAM_1 template.txt kw.txt prior.txt + DESIGN_MATRIX design_matrix.xlsx DESIGN_SHEET:DesignSheet DEFAULT_SHEET:DefaultValues RANDOM_SEED 1234 """ ) @@ -240,65 +256,82 @@ def test_load_gen_kw_not_sorted(storage, tmpdir, snapshot): fh.writelines("MY_KEYWORD LOGUNIF 0.1 1") ert_config = ErtConfig.from_file("config.ert") - - experiment_id = storage.create_experiment( - parameters=ert_config.ensemble_config.parameter_configuration - ) + assert ert_config.ensemble_config.scalars is not None + assert ert_config.analysis_config.design_matrix is not None + params = [ + ert_config.analysis_config.design_matrix.merge_with_existing_parameters( + ert_config.ensemble_config.scalars + ) + ] + experiment_id = storage.create_experiment(parameters=params) ensemble_size = 10 ensemble = storage.create_ensemble( experiment_id, name="default", ensemble_size=ensemble_size ) - - sample_prior(ensemble, range(ensemble_size), random_seed=1234) - - data = ensemble.load_all_gen_kw_data() - snapshot.assert_match(data.round(12).to_csv(), "gen_kw_unsorted") - - -@pytest.mark.parametrize( - "reals, expect_error", - [ - pytest.param( - list(range(10)), - False, - id="correct_active_realizations", - ), - pytest.param([10, 11], True, id="incorrect_active_realizations"), - ], -) -def test_save_parameters_to_storage_from_design_dataframe( - tmp_path, reals, expect_error -): - design_path = tmp_path / "design_matrix.xlsx" - ensemble_size = 10 - a_values = np.random.default_rng().uniform(-5, 5, 10) - b_values = np.random.default_rng().uniform(-5, 5, 10) - c_values = np.random.default_rng().uniform(-5, 5, 10) - design_matrix_df = DataFrame({"a": a_values, "b": b_values, "c": c_values}) - with ExcelWriter(design_path) as xl_write: - design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01") - DataFrame().to_excel( - xl_write, index=False, sheet_name="DefaultValues", header=False + sample_prior( + ensemble, + range(ensemble_size), + random_seed=1234, + design_matrix_df=ert_config.analysis_config.design_matrix.design_matrix_df, ) - design_matrix = DesignMatrix(design_path, "DesignSheet01", "DefaultValues") - with open_storage(tmp_path / "storage", mode="w") as storage: - experiment_id = storage.create_experiment( - parameters=[design_matrix.parameter_configuration] + assert ( + ensemble.load_parameters_scalar(key="DESIGN_MATRIX:a")[ + "DESIGN_MATRIX:a" + ].to_list() + == [1.0] * 10 ) - ensemble = storage.create_ensemble( - experiment_id, name="default", ensemble_size=ensemble_size + data = ensemble.load_parameters_scalar() + snapshot.assert_match( + data.with_columns(pl.col(pl.Float64).round(12)).write_csv(), + "scalars_all.csv", ) - if expect_error: - with pytest.raises(KeyError): - save_design_matrix_to_ensemble( - design_matrix.design_matrix_df, ensemble, reals - ) - else: - save_design_matrix_to_ensemble( - design_matrix.design_matrix_df, ensemble, reals - ) - params = ensemble.load_parameters(DESIGN_MATRIX_GROUP)["values"] - all(params.names.values == ["a", "b", "c"]) - np.testing.assert_array_almost_equal(params[:, 0], a_values) - np.testing.assert_array_almost_equal(params[:, 1], b_values) - np.testing.assert_array_almost_equal(params[:, 2], c_values) + + +# TODO remove this test +# @pytest.mark.parametrize( +# "reals, expect_error", +# [ +# pytest.param( +# list(range(10)), +# False, +# id="correct_active_realizations", +# ), +# pytest.param([10, 11], True, id="incorrect_active_realizations"), +# ], +# ) +# def test_save_parameters_to_storage_from_design_dataframe( +# tmp_path, reals, expect_error +# ): +# design_path = tmp_path / "design_matrix.xlsx" +# ensemble_size = 10 +# a_values = np.random.default_rng().uniform(-5, 5, 10) +# b_values = np.random.default_rng().uniform(-5, 5, 10) +# c_values = np.random.default_rng().uniform(-5, 5, 10) +# design_matrix_df = DataFrame({"a": a_values, "b": b_values, "c": c_values}) +# with ExcelWriter(design_path) as xl_write: +# design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01") +# DataFrame().to_excel( +# xl_write, index=False, sheet_name="DefaultValues", header=False +# ) +# design_matrix = DesignMatrix(design_path, "DesignSheet01", "DefaultValues") +# with open_storage(tmp_path / "storage", mode="w") as storage: +# experiment_id = storage.create_experiment( +# parameters=[design_matrix.parameter_configuration] +# ) +# ensemble = storage.create_ensemble( +# experiment_id, name="default", ensemble_size=ensemble_size +# ) +# if expect_error: +# with pytest.raises(KeyError): +# save_design_matrix_to_ensemble( +# design_matrix.design_matrix_df, ensemble, reals +# ) +# else: +# save_design_matrix_to_ensemble( +# design_matrix.design_matrix_df, ensemble, reals +# ) +# params = ensemble.load_parameters(DESIGN_MATRIX_GROUP)["values"] +# all(params.names.values == ["a", "b", "c"]) +# np.testing.assert_array_almost_equal(params[:, 0], a_values) +# np.testing.assert_array_almost_equal(params[:, 1], b_values) +# np.testing.assert_array_almost_equal(params[:, 2], c_values) diff --git a/tests/ert/unit_tests/test_run_path_creation.py b/tests/ert/unit_tests/test_run_path_creation.py index 1e82978ee9d..fcaf0d19a20 100644 --- a/tests/ert/unit_tests/test_run_path_creation.py +++ b/tests/ert/unit_tests/test_run_path_creation.py @@ -6,6 +6,7 @@ import numpy as np import orjson +import polars as pl import pytest import xtgeo @@ -14,7 +15,7 @@ ConfigValidationError, ErtConfig, Field, - GenKwConfig, + ScalarParameters, SurfaceConfig, ) from ert.enkf_main import create_run_path, sample_prior @@ -629,21 +630,34 @@ def test_assert_ertcase_replaced_in_runpath(placeholder, make_run_path): def save_zeros(prior_ensemble, num_realizations, dim_size): parameter_configs = prior_ensemble.experiment.parameter_configuration for parameter, config_node in parameter_configs.items(): - for realization_nr in range(num_realizations): - if isinstance(config_node, SurfaceConfig): - config_node.save_parameters( - prior_ensemble, parameter, realization_nr, np.zeros(dim_size**2) - ) - elif isinstance(config_node, Field): - config_node.save_parameters( - prior_ensemble, parameter, realization_nr, np.zeros(dim_size**3) - ) - elif isinstance(config_node, GenKwConfig): - config_node.save_parameters( - prior_ensemble, parameter, realization_nr, np.zeros(1) - ) - else: - raise ValueError(f"unexpected {config_node}") + if isinstance(config_node, ScalarParameters): + scalar_values = {} + for parameter in config_node.scalars: + scalar_values[f"{parameter.group_name}:{parameter.param_name}"] = [ + 0.0 + ] * num_realizations + scalar_values[ + f"{parameter.group_name}:{parameter.param_name}.transformed" + ] = [0.0] * num_realizations + + df = pl.DataFrame( + {"realization": list(range(num_realizations)), **scalar_values} + ) + prior_ensemble.save_parameters_scalar( + "SCALAR_PARAMETERS", np.arange(num_realizations), df + ) + else: + for realization_nr in range(num_realizations): + if isinstance(config_node, SurfaceConfig): + config_node.save_parameters( + prior_ensemble, parameter, realization_nr, np.zeros(dim_size**2) + ) + elif isinstance(config_node, Field): + config_node.save_parameters( + prior_ensemble, parameter, realization_nr, np.zeros(dim_size**3) + ) + else: + raise ValueError(f"unexpected {config_node}") @pytest.mark.usefixtures("use_tmpdir") @@ -692,8 +706,8 @@ def test_when_manifest_files_are_written_forward_model_ok_succeeds(storage, itr) FIELD PORO0 PARAMETER field1.roff INIT_FILES:field1_init.roff FORWARD_INIT:TRUE FIELD PORO1 PARAMETER field2.roff INIT_FILES:%dinit.roff - GEN_KW GEN0 gen0.txt INIT_FILES:%dgen_init.txt - GEN_KW GEN1 template.txt gen_parameter.txt gen1.txt INIT_FILES:%dgen_init.txt + GEN_KW GEN0 gen0.txt + GEN_KW GEN1 template.txt gen_parameter.txt gen1.txt """ ) ) From 1ca37bc940c1566e88fbd5e15d3b427e9cacdf86 Mon Sep 17 00:00:00 2001 From: xjules Date: Wed, 19 Mar 2025 15:49:54 +0100 Subject: [PATCH 2/5] Add storage migration for ScalarParameters --- src/ert/storage/local_storage.py | 3 ++- tests/ert/unit_tests/storage/migration/test_version_1.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ert/storage/local_storage.py b/src/ert/storage/local_storage.py index 9ef4d3348bb..ba435d31a36 100644 --- a/src/ert/storage/local_storage.py +++ b/src/ert/storage/local_storage.py @@ -456,6 +456,7 @@ def _migrate(self, version: int) -> None: to7, to8, to9, + to10, ) try: @@ -500,7 +501,7 @@ def _migrate(self, version: int) -> None: elif version < _LOCAL_STORAGE_VERSION: migrations = list( - enumerate([to2, to3, to4, to5, to6, to7, to8, to9], start=1) + enumerate([to2, to3, to4, to5, to6, to7, to8, to9, to10], start=1) ) for from_version, migration in migrations[version - 1 :]: print(f"* Updating storage to version: {from_version + 1}") diff --git a/tests/ert/unit_tests/storage/migration/test_version_1.py b/tests/ert/unit_tests/storage/migration/test_version_1.py index d4a4f5922bb..cda5bb0ee31 100644 --- a/tests/ert/unit_tests/storage/migration/test_version_1.py +++ b/tests/ert/unit_tests/storage/migration/test_version_1.py @@ -24,4 +24,5 @@ def test_migrate_gen_kw(setup_case): param_info = json.loads( (experiment._path / "parameter.json").read_text(encoding="utf-8") ) - assert "COEFFS" in param_info + assert "SCALAR_PARAMETERS" in param_info + print(param_info["SCALAR_PARAMETERS"]) From 567926de2318b1b3f053f6f1d9985a7867a72b88 Mon Sep 17 00:00:00 2001 From: xjules Date: Wed, 19 Mar 2025 23:14:07 +0100 Subject: [PATCH 3/5] Add storage migration file --- src/ert/storage/migration/to10.py | 50 +++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 src/ert/storage/migration/to10.py diff --git a/src/ert/storage/migration/to10.py b/src/ert/storage/migration/to10.py new file mode 100644 index 00000000000..200f21a5d31 --- /dev/null +++ b/src/ert/storage/migration/to10.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +import json +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pathlib import Path + +from ert.config import ( + SCALAR_PARAMETERS_NAME, + DataSource, + ScalarParameter, + ScalarParameters, + get_distribution, +) + +info = "Introducing ScalarParameters replacing GenKWConfig" + + +def migrate(path: Path) -> None: + for experiment in path.glob("experiments/*"): + with open(experiment / "parameter.json", encoding="utf-8") as fin: + parameters_json = json.load(fin) + gen_kw_params = { + config["name"]: config + for config in parameters_json.values() + if config["_ert_kind"] == "GenKwConfig" + } + scalars: list[ScalarParameter] = [] + for group, config in gen_kw_params.items(): + for param in config["transform_function_definitions"]: + scalars.append( + ScalarParameter( + param_name=param["name"], + group_name=group, + distribution=get_distribution( + param["param_name"], param["values"] + ), + input_source=DataSource.SAMPLED, + update=config["update"], + output_file=config["output_file"], + template_file=config["template_file"], + ) + ) + del parameters_json[group] + sc = ScalarParameters(scalars=scalars) + sc.save_experiment_data(experiment) + parameters_json[SCALAR_PARAMETERS_NAME] = sc.to_dict() + with open(experiment / "parameter.json", "w", encoding="utf-8") as fout: + fout.write(json.dumps(parameters_json, indent=4)) From 933789b1cd9605a913e1b236efe17b297c31d0cb Mon Sep 17 00:00:00 2001 From: xjules Date: Thu, 20 Mar 2025 15:21:32 +0100 Subject: [PATCH 4/5] Add __contains__ to ScalarParameters --- src/ert/config/scalar_parameter.py | 3 +++ src/ert/storage/local_ensemble.py | 4 +++- src/ert/storage/local_experiment.py | 8 ++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/ert/config/scalar_parameter.py b/src/ert/config/scalar_parameter.py index 38699fe4338..e6612e372f7 100644 --- a/src/ert/config/scalar_parameter.py +++ b/src/ert/config/scalar_parameter.py @@ -370,6 +370,9 @@ def __getitem__(self, key: str) -> list[ScalarParameter]: return [self.hash_group_key[key]] return [] + def __contains__(self, group_name: str) -> bool: + return group_name in self.groups + @staticmethod def _sample_value( parameters: list[ScalarParameter], diff --git a/src/ert/storage/local_ensemble.py b/src/ert/storage/local_ensemble.py index 87fa14e5512..8063cb7f5d5 100644 --- a/src/ert/storage/local_ensemble.py +++ b/src/ert/storage/local_ensemble.py @@ -966,7 +966,9 @@ def save_response( self.experiment._update_response_keys(response_type, response_keys) def calculate_std_dev_for_parameter(self, parameter_group: str) -> xr.Dataset: - if parameter_group not in self.experiment.parameter_configuration: + if parameter_group not in self.experiment.parameter_configuration or not ( + self.experiment.scalars and parameter_group in self.experiment.scalars + ): raise ValueError(f"{parameter_group} is not registered to the experiment.") ds = self.load_parameters(parameter_group) diff --git a/src/ert/storage/local_experiment.py b/src/ert/storage/local_experiment.py index 9516fb1aab5..271b5f4fbf7 100644 --- a/src/ert/storage/local_experiment.py +++ b/src/ert/storage/local_experiment.py @@ -14,6 +14,7 @@ from pydantic import BaseModel from ert.config import ( + SCALAR_PARAMETERS_NAME, ExtParamConfig, Field, GenKwConfig, @@ -293,6 +294,13 @@ def parameter_configuration(self) -> dict[str, ParameterConfig]: params[data["name"]] = _KNOWN_PARAMETER_TYPES[param_type](**data) return params + @property + def scalars(self) -> ScalarParameters | None: + param = self.parameter_configuration.get(SCALAR_PARAMETERS_NAME, None) + if isinstance(param, ScalarParameters): + return param + return None + @property def response_configuration(self) -> dict[str, ResponseConfig]: responses = {} From 88e9413e16b05084ff0c1ae7a44b485dec7e4391 Mon Sep 17 00:00:00 2001 From: xjules Date: Thu, 20 Mar 2025 16:08:55 +0100 Subject: [PATCH 5/5] WIP: use scalars in es_udpate tests --- src/ert/config/scalar_parameter.py | 32 +++++++++++++++++++ .../ert/ui_tests/cli/test_field_parameter.py | 4 +-- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/ert/config/scalar_parameter.py b/src/ert/config/scalar_parameter.py index e6612e372f7..1f276f08ca9 100644 --- a/src/ert/config/scalar_parameter.py +++ b/src/ert/config/scalar_parameter.py @@ -539,6 +539,38 @@ def save_updated_parameters_and_copy_remaining( df = df.update(df_updates, on="realization") target_ensemble.save_parameters_scalar(group, realizations, df) + def save_parameters_groups( + self, + ensemble: Ensemble, + groups: list[str], + realizations: npt.NDArray[np.int_], + data: npt.NDArray[np.float64], + ) -> None: + params_to_save = [ + f"{param.group_name}:{param.param_name}" + for group in groups + for param in self.groups[group] + ] + try: + df = ensemble.load_parameters_scalar(realizations=realizations) + except KeyError: + df = pl.DataFrame() + df_updates = pl.DataFrame( + { + "realization": realizations, + **{col: data[i, :] for i, col in enumerate(params_to_save)}, + **{ + f"{col}.transformed": [ + self.hash_group_key[col].distribution.trans(v) + for v in data[i, :] + ] + for i, col in enumerate(params_to_save) + }, + } + ) + df = df.update(df_updates, on="realization") + ensemble.save_parameters_scalar(self.name, realizations, df) + def __len__(self) -> int: return len(self.scalars) diff --git a/tests/ert/ui_tests/cli/test_field_parameter.py b/tests/ert/ui_tests/cli/test_field_parameter.py index 5e6b59a941f..8630603e03b 100644 --- a/tests/ert/ui_tests/cli/test_field_parameter.py +++ b/tests/ert/ui_tests/cli/test_field_parameter.py @@ -11,9 +11,7 @@ import resfo import xtgeo -from ert.analysis import ( - smoother_update, -) +from ert.analysis import smoother_update from ert.config import ErtConfig, ESSettings, UpdateSettings from ert.mode_definitions import ENSEMBLE_SMOOTHER_MODE from ert.storage import open_storage