Type everest/storage

frode-aarstad · frode-aarstad · commit 85b56ad79392 · 2025-02-26T09:44:26.000+01:00
diff --git a/.mypy.ini b/.mypy.ini
@@ -103,25 +103,6 @@ disable_error_code = dict-item,
                      name-defined
 
 
-[mypy-everest.everest_storage.*]
-disable_error_code = dict-item,
-                     no-untyped-def,
-                     call-overload,
-                     union-attr,
-                     no-untyped-call,
-                     var-annotated,
-                     index,
-                     call-arg,
-                     unused-ignore,
-                     arg-type,
-                     type-arg,
-                     type-var,
-                     assignment,
-                     typeddict-item,
-                     attr-defined,
-                     comparison-overlap,
-                     return-value,
-                     name-defined
 
 
 
diff --git a/src/everest/everest_storage.py b/src/everest/everest_storage.py
@@ -4,6 +4,7 @@
 import logging
 import os
 import traceback
+from collections.abc import Callable
 from dataclasses import dataclass, field
 from functools import partial
 from pathlib import Path
@@ -24,7 +25,7 @@
 @dataclass
 class OptimalResult:
     batch: int
-    controls: list[Any]
+    controls: dict[str, Any]
     total_objective: float
 
 
@@ -155,7 +156,7 @@ def read_from_experiment(self, experiment: _OptimizerOnlyExperiment) -> None:
             self.batches.append(
                 BatchStorageData(
                     batch_id=info["batch_id"],
-                    **{
+                    **{  # type: ignore
                         df_name: try_read_df(
                             Path(ens.optimizer_mount_point) / f"{df_name}.parquet"
                         )
@@ -198,8 +199,8 @@ class _OptimizerOnlyExperiment:
     """
 
     def __init__(self, output_dir: Path) -> None:
-        self._output_dir = output_dir
-        self._ensembles = {}
+        self._output_dir: Path = output_dir
+        self._ensembles: dict[str, _OptimizerOnlyEnsemble] = {}
 
     @property
     def optimizer_mount_point(self) -> Path:
@@ -329,7 +330,7 @@ def _ropt_to_df(
         field: str,
         *,
         values: list[str],
-        select: list,
+        select: list[str],
     ) -> pl.DataFrame:
         df = pl.from_pandas(
             results.to_dataframe(field, select=values).reset_index(),
@@ -340,17 +341,23 @@ def _ropt_to_df(
         # retrieved from the everest configuration and were stored in the init
         # method. Here we replace the indices with those names:
         ropt_to_everest_names = {
-            "variable": self.data.controls["control_name"],
-            "objective": self.data.objective_functions["objective_name"],
+            "variable": self.data.controls["control_name"]
+            if self.data.controls is not None
+            else None,
+            "objective": self.data.objective_functions["objective_name"]
+            if self.data.objective_functions is not None
+            else None,
             "nonlinear_constraint": (
                 self.data.nonlinear_constraints["constraint_name"]
                 if self.data.nonlinear_constraints is not None
                 else None
             ),
-            "realization": self.data.realization_weights["realization"],
+            "realization": self.data.realization_weights["realization"]
+            if self.data.realization_weights is not None
+            else None,
         }
         df = df.with_columns(
-            pl.col(ropt_name).replace_strict(dict(enumerate(everest_names)))
+            pl.col(ropt_name).replace_strict(dict(enumerate(everest_names)))  # type: ignore
             for ropt_name, everest_names in ropt_to_everest_names.items()
             if ropt_name in select
         )
@@ -367,7 +374,7 @@ def write_to_output_dir(self) -> None:
         self.data.write_to_experiment(exp)
 
     @staticmethod
-    def check_for_deprecated_seba_storage(config_file: str):
+    def check_for_deprecated_seba_storage(config_file: str) -> None:
         config = EverestConfig.load_file(config_file)
         output_dir = Path(config.optimization_output_dir)
         if os.path.exists(output_dir / "seba.db") or os.path.exists(
@@ -509,7 +516,7 @@ def _store_function_results(self, results: FunctionResults) -> _EvaluationResult
             separator=":",
         )
 
-        realization_objectives = realization_objectives.pivot(
+        realization_objectives = realization_objectives.pivot(  # type: ignore
             values="objective_value",
             index=[
                 "batch_id",
@@ -673,14 +680,16 @@ def _on_batch_evaluation_finished(self, event: Event) -> None:
                 and item.functions is not None
                 and item.functions.weighted_objective > best_value
             ):
-                best_value = item.functions.weighted_objective
+                best_value = float(item.functions.weighted_objective)
                 best_results = item
 
         if best_results is not None:
             results = [best_results, *results]
 
-        batch_dicts = {}
+        batch_dicts: dict[int, Any] = {}
         for item in results:
+            assert item.batch_id is not None
+
             if item.batch_id not in batch_dicts:
                 batch_dicts[item.batch_id] = {}
 
@@ -710,7 +719,7 @@ def _on_batch_evaluation_finished(self, event: Event) -> None:
                 )
             )
 
-    def _on_optimization_finished(self, _) -> None:
+    def _on_optimization_finished(self, _: Any) -> None:
         logger.debug("Storing final results Everest storage")
 
         merit_values = self._get_merit_values()
@@ -729,6 +738,7 @@ def _on_optimization_finished(self, _) -> None:
                 if merit_value is None:
                     continue
 
+                assert b.batch_objectives is not None
                 b.batch_objectives = b.batch_objectives.with_columns(
                     pl.lit(merit_value).alias("merit_value")
                 )
@@ -754,8 +764,9 @@ def get_optimal_result(self) -> OptimalResult | None:
         )
 
         def find_best_batch(
-            filter_by, sort_by
-        ) -> tuple[BatchStorageData | None, dict | None]:
+            filter_by: Callable[[BatchStorageData], bool],
+            sort_by: Callable[[BatchStorageData], Any],
+        ) -> tuple[BatchStorageData | None, dict[str, Any] | None]:
             matching_batches = [b for b in self.data.batches if filter_by(b)]
 
             if not matching_batches:
@@ -780,14 +791,17 @@ def find_best_batch(
                     b.batch_objectives is not None
                     and "merit_value" in b.batch_objectives.columns
                 ),
-                sort_by=lambda b: b.batch_objectives.select(
+                sort_by=lambda b: b.batch_objectives.select(  # type: ignore
                     pl.col("merit_value").min()
                 ).item(),
             )
 
             if batch is None:
                 return None
 
+            assert controls_dict is not None
+            assert batch.batch_objectives is not None
+
             return OptimalResult(
                 batch=batch.batch_id,
                 controls=controls_dict,
@@ -800,14 +814,17 @@ def find_best_batch(
             batch, controls_dict = find_best_batch(
                 filter_by=lambda b: b.batch_objectives is not None
                 and not b.batch_objectives.is_empty(),
-                sort_by=lambda b: -b.batch_objectives.select(
+                sort_by=lambda b: -b.batch_objectives.select(  # type: ignore
                     pl.col("total_objective_value").sample(n=1)
                 ).item(),
             )
 
             if batch is None:
                 return None
 
+            assert controls_dict is not None
+            assert batch.batch_objectives is not None
+
             return OptimalResult(
                 batch=batch.batch_id,
                 controls=controls_dict,