OpenSTEF
diff --git a/‎examples/benchmarks/liander_2024_ensemble.py‎
Lines changed: 132 additions & 0 deletions b/‎examples/benchmarks/liander_2024_ensemble.py‎
Lines changed: 132 additions & 0 deletions
diff --git a/‎examples/examples/configuring_model_pipeline_example.py‎
Lines changed: 7 additions & 10 deletions b/‎examples/examples/configuring_model_pipeline_example.py‎
Lines changed: 7 additions & 10 deletions
diff --git a/‎examples/examples/isotonic_calibration_example.py‎
Lines changed: 8 additions & 13 deletions b/‎examples/examples/isotonic_calibration_example.py‎
Lines changed: 8 additions & 13 deletions
diff --git a/‎packages/openstef-beam/src/openstef_beam/benchmarking/baselines/openstef4.py‎
Lines changed: 62 additions & 27 deletions b/‎packages/openstef-beam/src/openstef_beam/benchmarking/baselines/openstef4.py‎
Lines changed: 62 additions & 27 deletions
diff --git a/‎packages/openstef-core/src/openstef_core/datasets/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎packages/openstef-core/src/openstef_core/datasets/__init__.py‎
Lines changed: 2 additions & 0 deletions
@@ -0,0 +1,132 @@
+"""Liander 2024 Benchmark Example.
+
+====================================
+
+This example demonstrates how to set up and run the Liander 2024 STEF benchmark using OpenSTEF BEAM.
+The benchmark will evaluate XGBoost and GBLinear models on the dataset from HuggingFace.
+"""
+
+# SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project <[email protected]>
+#
+# SPDX-License-Identifier: MPL-2.0
+
+import os
+import time
+
+os.environ["OMP_NUM_THREADS"] = "1"  # Set OMP_NUM_THREADS to 1 to avoid issues with parallel execution and xgboost
+os.environ["OPENBLAS_NUM_THREADS"] = "1"
+os.environ["MKL_NUM_THREADS"] = "1"
+
+import logging
+import multiprocessing
+from datetime import timedelta
+from pathlib import Path
+
+from openstef_beam.backtesting.backtest_forecaster import BacktestForecasterConfig
+from openstef_beam.benchmarking.baselines import (
+    create_openstef4_preset_backtest_forecaster,
+)
+from openstef_beam.benchmarking.benchmarks.liander2024 import Liander2024Category, create_liander2024_benchmark_runner
+from openstef_beam.benchmarking.callbacks.strict_execution_callback import StrictExecutionCallback
+from openstef_beam.benchmarking.storage.local_storage import LocalBenchmarkStorage
+from openstef_core.types import LeadTime, Q
+from openstef_meta.presets import (
+    EnsembleForecastingWorkflowConfig,
+)
+from openstef_models.integrations.mlflow.mlflow_storage import MLFlowStorage
+from openstef_models.transforms.general import SampleWeightConfig
+
+logging.basicConfig(level=logging.INFO, format="[%(asctime)s][%(levelname)s] %(message)s")
+
+OUTPUT_PATH = Path("./benchmark_results")
+
+N_PROCESSES = 1 if True else multiprocessing.cpu_count()  # Amount of parallel processes to use for the benchmark
+
+ensemble_type = "learned_weights"  # "stacking", "learned_weights" or "rules"
+base_models = ["lgbm", "gblinear"]  # combination of "lgbm", "gblinear", "xgboost" and "lgbm_linear"
+combiner_model = (
+    "lgbm"  # "lgbm", "xgboost", "rf" or "logistic" for learned weights combiner, gblinear for stacking combiner
+)
+
+model = "Ensemble_" + "_".join(base_models) + "_" + ensemble_type + "_" + combiner_model
+
+# Model configuration
+FORECAST_HORIZONS = [LeadTime.from_string("PT36H")]  # Forecast horizon(s)
+PREDICTION_QUANTILES = [
+    Q(0.05),
+    Q(0.1),
+    Q(0.3),
+    Q(0.5),
+    Q(0.7),
+    Q(0.9),
+    Q(0.95),
+]  # Quantiles for probabilistic forecasts
+
+BENCHMARK_FILTER: list[Liander2024Category] | None = None
+
+USE_MLFLOW_STORAGE = True
+
+if USE_MLFLOW_STORAGE:
+    storage = MLFlowStorage(
+        tracking_uri=str(OUTPUT_PATH / "mlflow_artifacts"),
+        local_artifacts_path=OUTPUT_PATH / "mlflow_tracking_artifacts",
+    )
+else:
+    storage = None
+
+workflow_config = EnsembleForecastingWorkflowConfig(
+    model_id="common_model_",
+    ensemble_type=ensemble_type,
+    base_models=base_models,  # type: ignore
+    combiner_model=combiner_model,
+    horizons=FORECAST_HORIZONS,
+    quantiles=PREDICTION_QUANTILES,
+    model_reuse_enable=False,
+    mlflow_storage=None,
+    radiation_column="shortwave_radiation",
+    rolling_aggregate_features=["mean", "median", "max", "min"],
+    wind_speed_column="wind_speed_80m",
+    pressure_column="surface_pressure",
+    temperature_column="temperature_2m",
+    relative_humidity_column="relative_humidity_2m",
+    energy_price_column="EPEX_NL",
+    forecaster_sample_weights={
+        "gblinear": SampleWeightConfig(method="exponential", weight_exponent=1.0),
+        "lgbm": SampleWeightConfig(weight_exponent=0.0),
+        "xgboost": SampleWeightConfig(weight_exponent=0.0),
+        "lgbm_linear": SampleWeightConfig(weight_exponent=0.0),
+    },
+)
+
+
+# Create the backtest configuration
+backtest_config = BacktestForecasterConfig(
+    requires_training=True,
+    predict_length=timedelta(days=7),
+    predict_min_length=timedelta(minutes=15),
+    predict_context_length=timedelta(days=14),  # Context needed for lag features
+    predict_context_min_coverage=0.5,
+    training_context_length=timedelta(days=90),  # Three months of training data
+    training_context_min_coverage=0.5,
+    predict_sample_interval=timedelta(minutes=15),
+)
+
+
+if __name__ == "__main__":
+    start_time = time.time()
+    create_liander2024_benchmark_runner(
+        storage=LocalBenchmarkStorage(base_path=OUTPUT_PATH / model),
+        data_dir=None,  # Path("../data/liander2024-energy-forecasting-benchmark"),
+        callbacks=[StrictExecutionCallback()],
+    ).run(
+        forecaster_factory=create_openstef4_preset_backtest_forecaster(
+            workflow_config=workflow_config,
+            cache_dir=OUTPUT_PATH / "cache",
+        ),
+        run_name=model,
+        n_processes=N_PROCESSES,
+        filter_args=BENCHMARK_FILTER,
+    )
+
+    end_time = time.time()
+    print(f"Benchmark completed in {end_time - start_time:.2f} seconds.")
@@ -45,7 +45,6 @@
 from openstef_models.integrations.mlflow.mlflow_storage import MLFlowStorage
 from openstef_models.models.forecasting.gblinear_forecaster import (
     GBLinearForecaster,
-    GBLinearForecasterConfig,
     GBLinearHyperParams,
 )
 from openstef_models.models.forecasting_model import ForecastingModel
@@ -88,15 +87,13 @@
         ],
     ),
     forecaster=GBLinearForecaster(
-        config=GBLinearForecasterConfig(
-            horizons=[LeadTime.from_string("PT36H")],
-            quantiles=[Q(0.5), Q(0.1), Q(0.9)],
-            hyperparams=GBLinearHyperParams(
-                n_steps=1000,
-                learning_rate=0.3,
-            ),
-            verbosity=True,
-        )
+        horizons=[LeadTime.from_string("PT36H")],
+        quantiles=[Q(0.5), Q(0.1), Q(0.9)],
+        hyperparams=GBLinearHyperParams(
+            n_steps=1000,
+            learning_rate=0.3,
+        ),
+        verbosity=True,
     ),
     target_column="load",
     tags={
 
@@ -27,7 +27,6 @@
 from openstef_core.types import LeadTime, Q
 from openstef_models.models.forecasting.gblinear_forecaster import (
     GBLinearForecaster,
-    GBLinearForecasterConfig,
     GBLinearHyperParams,
 )
 from openstef_models.models.forecasting_model import ForecastingModel
@@ -53,12 +52,10 @@
 # Step 2: Configure model without calibration (for comparison)
 model_uncalibrated = ForecastingModel(
     forecaster=GBLinearForecaster(
-        config=GBLinearForecasterConfig(
-            horizons=[LeadTime.from_string("PT1H")],
-            quantiles=[Q(0.1), Q(0.5), Q(0.9)],
-            hyperparams=GBLinearHyperParams(n_steps=100),
-            verbosity=0,
-        )
+        horizons=[LeadTime.from_string("PT1H")],
+        quantiles=[Q(0.1), Q(0.5), Q(0.9)],
+        hyperparams=GBLinearHyperParams(n_steps=100),
+        verbosity=0,
     ),
     target_column="load",
 )
@@ -70,12 +67,10 @@
 # Step 3: Configure model with windowed isotonic quantile calibration
 model_calibrated = ForecastingModel(
     forecaster=GBLinearForecaster(
-        config=GBLinearForecasterConfig(
-            horizons=[LeadTime.from_string("PT1H")],
-            quantiles=[Q(0.1), Q(0.5), Q(0.9)],
-            hyperparams=GBLinearHyperParams(n_steps=100),
-            verbosity=0,
-        )
+        horizons=[LeadTime.from_string("PT1H")],
+        quantiles=[Q(0.1), Q(0.5), Q(0.9)],
+        hyperparams=GBLinearHyperParams(n_steps=100),
+        verbosity=0,
     ),
     postprocessing=TransformPipeline(
         transforms=[
 
@@ -11,18 +11,31 @@
 from pathlib import Path
 from typing import Any, cast, override
 
+import pandas as pd
 from pydantic import Field, PrivateAttr
 from pydantic_extra_types.coordinate import Coordinate
 
-from openstef_beam.backtesting.backtest_forecaster.mixins import BacktestForecasterConfig, BacktestForecasterMixin
-from openstef_beam.backtesting.restricted_horizon_timeseries import RestrictedHorizonVersionedTimeSeries
-from openstef_beam.benchmarking.benchmark_pipeline import BenchmarkContext, BenchmarkTarget, ForecasterFactory
+from openstef_beam.backtesting.backtest_forecaster.mixins import (
+    BacktestForecasterConfig,
+    BacktestForecasterMixin,
+)
+from openstef_beam.backtesting.restricted_horizon_timeseries import (
+    RestrictedHorizonVersionedTimeSeries,
+)
+from openstef_beam.benchmarking.benchmark_pipeline import (
+    BenchmarkContext,
+    BenchmarkTarget,
+    ForecasterFactory,
+)
 from openstef_core.base_model import BaseConfig, BaseModel
 from openstef_core.datasets import TimeSeriesDataset
 from openstef_core.exceptions import FlatlinerDetectedError, NotFittedError
 from openstef_core.types import Q
+from openstef_meta.presets import EnsembleForecastingWorkflowConfig, create_ensemble_forecasting_workflow
 from openstef_models.presets import ForecastingWorkflowConfig
-from openstef_models.workflows.custom_forecasting_workflow import CustomForecastingWorkflow
+from openstef_models.workflows.custom_forecasting_workflow import (
+    CustomForecastingWorkflow,
+)
 
 
 class WorkflowCreationContext(BaseConfig):
@@ -54,6 +67,10 @@ class OpenSTEF4BacktestForecaster(BaseModel, BacktestForecasterMixin):
         default=False,
         description="When True, saves intermediate input data for debugging",
     )
+    contributions: bool = Field(
+        default=False,
+        description="When True, saves base forecaster prediction contributions for ensemble models",
+    )
 
     _workflow: CustomForecastingWorkflow | None = PrivateAttr(default=None)
     _is_flatliner_detected: bool = PrivateAttr(default=False)
@@ -62,7 +79,7 @@ class OpenSTEF4BacktestForecaster(BaseModel, BacktestForecasterMixin):
 
     @override
     def model_post_init(self, context: Any) -> None:
-        if self.debug:
+        if self.debug or self.contributions:
             self.cache_dir.mkdir(parents=True, exist_ok=True)
 
     @property
@@ -71,8 +88,8 @@ def quantiles(self) -> list[Q]:
         # Create a workflow instance if needed to get quantiles
         if self._workflow is None:
             self._workflow = self.workflow_factory(WorkflowCreationContext())
-        # Extract quantiles from the workflow's model
-        return self._workflow.model.forecaster.config.quantiles
+
+        return self._workflow.model.quantiles
 
     @override
     def fit(self, data: RestrictedHorizonVersionedTimeSeries) -> None:
@@ -82,7 +99,9 @@ def fit(self, data: RestrictedHorizonVersionedTimeSeries) -> None:
 
         # Extract the dataset for training
         training_data = data.get_window(
-            start=data.horizon - self.config.training_context_length, end=data.horizon, available_before=data.horizon
+            start=data.horizon - self.config.training_context_length,
+            end=data.horizon,
+            available_before=data.horizon,
         )
 
         if self.debug:
@@ -102,7 +121,7 @@ def fit(self, data: RestrictedHorizonVersionedTimeSeries) -> None:
 
         if self.debug:
             id_str = data.horizon.strftime("%Y%m%d%H%M%S")
-            self._workflow.model.prepare_input(training_data).to_parquet(  # pyright: ignore[reportPrivateUsage]
+            self._workflow.model.prepare_input(training_data).to_parquet(
                 path=self.cache_dir / f"debug_{id_str}_prepared_training.parquet"
             )
 
@@ -136,6 +155,15 @@ def predict(self, data: RestrictedHorizonVersionedTimeSeries) -> TimeSeriesDatas
             predict_data.to_parquet(path=self.cache_dir / f"debug_{id_str}_predict.parquet")
             forecast.to_parquet(path=self.cache_dir / f"debug_{id_str}_forecast.parquet")
 
+        if self.contributions:
+            id_str = data.horizon.strftime("%Y%m%d%H%M%S")
+            try:
+                contributions = self._workflow.model.predict_contributions(predict_data, forecast_start=data.horizon)
+            except NotImplementedError:
+                pass
+            else:
+                df = pd.concat([contributions.data, forecast.data.drop(columns=["load"])], axis=1)
+                df.to_parquet(path=self.cache_dir / f"contrib_{id_str}_predict.parquet")
         return forecast
 
 
@@ -144,7 +172,7 @@ class OpenSTEF4PresetBacktestForecaster(OpenSTEF4BacktestForecaster):
 
 
 def _preset_target_forecaster_factory(
-    base_config: ForecastingWorkflowConfig,
+    base_config: ForecastingWorkflowConfig | EnsembleForecastingWorkflowConfig,
     backtest_config: BacktestForecasterConfig,
     cache_dir: Path,
     context: BenchmarkContext,
@@ -158,23 +186,26 @@ def _preset_target_forecaster_factory(
 
     def _create_workflow(context: WorkflowCreationContext) -> CustomForecastingWorkflow:
         # Create a new workflow instance with fresh model.
-        return create_forecasting_workflow(
-            config=base_config.model_copy(
-                update={
-                    "model_id": f"{prefix}_{target.name}",
-                    "run_name": context.step_name,
-                    "location": LocationConfig(
-                        name=target.name,
-                        description=target.description,
-                        coordinate=Coordinate(
-                            latitude=target.latitude,
-                            longitude=target.longitude,
-                        ),
-                    ),
-                }
-            )
+        location = LocationConfig(
+            name=target.name,
+            description=target.description,
+            coordinate=Coordinate(
+                latitude=target.latitude,
+                longitude=target.longitude,
+            ),
         )
 
+        update = {
+            "model_id": f"{prefix}_{target.name}",
+            "location": location,
+            "run_name": context.step_name,
+        }
+
+        if isinstance(base_config, EnsembleForecastingWorkflowConfig):
+            return create_ensemble_forecasting_workflow(config=base_config.model_copy(update=update))
+
+        return create_forecasting_workflow(config=base_config.model_copy(update=update))
+
     return OpenSTEF4BacktestForecaster(
         config=backtest_config,
         workflow_factory=_create_workflow,
@@ -184,7 +215,7 @@ def _create_workflow(context: WorkflowCreationContext) -> CustomForecastingWorkf
 
 
 def create_openstef4_preset_backtest_forecaster(
-    workflow_config: ForecastingWorkflowConfig,
+    workflow_config: ForecastingWorkflowConfig | EnsembleForecastingWorkflowConfig,
     backtest_config: BacktestForecasterConfig | None = None,
     cache_dir: Path = Path("cache"),
 ) -> ForecasterFactory[BenchmarkTarget]:
@@ -225,4 +256,8 @@ def create_openstef4_preset_backtest_forecaster(
     )
 
 
-__all__ = ["OpenSTEF4BacktestForecaster", "WorkflowCreationContext", "create_openstef4_preset_backtest_forecaster"]
+__all__ = [
+    "OpenSTEF4BacktestForecaster",
+    "WorkflowCreationContext",
+    "create_openstef4_preset_backtest_forecaster",
+]
@@ -19,13 +19,15 @@
 from openstef_core.datasets.timeseries_dataset import TimeSeriesDataset, validate_horizons_present
 from openstef_core.datasets.validated_datasets import (
     EnergyComponentDataset,
+    EnsembleForecastDataset,
     ForecastDataset,
     ForecastInputDataset,
 )
 from openstef_core.datasets.versioned_timeseries_dataset import VersionedTimeSeriesDataset
 
 __all__ = [
     "EnergyComponentDataset",
+    "EnsembleForecastDataset",
     "ForecastDataset",
     "ForecastInputDataset",
     "TimeSeriesDataset",