fix(STEF-2854): renormalize ensemble weights when base model predictions are NaN

egordm · egordm · commit d926bd6942b3 · 2026-03-18T17:30:41.000+01:00
When a base model cannot predict certain timestamps (e.g. gblinear limited
to 2-day weather horizon while lgbm predicts 7 days), the combiner must
redistribute the missing model's weight proportionally to the remaining
models.

Previously, pandas sum(axis=1, skipna=True) silently dropped the NaN
model's weight contribution, causing predictions to be systematically
scaled down by ~35% for timestamps beyond the weather horizon.

Now weights are reindexed to match predictions, zeroed where predictions
are NaN, and the weighted sum is divided by the available weight total.
When all models are NaN, the result is 0 (matching prior behavior).

Includes regression test with seeded data verifying no NaN propagation
and no systematic downscaling.

Signed-off-by: Egor Dmitriev &lt;egor.dmitriev@alliander.com&gt;
diff --git a/packages/openstef-meta/src/openstef_meta/models/forecast_combiners/learned_weights_combiner.py b/packages/openstef-meta/src/openstef_meta/models/forecast_combiners/learned_weights_combiner.py
@@ -295,8 +295,15 @@ def _predict_quantile(
             # Convert soft probabilities to hard selection: max weight → 1.0, ties distributed equally
             weights = (weights == weights.max(axis=1).to_frame().to_numpy()) / weights.sum(axis=1).to_frame().to_numpy()
 
-        # Weighted average: multiply each forecaster's prediction by its weight and sum
-        return dataset.input_data().mul(weights).sum(axis=1)
+        # Weighted average: renormalize weights so NaN base-model predictions don't shrink the sum.
+        # When a base model has no prediction (NaN), its weight is redistributed proportionally
+        # to the remaining models.  Reindex weights to predictions so that rows without
+        # additional_features (dropped by _prepare_input_data's inner join) get zero weight.
+        predictions = dataset.input_data()
+        weights = weights.reindex(predictions.index, fill_value=0)
+        available_weight = weights.where(predictions.notna(), 0).sum(axis=1)
+        weighted_sum = predictions.fillna(0).mul(weights).sum(axis=1)
+        return weighted_sum / available_weight.replace(0, 1)
 
     @override
     def predict(
diff --git a/packages/openstef-meta/tests/unit/models/forecast_combiners/test_learned_weights_combiner.py b/packages/openstef-meta/tests/unit/models/forecast_combiners/test_learned_weights_combiner.py
@@ -114,3 +114,55 @@ def test_quantile_weights_combiner__fit_with_additional_features_shorter_index(
 
     # Assert
     assert combiner.is_fitted
+
+
+def test_predict_renormalizes_weights_when_base_model_predictions_are_nan() -> None:
+    """Predict should renormalize weights when a base model has NaN predictions.
+
+    Regression test: when one base model cannot predict certain timestamps (e.g.
+    gblinear limited to 2-day horizon while lgbm predicts 7 days), the combiner
+    must redistribute the missing model's weight to the remaining models. Without
+    renormalization, sum(axis=1, skipna=True) drops the NaN contribution, causing
+    predictions to be systematically scaled down.
+    """
+    rng = np.random.default_rng(42)
+    index = pd.date_range("2023-01-01", periods=100, freq="15min")
+
+    # Two forecasters: lgbm has all values, gblinear is NaN for the last 50 rows
+    lgbm_vals = rng.normal(1000, 100, 100)
+    gblinear_vals = rng.normal(1000, 100, 100).copy()
+    gblinear_vals[50:] = np.nan
+
+    data = pd.DataFrame(
+        {
+            "LGBMForecaster__quantile_P10": lgbm_vals * 0.8,
+            "LGBMForecaster__quantile_P50": lgbm_vals,
+            "LGBMForecaster__quantile_P90": lgbm_vals * 1.2,
+            "GBLinearForecaster__quantile_P10": gblinear_vals * 0.8,
+            "GBLinearForecaster__quantile_P50": gblinear_vals,
+            "GBLinearForecaster__quantile_P90": gblinear_vals * 1.2,
+            "load": rng.normal(1000, 100, 100),
+        },
+        index=index,
+    )
+    dataset = EnsembleForecastDataset(data=data, sample_interval=timedelta(minutes=15))
+
+    combiner = WeightsCombiner(
+        hyperparams=LGBMCombinerHyperParams(n_leaves=5, n_estimators=10),
+        quantiles=[Q(0.1), Q(0.5), Q(0.9)],
+        horizons=[LeadTime(timedelta(days=1))],
+    )
+    combiner.fit(dataset)
+
+    # Act
+    result = combiner.predict(dataset)
+
+    # Assert — rows where gblinear is NaN should still produce valid (non-NaN) predictions
+    nan_rows = result.data[["quantile_P10", "quantile_P50", "quantile_P90"]].iloc[50:]
+    assert not nan_rows.isna().any().any(), (
+        "Predictions should not be NaN when at least one base model has valid predictions"
+    )
+    # And the predictions should be in the ballpark of the lgbm values (not scaled down)
+    assert nan_rows["quantile_P50"].mean() > 500, (
+        "Predictions in the NaN region should not be systematically scaled down"
+    )