Skip to content

Commit d926bd6

Browse files
committed
fix(STEF-2854): renormalize ensemble weights when base model predictions are NaN
When a base model cannot predict certain timestamps (e.g. gblinear limited to 2-day weather horizon while lgbm predicts 7 days), the combiner must redistribute the missing model's weight proportionally to the remaining models. Previously, pandas sum(axis=1, skipna=True) silently dropped the NaN model's weight contribution, causing predictions to be systematically scaled down by ~35% for timestamps beyond the weather horizon. Now weights are reindexed to match predictions, zeroed where predictions are NaN, and the weighted sum is divided by the available weight total. When all models are NaN, the result is 0 (matching prior behavior). Includes regression test with seeded data verifying no NaN propagation and no systematic downscaling. Signed-off-by: Egor Dmitriev <egor.dmitriev@alliander.com>
1 parent 7f1a06d commit d926bd6

2 files changed

Lines changed: 61 additions & 2 deletions

File tree

packages/openstef-meta/src/openstef_meta/models/forecast_combiners/learned_weights_combiner.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,8 +295,15 @@ def _predict_quantile(
295295
# Convert soft probabilities to hard selection: max weight → 1.0, ties distributed equally
296296
weights = (weights == weights.max(axis=1).to_frame().to_numpy()) / weights.sum(axis=1).to_frame().to_numpy()
297297

298-
# Weighted average: multiply each forecaster's prediction by its weight and sum
299-
return dataset.input_data().mul(weights).sum(axis=1)
298+
# Weighted average: renormalize weights so NaN base-model predictions don't shrink the sum.
299+
# When a base model has no prediction (NaN), its weight is redistributed proportionally
300+
# to the remaining models. Reindex weights to predictions so that rows without
301+
# additional_features (dropped by _prepare_input_data's inner join) get zero weight.
302+
predictions = dataset.input_data()
303+
weights = weights.reindex(predictions.index, fill_value=0)
304+
available_weight = weights.where(predictions.notna(), 0).sum(axis=1)
305+
weighted_sum = predictions.fillna(0).mul(weights).sum(axis=1)
306+
return weighted_sum / available_weight.replace(0, 1)
300307

301308
@override
302309
def predict(

packages/openstef-meta/tests/unit/models/forecast_combiners/test_learned_weights_combiner.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,3 +114,55 @@ def test_quantile_weights_combiner__fit_with_additional_features_shorter_index(
114114

115115
# Assert
116116
assert combiner.is_fitted
117+
118+
119+
def test_predict_renormalizes_weights_when_base_model_predictions_are_nan() -> None:
120+
"""Predict should renormalize weights when a base model has NaN predictions.
121+
122+
Regression test: when one base model cannot predict certain timestamps (e.g.
123+
gblinear limited to 2-day horizon while lgbm predicts 7 days), the combiner
124+
must redistribute the missing model's weight to the remaining models. Without
125+
renormalization, sum(axis=1, skipna=True) drops the NaN contribution, causing
126+
predictions to be systematically scaled down.
127+
"""
128+
rng = np.random.default_rng(42)
129+
index = pd.date_range("2023-01-01", periods=100, freq="15min")
130+
131+
# Two forecasters: lgbm has all values, gblinear is NaN for the last 50 rows
132+
lgbm_vals = rng.normal(1000, 100, 100)
133+
gblinear_vals = rng.normal(1000, 100, 100).copy()
134+
gblinear_vals[50:] = np.nan
135+
136+
data = pd.DataFrame(
137+
{
138+
"LGBMForecaster__quantile_P10": lgbm_vals * 0.8,
139+
"LGBMForecaster__quantile_P50": lgbm_vals,
140+
"LGBMForecaster__quantile_P90": lgbm_vals * 1.2,
141+
"GBLinearForecaster__quantile_P10": gblinear_vals * 0.8,
142+
"GBLinearForecaster__quantile_P50": gblinear_vals,
143+
"GBLinearForecaster__quantile_P90": gblinear_vals * 1.2,
144+
"load": rng.normal(1000, 100, 100),
145+
},
146+
index=index,
147+
)
148+
dataset = EnsembleForecastDataset(data=data, sample_interval=timedelta(minutes=15))
149+
150+
combiner = WeightsCombiner(
151+
hyperparams=LGBMCombinerHyperParams(n_leaves=5, n_estimators=10),
152+
quantiles=[Q(0.1), Q(0.5), Q(0.9)],
153+
horizons=[LeadTime(timedelta(days=1))],
154+
)
155+
combiner.fit(dataset)
156+
157+
# Act
158+
result = combiner.predict(dataset)
159+
160+
# Assert — rows where gblinear is NaN should still produce valid (non-NaN) predictions
161+
nan_rows = result.data[["quantile_P10", "quantile_P50", "quantile_P90"]].iloc[50:]
162+
assert not nan_rows.isna().any().any(), (
163+
"Predictions should not be NaN when at least one base model has valid predictions"
164+
)
165+
# And the predictions should be in the ballpark of the lgbm values (not scaled down)
166+
assert nan_rows["quantile_P50"].mean() > 500, (
167+
"Predictions in the NaN region should not be systematically scaled down"
168+
)

0 commit comments

Comments
 (0)