Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions openstef/data_classes/prediction_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ class PredictionJobDataClass(BaseModel):
False,
description="If True, flatliners are also detected on non-zero values (median of the load).",
)
predict_non_zero_flatliner: bool = Field(
False,
description="If True, the flatliner model predicts the median of the load measurements instead of zero.",
)
data_balancing_ratio: Optional[float] = Field(
None,
description="If data balancing is enabled, the data will be balanced with data from 1 year ago in the future.",
Expand Down
1 change: 1 addition & 0 deletions openstef/model/model_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@
],
ModelType.FLATLINER: [
"quantiles",
"predict_median",
],
ModelType.LINEAR_QUANTILE: [
"alpha",
Expand Down
33 changes: 24 additions & 9 deletions openstef/model/regressors/flatliner.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,23 @@
class FlatlinerRegressor(OpenstfRegressor, RegressorMixin):
feature_names_: List[str] = []

def __init__(self, quantiles=None):
def __init__(self, quantiles=None, predict_median: bool = False):
"""Initialize FlatlinerRegressor.

The model always predicts 0.0, regardless of the input features. The model is meant to be used for flatliner
locations that still expect a prediction while preserving the prediction interface.
The model always predicts a constant value, regardless of the input features.
The model is meant to be used for flatliner locations that still expect a
prediction while preserving the prediction interface.

Args:
quantiles: Quantiles to predict (optional).
predict_median: If True, predicts the median of the training load data.
If False, predicts 0.0.

"""
super().__init__()
self.quantiles = quantiles
self.predict_median = predict_median
self.predicted_value_: float = 0.0

@property
def feature_names(self) -> list:
Expand All @@ -48,38 +56,44 @@ def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> RegressorMixin:

Args:
x: Feature matrix
y: Labels
y: Labels (load measurements)

Returns:
Fitted LinearQuantile model
Fitted FlatlinerRegressor model

"""
self.feature_names_ = list(x.columns)
self.feature_importances_ = np.ones(len(self.feature_names_)) / (
len(self.feature_names_) or 1.0
)

# Calculate the predicted value based on predict_median setting
if self.predict_median and len(y) > 0:
self.predicted_value_ = float(y.median())
else:
self.predicted_value_ = 0.0

return self

def predict(self, x: pd.DataFrame, quantile: float = 0.5, **kwargs) -> np.array:
"""Makes a prediction for a desired quantile.

Args:
x: Feature matrix
quantile: Quantile for which a prediciton is desired,
note that only quantile are available for which a model is trained,
quantile: Quantile for which a prediction is desired,
note that only quantiles are available for which a model is trained,
and that this is a quantile-model specific keyword

Returns:
Prediction
Prediction (constant value for all rows)

Raises:
ValueError in case no model is trained for the requested quantile

"""
check_is_fitted(self)

return np.zeros(x.shape[0])
return np.full(x.shape[0], self.predicted_value_)

def _get_feature_importance_from_linear(self, quantile: float = 0.5) -> np.array:
check_is_fitted(self)
Expand All @@ -89,6 +103,7 @@ def _get_feature_importance_from_linear(self, quantile: float = 0.5) -> np.array
def _get_param_names(cls):
return [
"quantiles",
"predict_median",
]

def __sklearn_is_fitted__(self) -> bool:
Expand Down
7 changes: 6 additions & 1 deletion openstef/pipeline/train_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,11 +458,16 @@ def train_pipeline_step_train_model(
"'load' column should be first and 'horizon' column last."
)

# Prepare model kwargs, including predict_median for flatliner models
model_kwargs = dict(pj.model_kwargs or {})
if pj.get("predict_non_zero_flatliner", False):
model_kwargs["predict_median"] = True

# Create relevant model
model = ModelCreator.create_model(
pj["model"],
quantiles=pj["quantiles"],
**(pj.model_kwargs or {}),
**model_kwargs,
)

# split x and y data
Expand Down
36 changes: 36 additions & 0 deletions test/unit/model/regressors/test_flatliner.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,39 @@ def test_get_feature_names_from_linear(self):
self.assertTrue(
(feature_importance == np.array([0, 0, 0], dtype=np.float32)).all()
)

def test_predict_median_when_enabled(self):
"""Test that predict_median=True causes the model to predict the median of the load."""
# Arrange
model = FlatlinerRegressor(predict_median=True)
# Create test data with known load values
x_train = train_input.iloc[:, 1:]
y_train = train_input.iloc[:, 0]
expected_median = y_train.median()

# Act
model.fit(x_train, y_train)
result = model.predict(x_train)

# Assert
# check if the model was fitted
self.assertIsNone(sklearn.utils.validation.check_is_fitted(model))
# check if model predicts the median
self.assertEqual(len(result), len(x_train))
self.assertTrue(np.allclose(result, expected_median))
self.assertAlmostEqual(model.predicted_value_, expected_median)

def test_predict_zero_when_predict_median_disabled(self):
"""Test that predict_median=False causes the model to predict zero."""
# Arrange
model = FlatlinerRegressor(predict_median=False)
x_train = train_input.iloc[:, 1:]
y_train = train_input.iloc[:, 0]

# Act
model.fit(x_train, y_train)
result = model.predict(x_train)

# Assert
self.assertTrue((result == 0).all())
self.assertEqual(model.predicted_value_, 0.0)