Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions openstef/data_classes/prediction_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ class PredictionJobDataClass(BaseModel):
False,
description="If True, flatliners are also detected on non-zero values (median of the load).",
)
predict_non_zero_flatliner: bool = Field(
False,
description="If True, the flatliner model predicts the median of the load measurements instead of zero.",
)
data_balancing_ratio: Optional[float] = Field(
None,
description="If data balancing is enabled, the data will be balanced with data from 1 year ago in the future.",
Expand Down
1 change: 1 addition & 0 deletions openstef/model/model_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@
],
ModelType.FLATLINER: [
"quantiles",
"predict_median",
],
ModelType.LINEAR_QUANTILE: [
"alpha",
Expand Down
33 changes: 24 additions & 9 deletions openstef/model/regressors/flatliner.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,23 @@
class FlatlinerRegressor(OpenstfRegressor, RegressorMixin):
feature_names_: List[str] = []

def __init__(self, quantiles=None):
def __init__(self, quantiles=None, predict_median: bool = False):
"""Initialize FlatlinerRegressor.

The model always predicts 0.0, regardless of the input features. The model is meant to be used for flatliner
locations that still expect a prediction while preserving the prediction interface.
The model always predicts a constant value, regardless of the input features.
The model is mediant to be used for flatliner locations that still expect a
prediction while preserving the prediction interface.

Args:
quantiles: Quantiles to predict (optional).
predict_median: If True, predicts the median of the training load data.
If False, predicts 0.0.

"""
super().__init__()
self.quantiles = quantiles
self.predict_median = predict_median
self.predicted_value_: float = 0.0

@property
def feature_names(self) -> list:
Expand All @@ -48,38 +56,44 @@ def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> RegressorMixin:

Args:
x: Feature matrix
y: Labels
y: Labels (load measurements)

Returns:
Fitted LinearQuantile model
Fitted FlatlinerRegressor model

"""
self.feature_names_ = list(x.columns)
self.feature_importances_ = np.ones(len(self.feature_names_)) / (
len(self.feature_names_) or 1.0
)

# Calculate the predicted value based on predict_median setting
if self.predict_median and len(y) > 0:
self.predicted_value_ = float(y.median())
else:
self.predicted_value_ = 0.0

return self

def predict(self, x: pd.DataFrame, quantile: float = 0.5, **kwargs) -> np.array:
"""Makes a prediction for a desired quantile.

Args:
x: Feature matrix
quantile: Quantile for which a prediciton is desired,
note that only quantile are available for which a model is trained,
quantile: Quantile for which a prediction is desired,
note that only quantiles are available for which a model is trained,
and that this is a quantile-model specific keyword

Returns:
Prediction
Prediction (constant value for all rows)

Raises:
ValueError in case no model is trained for the requested quantile

"""
check_is_fitted(self)

return np.zeros(x.shape[0])
return np.full(x.shape[0], self.predicted_value_)

def _get_feature_importance_from_linear(self, quantile: float = 0.5) -> np.array:
check_is_fitted(self)
Expand All @@ -89,6 +103,7 @@ def _get_feature_importance_from_linear(self, quantile: float = 0.5) -> np.array
def _get_param_names(cls):
return [
"quantiles",
"predict_median",
]

def __sklearn_is_fitted__(self) -> bool:
Expand Down
7 changes: 6 additions & 1 deletion openstef/pipeline/train_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,11 +458,16 @@ def train_pipeline_step_train_model(
"'load' column should be first and 'horizon' column last."
)

# Prepare model kwargs, including predict_median for flatliner models
model_kwargs = dict(pj.model_kwargs or {})
if pj.get("predict_non_zero_flatliner", False):
model_kwargs["predict_median"] = True

# Create relevant model
model = ModelCreator.create_model(
pj["model"],
quantiles=pj["quantiles"],
**(pj.model_kwargs or {}),
**model_kwargs,
)

# split x and y data
Expand Down
36 changes: 36 additions & 0 deletions test/unit/model/regressors/test_flatliner.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,39 @@ def test_get_feature_names_from_linear(self):
self.assertTrue(
(feature_importance == np.array([0, 0, 0], dtype=np.float32)).all()
)

def test_predict_median_when_enabled(self):
"""Test that predict_median=True causes the model to predict the median of the load."""
# Arrange
model = FlatlinerRegressor(predict_median=True)
# Create test data with known load values
x_train = train_input.iloc[:, 1:]
y_train = train_input.iloc[:, 0]
expected_median = y_train.median()

# Act
model.fit(x_train, y_train)
result = model.predict(x_train)

# Assert
# check if the model was fitted
self.assertIsNone(sklearn.utils.validation.check_is_fitted(model))
# check if model predicts the median
self.assertEqual(len(result), len(x_train))
self.assertTrue(np.allclose(result, expected_median))
self.assertAlmostEqual(model.predicted_value_, expected_median)

def test_predict_zero_when_predict_median_disabled(self):
"""Test that predict_median=False causes the model to predict zero."""
# Arrange
model = FlatlinerRegressor(predict_median=False)
x_train = train_input.iloc[:, 1:]
y_train = train_input.iloc[:, 0]

# Act
model.fit(x_train, y_train)
result = model.predict(x_train)

# Assert
self.assertTrue((result == 0).all())
self.assertEqual(model.predicted_value_, 0.0)