From a6e9c6577253ea66f48ecee074cacb804fa1b585 Mon Sep 17 00:00:00 2001 From: George Oblapenko Date: Fri, 21 Sep 2018 15:23:50 +0300 Subject: [PATCH] LogTransformer raises error if transform is called before it's fitted --- src/transforms.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/transforms.py b/src/transforms.py index 71fb81e..ae38f1f 100644 --- a/src/transforms.py +++ b/src/transforms.py @@ -1,6 +1,8 @@ import numpy as np from sklearn.base import BaseEstimator, TransformerMixin import pandas as pd +from .exceptions import DataProcessorError +from sklearn.exceptions import NotFittedError class LogTransformer(BaseEstimator, TransformerMixin): @@ -9,13 +11,18 @@ class LogTransformer(BaseEstimator, TransformerMixin): replace the values in the column with a logarithm: X[i, col] = np.log(1 + X[i, col] - min(X[:, col])) """ def __init__(self, threshold=1e5): + self.fitted = False self.threshold = threshold + def __str__(self): + return 'LogTransformer(threshold={})'.format(self.threshold) + def _reset(self): if hasattr(self, 'columns_'): del self.columns_ del self.column_names_ del self.min_vals_ + self.fitted = False def fit(self, X): self._reset() @@ -50,15 +57,20 @@ def fit(self, X): if type(X) == pd.DataFrame: self.column_names_ = [X.columns[i] for i in self.columns_] + self.fitted = True + return self def transform(self, X): - if type(X) == pd.DataFrame: - Y = X.values - else: - Y = X + if self.fitted: + if type(X) == pd.DataFrame: + Y = X.values + else: + Y = X - for i, col in enumerate(self.columns_): - Y[:, col] = np.log1p(Y[:, col] - self.min_vals_[i]) + for i, col in enumerate(self.columns_): + Y[:, col] = np.log1p(Y[:, col] - self.min_vals_[i]) - return X + return X + else: + raise NotFittedError('This LogTransformer has not been fitted yet')