Skip to content

Commit

Permalink
Added __str__ methods, and additional check when CorrelatedFeatureRem…
Browse files Browse the repository at this point in the history
…over loads data from disk
  • Loading branch information
knstmrd committed Sep 21, 2018
1 parent c45337f commit 6d44f6c
Showing 1 changed file with 14 additions and 6 deletions.
20 changes: 14 additions & 6 deletions src/removers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pandas as pd
import numpy as np
from .exceptions import DataProcessorError


class BaseFeatureRemover:
Expand Down Expand Up @@ -27,13 +28,21 @@ def __init__(self, correlation_threshold=0.9, verbose=True, force_recompute=Fals
self.fitted = False
self.verbose = verbose
self.force_recompute = force_recompute
self.persistent = True
self.write_to_file = write_to_file
self.load_from_file = load_from_file

def __str__(self):
return 'CorrelatedFeatureRemover(correlation_threshold={})'.format(self.correlation_threshold)

def fit(self, df, feature_columns):
if self.load_from_file:
corr = pd.read_csv(self.load_from_file, index_col=0)
corr = corr.abs()

for col in feature_columns:
if col not in corr.columns:
raise DataProcessorError("Column '{}' not found in correlation file".format(col))
else:
corr = df[feature_columns].corr()
if self.write_to_file:
Expand All @@ -53,24 +62,23 @@ def fit(self, df, feature_columns):


class AlmostConstantFeatureRemover:
def __init__(self, max_count_percent=90, verbose=True, force_recompute=False, write_to_file=False,
load_from_file=False):
def __init__(self, max_count_percent=90, verbose=True, force_recompute=False):
"""
If a column has a single value that makes up more than max_count_percent of the values, remove it
:param max_count_percent:
:param verbose:
:param force_recompute:
:param write_to_file:
:param load_from_file:
"""
self.max_count_percent = max_count_percent
self.columns_to_remove = []
self.columns_to_leave = []
self.fitted = False
self.verbose = verbose
self.force_recompute = force_recompute
self.write_to_file = write_to_file
self.load_from_file = load_from_file
self.persistent = False

def __str__(self):
return 'AlmostConstantFeatureRemover(max_count_percent={})'.format(self.max_count_percent)

def fit(self, df, feature_columns):
len_df = len(df)
Expand Down

0 comments on commit 6d44f6c

Please sign in to comment.