Skip to content

Commit

Permalink
Some code for writing CV results
Browse files Browse the repository at this point in the history
  • Loading branch information
knstmrd committed Sep 21, 2018
1 parent 41506e9 commit 661cc83
Showing 1 changed file with 30 additions and 6 deletions.
36 changes: 30 additions & 6 deletions src/dataprocessor.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import pathlib
from datetime import datetime
from itertools import chain
import pandas as pd
import numpy as np
import json


Expand All @@ -11,8 +8,9 @@ def __init__(self, path: str, df, non_feature_columns=None, fname_prefix='', ver
if not path.endswith('/'):
path += '/'
self.base_path = path
self.fname = fname_prefix + str(datetime.now()).replace(':', '_').replace(' ', '_')[5:19]
self.removers = []
self.fname_prefix = fname_prefix
self.fname = self.fname_prefix + str(datetime.now()).replace(':', '_').replace(' ', '_')[5:19]
self.remover_params = []
self.transforms = []
self.transform_params = []
Expand Down Expand Up @@ -107,13 +105,34 @@ def transform(self, df, use_features: str='selected'):
for transform, transform_params in zip(self.transforms, self.transform_params):
df[features_to_use] = transform.transform(df[features_to_use], **transform_params)

def cv(self, scorer, predict_proba=False, df_test=None):
# TODO: write check for whether is saved, if not, save first
def cv(self, df, predictor, scorers, predict_proba=False, df_test=None, use_features: str='selected'):

mean_score = [0.0] * len(scorers)
std_score = [0.0] * len(scorers)
if not self.saved:
print('Dataprocessor settings not saved, will save now')
self.save()

with open(self.base_path + 'dataprocessor_files/cv/cv.log', 'a') as f:

for (scorer, scorer_name) in scorers:
f.write('Mean(score): {}\nStd(score): {}\n'.format(mean_score, std_score))
f.write('Scorer={}, mean(score): {}\nScorer={}, std(score): {}\n'.format(scorer_name, mean_score,
scorer_name, std_score))
f.write('Predictor: {}\nPredict_proba: {}\n'.format(str(predictor), str(predict_proba)))
f.write('Features used: {}\n'.format(use_features))
f.write('Selected features list: {}\n'.format(self.base_path + 'dataprocessor_files/features/selected/'
+ self.fname))
f.write('Removed features list: {}\n'.format(self.base_path + 'dataprocessor_files/features/removed/'
+ self.fname))

f.write('\n\n\n')
# Write mean, std (score); names of feature lists, list of feature removers and transforms and their settings
# also can run on df_test and predict there
pass

def save(self):
self.fname = self.fname_prefix + str(datetime.now()).replace(':', '_').replace(' ', '_')[5:19]
with open(self.base_path + 'dataprocessor_files/features/removed/' + self.fname, 'w') as f:
for feature in self.features['removed']:
f.write('{}\n'.format(feature))
Expand All @@ -140,3 +159,8 @@ def save(self):
with open(self.base_path + 'dataprocessor_files/settings/current_settings.log', 'w') as f:
json.dump(self.settings, f)
self.saved = True

def write_features(self, df, df_format):
# write selected and transformed features to a dataframe
# and add that to settings
pass

0 comments on commit 661cc83

Please sign in to comment.