-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.py
44 lines (40 loc) · 1.45 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import os
from fit_predict_categorical_encoding import *
'''
Learning with dirty categorical variables.
'''
# Parameters ##################################################################
datasets = [
# 'midwest_survey',
# 'employee_salaries',
# 'medical_charge',
'traffic_violations',
# 'road_safety',
# 'docs_payments',
# 'beer_reviews'
]
n_jobs = 26
n_splits = 100
test_size = 1./3
encoders = [
# 'one-hot_encoding_sparse',
# '3gram_similarity2',
# '3gram_similarity2_1',
# '3gram_similarity4',
# '3gram_similarity2_2',
# '3gram_similarity5',
# '3grams_count_vectorizer',
# '3grams_hot_vectorizer',
# '3grams_tfidf_vectorizer',
'3gram_presence_fisher_kernel',
]
str_preprocess = True
dimension_reductions = [['-', -1]]
# '-', 'RandomProjectionsGaussian', 'MostFrequentCategories', 'KMeans',
results_path = os.path.join('results', '2018-02-09_100splits')
# results_path = os.path.join('results', '2017-12-05_DimRed')
###############################################################################
fit_predict_categorical_encoding(datasets, n_jobs, n_splits, test_size,
encoders, str_preprocess,
dimension_reductions, results_path,
model_path='')