-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathExample-iris-smoothing.py
76 lines (61 loc) · 2.72 KB
/
Example-iris-smoothing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# n_folds = 5
from huber_svm import HuberSVC
from smoothing_regularization import Smoothing_Regularization
import pandas
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import ElasticNet, Lasso, RidgeClassifier
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import StratifiedKFold, cross_val_score
from sklearn.preprocessing import scale
from DataLoader import classificationDataLoader
import warnings
warnings.filterwarnings("ignore")
# data = load_iris()
# # X = data['data']
# X = scale(data['data'])
# y = data['target']
#
X, y = classificationDataLoader('dataset/dermatology.categorical.csv', labelCol=-1, sparsify=True)
print "X.shape = \n", X.shape
print "y.shape = \n", y.shape
idx = np.random.permutation(X.shape[0])
X = X[idx]
y = y[idx]
lasso = OneVsRestClassifier(Lasso())
param_lasso = {'estimator__alpha': [100, 10, 1, 0.1, 1e-2, 1e-3]}
elastic = OneVsRestClassifier(ElasticNet())
param_elastic = {'estimator__alpha': [100, 10, 1, 0.1, 1e-2, 1e-3],
'estimator__l1_ratio': np.linspace(0.01, 0.99, 5)}
ridge = RidgeClassifier(solver='lsqr')
param_ridge = {'alpha': [100, 10, 1, 0.1, 1e-2, 1e-3]}
huber = OneVsRestClassifier(HuberSVC())
param_huber = {'estimator__C': [100, 10, 1, 0.1, 1e-2, 1e-3],
'estimator__lambd': [100, 10, 1, 0.1, 1e-2, 1e-3],
'estimator__mu': [100, 10, 1, 0.1, 1e-2, 1e-3]}
smoothing = OneVsRestClassifier(Smoothing_Regularization())
param_smoothing = {'estimator__C': [100, 10, 1, 0.1, 1e-2, 1e-3],
'estimator__alpha': [10000, 1000, 100, 10, 1, 0.1, 1e-2, 1e-3, 1e-4]}
n_folds = 5
param_folds = 3
scoring = 'accuracy'
result_df = pandas.DataFrame()
for i, (train_index, test_index) in enumerate(StratifiedKFold(y, n_folds=n_folds)):
for clf_name, clf, param_grid in [('Smoothing_Regularization', smoothing, param_smoothing),
('ElasticNet', elastic, param_elastic),
('Ridge', ridge, param_ridge),
# ('HuberSVC', huber, param_huber),
('Lasso', lasso, param_lasso)]:
print "clf_name: \n", clf_name
gs = GridSearchCV(clf, param_grid, scoring=scoring, cv=param_folds, n_jobs=-1)
gs.fit(X[train_index], y[train_index])
best_clf = gs.best_estimator_
score = accuracy_score(y[test_index], best_clf.predict(X[test_index]))
result_df.loc[i, clf_name] = score
print "result shows: \n"
result_df.loc['Mean'] = result_df.mean()
pandas.options.display.float_format = '{:,.3f}'.format
result_df
print result_df.values