-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsvm-rfe.age.py
157 lines (127 loc) · 3.31 KB
/
svm-rfe.age.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import time
import cPickle as pickle
import matplotlib.pyplot as plt
from sklearn.feature_selection import RFE
from sklearn.svm import SVC
from sklearn.datasets import load_digits
def read_data(path):
with open(path + ".pickle", "r") as fp:
obj = pickle.load(fp)
print len(obj), path + " elements load over.", time.ctime()
return obj
def age_beta_D(line):
gsm = line[0]
age = GSM_info[gsm][1]
#print age
if age < 60:
label = "ABC"
else:
label = "D"
return label
def age_distribution(matrix):
count = {'ABC': 0, 'D': 0}
for line in matrix:
beta = age_beta_D(line)
count[beta] += 1
return count
def svm_rfe(X_train):
#X_train = read_data("age_test")
#subsum = age_distribution(X_train)
#print subsum
Y_train = [age_beta_D(line) for line in X_train]
X_train = [line[1:] for line in X_train]
svc = SVC(kernel="linear", C=1)
rfe = RFE(estimator=svc, n_features_to_select=None, step=0.5)
rfe.fit(X_train, Y_train)
#print rfe.ranking_
#print rfe.score(X_train, Y_train)
return rfe
'''
X_test = read_data("age_test")
subsum = age_distribution(X_test)
print subsum
Y_test = [age_beta_D(line) for line in X_test]
X_test = [line[1:] for line in X_test]
print rfe.score(X_test, Y_test)
'''
def sampling(data, n):
matrix = read_data(data)
subsum = age_distribution(matrix)
expect = {}
expect["ABC"] = n * 1.0 * subsum["ABC"] / len(matrix)
expect["D"] = n * 1.0 * subsum["D"] / len(matrix)
count = {}
m = []
for line in matrix:
beta = age_beta_D(line)
if count.get(beta):
if count[beta] <= expect[beta]:
m.append(line)
count[beta] += 1
else:
m.append(line)
count[beta] = 1
if len(m) >= n:
break
print count
return m
def timing():
css = []
t = []
for i in [1000, 2000, 4000, 6000, 8000, 10000]:
sample = sampling("age_train", i)
start = time.clock()
cs = svm_rfe(sample)
elapsed = time.clock() - start
t.append(elapsed)
css.append(cs)
with open("svm_rfe.models.pickle", "w") as fp:
pickle.dump(css, fp)
with open("svm_rfe.timing.pickle", "w") as fp:
pickle.dump(t, fp)
plt.plot([1000, 2000, 4000, 6000, 8000, 10000], t, "-")
plt.ylabel("CPU time(second)")
plt.xlabel("# of samples")
plt.grid(True)
plt.show()
def acc():
X_test = read_data("age_test")
subsum = age_distribution(X_test)
print subsum
Y_test = [age_beta_D(line) for line in X_test]
X_test = [line[1:] for line in X_test]
rfes = read_data("svm_rfe.models")
t, j = [], 0
for j in range(len(rfes)):
ac = rfes[j].score(X_test, Y_test)
t.append(ac)
with open("acc.svm_rfe.pickle", "w") as fp:
pickle.dump(t, fp)
plt.plot([1000, 2000, 4000, 6000, 8000, 10000], t, "-")
plt.ylabel("Accuracy")
plt.xlabel("# of samples")
plt.grid(True)
plt.show()
def demo():
digits = load_digits()
X = digits.images.reshape((len(digits.images), -1))
y = digits.target
# Create the RFE object and rank each pixel
svc = SVC(kernel="linear", C=1)
rfe = RFE(estimator=svc, n_features_to_select=None, step=1)
rfe.fit(X, y)
ranking = rfe.ranking_.reshape(digits.images[0].shape)
print rfe.score(X, y)
# Plot pixel ranking
plt.matshow(ranking, cmap=plt.cm.Blues)
plt.colorbar()
plt.title("Ranking of pixels with RFE")
plt.show()
if __name__ == "__main__":
print "Start.", time.ctime()
GSM_info = read_data("GSM_info")
#demo()
#svm_rfe()
#timing()
acc()
print "End.", time.ctime()