-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathbisecting_kmeans.py
40 lines (32 loc) · 1.02 KB
/
bisecting_kmeans.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import numpy as np
import k_means
class BisectingKMeans:
def fit(self, X, n_clusters):
'''
Parameters
----------
X : shape (n_samples, n_features)
Training data
n_clusters : The number of clusters
Returns
-------
y : shape (n_samples,)
Predicted cluster label per sample.
'''
n_samples = X.shape[0]
data = X
clusters = []
while True:
model = k_means.KMeans()
label = model.fit(data, 2, 100)
clusters.append(np.flatnonzero(label == 0))
clusters.append(np.flatnonzero(label == 1))
if len(clusters) == n_clusters:
break
sse = [np.var(data[cluster]) for cluster in clusters]
data = data[clusters[np.argmax(sse)]]
del clusters[np.argmax(sse)]
y = np.zeros(n_samples)
for i in range(len(clusters)):
y[clusters[i]] = i
return y