Skip to content

Commit 8bed0f4

Browse files
committed
added adjustable distances for kmeans and fuzzy
1 parent 9854777 commit 8bed0f4

5 files changed

+24
-21
lines changed

clustering_api.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,19 @@
1515

1616
########################################################################################################################
1717

18-
@app.route('/kmeans/<k>/<initMethod>/<datasetID>')
19-
def kmeansClustering(k, initMethod, datasetID):
18+
@app.route('/kmeans/<k>/<initMethod>/<distance>/<datasetID>')
19+
def kmeansClustering(k, initMethod, distance, datasetID):
2020
"""
2121
Access k-means clustering plugin.
2222
:param k: number of clusters
2323
:param initMethod: initialization method for initial clusters
24+
:param distance: distance measurement
2425
:param datasetID: identifier of data set
2526
:return: jsonified output
2627
"""
2728
try:
2829
data = loadData(datasetID)
29-
response = runKMeans(data, int(k), initMethod)
30+
response = runKMeans(data, int(k), initMethod, distance)
3031
return flask.jsonify(response)
3132
except:
3233
return flask.jsonify({})
@@ -72,18 +73,19 @@ def affinityPropagationClustering(damping, factor, preference, distance, dataset
7273

7374
########################################################################################################################
7475

75-
@app.route('/fuzzy/<numClusters>/<m>/<threshold>/<datasetID>')
76-
def fuzzyClustering(numClusters, m, threshold, datasetID):
76+
@app.route('/fuzzy/<numClusters>/<m>/<threshold>/<distance>/<datasetID>')
77+
def fuzzyClustering(numClusters, m, threshold, distance, datasetID):
7778
"""
7879
:param numClusters:
7980
:param m:
8081
:param threshold:
82+
:param distance:
8183
:param datasetID:
8284
:return:
8385
"""
8486
try:
8587
data = loadData(datasetID)
86-
response = runFuzzy(data, int(numClusters), float(m), float(threshold))
88+
response = runFuzzy(data, int(numClusters), float(m), float(threshold), distance)
8789
return flask.jsonify(response)
8890
except:
8991
return flask.jsonify({})

clustering_fuzzy.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -205,12 +205,12 @@ def _plugin_initialize():
205205

206206
# ----------------------------------------------------------------------------------------------------------------------
207207

208-
def create(data, numCluster, m, threshold):
208+
def create(data, numCluster, m, threshold, distance):
209209
"""
210210
by convention contain a factory called create returning the extension implementation
211211
:return:
212212
"""
213-
return Fuzzy(data, numCluster, m, threshold)
213+
return Fuzzy(data, numCluster, m, threshold, distance)
214214

215215
########################################################################################################################
216216

clustering_kmeans.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,15 @@ class KMeans:
2828
Implementation detail: <https://en.wikipedia.org/wiki/K-means_clustering>
2929
"""
3030

31-
def __init__(self, obs, k, initMode='kmeans++', iters=1000, compare='sqeuclidean'):
31+
def __init__(self, obs, k, initMode='kmeans++', distance='sqeuclidean', iters=1000):
3232
"""
3333
Initializes the algorithm with observation, number of k clusters, the initial method and
3434
the maximum number of iterations.
3535
Initialization method of random cluster choice can be: forgy, uniform, random, plusplus
3636
:param obs: genomic data / matrix
3737
:param k: number of clusters
3838
:param initMode: initialization method
39+
:param distance: distance measurement
3940
:param iters: number of maximum iterations
4041
:return:
4142
"""
@@ -59,7 +60,7 @@ def __init__(self, obs, k, initMode='kmeans++', iters=1000, compare='sqeuclidean
5960
# initialization method
6061
self.__initMode = initMode
6162
# compare function
62-
self.__compare = compare
63+
self.__distance = distance
6364

6465
# ------------------------------------------------------------------------------------------------------------------
6566

@@ -150,7 +151,7 @@ def __plusplusMethod(self):
150151
probs.fill(maxValue)
151152
# compute new probabilities, choose min of all distances
152153
for j in range(0, i):
153-
dists = similarityMeasurement(self.__obs, self.__clusterMeans[j], self.__compare)
154+
dists = similarityMeasurement(self.__obs, self.__clusterMeans[j], self.__distance)
154155
# collect minimum squared distances to cluster centroids
155156
probs = np.minimum(probs, dists)
156157

@@ -210,7 +211,7 @@ def __assignment(self):
210211
value = self.__obs[i]
211212

212213
# compute squared distances to each mean
213-
dists = similarityMeasurement(self.__clusterMeans, value, self.__compare)
214+
dists = similarityMeasurement(self.__clusterMeans, value, self.__distance)
214215
# nearest cluster
215216
nearestID = np.argmin(dists)
216217

@@ -347,12 +348,12 @@ def _plugin_initialize():
347348

348349
# ----------------------------------------------------------------------------------------------------------------------
349350

350-
def create(data, k, initMethod):
351+
def create(data, k, initMethod, distance):
351352
"""
352353
by convention contain a factory called create returning the extension implementation
353354
:return:
354355
"""
355-
return KMeans(data, k, initMethod)
356+
return KMeans(data, k, initMethod, distance)
356357

357358
########################################################################################################################
358359

@@ -377,7 +378,7 @@ def create(data, k, initMethod):
377378

378379
for i in range(10):
379380
s1 = timer()
380-
kMeansPlus = KMeans(data, k, 'kmeans++', 10)
381+
kMeansPlus = KMeans(data, k, 'kmeans++', 'sqeuclidean', 10)
381382
result1 = kMeansPlus.run()
382383
#print(result)
383384
e1 = timer()

clustering_service.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def loadPlugin(pluginID, *args, **kwargs):
4848

4949
########################################################################################################################
5050

51-
def runKMeans(data, k, initMethod):
51+
def runKMeans(data, k, initMethod, distance):
5252
"""
5353
Runs the k-Means clustering algorithm given the loaded data set, the number of clusters k and the initialization
5454
method.
@@ -57,7 +57,7 @@ def runKMeans(data, k, initMethod):
5757
:param initMethod: number of clusters
5858
:return: result of k-means
5959
"""
60-
KMeans = loadPlugin('caleydo-clustering-kmeans', data, k, initMethod)
60+
KMeans = loadPlugin('caleydo-clustering-kmeans', data, k, initMethod, distance)
6161
# and run the kmeans extension
6262
centroids, labels, clusterLabels = KMeans()
6363
# clusterLabels, clusterDists = KMeans.getDistsPerCentroid()
@@ -108,8 +108,8 @@ def runAffinityPropagation(data, damping, factor, preference, distance):
108108

109109
########################################################################################################################
110110

111-
def runFuzzy(data, numClusters, m, threshold):
112-
Fuzzy = loadPlugin('caleydo-clustering-fuzzy', data, numClusters, m, threshold)
111+
def runFuzzy(data, numClusters, m, threshold, distance):
112+
Fuzzy = loadPlugin('caleydo-clustering-fuzzy', data, numClusters, m, threshold, distance)
113113

114114
centroids, clusterLabels, partitionMatrix, maxProb = Fuzzy()
115115

clustering_util.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ def similarityMeasurement(matrix, vector, method='euclidean'):
247247
spatialMethods = ['cityblock', 'chebyshev', 'canberra', 'correlation', 'hamming', 'mahalanobis',]
248248

249249
if method in spatialMethods:
250-
return cdist(matrix, np.atleast_2d(vector), method).flatten()
250+
return np.nan_to_num(cdist(matrix, np.atleast_2d(vector), method).flatten())
251251

252252
corrMethods = ['spearman', 'pearson', 'kendall']
253253

@@ -379,7 +379,7 @@ def similarityMeasurementMatrix(matrix, method):
379379
spatialMethods = ['cityblock', 'chebyshev', 'canberra', 'correlation', 'hamming', 'mahalanobis']
380380

381381
if method in spatialMethods:
382-
return squareform(pdist(matrix, method))
382+
return squareform(np.nan_to_num(pdist(matrix, method)))
383383

384384
corrMethods = ['spearman', 'pearson', 'kendall']
385385

0 commit comments

Comments
 (0)