-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathclustering_service.py
151 lines (123 loc) · 5.78 KB
/
clustering_service.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
__author__ = 'Michael Kern'
__version__ = '0.0.1'
__email__ = '[email protected]'
import numpy as np
from clustering_hierarchical import getClusters
########################################################################################################################
def loadData(datasetID):
"""
Loads the genomic data with given identifier datasetID.
:param datasetID: identifier
:return: array of the genomic data
"""
import caleydo_server.dataset as dt
# obtain Caleydo dataset from ID
dataset = dt.get(datasetID)
# choose loaded attribute and load raw data in numpy format
# somehow hack to get a numpy array out of the data
try:
arr = np.array(list(dataset.asnumpy()))
except:
raise Exception
return arr
########################################################################################################################
def loadPlugin(pluginID, *args, **kwargs):
"""
Loads the clustering plugin with given arguments.
:param pluginID: identifier of plugin
:param *args: additional caller function arguments
:param **kwargs: additional arguments
:return: plugin
"""
import caleydo_server.plugin
# obtain all plugins with 'pluginID' extension
plugins = caleydo_server.plugin.list('clustering')
# choose plugin with given ID
for plugin in plugins:
if plugin.id == pluginID:
# load the implementation of the plugin
return plugin.load().factory(*args, **kwargs)
raise NotImplementedError
########################################################################################################################
def runKMeans(data, k, initMethod, distance):
"""
Runs the k-Means clustering algorithm given the loaded data set, the number of clusters k and the initialization
method.
:param data: observation matrix
:param k: number of clusters
:param initMethod: number of clusters
:return: result of k-means
"""
KMeans = loadPlugin('caleydo-clustering-kmeans', data, k, initMethod, distance)
# and run the kmeans extension
centroids, labels, clusterLabels = KMeans()
# clusterLabels, clusterDists = KMeans.getDistsPerCentroid()
return {'centroids': centroids, 'clusterLabels': clusterLabels}
########################################################################################################################
def runHierarchical(data, k, method, distance):
"""
Runs the hierarchical clustering algorithm given the loaded data set and type of linkage method.
:param data: observation matrix
:param method: linkage method
:return: linkage matrix / dendrogram of the algorithm
"""
Hierarchical = loadPlugin('caleydo-clustering-hierarchical', data, method, distance)
# and use the extension
Hierarchical()
# obtain k-number of clusters
centroids, clusterLabels, labels = getClusters(k, data, Hierarchical.tree, False)
return {'centroids': centroids, 'clusterLabels': clusterLabels, 'dendrogram': Hierarchical.tree.json()}
# print('\t-> creating dendrogram tree...')
# tree = Hierarchical.generateTree(linkage)
# print('\t-> creating json string ...')
# dendrogram = tree.jsonify()
# print('\t-> finished.')
# return {'dendrogram': dendrogram} --> if needed later
########################################################################################################################
def runAffinityPropagation(data, damping, factor, preference, distance):
"""
Runs the affinity propagation algorithm given the loaded dataset, a damping value, a certain factor and
a preference method.
:param data:
:param damping:
:param factor:
:param preference:
:return:
"""
Affinity = loadPlugin('caleydo-clustering-affinity', data, damping, factor, preference, distance)
# use this extension
centroids, labels, clusterLabels = Affinity()
return {'centroids': centroids, 'clusterLabels': clusterLabels}
########################################################################################################################
def runFuzzy(data, numClusters, m, threshold, distance):
Fuzzy = loadPlugin('caleydo-clustering-fuzzy', data, numClusters, m, threshold, distance)
centroids, clusterLabels, partitionMatrix, maxProb = Fuzzy()
return {'centroids': centroids, 'clusterLabels': clusterLabels, 'partitionMatrix': partitionMatrix,
'maxProbability': maxProb}
########################################################################################################################
def getClusterDistances(data, labels, metric, externLabels = None, sorted = True):
"""
Compute the cluster distances in a given data among certain rows (labels)
:param data: genomic data
:param labels: indices of rows
:param metric: distance metric
:param externLabels:
:return: labels and distances values sorted in ascending order
"""
from clustering_util import computeClusterInternDistances, computeClusterExternDistances
distLabels, distValues = computeClusterInternDistances(data, labels, sorted, metric)
if externLabels is not None:
externDists = computeClusterExternDistances(data, distLabels, externLabels, metric)
return {'labels': distLabels, 'distances': distValues, 'externDistances': externDists}
else:
return {'labels': distLabels, 'distances': distValues}
########################################################################################################################
def getClustersFromDendrogram(data, dendrogram, numClusters):
"""
:param data:
:param dendrogram:
:param numClusters:
:return:
"""
centroids, clusterLabels, _ = getClusters(numClusters, data, dendrogram)
return {'centroids': centroids, 'clusterLabels': clusterLabels}