diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..c2cdfb8
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,21 @@
+# EditorConfig helps developers define and maintain consistent
+# coding styles between different editors and IDEs
+# editorconfig.org
+
+root = true
+
+
+[*]
+
+# Change these settings to your own preference
+indent_style = space
+indent_size = 2
+
+# We recommend you to keep these unchanged
+end_of_line = lf
+charset = utf-8
+trim_trailing_whitespace = true
+insert_final_newline = true
+
+[*.md]
+trim_trailing_whitespace = false
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..833eaab
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,126 @@
+# These settings are for any web project
+
+# Handle line endings automatically for files detected as text
+# and leave all files detected as binary untouched.
+* text=auto
+
+#
+# The above will handle all files NOT found below
+#
+
+#
+## These files are text and should be normalized (Convert crlf => lf)
+#
+
+# source code
+*.php text
+*.css text
+*.sass text
+*.scss text
+*.less text
+*.styl text
+*.js text
+*.ts text
+*.coffee text
+*.json text
+*.htm text
+*.html text
+*.xml text
+*.txt text
+*.ini text
+*.inc text
+*.pl text
+*.rb text
+*.py text
+*.scm text
+*.sql text
+*.sh text eof=LF
+*.bat text
+
+# templates
+*.hbt text
+*.jade text
+*.haml text
+*.hbs text
+*.dot text
+*.tmpl text
+*.phtml text
+
+# server config
+.htaccess text
+
+# git config
+.gitattributes text
+.gitignore text
+
+# code analysis config
+.jshintrc text
+.jscsrc text
+.jshintignore text
+.csslintrc text
+
+# misc config
+*.yaml text
+*.yml text
+.editorconfig text
+
+# build config
+*.npmignore text
+*.bowerrc text
+
+# Heroku
+Procfile text
+.slugignore text
+
+# Documentation
+*.md text
+LICENSE text
+AUTHORS text
+
+
+#
+## These files are binary and should be left untouched
+#
+
+# (binary is a macro for -text -diff)
+*.png binary
+*.jpg binary
+*.jpeg binary
+*.gif binary
+*.ico binary
+*.mov binary
+*.mp4 binary
+*.mp3 binary
+*.flv binary
+*.fla binary
+*.swf binary
+*.gz binary
+*.zip binary
+*.7z binary
+*.ttf binary
+*.pyc binary
+*.pdf binary
+
+# Source files
+# ============
+*.pxd		text
+*.py 		text
+*.py3 		text
+*.pyw 		text
+*.pyx  		text
+*.sh        text eol=lf
+*.json      text
+
+# Binary files
+# ============
+*.db		binary
+*.p 		binary
+*.pkl 		binary
+*.pyc 		binary
+*.pyd		binary
+*.pyo 		binary
+
+# Note: .db, .p, and .pkl files are associated
+# with the python modules ``pickle``, ``dbm.*``,
+# ``shelve``, ``marshal``, ``anydbm``, & ``bsddb``
+# (among others).
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 0d20b64..a00b0c0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,19 @@
+# general things to ignore
+/.tscache
+/.idea
+/build/
+/dist/
+*.egg-info/
+*.egg
+*.py[cod]
+__pycache__/
+*.so
+*~
+*.log
+*.pot
 *.pyc
+*.swp
+*.lock
+# due to using tox and pytest
+.tox
+.cache
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..80a94cb
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,27 @@
+language: python
+sudo: required
+env:
+  - TOXENV=py27
+  - TOXENV=py34
+
+install:
+  - (!(test -f docker_packages.txt) || (cat docker_packages.txt | xargs sudo apt-get install -y))
+  - pip install -r requirements_dev.txt
+  - pip install -r requirements.txt
+
+script: npm run dist
+
+deploy:
+  provider: releases
+  api_key:
+    secure: TK9/P34Bi3WuppiDrBCwVcn41yCBwmILaU8hXTBzUPbT7TbeFIwsC6/4CtH85Z+ZrUve4S5pTmWRNf2dQDxWw3uYu7+bJuemV2J1LHG76mognj+TNEiYxfLQUt3Gql4W7C7FcI4Rlx5/uMN9wY1wro8TWUBMwT6jjSrUWIvK3GXoojd5bHvJx07XpjWl9wCon4D0ruZiFoM2mdeP23lbc2GckETi32oEKswnQXxkMACmxbPzoWbvkxH4aK8Bt2Rj2sl2TbPhVkN6DAkHGkGAvLI+2/aRfG27+oo3OKsaDjbuGABct8TfZccJ970CbQ8kbnCjYxstvqkg1JWjF0W67sX/flBZZOEUA5l0OLWo6HqMGMxm7/lEQhIdPMsRmvXL+HVOxkMrB2dda58QzxVwiZp+rRqUaeabPZp8Kl5xodGrVxsBvxe6zAbJ5jCtCSumG6+kLyKI00/kYlghqQNrgUw0ZsYJlQ34h3lo/24QpaeyDpQoCkGWQgtgqiXGpeKSu7bCnOqIqAy3nbT9Utwj7K8gIasTG5idosEAz/THMampNbGDuyxxc340sYGNMg9Bhm1g2ILWRdtV470p5hwBtIDTKi3/PAizEO26+Wh0zI47Sg3ao57avcbCsTmzbZUeA5J4bojmchhJCHX8su9cSCGh/2fJA/1eBIgEvOQ8LNE=
+  file_glob: true
+  file: dist/phovea_clustering*.egg
+  on:
+    tags: true
+
+notifications:
+  slack:
+    secure: E8/1UIdHSczUbN+6i6gd1d5LM4vmLdwLQ30tpyjvnM0wvfDce76oPxLJAy240WJ5ybXRZUtNrttpVpt4tEXCy8aLFCmxD7s77rVloH+q1J8R/ptTFWZGhFGEujk1awEmVbzcWxJkV9/JENQaeGBKxwv8/EQwWwEkAb7p/+AJb9owmH88b3wUZUGHBWtbMiyyaF4Rm1Wg1stJB8Z1Ga7PRF4cqufTgcDdsCPVv9gAY+VxOIGqX/Vfuc9UWpUH8vq8lHUE7Inn5QS78kuFfSgLWga3H6Mu/Gko1XNlWk0QWWQBUvEZ6ZC6Wuo68KzvUjJHDTnx8WyfHue2JNHIslcX+eJq2WHLeEgM24VeNkILCGo/H/60NGHiSjrIv/Y9h6bQ9FDjo6TUyE4nbdPYN1RN9FQ5UbI9Y4Gi753H9mqnHWlEywBOzHxdZCAuz9Wh03CCF/blsvJ+Obbyo6Jrfe+g44jyi9kQdBNQ78qG6v4EXws8FiYao6x3PpgIwFix42Cpr+soAh5FpA3C1zHSAyZZpXF65/lrDl5yPNofK7Wy0B9bw+0I6Z/u7ZKFNVZXvYPGYvtUVcsALGBdmYc61+LCta36Po0KZseWVAlJj6QnOJDYzv0wvV/zsuf9A5KpYFGiqV9Q7zmtiO5FYF5sBy+lE7O9tHVO4O18IRndhRQgxhs=
+    on_success: change
+    on_failure: always
diff --git a/.yo-rc.json b/.yo-rc.json
new file mode 100644
index 0000000..308c8c3
--- /dev/null
+++ b/.yo-rc.json
@@ -0,0 +1,52 @@
+{
+  "generator-phovea": {
+    "type": "slib",
+    "name": "phovea_clustering",
+    "author": "The Caleydo Team",
+    "githubAccount": "phovea",
+    "modules": [
+      "phovea_server"
+    ],
+    "extensions": [],
+    "sextensions": [
+      {
+        "type": "clustering",
+        "id": "caleydo-clustering-kmeans",
+        "module": "clustering_kmeans",
+        "extras": {}
+      },
+      {
+        "type": "clustering",
+        "id": "caleydo-clustering-hierarchical",
+        "module": "clustering_hierarchical",
+        "extras": {}
+      },
+      {
+        "type": "clustering",
+        "id": "caleydo-clustering-affinity",
+        "module": "clustering_affinity",
+        "extras": {}
+      },
+      {
+        "type": "clustering",
+        "id": "caleydo-clustering-fuzzy",
+        "module": "clustering_fuzzy",
+        "extras": {}
+      },
+      {
+        "type": "namespace",
+        "id": "caleydo-clustering",
+        "module": "clustering_api",
+        "extras": {
+          "namespace": "/api/clustering"
+        }
+      }
+    ],
+    "libraries": [],
+    "unknown": {
+      "requirements": [],
+      "dockerPackages": []
+    },
+    "today": "Fri, 23 Dec 2016 03:02:24 GMT"
+  }
+}
\ No newline at end of file
diff --git a/ISSUE_TEMPLATE.md b/ISSUE_TEMPLATE.md
new file mode 100644
index 0000000..bdb156f
--- /dev/null
+++ b/ISSUE_TEMPLATE.md
@@ -0,0 +1,17 @@
+* Release number or git hash: 
+* Web browser version and OS: 
+* Environment (local or deployed): 
+
+### Steps to reproduce
+
+1. 
+2. 
+
+### Observed behavior
+* Any unexpected output or action (or lack of expected output or action)
+* Web browser console errors (including tracebacks)
+* Server errors (relevant messages and tracebacks)
+* Static or animated images showing the UI behavior
+
+### Expected behavior
+* 
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..fd6f461
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,22 @@
+Copyright (c) 2016, The Caleydo Team
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+  Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+  Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+  Neither the name of the Caleydo Software nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.md b/README.md
index 1a3242f..97caba2 100644
--- a/README.md
+++ b/README.md
@@ -1,23 +1,44 @@
-Caleydo Clustering
-==================
-
-This repository is a server-side plugin for Caleydo to apply clustering algorithms on arbitrary matrices.
-
-Supported Algorithms:
---------------------
-- K-Means (Init methods: Forgy, Uniform, Random, Kmeans++)
-- Hierarchical Clustering (Single, Complete, Weighted, Median)
-- Affinity Propagation
-- Fuzzy Clustering
-- Various Distance Measurements(Euclidean, Chebyshef, Manhattan, Pearson, Spearman, ...)
-
-General Information:
--------------------
-- All these algorithms expect the input as matrix (or 2D numpy array)
-- It is assumed that the matrix is dense (no sparse matrix support right now)
-- NaN values will be converted to zero
-
-Future Work:
+phovea_clustering [![Phovea][phovea-image]][phovea-url] [![NPM version][npm-image]][npm-url] [![Build Status][travis-image]][travis-url] [![Dependency Status][daviddm-image]][daviddm-url]
+=====================
+
+
+
+Installation
 ------------
-- Improvement of algorithms
-- Combination of several clustering results
+
+```
+git clone https://github.com/phovea/phovea_clustering.git
+cd phovea_clustering
+npm install
+```
+
+Testing
+-------
+
+```
+npm test
+```
+
+Building
+--------
+
+```
+npm run build
+```
+
+
+
+***
+
+<a href="https://caleydo.org"><img src="http://caleydo.org/assets/images/logos/caleydo.svg" align="left" width="200px" hspace="10" vspace="6"></a>
+This repository is part of **[Phovea](http://phovea.caleydo.org/)**, a platform for developing web-based visualization applications. For tutorials, API docs, and more information about the build and deployment process, see the [documentation page](http://caleydo.org/documentation/).
+
+
+[phovea-image]: https://img.shields.io/badge/Phovea-Server%20Plugin-10ACDF.svg
+[phovea-url]: https://phovea.caleydo.org
+[npm-image]: https://badge.fury.io/js/phovea_clustering.svg
+[npm-url]: https://npmjs.org/package/phovea_clustering
+[travis-image]: https://travis-ci.org/phovea/phovea_clustering.svg?branch=master
+[travis-url]: https://travis-ci.org/phovea/phovea_clustering
+[daviddm-image]: https://david-dm.org/phovea/phovea_clustering.svg?theme=shields.io
+[daviddm-url]: https://david-dm.org/phovea/phovea_clustering
diff --git a/__init__.py b/__init__.py
deleted file mode 100644
index f425e42..0000000
--- a/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-__author__ = 'Michael Kern'
-__version__ = '1.0.0'
-__email__ = 'kernm@in.tum.de'
diff --git a/clustering_affinity.py b/clustering_affinity.py
deleted file mode 100644
index 80a4963..0000000
--- a/clustering_affinity.py
+++ /dev/null
@@ -1,304 +0,0 @@
-__author__ = 'Michael Kern'
-__version__ = '0.0.1'
-__email__ = 'kernm@in.tum.de'
-
-########################################################################################################################
-# libraries
-
-# module to load own configurations
-import caleydo_server.config
-# request config if needed for the future
-config = caleydo_server.config.view('caleydo-clustering')
-
-import numpy as np
-from clustering_util import similarityMeasurementMatrix
-from timeit import default_timer as timer
-
-########################################################################################################################
-
-class AffinityPropagation:
-    """
-    This is an implementation of the affinity propagation algorithm to cluster genomic data / matrices.
-    Implementation details: <http://www.psi.toronto.edu/index.php?q=affinity%20propagation>.
-    Matlab implementation: <http://www.psi.toronto.edu/affinitypropagation/software/apcluster.m>
-    Returns the centroids and labels / stratification of each row belonging to one cluster.
-    """
-
-    def __init__(self, obs, damping=0.5, factor=1.0, prefMethod='minimum', distance='euclidean'):
-        """
-        Initializes the algorithm.
-        :param obs: genomic data / matrix
-        :param damping: controls update process to dampen oscillations
-        :param factor: controls the preference value (influences number of clusters)
-        :param prefMethod: all points are chosen equally with a given preference (median or minimum of similarity matrix)
-        :return:
-        """
-        self.__n = np.shape(obs)[0]
-        # observations, can be 1D array or 2D matrix with genes as rows and conditions as columns
-        # remove all NaNs in data
-        self.__obs = np.nan_to_num(obs)
-        # variables influencing output of clustering algorithm
-        self.__damping = damping
-        self.__factor = factor
-        self.__prevMethod = prefMethod
-
-        # similarity matrix
-        self.__S = np.zeros((self.__n, self.__n))
-        # availability matrix
-        self.__A = np.zeros((self.__n, self.__n))
-        # responsibility matrix
-        self.__R = np.zeros((self.__n, self.__n))
-
-        self.minValue = np.finfo(np.float).min
-
-        # self.__mx1 = np.full(self.__n, self.minValue)
-        # self.__mx2 = np.full(self.__n, self.minValue)
-
-        self.__idx = np.zeros(self.__n)
-
-        # set similarity computation
-        self.__distance = distance
-
-        self.__computeSimilarity()
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def __call__(self):
-        """
-        Caller function for server API.
-        """
-        return self.run()
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def __computeSimilarity(self):
-        """
-        Compute the similarity matrix from the original observation matrix and set preference of each element.
-        :return: Similarity matrix
-        """
-        # compute distance matrix containing the negative sq euclidean distances -|| xi - xj ||**2
-        self.__S = -similarityMeasurementMatrix(self.__obs, self.__distance)
-
-        # determine the preferences S(k,k) to control the output of clusters
-        pref = 0
-        # could be median or minimum
-        if self.__prevMethod == 'median':
-            pref = float(np.median(self.__S)) * self.__factor
-        elif self.__prevMethod == 'minimum':
-            pref = np.min(self.__S) * self.__factor
-        else:
-            raise AttributeError
-
-        np.fill_diagonal(self.__S, pref)
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def run(self):
-        """
-        Runs the algorithm of affinity propagation. Conducts at least 100 iterations and checks if the outcome of
-        current exemplars/clusters has converged. If not, the algorithm will continue until convergence is found
-        or the maximum number of iterations (200) is reached.
-        :return:
-        """
-        maxIter = 200
-        maxConvIter = 100
-
-        # sum all decisions for exemplars per round
-        decisionSum = np.zeros(self.__n)
-        # collect decisions for one exemplar per iteration round
-        decisionIter = np.zeros((maxConvIter, self.__n))
-        # counter for decisions (= consider data element as exemplar in each algorithm iteration)
-        decisionCounter = maxConvIter
-        # indicates if algorithm has converged
-        isConverged = False
-
-        centroids = []
-        it = 0
-        clusterI = []
-
-        # helpful variables (that do not need recomputation)
-        indexDiag = np.arange(self.__n)
-        indicesDiag = np.diag_indices_from(self.__R)
-        newA = np.zeros((self.__n, self.__n))
-        newR = np.zeros((self.__n, self.__n))
-
-        for it in range(1, maxIter + 1):
-
-            # ----------------------------------------------------------------------------------------------------------
-
-            # compute responsibility matrix
-            AS = self.__A + self.__S
-
-            maxY = np.max(AS, axis=1)
-            indexY = np.argmax(AS, axis=1)
-
-            # set values of maxima to zero in AS matrix
-            AS[indexDiag, indexY] = self.minValue
-
-            # look for second maxima
-            maxY2 = np.max(AS, axis=1)
-
-            # perform responsibility update
-            for ii in range(self.__n):
-                # s(i, k) - max({ a(i, k') + s(i, k') })
-                newR[ii] = self.__S[ii] - maxY[ii]
-
-            # subtract second maximum from row -> column entry with maximum value
-            newR[indexDiag, indexY] = self.__S[indexDiag, indexY] - maxY2[indexDiag]
-
-            # dampen values
-            # self.__R = self.__damping * self.__R + (1 - self.__damping) * newR
-            self.__R *= self.__damping
-            self.__R += (1 - self.__damping) * newR
-
-            # ----------------------------------------------------------------------------------------------------------
-
-            # compute availability matrix
-            # cut out negative elements
-            # TODO! slow because of copy operation
-            Rp = np.maximum(self.__R, 0)
-
-            # write back all diagonal elements als self representatives
-            Rp[indicesDiag] = self.__R[indicesDiag]
-            sumCols = np.sum(Rp, axis=0)
-
-            # apply availability update
-            newA[:,] = sumCols
-            newA -= Rp
-            # for ii in range(self.__n):
-            #     # r(k, k) + sum(max(0, r(i',k))
-            #     newA[:, ii] = sumCols[ii] - Rp[:, ii]
-
-            diagA = np.diag(newA)
-            # take minimum of all the values in A, cut out all values above zero
-            # newA = np.minimum(newA, 0)
-            newA[newA > 0] = 0
-            newA[indicesDiag] = diagA[indexDiag]
-
-            # dampen values
-            # self.__A = self.__damping * self.__A + (1 - self.__damping) * newA
-            self.__A *= self.__damping
-            self.__A += (1 - self.__damping) * newA
-
-            # ----------------------------------------------------------------------------------------------------------
-
-            # find exemplars for new clusters
-            # old version which is slower
-            # E = self.__R + self.__A
-            # diagE = np.diag(E)
-
-            # take the diagonal elements of the create matrix E
-            diagE = np.diag(self.__R) + np.diag(self.__A)
-
-            # all elements > 0 are considered to be an appropriate exemplar for the dataset
-            clusterI = np.argwhere(diagE > 0).flatten()
-
-            # count the number of clusters
-            numClusters = len(clusterI)
-
-            # ----------------------------------------------------------------------------------------------------------
-
-            decisionCounter += 1
-            if decisionCounter >= maxConvIter:
-                decisionCounter = 0
-
-            # subtract outcome of previous iteration (< 100) from the total sum of the decisions
-            decisionSum -= decisionIter[decisionCounter]
-
-            decisionIter[decisionCounter].fill(0)
-            decisionIter[decisionCounter][clusterI] = 1
-
-            # compute sum of decisions for each element being a exemplar
-            decisionSum += decisionIter[decisionCounter]
-
-            # check for convergence
-            if it >= maxConvIter or it >= maxIter:
-                isConverged = True
-
-                for ii in range(self.__n):
-                    # if element is considered to be an exemplar in at least one iterations
-                    # and total of decisions in the last 100 iterations is not 100 --> no convergence
-                    if decisionSum[ii] != 0 and decisionSum[ii] != maxConvIter:
-                        isConverged = False
-                        break
-
-                if isConverged and numClusters > 0:
-                    break
-
-        # --------------------------------------------------------------------------------------------------------------
-
-        # obtain centroids
-        centroids = self.__obs[clusterI]
-
-        # find maximum columns in AS matrix to assign elements to clusters / exemplars
-        # fill A with negative values
-        self.__A.fill(self.minValue)
-        # set values of clusters to zero (as we only want to regard these values
-        self.__A[:, clusterI] = 0.0
-        # fill diagonal of similarity matrix to zero (remove preferences)
-        np.fill_diagonal(self.__S, 0.0)
-
-        # compute AS matrix
-        AS = self.__A + self.__S
-        # since values are < 0, look for the maximum number in each row and return its column index
-        self.__idx = np.argmax(AS, axis=1)
-
-        clusterI = clusterI.tolist()
-        clusterLabels = [[] for _ in range(numClusters)]
-
-        # create labels per cluster
-        for ii in range(self.__n):
-            index = clusterI.index(self.__idx[ii])
-            self.__idx[ii] = index
-            clusterLabels[index].append(ii)
-
-        # return sorted cluster labels (that's why we call compute cluster distances, might be redundant)
-        # for ii in range(numClusters):
-        #     clusterLabels[ii], _ = computeClusterInternDistances(self.__obs, clusterLabels[ii])
-
-        # if isConverged:
-        #     print('Algorithm has converged after {} iterations'.format(it))
-        # else:
-        #     print('Algorithm has not converged after 200 iterations')
-        #
-        # print('Number of detected clusters {}'.format(numClusters))
-        # print('Centroids: {}'.format(centroids))
-
-        return centroids.tolist(), self.__idx.tolist(), clusterLabels
-
-########################################################################################################################
-
-def _plugin_initialize():
-    """
-    optional initialization method of this module, will be called once
-    :return:
-    """
-    pass
-
-# ----------------------------------------------------------------------------------------------------------------------
-
-def create(data, damping, factor, preference, distance):
-    """
-    by convention contain a factory called create returning the extension implementation
-    :return:
-    """
-    return AffinityPropagation(data, damping, factor, preference, distance)
-
-########################################################################################################################
-
-# from timeit import default_timer as timer
-
-if __name__ == '__main__':
-    np.random.seed(200)
-    # data = np.array([[1,2,3],[5,4,5],[3,2,2],[8,8,7],[9,6,7],[2,3,4]])
-    # data = np.array([np.random.rand(8000) * 4 - 2 for _ in range(500)])
-    # data = np.array([[0.9],[1],[1.1],[10],[11],[12],[20],[21],[22]])
-    data = np.array([1,1.1,5,8,5.2,8.3])
-
-    s = timer()
-    aff = AffinityPropagation(data, 0.9, 1.0, 'median', 'euclidean')
-    result = aff.run()
-    e = timer()
-    print(result)
-    print('time elapsed: {}'.format(e - s))
-
diff --git a/clustering_api.py b/clustering_api.py
deleted file mode 100644
index 3d9941c..0000000
--- a/clustering_api.py
+++ /dev/null
@@ -1,148 +0,0 @@
-__author__ = 'Michael Kern'
-__version__ = '0.0.1'
-__email__ = 'kernm@in.tum.de'
-
-########################################################################################################################
-# libraries
-
-# use flask library for server activities
-import flask
-# load services (that are executed by the server when certain website is called)
-from clustering_service import *
-
-# create new flask application for hosting namespace
-app = flask.Flask(__name__)
-
-########################################################################################################################
-
-@app.route('/kmeans/<k>/<initMethod>/<distance>/<datasetID>')
-def kmeansClustering(k, initMethod, distance, datasetID):
-    """
-    Access k-means clustering plugin.
-    :param k: number of clusters
-    :param initMethod:  initialization method for initial clusters
-    :param distance: distance measurement
-    :param datasetID:  identifier of data set
-    :return: jsonified output
-    """
-    try:
-        data = loadData(datasetID)
-        response = runKMeans(data, int(k), initMethod, distance)
-        return flask.jsonify(response)
-    except:
-        return flask.jsonify({})
-
-########################################################################################################################
-
-@app.route('/hierarchical/<k>/<method>/<distance>/<datasetID>')
-def hierarchicalClustering(k, method, distance, datasetID):
-    """
-    Access hierarchical clustering plugin.
-    :param k: number of desired clusters
-    :param method: type of single linkage
-    :param distance: distance measurement
-    :param datasetID: identifier of data set
-    :return: jsonified output
-    """
-    try:
-        data = loadData(datasetID)
-        response = runHierarchical(data, int(k), method, distance)
-        return flask.jsonify(response)
-    except:
-        return flask.jsonify({})
-
-########################################################################################################################
-
-@app.route('/affinity/<damping>/<factor>/<preference>/<distance>/<datasetID>')
-def affinityPropagationClustering(damping, factor, preference, distance, datasetID):
-    """
-    Access affinity propagation clustering plugin.
-    :param damping:
-    :param factor:
-    :param preference:
-    :param distance: distance measurement
-    :param datasetID:
-    :return:
-    """
-    try:
-        data = loadData(datasetID)
-        response = runAffinityPropagation(data, float(damping), float(factor), preference, distance)
-        return flask.jsonify(response)
-    except:
-        return flask.jsonify({})
-
-########################################################################################################################
-
-@app.route('/fuzzy/<numClusters>/<m>/<threshold>/<distance>/<datasetID>')
-def fuzzyClustering(numClusters, m, threshold, distance, datasetID):
-    """
-    :param numClusters:
-    :param m:
-    :param threshold:
-    :param distance:
-    :param datasetID:
-    :return:
-    """
-    try:
-        data = loadData(datasetID)
-        response = runFuzzy(data, int(numClusters), float(m), float(threshold), distance)
-        return flask.jsonify(response)
-    except:
-        return flask.jsonify({})
-
-########################################################################################################################
-
-def loadAttribute(jsonData, attr):
-    import json
-    data = json.loads(jsonData)
-    if attr in data:
-        return data[attr]
-    else:
-        return None
-
-########################################################################################################################
-
-@app.route('/distances/<metric>/<datasetID>/<sorted>', methods=['POST'])
-def getDistances(metric, datasetID, sorted):
-    """
-    Compute the distances of the current stratification values to its centroid.
-    :param metric:
-    :param datasetID:
-    :return: distances and labels sorted in ascending order
-    """
-    data = loadData(datasetID)
-    labels = []
-    externLabels = None
-
-    if 'group' in flask.request.values:
-        labels = loadAttribute(flask.request.values['group'], 'labels')
-        externLabels = loadAttribute(flask.request.values['group'], 'externLabels')
-    else:
-        return ''
-
-    response = getClusterDistances(data, labels, metric, externLabels, sorted)
-    return flask.jsonify(response)
-
-########################################################################################################################
-
-@app.route('/dendrogram/<numClusters>/<datasetID>', methods=['POST'])
-def dendrogramClusters(numClusters, datasetID):
-    data = loadData(datasetID)
-
-    if 'group' in flask.request.values:
-        dendrogram = loadAttribute(flask.request.values['group'], 'dendrogram')
-    else:
-        return ''
-
-    response = getClustersFromDendrogram(data, dendrogram, int(numClusters))
-    return flask.jsonify(response)
-
-
-########################################################################################################################
-
-def create():
-  """
-  Standard Caleydo convention for creating the service when server is initialized.
-  :return: Returns implementation of this plugin with given name
-  """
-  return app
diff --git a/clustering_fuzzy.py b/clustering_fuzzy.py
deleted file mode 100644
index ff4a607..0000000
--- a/clustering_fuzzy.py
+++ /dev/null
@@ -1,223 +0,0 @@
-__author__ = 'Michael Kern'
-__version__ = '0.0.3'
-__email__ = 'kernm@in.tum.de'
-
-########################################################################################################################
-# libraries
-
-# module to load own configurations
-import caleydo_server.config
-# request config if needed for the future
-config = caleydo_server.config.view('caleydo-clustering')
-
-# library to conduct matrix/vector calculus
-import numpy as np
-
-from clustering_util import similarityMeasurement
-
-########################################################################################################################
-# class definition
-
-class Fuzzy(object):
-    """
-    Formulas: https://en.wikipedia.org/wiki/Fuzzy_clustering
-    """
-
-    def __init__(self, obs, numClusters, m=2.0, threshold=-1, distance='euclidean', init=None, error=0.0001):
-        """
-        Initializes algorithm.
-        :param obs: observation matrix / genomic data
-        :param numClusters: number of clusters
-        :param m: fuzzifier, controls degree of fuzziness, from [1; inf]
-        :return:
-        """
-        # observation
-        self.__obs = np.nan_to_num(obs)
-
-        self.__n = obs.shape[0]
-
-        # fuzzifier value
-        self.__m = np.float(m)
-        # number of clusters
-        self.__c = numClusters
-
-        # matrix u containing all the weights describing the degree of membership of each patient to the centroid
-        if init is None:
-            init = np.random.rand(self.__c, self.__n)
-
-        self.__u = np.copy(init)
-
-        # TODO! scikit normalizes the values at the beginning and at each step to [0; 1]
-        self.__u /= np.ones((self.__c, 1)).dot(np.atleast_2d(np.sum(self.__u, axis=0))).astype(np.float64)
-        # remove all zero values and set them to smallest possible value
-        self.__u = np.fmax(self.__u, np.finfo(np.float64).eps)
-        # centroids
-        self.__centroids = np.zeros(self.__c)
-        # threshold for stopping criterion
-        self.__error = error
-        # distance function
-        self.__distance = distance
-
-        # threshold or minimum probability used for cluster assignments
-        if threshold == -1:
-            self.__threshold = 1.0 / numClusters
-        else:
-            self.__threshold = threshold
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def __call__(self):
-        """
-        Caller function for server API
-        :return:
-        """
-        return self.run()
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def computeCentroid(self):
-        """
-        Compute the new centroids using the computed partition matrix.
-        :return:
-        """
-        uM = self.__u ** self.__m
-
-        sumDataWeights = np.dot(uM, self.__obs)
-        if self.__obs.ndim == 1:
-            m = 1
-        else:
-            m = self.__obs.shape[1]
-
-        sumWeights = np.sum(uM, axis=1)
-        # tile array (sum of weights repeated in every row)
-        sumWeights = np.ones((m, 1)).dot(np.atleast_2d(sumWeights)).T
-
-        if self.__obs.ndim == 1:
-            sumWeights = sumWeights.flatten()
-
-        # divide by total sum to get new centroids
-        self.__centroids = sumDataWeights / sumWeights
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def computeCoefficients(self):
-        """
-        Compute new partition matrix / weights describing the degree of membership of each patient to all clusters.
-        :return:
-        """
-
-        # TODO you can also use cdist of scipy.spatial.distance module
-        distMat = np.zeros((self.__c, self.__n))
-
-        for ii in range(self.__c):
-            distMat[ii] = similarityMeasurement(self.__obs, self.__centroids[ii], self.__distance)
-
-        # set zero values to smallest values to prevent inf results
-        distMat = np.fmax(distMat, np.finfo(np.float64).eps)
-
-        # apply coefficient formula
-        denom = np.float(self.__m - 1.0)
-        self.__u = distMat ** (-2.0 / denom)
-
-        sumCoeffs = np.sum(self.__u, axis=0)
-
-        self.__u /= np.ones((self.__c, 1)).dot(np.atleast_2d(sumCoeffs))
-        self.__u = np.fmax(self.__u, np.finfo(np.float64).eps)
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def run(self):
-        """
-        Perform the c-means fuzzy clustering.
-        :return:
-        """
-        MAX_ITER = 100
-        iter = 0
-
-        while iter < MAX_ITER:
-            # save last partition matrix
-            uOld = np.copy(self.__u)
-            # compute centroids with given weights
-            self.computeCentroid()
-            # compute new coefficient matrix
-            self.computeCoefficients()
-
-            # normalize weight / partition matrix u
-            self.__u /= np.ones((self.__c, 1)).dot(np.atleast_2d(np.sum(self.__u, axis=0)))
-            self.__u = np.fmax(self.__u, np.finfo(np.float64).eps)
-
-            # compute the difference between the old and new matrix
-            epsilon = np.linalg.norm(self.__u - uOld)
-
-            # stop if difference (epsilon) is smaller than the user-defined threshold
-            if epsilon < self.__error:
-                break
-
-            iter += 1
-
-        self.__end()
-
-        u = self.__u.T
-        # print(self.__u.T)
-
-        return self.__centroids.tolist(), self.__clusterLabels, u.tolist(), self.__threshold
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def __end(self):
-        """
-        Conduct the cluster assignments and creates clusterLabel array.
-        :return:
-        """
-        # assign patient to clusters
-        # transpose to get a (n, c) matrix
-        u = self.__u.T
-
-        self.__labels = np.zeros(self.__n, dtype=np.int)
-        self.__clusterLabels = [[] for _ in range(self.__c)]
-        # gather all probabilities / degree of memberships of each patient to the clusters
-        # self.__clusterProbs = [[] for _ in range(self.__c)]
-        # probability that the patients belongs to each cluster
-        maxProb = np.float64(self.__threshold)
-
-        for ii in range(self.__n):
-            # clusterID = np.argmax(u[ii])
-            # self.__labels = clusterID
-            # self.__clusterLabels[clusterID].append(ii)
-
-            for jj in range(self.__c):
-                if u[ii][jj] >= maxProb:
-                  clusterID = jj
-                  self.__labels = clusterID
-                  self.__clusterLabels[clusterID].append(int(ii))
-
-        # for ii in range(self.__c):
-        #     self.__clusterLabels[ii], _ = computeClusterInternDistances(self.__obs, self.__clusterLabels[ii])
-
-########################################################################################################################
-
-def _plugin_initialize():
-    """
-    optional initialization method of this module, will be called once
-    :return:
-    """
-    pass
-
-# ----------------------------------------------------------------------------------------------------------------------
-
-def create(data, numCluster, m, threshold, distance):
-    """
-    by convention contain a factory called create returning the extension implementation
-    :return:
-    """
-    return Fuzzy(data, numCluster, m, threshold, distance)
-
-########################################################################################################################
-
-if __name__ == '__main__':
-
-    data = np.array([[1,1,2],[5,4,5],[3,2,2],[8,8,7],[9,8,9],[2,2,2]])
-    # data = np.array([1,1.1,5,8,5.2,8.3])
-
-    fuz = Fuzzy(data, 3, 1.5)
-    print(fuz.run())
diff --git a/clustering_hierarchical.py b/clustering_hierarchical.py
deleted file mode 100644
index b432f40..0000000
--- a/clustering_hierarchical.py
+++ /dev/null
@@ -1,462 +0,0 @@
-__author__ = 'Michael Kern'
-__version__ = '0.0.3'
-__email__ = 'kernm@in.tum.de'
-
-########################################################################################################################
-# libraries
-
-# module to load own configurations
-import caleydo_server.config
-# request config if needed for the future
-config = caleydo_server.config.view('caleydo-clustering')
-
-# library to conduct matrix/vector calculus
-import numpy as np
-# fastest distance computation by scipy
-import scipy.spatial as spt
-
-# utility functions for clustering and creating the dendrogram trees
-from clustering_util import BinaryNode, BinaryTree
-from clustering_util import similarityMeasurementMatrix
-from clustering_util import computeClusterInternDistances
-
-########################################################################################################################
-
-class Hierarchical(object):
-    """
-    This is a implementation of hierarchical clustering on genomic data using the Lance-Williams dissimilarity update
-    to compute different distance metrics (single linkage, complete linkage, ...).
-    Lance-Williams explained in: http://arxiv.org/pdf/1105.0121.pdf
-    """
-
-    def __init__(self, obs, method='single', distance='euclidean'):
-        """
-        Initializes the algorithm
-        :param obs: genomic data / matrix
-        :param method: linkage method
-        :return:
-        """
-        # genomic data / matrix
-        # observations, can be 1D array or 2D matrix with genes as rows and conditions as columns
-        # remove all NaNs in data
-        self.__obs = np.nan_to_num(obs)
-
-        numGenes = np.shape(self.__obs)[0]
-        self.__n = numGenes
-
-        # check if dimension is 2D
-        # if self.__obs.ndim == 2:
-        #     # obtain number of observations (rows)
-        #     numGenes, _ = np.shape(self.__obs)
-        #     self.__n = numGenes
-
-        # else:
-        #     print("[Error]:\tdata / observations must be 2D. 1D observation arrays are not supported")
-        #     raise AttributeError
-
-        # distance measurement
-        self.__distance = distance
-
-        # distance / proximity matrix
-        self.__d = []
-        self.__computeProximityMatrix()
-        # dictionary mapping the string id (i,j,k,...) of clusters to corresponding index in matrix
-        self.__idMap = {}
-        # inverse mapping of idMap --> returns the string id given a certain index
-        self.__keyMap = {}
-        # contains actual index of all clusters, old clusters are from [0, n - 1], new clusters have indices in range
-        # [n, 2n - 1]
-        self.__clusterMap = {}
-        for ii in range(self.__n):
-            self.__idMap[str(ii)] = ii
-            self.__keyMap[ii] = str(ii)
-            self.__clusterMap[str(ii)] = ii
-
-        # linkage method for hierarchical clustering
-        self.__method = method
-
-        # internal dendrogram tree
-        self.__tree = None
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def __call__(self):
-        """
-        Caller function for server API
-        :return:
-        """
-        return self.run()
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    @property
-    def tree(self):
-        return self.__tree
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def __getCoefficients(self, clusterI, clusterJ):
-        """
-        Compute the coefficients for the Lance-Williams algorithm
-        :param clusterI:
-        :param clusterJ:
-        :return:
-        """
-        # TODO! use hash map for storing numbers instead of computing them every time
-        if self.__method == 'single':
-            return 0.5, 0.5, 0, -0.5
-        elif self.__method == 'complete':
-            return 0.5, 0.5, 0, 0.5
-        elif self.__method == 'weighted':
-            return 0.5, 0.5, 0, 0
-        elif self.__method == 'median':
-            return 0.5, 0.5, -0.25, 0
-
-        # TODO! ATTENTION! average method should compute the cluster centroids using the average
-        # TODO! || clusterI - clusterJ || ** 2
-        elif self.__method == 'average':
-            nI = np.float(clusterI.count(',') + 1)
-            nJ = np.float(clusterJ.count(',') + 1)
-            sumN = nI + nJ
-            return (nI / sumN), (nJ / sumN), 0, 0
-
-        # TODO! ATTENTION! centroid method should compute the cluster centroids using the mean
-        # TODO! || clusterI - clusterJ || ** 2
-        elif self.__method == 'centroid':
-            nI = np.float(clusterI.count(',') + 1)
-            nJ = np.float(clusterJ.count(',') + 1)
-            sumN = nI + nJ
-            return (nI / sumN), (nJ / sumN), -((nI * nJ) / (sumN ** 2)), 0
-
-        # TODO! Support ward method
-        # TODO! (|clusterI| * |clusterJ|) / (|clusterI| + |clusterJ) * || clusterI - clusterJ || ** 2
-        # elif self.__method == 'ward':
-        #     nI = np.float(clusterI.count(',') + 1)
-        #     nJ = np.float(clusterJ.count(',') + 1)
-        #     nK = np.float(clusterK.count(',') + 1)
-        #     sumN = nI + nJ + nK
-        #     return (nI + nK) / sumN, (nJ + nK) / sumN, -nK / sumN, 0
-        else:
-            raise AttributeError
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def __computeProximityMatrix(self):
-        """
-        Compute the proximity of each observation and store the results in a nxn matrix
-        :return:
-        """
-
-        # create distance matrix of size n x n
-        self.__d = np.zeros((self.__n, self.__n))
-
-        # compute euclidean distance
-        # TODO! implement generic distance functions
-        # TODO! look for an alternative proximity analysis without computing all distances
-        self.__d = similarityMeasurementMatrix(self.__obs, self.__distance)
-
-        # get number of maximum value of float
-        self.__maxValue = self.__d.max() + 1
-
-        # fill diagonals with max value to exclude them from min dist process
-        # TODO! operate only on upper triangle matrix of distance matrix
-        np.fill_diagonal(self.__d, self.__maxValue)
-
-        # print('\t-> finished.')
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def __getMatrixMinimumIndices(self):
-        """
-        Searches for the minimum distance in the distance matrix
-        :return: indices of both clusters having the smallest distance
-        """
-        minDist = self.__d.min()
-        minList = np.argwhere(self.__d == minDist)
-
-        minI, minJ = 0, 0
-
-        # look for indices, where i < j
-        # TODO! for the future --> use upper triangle matrix
-        for ii in range(len(minList)):
-            minI, minJ = minList[ii]
-            if minI < minJ:
-                break
-
-        if minI == minJ:
-            print("ERROR")
-
-        return self.__keyMap[minI], self.__keyMap[minJ], minDist
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def __deleteClusters(self, i, j):
-        """
-        Reorders and reduces the matrix to insert the new cluster formed of cluster i and j
-        and its distance values, and removes the old clusters by cutting the last row.
-        :param i: cluster index i
-        :param j: cluster index j
-        :return:
-        """
-        idI = self.__idMap[str(i)]
-        idJ = self.__idMap[str(j)]
-
-        minID = min(idI, idJ)
-        maxID = max(idI, idJ)
-
-        # now set column max ID to last column -> swap last and i column
-        lastRow = self.__d[self.__n - 1]
-        self.__d[maxID] = lastRow
-        self.__d[:, maxID] = self.__d[:, (self.__n - 1)]
-
-        # set key of last column (cluster) to column of the cluster with index maxID
-        key = self.__keyMap[self.__n - 1]
-        self.__idMap[key] = maxID
-        self.__keyMap[maxID] = key
-
-        # delete entries in id and key map --> not required anymore
-        try:
-            del self.__idMap[i]
-            del self.__idMap[j]
-            del self.__keyMap[self.__n - 1]
-        except KeyError:
-            print("\nERROR: Key {} not found in idMap".format(j))
-            print("ERROR: Previous key: {} in idMap".format(i))
-            print("Given keys: ")
-            for key in self.__idMap:
-                print(key)
-            return
-
-        # reduce dimension of matrix by one column and row
-        self.__n -= 1
-        self.__d = self.__d[:-1, :-1]
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def __mergeClusters(self, i, j):
-        """
-        Merges cluster i and j, computes the new ID and distances of the newly formed cluster
-        and stores required information
-        :param i: cluster index i
-        :param j: cluster index j
-        :return:
-        """
-        idI = self.__idMap[str(i)]
-        idJ = self.__idMap[str(j)]
-
-        minID = min(idI, idJ)
-        maxID = max(idI, idJ)
-
-        # use Lance-Williams formula to compute linkages
-        DKI = self.__d[:, minID]
-        DKJ = self.__d[:, maxID]
-        DIJ = self.__d[minID, maxID]
-        distIJ = np.abs(DKI - DKJ)
-
-        # compute coefficients
-        ai, aj, b, y = self.__getCoefficients(i, j)
-
-        newEntries = ai * DKI + aj * DKJ + b * DIJ + y * distIJ
-        newEntries[minID] = self.__maxValue
-        newEntries[maxID] = self.__maxValue
-
-        # add new column and row
-        self.__d[minID] = newEntries
-        self.__d[:, minID] = newEntries
-
-        idIJ = minID
-        newKey = i + ',' + j
-        self.__idMap[newKey] = idIJ
-        self.__keyMap[idIJ] = newKey
-        self.__clusterMap[newKey] = len(self.__clusterMap)
-
-        # delete old clusters
-        self.__deleteClusters(i, j)
-
-        # count number of elements
-        return newKey.count(',') + 1
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def run(self):
-        """
-        Conducts the algorithm until there's only one cluster.
-        :return:
-        """
-
-        # number of the current iteration
-        m = 0
-
-        # resulting matrix containing information Z[i,x], x=0: cluster i, x=1: cluster j, x=2: dist(i,j), x=3: num(i,j)
-        runs = self.__n - 1
-        Z = np.array([[0 for _ in range(4)] for _ in range(runs)], dtype=np.float)
-
-        while m < runs:
-            m += 1
-
-            i, j, distIJ = self.__getMatrixMinimumIndices()
-            numIJ = self.__mergeClusters(i, j)
-
-            clusterI, clusterJ = self.__clusterMap[i], self.__clusterMap[j]
-            Z[m - 1] = [int(min(clusterI, clusterJ)), int(max(clusterI, clusterJ)), np.float(distIJ), int(numIJ)]
-
-        # reset number n to length of first dimension (number of genes)
-        self.__n = np.shape(self.__obs)[0]
-
-        self.__tree = self.generateTree(Z)
-        return Z.tolist()
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def generateTree(self, linkageMatrix):
-        """
-        Computes the dendrogram tree for a given linkage matrix.
-        :param linkageMatrix:
-        :return:
-        """
-        self.__tree = None
-
-        treeMap = {}
-        numTrees = len(linkageMatrix)
-
-        for ii in range(numTrees):
-            entry = linkageMatrix[ii]
-            currentID = self.__n + ii
-            leftIndex, rightIndex, value, num = int(entry[1]), int(entry[0]), entry[2], int(entry[3])
-            left = right = None
-
-            if leftIndex < self.__n:
-                left = BinaryNode(self.__obs[leftIndex].tolist(), leftIndex, 1, None, None)
-            else:
-                left = treeMap[leftIndex]
-
-            if rightIndex < self.__n:
-                right = BinaryNode(self.__obs[rightIndex].tolist(), rightIndex, 1, None, None)
-            else:
-                right = treeMap[rightIndex]
-
-            if isinstance(left, BinaryNode) and isinstance(right, BinaryNode):
-                treeMap[currentID] = BinaryTree(left, right, currentID, value)
-            elif isinstance(left, BinaryNode):
-                treeMap[currentID] = right.addNode(left, currentID, value)
-                del treeMap[rightIndex]
-            elif isinstance(right, BinaryNode):
-                treeMap[currentID] = left.addNode(right, currentID, value)
-                del treeMap[leftIndex]
-            else:
-                treeMap[currentID] = left.merge(right, currentID, value)
-                del treeMap[rightIndex]
-                del treeMap[leftIndex]
-
-        self.__tree = treeMap[numTrees + self.__n - 1]
-        return self.__tree
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-########################################################################################################################
-
-from clustering_util import cutJsonTreeByClusters
-
-def getClusters(k, obs, dendrogram, sorted=True):
-    """
-    First implementation to cut dendrogram tree automatically by choosing nodes having the greatest node values
-    or rather distance to the other node / potential cluster
-    :param k: number of desired clusters
-    :param obs: set of observations
-    :param dendrogram: dendrogram tree
-    :return: centroids, sorted cluster labels and normal label list
-    """
-    obs = np.nan_to_num(obs)
-    n = obs.shape[0]
-
-    if isinstance(dendrogram, BinaryTree):
-        clusterLabels = dendrogram.cutTreeByClusters(k)
-    else:
-        clusterLabels = cutJsonTreeByClusters(dendrogram, k)
-
-    clusterCentroids = []
-    labels = np.zeros(n, dtype=np.int)
-    clusterID = 0
-
-    for ii in range(len(clusterLabels)):
-        cluster = clusterLabels[ii]
-        subObs = obs[cluster]
-        clusterCentroids.append(np.mean(subObs, axis=0).tolist())
-
-        for id in cluster:
-            labels[id] = clusterID
-
-        # sort labels according to their distance
-        if sorted:
-            clusterLabels[ii], _ = computeClusterInternDistances(obs, cluster)
-
-        clusterID += 1
-
-    return clusterCentroids, clusterLabels, labels.tolist()
-
-########################################################################################################################
-
-def _plugin_initialize():
-    """
-    optional initialization method of this module, will be called once
-    :return:
-    """
-    pass
-
-# ----------------------------------------------------------------------------------------------------------------------
-
-def create(data, method, distance):
-    """
-    by convention contain a factory called create returning the extension implementation
-    :return:
-    """
-    return Hierarchical(data, method, distance)
-
-########################################################################################################################
-
-from timeit import default_timer as timer
-from scipy.cluster.hierarchy import linkage, leaves_list
-
-if __name__ == '__main__':
-
-    np.random.seed(200)
-    # data = np.array([[1,2,3],[5,4,5],[3,2,2],[8,8,7],[9,6,7],[2,3,4]])
-    data = np.array([1,1.1,5,8,5.2,8.3])
-
-    timeMine = 0
-    timeTheirs = 0
-
-
-    n = 10
-
-    for i in range(n):
-        # data = np.array([np.random.rand(6000) * 4 - 2 for _ in range(249)])
-        # import time
-        s1 = timer()
-        hier = Hierarchical(data, 'complete')
-        # s = time.time()
-        linkageMatrix = hier.run()
-        e1 = timer()
-        print(linkageMatrix)
-        tree = hier.generateTree(linkageMatrix)
-        # print(tree.getLeaves())
-        # print(tree.jsonify())
-        # print(hier.getClusters(3))
-        import json
-        jsonTree = json.loads(tree.jsonify())
-        getClusters(3, data, jsonTree)
-
-
-        s2 = timer()
-        linkageMatrix2 = linkage(data, 'complete')
-        # print(leaves_list(linkageMatrix2))
-        e2 = timer()
-
-        timeMine += e1 - s1
-        timeTheirs += e2 - s2
-
-    # print(linkageMatrix)
-    # print(linkageMatrix2)
-    print('mine: {}'.format(timeMine / n))
-    print('theirs: {}'.format(timeTheirs / n))
-
diff --git a/clustering_kmeans.py b/clustering_kmeans.py
deleted file mode 100644
index 5e5d2a2..0000000
--- a/clustering_kmeans.py
+++ /dev/null
@@ -1,398 +0,0 @@
-__author__ = 'Michael Kern'
-__version__ = '0.0.2'
-__email__ = 'kernm@in.tum.de'
-
-########################################################################################################################
-# libraries
-
-# module to load own configurations
-import caleydo_server.config
-# request config if needed in the future
-config = caleydo_server.config.view('caleydo-clustering')
-
-# numpy important to conduct matrix/vector calculus
-import numpy as np
-# creates random numbers
-import random
-
-# contains utility functions
-from clustering_util import weightedChoice, similarityMeasurement, computeClusterInternDistances
-
-########################################################################################################################
-
-class KMeans:
-    """
-    This is an implementation of the k-means algorithm to cluster genomic data / matrices.
-    Returns the centroids, the labels / stratification of each row belonging to one cluster,
-    distance matrix for cluster-cluster distance and distance arrays for row-clusterCentroid distance.
-    Implementation detail: <https://en.wikipedia.org/wiki/K-means_clustering>
-    """
-
-    def __init__(self, obs, k, initMode='kmeans++', distance='sqeuclidean', iters=1000):
-        """
-        Initializes the algorithm with observation, number of k clusters, the initial method and
-        the maximum number of iterations.
-        Initialization method of random cluster choice can be: forgy, uniform, random, plusplus
-        :param obs: genomic data / matrix
-        :param k: number of clusters
-        :param initMode: initialization method
-        :param distance: distance measurement
-        :param iters: number of maximum iterations
-        :return:
-        """
-
-        # number of clusters
-        self.__k = k
-        # observations, can be 1D array or 2D matrix with genes as rows and conditions as columns
-        # remove all NaNs in data
-        self.__obs = np.nan_to_num(obs)
-        # number of observations / genes
-        self.__n = np.shape(obs)[0]
-        # maps the element ids to clusters
-        self.__labelMap = np.zeros(self.__n, dtype=np.int)
-        # cluster means and number of elements
-        self.__clusterMeans = np.array([obs[0] for _ in range(k)], dtype=np.float)
-        self.__clusterNums = np.array([0 for _ in range(k)], dtype=np.int)
-        # tells if any cluster has changed or rather if any data item was moved
-        self.__changed = True
-        # number of iterations
-        self.__iters = iters
-        # initialization method
-        self.__initMode = initMode
-        # compare function
-        self.__distance = distance
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def __call__(self):
-        """
-        Caller function for server API.
-        """
-        return self.run()
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def __init(self):
-        """
-        Initialize clustering with random clusters using a user-specified method
-        :return:
-        """
-        # TODO! consider to init k-Means algorithm with Principal Component Analysis (PCA)
-        # TODO! see <http://www.vision.caltech.edu/wikis/EE148/images/c/c2/KmeansPCA1.pdf>
-        # init cluster
-        if self.__initMode == 'forgy':
-            self.__forgyMethod()
-        elif self.__initMode == 'uniform':
-            self.__uniformMethod()
-        elif self.__initMode == 'random':
-            self.__randomMethod()
-        elif self.__initMode == 'kmeans++':
-            self.__plusplusMethod()
-        else:
-            raise AttributeError
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def __forgyMethod(self):
-        """
-        Initialization method:
-        Randomly choose k observations from the data using a uniform random distribution.
-        :return:
-        """
-        for ii in range(self.__k):
-            self.__clusterMeans[ii] = (self.__obs[random.randint(0, self.__n - 1)])
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def __uniformMethod(self):
-        """
-        Initialization method:
-        Randomly assign each observation to one of the k clusters using uniform random distribution
-        and compute the centroids of each cluster.
-        :return:
-        """
-        for i in range(self.__n):
-            self.__labelMap[i] = random.randint(0, self.__k - 1)
-
-        self.__update()
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def __randomMethod(self):
-        """
-        Initialization method:
-        Randomly choose k observations from the data by estimating the mean and standard deviation of the data and
-        using the gaussian random distribution.
-        :return:
-        """
-        mean = np.mean(self.__obs, axis=0)
-        std = np.std(self.__obs, axis=0)
-
-        for ii in range(self.__k):
-            self.__clusterMeans[ii] = np.random.normal(mean, std)
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def __plusplusMethod(self):
-        """
-        Initialization method:
-        Chooses k observations by computing probabilities for each observation and using a weighted random distribution.
-        Algorithm: <https://en.wikipedia.org/wiki/K-means%2B%2B>. This method should accelerate the algorithm by finding
-        the appropriate clusters right at the beginning and hence should make it more robust.
-        :return:
-        """
-        # 1) choose random center out of data
-        self.__clusterMeans[0] = (random.choice(self.__obs))
-
-        maxValue = np.max(self.__obs) + 1
-        probs = np.array([maxValue for _ in range(self.__n)])
-
-        for i in range(1, self.__k):
-            probs.fill(maxValue)
-            # compute new probabilities, choose min of all distances
-            for j in range(0, i):
-                dists = similarityMeasurement(self.__obs, self.__clusterMeans[j], self.__distance)
-                # collect minimum squared distances to cluster centroids
-                probs = np.minimum(probs, dists)
-
-            # sum all squared distances
-            sumProbs = np.float(np.sum(probs))
-
-            if sumProbs != 0:
-                probs /= sumProbs
-                # 3) choose new center based on probabilities
-                self.__clusterMeans[i] = (self.__obs[weightedChoice(probs)])
-            else:
-                print('ERROR: cannot find enough cluster centroids for given k = ' + str(self.__k))
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def getClusterMean(self, num):
-        """
-        Returns the centroid of the cluster with index num.
-        :param num:
-        :return:
-        """
-        if num >= self.__k:
-            return None
-        else:
-            return self.__clusterMeans[num]
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def getClusterOfElement(self, index):
-        """
-        :param index: number of element in observation array
-        :return: cluster id of observation with given index.
-        """
-        if index >= self.__n:
-            return None
-        else:
-            return self.__labelMap[index]
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def printClusters(self):
-        """
-        Print the cluster centroids and the labels.
-        :return:
-        """
-        print('Centroids: ' + str(self.__centroids) + ' | Labels: ' + str(self.__labels))
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def __assignment(self):
-        """
-        Assignment step:
-        Compute distance of current observation to each cluster centroid and move gene to the nearest cluster.
-        :return:
-        """
-        for i in range(self.__n):
-            value = self.__obs[i]
-
-            # compute squared distances to each mean
-            dists = similarityMeasurement(self.__clusterMeans, value, self.__distance)
-            # nearest cluster
-            nearestID = np.argmin(dists)
-
-            if self.__labelMap[i] != nearestID:
-                self.__changed = True
-                self.__labelMap[i] = nearestID
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def __update(self):
-        """
-        Update step:
-        Compute the new centroids of each cluster after the assignment.
-        :return:
-        """
-        self.__clusterMeans.fill(0)
-        self.__clusterNums.fill(0)
-
-        self.__clusterLabels = [[] for _ in range(self.__k)]
-
-        for ii in range(self.__n):
-            clusterID = self.__labelMap[ii]
-            self.__clusterLabels[clusterID].append(ii)
-            self.__clusterNums[clusterID] += 1
-
-        for ii in range(self.__k):
-            self.__clusterMeans[ii] = np.mean(self.__obs[self.__clusterLabels[ii]], axis=0)
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def __end(self):
-        """
-        Writes the results to the corresponding member variables.
-        :return:
-        """
-        # returned values | have to be reinitialized in case of sequential running
-        # centroids
-        self.__centroids = np.array([self.__obs[0] for _ in range(self.__k)], dtype=np.float)
-        # labels of observations
-        self.__labels = np.array([0 for _ in range(self.__n)], dtype=np.int)
-        # distances between centroids
-        # self.__centroidDistMat = np.zeros((self.__k, self.__k))
-
-        # we do not use OrderedDict here, so obtain dict.values and fill array manually
-        for index in range(self.__n):
-            clusterID = self.__labelMap[index]
-            self.__labels[index] = clusterID
-
-        # collect centroids
-        for ii in range(self.__k):
-            # self.__centroids.append(self.__clusterMeans[ii].tolist())
-            self.__centroids[ii] = self.__clusterMeans[ii]
-
-        # compute distances between each centroids
-        # for ii in range(self.__k - 1):
-        #     # compute indices of other clusters
-        #     jj = range(ii + 1, self.__k)
-        #     # select matrix of cluster centroids
-        #     centroidMat = self.__centroids[jj]
-        #     distances = np.sqrt(self.__compare(centroidMat, self.__centroids[ii]))
-        #     self.__centroidDistMat[ii, jj] = distances
-        #     self.__centroidDistMat[jj, ii] = distances
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def run(self):
-        """
-        Runs the algorithm of k-means, using the initialization method and the assignment/update step.
-        Conducts at most iters iterations and terminates if this number is exceeded or no observations
-        was moved to another cluster.
-        :return:
-        """
-        # 1) init algorithm by choosing cluster centroids
-        self.__init()
-
-        MAX_ITERS = self.__iters
-        counter = 0
-        # 2) run clustering
-        while self.__changed and counter < MAX_ITERS:
-            self.__changed = False
-
-            self.__assignment()
-            self.__update()
-
-            counter += 1
-
-        self.numIters = counter
-
-        # write results to the class members
-        self.__end()
-        return self.__centroids.tolist(), self.__labels.tolist(), self.__clusterLabels
-        #, self.__centroidDistMat.tolist()
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    # def getDistsPerCentroid(self):
-    #     """
-    #     Compute the distances between observations belonging to one cluster and the corresponding cluster centroid.
-    #     Cluster labels are sorted in ascending order using their distances
-    #     :return: array of distance arrays for each cluster and ordered labels
-    #     """
-    #
-    #     # labels per centroid
-    #     # self.__clusterLabels = [[] for _ in range(self.__k)]
-    #     # distances of obs to their cluster
-    #     self.__centroidDists = [[] for _ in range(self.__k)]
-    #
-    #     for ii in range(self.__k):
-    #         self.__clusterLabels[ii] = np.array(self.__clusterLabels[ii], dtype=np.int)
-    #
-    #     # compute euclidean distances of values to cluster mean
-    #     for ii in range(self.__k):
-    #         mean = self.__clusterMeans[ii]
-    #         obs = self.__obs[self.__clusterLabels[ii]]
-    #         dists = similarityMeasurement(obs, mean, self.__compare).tolist()
-    #         self.__centroidDists[ii] = dists
-    #
-    #         # sort indices in ascending order using the distances
-    #         indices = range(len(dists))
-    #         indices.sort(key=dists.__getitem__)
-    #         self.__clusterLabels[ii] = self.__clusterLabels[ii][indices].tolist()
-    #         self.__centroidDists[ii].sort()
-    #
-    #     return self.__clusterLabels, self.__centroidDists
-
-########################################################################################################################
-
-def _plugin_initialize():
-  """
-  optional initialization method of this module, will be called once
-  :return:
-  """
-  pass
-
-# ----------------------------------------------------------------------------------------------------------------------
-
-def create(data, k, initMethod, distance):
-  """
-  by convention contain a factory called create returning the extension implementation
-  :return:
-  """
-  return KMeans(data, k, initMethod, distance)
-
-########################################################################################################################
-
-from timeit import default_timer as timer
-from scipy.cluster.vq import kmeans2, kmeans
-
-"""
-This is for testing the algorithm and comparing the resuls between this and scipy's algorithm
-"""
-if __name__ == '__main__':
-    from datetime import datetime
-    #np.random.seed(datetime.now())
-    # data = np.array([[1,2,3],[5,4,5],[3,2,2],[8,8,7],[9,6,7],[2,3,4]])
-    data = np.array([1,1.1,5,8,5.2,8.3])
-
-    # data = np.array([np.random.rand(2) * 5 for _ in range(10)])
-    k = 3
-
-    timeMine = 0
-    timeTheirs = 0
-    n = 10
-
-    for i in range(10):
-        s1 = timer()
-        kMeansPlus = KMeans(data, k, 'kmeans++', 'sqeuclidean', 10)
-        result1 = kMeansPlus.run()
-        #print(result)
-        e1 = timer()
-        # labels = kMeansPlus.getDistsPerCentroid()
-        # l, d = computeClusterDistances(data, labels[0])
-
-        s2 = timer()
-        result2 = kmeans2(data, k)
-        e2 = timer()
-
-        timeMine += e1 - s1
-        timeTheirs += e2 - s2
-
-    print(result1)
-    print(result2)
-    print('mine: {}'.format(timeMine / n))
-    print('theirs: {}'.format(timeTheirs / n))
diff --git a/clustering_service.py b/clustering_service.py
deleted file mode 100644
index 0bed35c..0000000
--- a/clustering_service.py
+++ /dev/null
@@ -1,151 +0,0 @@
-__author__ = 'Michael Kern'
-__version__ = '0.0.1'
-__email__ = 'kernm@in.tum.de'
-
-import numpy as np
-from clustering_hierarchical import getClusters
-
-########################################################################################################################
-
-def loadData(datasetID):
-    """
-    Loads the genomic data with given identifier datasetID.
-    :param datasetID: identifier
-    :return: array of the genomic data
-    """
-    import caleydo_server.dataset as dt
-    # obtain Caleydo dataset from ID
-    dataset = dt.get(datasetID)
-    # choose loaded attribute and load raw data in numpy format
-    # somehow hack to get a numpy array out of the data
-    try:
-        arr = np.array(list(dataset.asnumpy()))
-    except:
-        raise Exception
-    return arr
-
-########################################################################################################################
-
-def loadPlugin(pluginID, *args, **kwargs):
-    """
-    Loads the clustering plugin with given arguments.
-    :param pluginID: identifier of plugin
-    :param *args: additional caller function arguments
-    :param **kwargs: additional arguments
-    :return: plugin
-    """
-    import caleydo_server.plugin
-    # obtain all plugins with 'pluginID' extension
-    plugins = caleydo_server.plugin.list('clustering')
-    # choose plugin with given ID
-    for plugin in plugins:
-        if plugin.id == pluginID:
-            # load the implementation of the plugin
-            return plugin.load().factory(*args, **kwargs)
-
-    raise NotImplementedError
-
-
-########################################################################################################################
-
-def runKMeans(data, k, initMethod, distance):
-    """
-    Runs the k-Means clustering algorithm given the loaded data set, the number of clusters k and the initialization
-    method.
-    :param data: observation matrix
-    :param k: number of clusters
-    :param initMethod: number of clusters
-    :return: result of k-means
-    """
-    KMeans = loadPlugin('caleydo-clustering-kmeans', data, k, initMethod, distance)
-    # and run the kmeans extension
-    centroids, labels, clusterLabels = KMeans()
-    # clusterLabels, clusterDists = KMeans.getDistsPerCentroid()
-
-    return {'centroids': centroids, 'clusterLabels': clusterLabels}
-
-########################################################################################################################
-
-def runHierarchical(data, k, method, distance):
-    """
-    Runs the hierarchical clustering algorithm given the loaded data set and type of linkage method.
-    :param data: observation matrix
-    :param method: linkage method
-    :return: linkage matrix / dendrogram of the algorithm
-    """
-    Hierarchical = loadPlugin('caleydo-clustering-hierarchical', data, method, distance)
-    # and use the extension
-    Hierarchical()
-    # obtain k-number of clusters
-    centroids, clusterLabels, labels = getClusters(k, data, Hierarchical.tree, False)
-
-    return {'centroids': centroids, 'clusterLabels': clusterLabels, 'dendrogram': Hierarchical.tree.json()}
-    # print('\t-> creating dendrogram tree...')
-    # tree = Hierarchical.generateTree(linkage)
-    # print('\t-> creating json string ...')
-    # dendrogram = tree.jsonify()
-    # print('\t-> finished.')
-
-    # return {'dendrogram': dendrogram} --> if needed later
-
-########################################################################################################################
-
-def runAffinityPropagation(data, damping, factor, preference, distance):
-    """
-    Runs the affinity propagation algorithm given the loaded dataset, a damping value, a certain factor and
-    a preference method.
-    :param data:
-    :param damping:
-    :param factor:
-    :param preference:
-    :return:
-    """
-    Affinity = loadPlugin('caleydo-clustering-affinity', data, damping, factor, preference, distance)
-    # use this extension
-    centroids, labels, clusterLabels = Affinity()
-
-    return {'centroids': centroids, 'clusterLabels': clusterLabels}
-
-########################################################################################################################
-
-def runFuzzy(data, numClusters, m, threshold, distance):
-    Fuzzy = loadPlugin('caleydo-clustering-fuzzy', data, numClusters, m, threshold, distance)
-
-    centroids, clusterLabels, partitionMatrix, maxProb = Fuzzy()
-
-    return {'centroids': centroids, 'clusterLabels': clusterLabels, 'partitionMatrix': partitionMatrix,
-            'maxProbability': maxProb}
-
-########################################################################################################################
-
-def getClusterDistances(data, labels, metric, externLabels = None, sorted = True):
-    """
-    Compute the cluster distances in a given data among certain rows (labels)
-    :param data: genomic data
-    :param labels: indices of rows
-    :param metric: distance metric
-    :param externLabels:
-    :return: labels and distances values sorted in ascending order
-    """
-    from clustering_util import computeClusterInternDistances, computeClusterExternDistances
-    distLabels, distValues = computeClusterInternDistances(data, labels, sorted, metric)
-
-    if externLabels is not None:
-        externDists = computeClusterExternDistances(data, distLabels, externLabels, metric)
-        return {'labels': distLabels, 'distances': distValues, 'externDistances': externDists}
-    else:
-        return {'labels': distLabels, 'distances': distValues}
-
-########################################################################################################################
-
-def getClustersFromDendrogram(data, dendrogram, numClusters):
-    """
-
-    :param data:
-    :param dendrogram:
-    :param numClusters:
-    :return:
-    """
-
-    centroids, clusterLabels, _ = getClusters(numClusters, data, dendrogram)
-    return {'centroids': centroids, 'clusterLabels': clusterLabels}
diff --git a/clustering_util.py b/clustering_util.py
deleted file mode 100644
index 575a0d2..0000000
--- a/clustering_util.py
+++ /dev/null
@@ -1,470 +0,0 @@
-__author__ = "Michael Kern"
-__email__ = 'kernm@in.tum.de'
-
-########################################################################################################################
-
-import random
-import numpy as np
-
-# use scipy to compute different distance matrices
-from scipy.spatial.distance import pdist, squareform
-import scipy.stats as stats
-
-"""
-http://eli.thegreenplace.net/2010/01/22/weighted-random-generation-in-python
---> good explanation to create weighted choices / random numbers
-"""
-def weightedChoice(weights):
-
-    # compute sum of all weights
-    sumTotal = sum(weights)
-    # compute a random with range[0, sumTotal]
-    rnd = random.random() * sumTotal
-
-    for index, weight in enumerate(weights):
-        # subtract current weight from random to find current index
-        rnd -= weight
-        if rnd < 0:
-            return index
-
-    # 20% faster if weights are sorted in descending order
-
-########################################################################################################################
-
-"""
-Implementation of an binary tree for hierarchical clustering
-"""
-
-"""
-Node of the tree containing information about it's id in data, children and the value
-"""
-class BinaryNode:
-    def __init__(self, value, id, size, leftChild, rightChild):
-        self.value = value
-        self.left = leftChild
-        self.right = rightChild
-        self.size = size
-        self.id = id
-        self.parent = None
-        self.indices = [id]
-
-        # create json info on the fly
-        self.json = {"id": self.id, "size": self.size, "value": self.value, "indices": [id]}
-        if leftChild is not None and rightChild is not None:
-            # self.json["value"] = np.mean(self.value)
-            self.json["children"] = [rightChild.json, leftChild.json]
-            self.indices = [] + rightChild.indices + leftChild.indices
-            self.json["indices"] = self.indices
-
-    def isLeave(self):
-        return self.left is None and self.right is None
-
-########################################################################################################################
-
-"""
-Implementation of an hierarchical binary tree
-"""
-class BinaryTree:
-    # this tree must not be empty and must have at least two children (leaves)
-    def __init__(self, leftNode, rightNode, newID, newValue):
-        self.__createNewRoot(leftNode, rightNode, newID, newValue)
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def addNode(self, newNode, newID, newValue):
-        self.__createNewRoot(self.root, newNode, newID, newValue)
-        return self
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def merge(self, tree, newID, newValue):
-        self.__createNewRoot(self.root, tree.root, newID, newValue)
-        return self
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def jsonify(self):
-        import json
-        return json.dumps(self.root.json)
-        # return self.root.json
-        # return self.__traverseJson(self.root)
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def json(self):
-        return self.root.json
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def cutTreeByClusters(self, k):
-        queue = [self.root]
-
-        while len(queue) < k:
-            node = queue.pop(0)
-            queue.append(node.left)
-            queue.append(node.right)
-
-            def keyFunc(x):
-                if x.isLeave():
-                    return 0
-                else:
-                    return -x.value
-
-            queue.sort(key=keyFunc)
-
-        clusters = []
-
-        for node in queue:
-            clusters.append(node.indices)
-
-        return clusters
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def __traverseJson(self, node):
-        json = {"id": node.id, "size": node.size, "value": node.value}
-        if node.left is None and node.right is None:
-            return json
-        else:
-            json["children"] = [] + [self.__traverseJson(node.left)] + [self.__traverseJson(node.right)]
-
-        return json
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def getLeaves(self):
-        return self.root.indices
-        # return self.__traverseIDs(self.root)
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def __traverseIDs(self, node):
-
-        if node.left is None and node.right is None:
-            return [node.id]
-        else:
-            return [] + self.__traverseIDs(node.right) + self.__traverseIDs(node.left)
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def __createNewRoot(self, leftNode, rightNode, newID, newValue):
-            newSize = leftNode.size + rightNode.size
-            self.root = BinaryNode(newValue, newID, newSize, leftNode, rightNode)
-            leftNode.parent = rightNode.parent = self.root
-
-    # ------------------------------------------------------------------------------------------------------------------
-
-def cutJsonTreeByClusters(jsonData, k):
-    # import json
-    # tree = json.loads(jsonData)
-    queue = [jsonData]
-
-    while len(queue) < k:
-        node = queue.pop(0)
-        queue.append(node['children'][0])
-        queue.append(node['children'][1])
-
-        def keyFunc(x):
-            if 'children' not in x:
-                return 0
-            else:
-                return -x['value']
-
-        queue.sort(key=keyFunc)
-
-    clusters = []
-
-    for node in queue:
-        clusters.append(node['indices'])
-
-    return clusters
-
-########################################################################################################################
-
-def euclideanDistance(matrix, vector, squared=False):
-    """
-    Computes the euclidean distance between a vector and the rows of a matrix in parallel.
-    :param matrix: array of observations or clusters
-    :param vector: cluster centroid or observation
-    :return:
-    """
-
-    # compute distance between values in matrix and the vector
-    distMat = matrix - vector
-    numValues = len(matrix)
-    distances = np.array([0.0 for _ in range(numValues)], dtype=np.float)
-
-    for ii in range(numValues):
-        distance = distMat[ii]
-        # always try to use np.dot when computing euclidean distance
-        # it's way faster than ** 2 and sum(..., axis=1)
-        distances[ii] = np.dot(distance, distance)
-
-    if squared:
-        return distances
-    else:
-        return np.sqrt(distances)
-
-# ----------------------------------------------------------------------------------------------------------------------
-
-def correlationDistance(matrix, vector, method):
-    """
-
-    :param matrix:
-    :param vector:
-    :return:
-    """
-
-    numValues = len(matrix)
-    distances = np.array([0.0 for _ in range(numValues)], dtype=np.float)
-
-    for ii in range(numValues):
-        value = matrix[ii]
-
-        if method == 'pearson':
-            distances[ii], _ = stats.pearsonr(value, vector)
-        elif method == 'spearman':
-            distances[ii], _ = stats.spearmanr(value, vector)
-        elif method == 'kendall':
-            distances[ii], _ = stats.kendalltau(value, vector)
-        else:
-            raise AttributeError
-
-    return distances
-
-# ----------------------------------------------------------------------------------------------------------------------
-
-from scipy.spatial.distance import cdist
-
-def similarityMeasurement(matrix, vector, method='euclidean'):
-
-    if method == 'euclidean':
-        return euclideanDistance(matrix, vector)
-
-    if method == 'sqeuclidean':
-        return euclideanDistance(matrix, vector, True)
-
-    spatialMethods = ['cityblock', 'chebyshev', 'canberra', 'correlation', 'hamming', 'mahalanobis',]
-
-    if method in spatialMethods:
-        return np.nan_to_num(cdist(matrix, np.atleast_2d(vector), method).flatten())
-
-    corrMethods = ['spearman', 'pearson', 'kendall']
-
-    if method in corrMethods:
-        return correlationDistance(matrix, vector, method)
-
-    raise AttributeError
-
-# ----------------------------------------------------------------------------------------------------------------------
-
-def euclideanDistanceMatrix(matrix, squared=False):
-    """
-    Compute the euclidean distance matrix required for the algorithm
-    :param matrix:
-    :param n:
-    :return:
-    """
-
-    n = np.shape(matrix)[0]
-    distMat = np.zeros((n, n))
-
-    # use Gram matrix and compute distances without inner products | FASTER than row-by-row method
-    "Gramiam matrix to compute dot products of each pair of elements: "
-    "<https://en.wikipedia.org/wiki/Gramian_matrix>"
-    gramMat = np.zeros((n, n))
-    for ii in range(n):
-        for jj in range(ii, n):
-            gramMat[ii, jj] = np.dot(matrix[ii], matrix[jj])
-
-    # # ! This is slower than computing dot products of rows manually in python
-    # # ! And we only require the upper triangle matrix of the Gram matrix
-    # gramMat = np.dot(self.__obs, self.__obs.T)
-
-    # make use of formula |a - b|^2 = a^2 - 2ab + b^2
-    for ii in range(n):
-        # self.__d[ii, ii] = self.__maxValue
-        jj = np.arange(ii + 1, n)
-        distMat[ii, jj] = gramMat[ii, ii] - 2 * gramMat[ii, jj] + gramMat[jj, jj]
-        distMat[jj, ii] = distMat[ii, jj]
-
-    # # take square root of distances to compute real euclidean distance
-    # distMat = np.sqrt(distMat)
-
-    "alternative version --> use scipy's fast euclidean distance implementation: FASTEST"
-    # distMat = spt.distance.pdist(self.__obs, 'euclidean')
-    # self.__d = spt.distance.squareform(distMat)
-    # print(distMat)
-
-    if squared:
-        return distMat
-    else:
-        return np.sqrt(distMat)
-
-# ----------------------------------------------------------------------------------------------------------------------
-
-def norm1Distance(matrix, vector):
-    """
-    Computes the norm-1 distance between a vector and the rows of a matrix in parallel.
-    :param matrix: array of observations or clusters
-    :param vector: cluster centroid or observation
-    :return:
-    """
-    distMat = np.abs(matrix - vector)
-    numValues = len(vector)
-
-    distances = np.sum(distMat, axis=1) / numValues
-    return distances
-
-# ----------------------------------------------------------------------------------------------------------------------
-
-def pearsonCorrelationMatrix(matrix):
-    """
-
-    :param matrix:
-    :param n:
-    :return:
-    """
-    # TODO! other possibilites like 1 - abs(corr) | sqrt(1 - corr ** 2) | (1 - corr) / 2
-    distMat = 1 - np.corrcoef(matrix)
-
-    return distMat
-
-# ----------------------------------------------------------------------------------------------------------------------
-
-def statsCorrelationMatrix(matrix, method):
-    if method == 'pearson':
-        return pearsonCorrelationMatrix(matrix)
-
-    n = np.shape(matrix)[0]
-    distMat = np.zeros((n, n))
-
-    for ii in range(n):
-        rowI = matrix[ii]
-        for jj in range(ii + 1, n):
-            rowJ = matrix[jj]
-            corr = 0
-
-            if method == 'spearman':
-                corr, _ = stats.spearmanr(rowI, rowJ)
-
-            if method == 'kendall':
-                corr, _ = stats.kendalltau(rowI, rowJ)
-
-            # TODO! other possibilites like 1 - abs(corr) | sqrt(1 - corr ** 2) | (1 - corr) / 2
-            corr = 1 - corr
-
-            distMat[ii, jj] = corr
-            distMat[jj, ii] = corr
-
-    return distMat
-
-# ----------------------------------------------------------------------------------------------------------------------
-
-def similarityMeasurementMatrix(matrix, method):
-    """
-    Generic function to determine the similarity measurement for clustering
-    :param matrix:
-    :param method:
-    :return:
-    """
-    if method == 'euclidean':
-        return euclideanDistanceMatrix(matrix)
-        # return squareform(pdist(matrix, method))
-
-    if method == 'sqeuclidean':
-        return euclideanDistanceMatrix(matrix, True)
-        # return squareform(pdist(matrix, method))
-
-    spatialMethods = ['cityblock', 'chebyshev', 'canberra', 'correlation', 'hamming', 'mahalanobis']
-
-    if method in spatialMethods:
-        return squareform(np.nan_to_num(pdist(matrix, method)))
-
-    corrMethods = ['spearman', 'pearson', 'kendall']
-
-    if method in corrMethods:
-        return statsCorrelationMatrix(matrix, method)
-
-    raise AttributeError
-
-
-########################################################################################################################
-# utility functions to compute distances between rows and cluster centroids
-
-def computeClusterInternDistances(matrix, labels, sorted=True, metric='euclidean'):
-    """
-    Computes the distances of each element in one cluster to the cluster's centroid. Returns distance values and labels
-    sorted in ascending order.
-    :param matrix:
-    :param labels:
-    :return: labels / indices of elements corresponding to distance array, distance values of cluster
-    """
-    clusterLabels = np.array(labels)
-    if len(clusterLabels) == 0:
-        return [], []
-
-    subMatrix = matrix[clusterLabels]
-    # compute centroid of cluster along column (as we want to average each gene separately)
-    centroid = np.mean(subMatrix, axis=0)
-
-    # compute distances to centroid
-    dists = similarityMeasurement(subMatrix, centroid, metric)
-
-    if sorted == 'true':
-        # sort values
-        indices = range(len(dists))
-        indices.sort(key=dists.__getitem__)
-        dists.sort()
-
-        # reverse order if correlation coefficient is used
-        # (1 means perfect correlation while -1 denotes opposite correlation)
-        corrMetrics = ['pearson', 'spearman', 'kendall']
-        if metric in corrMetrics:
-            indices.reverse()
-            dists = dists[::-1]
-
-        # write back to our arrays
-        distLabels = clusterLabels[indices].tolist()
-        distValues = dists.tolist()
-    else:
-        distLabels = clusterLabels.tolist()
-        distValues = dists.tolist()
-
-    return distLabels, distValues
-
-# ----------------------------------------------------------------------------------------------------------------------
-
-def computeClusterExternDistances(matrix, labels, outerLabels, metric='euclidean'):
-    """
-    Compute the distances of patients in one cluster to the centroids of all other clusters.
-    :param matrix:
-    :param labels:
-    :param outerLabels:
-    :return:
-    """
-    externDists = []
-    internSubMatrix = matrix[labels]
-
-    for externLabels in outerLabels:
-
-        if len(externLabels) == 0:
-            externDists.append([])
-
-        # compute centroid of external cluster
-        subMatrix = matrix[externLabels]
-        centroid = np.mean(subMatrix, axis=0)
-
-        dists = similarityMeasurement(internSubMatrix, centroid, metric)
-        externDists.append(dists.tolist())
-
-    return externDists
-
-########################################################################################################################
-
-if __name__ == '__main__':
-    print(cdist([[1,1,1],[3,3,3],[5,5,5]],np.atleast_2d([2,2,2]), 'sqeuclidean').flatten())
-
-    from scipy.stats import spearmanr
-
-    print(spearmanr([1,2,3],[2,4,1]))
diff --git a/docker_packages.txt b/docker_packages.txt
new file mode 100644
index 0000000..e69de29
diff --git a/package.json b/package.json
index 678bd7e..ab5f3a3 100644
--- a/package.json
+++ b/package.json
@@ -1,46 +1,37 @@
 {
-  "name": "caleydo_clustering",
-  "version": "1.0.0",
-  "license" : "SEE LICENSE IN LICENSE",
-  "repository": "K3rn1n4tor/caleydo_clustering",
-  "dependencies": {
+  "files": [
+    "phovea_clustering",
+    "__init__.py",
+    "__main__.py",
+    "build",
+    "requirements.txt",
+    "requirements_dev.txt",
+    "docker_packages.txt"
+  ],
+  "scripts": {
+    "check": "flake8",
+    "pretest": "npm run check",
+    "test": "python setup.py test",
+    "prebuild": "node -e \"process.exit(process.env.PHOVEA_SKIP_TESTS === undefined?1:0)\" || npm run test",
+    "build": "python -c \"from distutils.dir_util import mkpath ; mkpath('./build/source')\" && (tar -c ./phovea_clustering --exclude '*.pyc' | tar -xC build/source)",
+    "predist": "npm run build",
+    "dist": "python setup.py bdist_egg"
   },
-  "peerDependencies": {
-    "caleydo_server": "*"
+  "name": "phovea_clustering",
+  "description": "",
+  "homepage": "https://phovea.caleydo.org",
+  "version": "1.0.0",
+  "author": {
+    "name": "The Caleydo Team",
+    "email": "contact@caleydo.org",
+    "url": "https://caleydo.org"
   },
-  "caleydo": {
-    "plugins": {
-      "python": [
-        {
-          "type": "clustering",
-          "id": "caleydo-clustering-kmeans",
-          "file": "clustering_kmeans"
-        },
-        {
-          "type": "clustering",
-          "id": "caleydo-clustering-hierarchical",
-          "file": "clustering_hierarchical"
-        },
-        {
-          "type": "clustering",
-          "id": "caleydo-clustering-affinity",
-          "file": "clustering_affinity"
-        },
-        {
-          "type": "clustering",
-          "id": "caleydo-clustering-fuzzy",
-          "file": "clustering_fuzzy"
-        },
-        {
-          "type": "namespace",
-          "id": "caleydo-clustering",
-          "file": "clustering_api",
-          "namespace": "/api/clustering"
-        }
-      ]
-    }
+  "license": "BSD-3-Clause",
+  "bugs": {
+    "url": "https://github.com/phovea/phovea_clustering/issues"
   },
-  "publishConfig": {
-    "registry": "http://registry.caleydo.org/"
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/phovea/phovea_clustering.git"
   }
 }
diff --git a/phovea_clustering/__init__.py b/phovea_clustering/__init__.py
new file mode 100644
index 0000000..0a25671
--- /dev/null
+++ b/phovea_clustering/__init__.py
@@ -0,0 +1,36 @@
+###############################################################################
+# Caleydo - Visualization for Molecular Biology - http://caleydo.org
+# Copyright (c) The Caleydo Team. All rights reserved.
+# Licensed under the new BSD license, available at http://caleydo.org/license
+###############################################################################
+
+
+def phovea(registry):
+  """
+  register extension points
+  :param registry:
+  """
+  # generator-phovea:begin
+  registry.append('clustering', 'caleydo-clustering-kmeans', 'phovea_clustering.clustering_kmeans', {})
+
+  registry.append('clustering', 'caleydo-clustering-hierarchical', 'phovea_clustering.clustering_hierarchical', {})
+
+  registry.append('clustering', 'caleydo-clustering-affinity', 'phovea_clustering.clustering_affinity', {})
+
+  registry.append('clustering', 'caleydo-clustering-fuzzy', 'phovea_clustering.clustering_fuzzy', {})
+
+  registry.append('namespace', 'caleydo-clustering', 'phovea_clustering.clustering_api', {
+      'namespace': '/api/clustering'
+  })
+  # generator-phovea:end
+  pass
+
+
+def phovea_config():
+  """
+  :return: file pointer to config file
+  """
+  from os import path
+  here = path.abspath(path.dirname(__file__))
+  config_file = path.join(here, 'config.json')
+  return config_file if path.exists(config_file) else None
diff --git a/phovea_clustering/clustering_affinity.py b/phovea_clustering/clustering_affinity.py
new file mode 100644
index 0000000..b5b3283
--- /dev/null
+++ b/phovea_clustering/clustering_affinity.py
@@ -0,0 +1,307 @@
+########################################################################################################################
+# libraries
+
+# module to load own configurations
+import phovea_server.config
+import numpy as np
+from clustering_util import similarity_measurementmatrix
+from timeit import default_timer as timer
+
+# request config if needed for the future
+config = phovea_server.config.view('caleydo-clustering')
+
+__author__ = 'Michael Kern'
+__version__ = '0.0.1'
+__email__ = 'kernm@in.tum.de'
+
+
+########################################################################################################################
+
+class AffinityPropagation:
+  """
+  This is an implementation of the affinity propagation algorithm to cluster genomic data / matrices.
+  Implementation details: <http://www.psi.toronto.edu/index.php?q=affinity%20propagation>.
+  Matlab implementation: <http://www.psi.toronto.edu/affinitypropagation/software/apcluster.m>
+  Returns the centroids and labels / stratification of each row belonging to one cluster.
+  """
+
+  def __init__(self, obs, damping=0.5, factor=1.0, pref_method='minimum', distance='euclidean'):
+    """
+    Initializes the algorithm.
+    :param obs: genomic data / matrix
+    :param damping: controls update process to dampen oscillations
+    :param factor: controls the preference value (influences number of clusters)
+    :param pref_method: all points are chosen equally with a given preference (median or minimum of similarity matrix)
+    :return:
+    """
+    self.__n = np.shape(obs)[0]
+    # observations, can be 1D array or 2D matrix with genes as rows and conditions as columns
+    # remove all NaNs in data
+    self.__obs = np.nan_to_num(obs)
+    # variables influencing output of clustering algorithm
+    self.__damping = damping
+    self.__factor = factor
+    self.__prev_method = pref_method
+
+    # similarity matrix
+    self.__S = np.zeros((self.__n, self.__n))
+    # availability matrix
+    self.__A = np.zeros((self.__n, self.__n))
+    # responsibility matrix
+    self.__R = np.zeros((self.__n, self.__n))
+
+    self.min_value = np.finfo(np.float).min
+
+    # self.__mx1 = np.full(self.__n, self.min_value)
+    # self.__mx2 = np.full(self.__n, self.min_value)
+
+    self.__idx = np.zeros(self.__n)
+
+    # set similarity computation
+    self.__distance = distance
+
+    self.__compute_similarity()
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def __call__(self):
+    """
+    Caller function for server API.
+    """
+    return self.run()
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def __compute_similarity(self):
+    """
+    Compute the similarity matrix from the original observation matrix and set preference of each element.
+    :return: _similarity matrix
+    """
+    # compute distance matrix containing the negative sq euclidean distances -|| xi - xj ||**2
+    self.__S = -similarity_measurementmatrix(self.__obs, self.__distance)
+
+    # determine the preferences S(k,k) to control the output of clusters
+    pref = 0
+    # could be median or minimum
+    if self.__prev_method == 'median':
+      pref = float(np.median(self.__S)) * self.__factor
+    elif self.__prev_method == 'minimum':
+      pref = np.min(self.__S) * self.__factor
+    else:
+      raise AttributeError
+
+    np.fill_diagonal(self.__S, pref)
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def run(self):
+    """
+    Runs the algorithm of affinity propagation. Conducts at least 100 iterations and checks if the outcome of
+    current exemplars/clusters has converged. If not, the algorithm will continue until convergence is found
+    or the maximum number of iterations (200) is reached.
+    :return:
+    """
+    max_iter = 200
+    max_conv_iter = 100
+
+    # sum all decisions for exemplars per round
+    decision_sum = np.zeros(self.__n)
+    # collect decisions for one exemplar per iteration round
+    decision_iter = np.zeros((max_conv_iter, self.__n))
+    # counter for decisions (= consider data element as exemplar in each algorithm iteration)
+    decision_counter = max_conv_iter
+    # indicates if algorithm has converged
+    is_converged = False
+
+    centroids = []
+    it = 0
+    cluster_i = []
+
+    # helpful variables (that do not need recomputation)
+    index_diag = np.arange(self.__n)
+    indices_diag = np.diag_indices_from(self.__R)
+    new_a = np.zeros((self.__n, self.__n))
+    new_r = np.zeros((self.__n, self.__n))
+
+    for it in range(1, max_iter + 1):
+
+      # ----------------------------------------------------------------------------------------------------------
+
+      # compute responsibility matrix
+      m_as = self.__A + self.__S
+
+      max_y = np.max(m_as, axis=1)
+      index_y = np.argmax(m_as, axis=1)
+
+      # set values of maxima to zero in m_as matrix
+      m_as[index_diag, index_y] = self.min_value
+
+      # look for second maxima
+      max_y2 = np.max(m_as, axis=1)
+
+      # perform responsibility update
+      for ii in range(self.__n):
+        # s(i, k) - max({ a(i, k') + s(i, k') })
+        new_r[ii] = self.__S[ii] - max_y[ii]
+
+      # subtract second maximum from row -> column entry with maximum value
+      new_r[index_diag, index_y] = self.__S[index_diag, index_y] - max_y2[index_diag]
+
+      # dampen values
+      # self.__R = self.__damping * self.__R + (1 - self.__damping) * new_r
+      self.__R *= self.__damping
+      self.__R += (1 - self.__damping) * new_r
+
+      # ----------------------------------------------------------------------------------------------------------
+
+      # compute availability matrix
+      # cut out negative elements
+      # TODO! slow because of copy operation
+      rp = np.maximum(self.__R, 0)
+
+      # write back all diagonal elements als self representatives
+      rp[indices_diag] = self.__R[indices_diag]
+      sum_cols = np.sum(rp, axis=0)
+
+      # apply availability update
+      new_a[:, ] = sum_cols
+      new_a -= rp
+      # for ii in range(self.__n):
+      #     # r(k, k) + sum(max(0, r(i',k))
+      #     new_a[:, ii] = sum_cols[ii] - Rp[:, ii]
+
+      diag_a = np.diag(new_a)
+      # take minimum of all the values in A, cut out all values above zero
+      # new_a = np.minimum(new_a, 0)
+      new_a[new_a > 0] = 0
+      new_a[indices_diag] = diag_a[index_diag]
+
+      # dampen values
+      # self.__A = self.__damping * self.__A + (1 - self.__damping) * new_a
+      self.__A *= self.__damping
+      self.__A += (1 - self.__damping) * new_a
+
+      # ----------------------------------------------------------------------------------------------------------
+
+      # find exemplars for new clusters
+      # old version which is slower
+      # E = self.__R + self.__A
+      # diag_e = np.diag(E)
+
+      # take the diagonal elements of the create matrix E
+      diag_e = np.diag(self.__R) + np.diag(self.__A)
+
+      # all elements > 0 are considered to be an appropriate exemplar for the dataset
+      cluster_i = np.argwhere(diag_e > 0).flatten()
+
+      # count the number of clusters
+      num_clusters = len(cluster_i)
+
+      # ----------------------------------------------------------------------------------------------------------
+
+      decision_counter += 1
+      if decision_counter >= max_conv_iter:
+        decision_counter = 0
+
+      # subtract outcome of previous iteration (< 100) from the total sum of the decisions
+      decision_sum -= decision_iter[decision_counter]
+
+      decision_iter[decision_counter].fill(0)
+      decision_iter[decision_counter][cluster_i] = 1
+
+      # compute sum of decisions for each element being a exemplar
+      decision_sum += decision_iter[decision_counter]
+
+      # check for convergence
+      if it >= max_conv_iter or it >= max_iter:
+        is_converged = True
+
+        for ii in range(self.__n):
+          # if element is considered to be an exemplar in at least one iterations
+          # and total of decisions in the last 100 iterations is not 100 --> no convergence
+          if decision_sum[ii] != 0 and decision_sum[ii] != max_conv_iter:
+            is_converged = False
+            break
+
+        if is_converged and num_clusters > 0:
+          break
+
+    # --------------------------------------------------------------------------------------------------------------
+
+    # obtain centroids
+    centroids = self.__obs[cluster_i]
+
+    # find maximum columns in m_as matrix to assign elements to clusters / exemplars
+    # fill A with negative values
+    self.__A.fill(self.min_value)
+    # set values of clusters to zero (as we only want to regard these values
+    self.__A[:, cluster_i] = 0.0
+    # fill diagonal of similarity matrix to zero (remove preferences)
+    np.fill_diagonal(self.__S, 0.0)
+
+    # compute m_as matrix
+    m_as = self.__A + self.__S
+    # since values are < 0, look for the maximum number in each row and return its column index
+    self.__idx = np.argmax(m_as, axis=1)
+
+    cluster_i = cluster_i.tolist()
+    cluster_labels = [[] for _ in range(num_clusters)]
+
+    # create labels per cluster
+    for ii in range(self.__n):
+      index = cluster_i.index(self.__idx[ii])
+      self.__idx[ii] = index
+      cluster_labels[index].append(ii)
+
+    # return sorted cluster labels (that's why we call compute cluster distances, might be redundant)
+    # for ii in range(num_clusters):
+    #     cluster_labels[ii], _ = compute_cluster_intern_distances(self.__obs, cluster_labels[ii])
+
+    # if is_converged:
+    #     print('Algorithm has converged after {} iterations'.format(it))
+    # else:
+    #     print('Algorithm has not converged after 200 iterations')
+    #
+    # print('Number of detected clusters {}'.format(num_clusters))
+    # print('Centroids: {}'.format(centroids))
+
+    return centroids.tolist(), self.__idx.tolist(), cluster_labels
+
+
+########################################################################################################################
+
+def _plugin_initialize():
+  """
+  optional initialization method of this module, will be called once
+  :return:
+  """
+  pass
+
+
+# ----------------------------------------------------------------------------------------------------------------------
+
+def create(data, damping, factor, preference, distance):
+  """
+  by convention contain a factory called create returning the extension implementation
+  :return:
+  """
+  return AffinityPropagation(data, damping, factor, preference, distance)
+
+
+########################################################################################################################
+
+# from timeit import default_timer as timer
+
+if __name__ == '__main__':
+  np.random.seed(200)
+  # data = np.array([[1,2,3],[5,4,5],[3,2,2],[8,8,7],[9,6,7],[2,3,4]])
+  # data = np.array([np.random.rand(8000) * 4 - 2 for _ in range(500)])
+  # data = np.array([[0.9],[1],[1.1],[10],[11],[12],[20],[21],[22]])
+  data = np.array([1, 1.1, 5, 8, 5.2, 8.3])
+
+  s = timer()
+  aff = AffinityPropagation(data, 0.9, 1.0, 'median', 'euclidean')
+  result = aff.run()
+  e = timer()
+  print(result)
+  print('time elapsed: {}'.format(e - s))
diff --git a/phovea_clustering/clustering_api.py b/phovea_clustering/clustering_api.py
new file mode 100644
index 0000000..4aa1200
--- /dev/null
+++ b/phovea_clustering/clustering_api.py
@@ -0,0 +1,156 @@
+########################################################################################################################
+# libraries
+
+# use flask library for server activities
+from phovea_server import ns
+# load services (that are executed by the server when certain website is called)
+from clustering_service import get_cluster_distances, get_clusters_from_dendrogram, load_data, run_affinity_propagation, run_fuzzy, run_hierarchical, run_kmeans
+
+
+__author__ = 'Michael Kern'
+__version__ = '0.0.1'
+__email__ = 'kernm@in.tum.de'
+
+# create new flask application for hosting namespace
+app = ns.Namespace(__name__)
+
+
+########################################################################################################################
+
+@app.route('/kmeans/<k>/<init_method>/<distance>/<dataset_id>')
+def kmeans_clustering(k, init_method, distance, dataset_id):
+  """
+  Access k-means clustering plugin.
+  :param k: number of clusters
+  :param init_method:  initialization method for initial clusters
+  :param distance: distance measurement
+  :param dataset_id:  identifier of data set
+  :return: jsonified output
+  """
+  try:
+    data = load_data(dataset_id)
+    response = run_kmeans(data, int(k), init_method, distance)
+    return ns.jsonify(response)
+  except:
+    return ns.jsonify({})
+
+
+########################################################################################################################
+
+@app.route('/hierarchical/<k>/<method>/<distance>/<dataset_id>')
+def hierarchical_clustering(k, method, distance, dataset_id):
+  """
+  Access hierarchical clustering plugin.
+  :param k: number of desired clusters
+  :param method: type of single linkage
+  :param distance: distance measurement
+  :param dataset_id: identifier of data set
+  :return: jsonified output
+  """
+  try:
+    data = load_data(dataset_id)
+    response = run_hierarchical(data, int(k), method, distance)
+    return ns.jsonify(response)
+  except:
+    return ns.jsonify({})
+
+
+########################################################################################################################
+
+@app.route('/affinity/<damping>/<factor>/<preference>/<distance>/<dataset_id>')
+def affinity_propagation_clustering(damping, factor, preference, distance, dataset_id):
+  """
+  Access affinity propagation clustering plugin.
+  :param damping:
+  :param factor:
+  :param preference:
+  :param distance: distance measurement
+  :param dataset_id:
+  :return:
+  """
+  try:
+    data = load_data(dataset_id)
+    response = run_affinity_propagation(data, float(damping), float(factor), preference, distance)
+    return ns.jsonify(response)
+  except:
+    return ns.jsonify({})
+
+
+########################################################################################################################
+
+@app.route('/fuzzy/<num_clusters>/<m>/<threshold>/<distance>/<dataset_id>')
+def fuzzy_clustering(num_clusters, m, threshold, distance, dataset_id):
+  """
+  :param num_clusters:
+  :param m:
+  :param threshold:
+  :param distance:
+  :param dataset_id:
+  :return:
+  """
+  try:
+    data = load_data(dataset_id)
+    response = run_fuzzy(data, int(num_clusters), float(m), float(threshold), distance)
+    return ns.jsonify(response)
+  except:
+    return ns.jsonify({})
+
+
+########################################################################################################################
+
+def load_atttribute(json_data, attr):
+  import json
+  data = json.loads(json_data)
+  if attr in data:
+    return data[attr]
+  else:
+    return None
+
+
+########################################################################################################################
+
+@app.route('/distances/<metric>/<dataset_id>/<sorted>', methods=['POST'])
+def get_distances(metric, dataset_id, sorted):
+  """
+  Compute the distances of the current stratification values to its centroid.
+  :param metric:
+  :param dataset_id:
+  :return: distances and labels sorted in ascending order
+  """
+  data = load_data(dataset_id)
+  labels = []
+  extern_labels = None
+
+  if 'group' in ns.request.values:
+    labels = load_atttribute(ns.request.values['group'], 'labels')
+    extern_labels = load_atttribute(ns.request.values['group'], 'externLabels')
+  else:
+    return ''
+
+  response = get_cluster_distances(data, labels, metric, extern_labels, sorted)
+  return ns.jsonify(response)
+
+
+########################################################################################################################
+
+@app.route('/dendrogram/<num_clusters>/<dataset_id>', methods=['POST'])
+def dendrogram_clusters(num_clusters, dataset_id):
+  data = load_data(dataset_id)
+
+  if 'group' in ns.request.values:
+    dendrogram = load_atttribute(ns.request.values['group'], 'dendrogram')
+  else:
+    return ''
+
+  response = get_clusters_from_dendrogram(data, dendrogram, int(num_clusters))
+  return ns.jsonify(response)
+
+
+########################################################################################################################
+
+def create():
+  """
+  Standard Caleydo convention for creating the service when server is initialized.
+  :return: Returns implementation of this plugin with given name
+  """
+  return app
diff --git a/phovea_clustering/clustering_fuzzy.py b/phovea_clustering/clustering_fuzzy.py
new file mode 100644
index 0000000..8b95f59
--- /dev/null
+++ b/phovea_clustering/clustering_fuzzy.py
@@ -0,0 +1,225 @@
+########################################################################################################################
+# libraries
+
+# module to load own configurations
+import phovea_server.config
+# library to conduct matrix/vector calculus
+import numpy as np
+from clustering_util import similarity_measurement
+
+# request config if needed for the future
+config = phovea_server.config.view('caleydo-clustering')
+
+__author__ = 'Michael Kern'
+__version__ = '0.0.3'
+__email__ = 'kernm@in.tum.de'
+
+
+########################################################################################################################
+# class definition
+
+class Fuzzy(object):
+  """
+  Formulas: https://en.wikipedia.org/wiki/Fuzzy_clustering
+  """
+
+  def __init__(self, obs, num_clusters, m=2.0, threshold=-1, distance='euclidean', init=None, error=0.0001):
+    """
+    Initializes algorithm.
+    :param obs: observation matrix / genomic data
+    :param num_clusters: number of clusters
+    :param m: fuzzifier, controls degree of fuzziness, from [1; inf]
+    :return:
+    """
+    # observation
+    self.__obs = np.nan_to_num(obs)
+
+    self.__n = obs.shape[0]
+
+    # fuzzifier value
+    self.__m = np.float(m)
+    # number of clusters
+    self.__c = num_clusters
+
+    # matrix u containing all the weights describing the degree of membership of each patient to the centroid
+    if init is None:
+      init = np.random.rand(self.__c, self.__n)
+
+    self.__u = np.copy(init)
+
+    # TODO! scikit normalizes the values at the beginning and at each step to [0; 1]
+    self.__u /= np.ones((self.__c, 1)).dot(np.atleast_2d(np.sum(self.__u, axis=0))).astype(np.float64)
+    # remove all zero values and set them to smallest possible value
+    self.__u = np.fmax(self.__u, np.finfo(np.float64).eps)
+    # centroids
+    self.__centroids = np.zeros(self.__c)
+    # threshold for stopping criterion
+    self.__error = error
+    # distance function
+    self.__distance = distance
+
+    # threshold or minimum probability used for cluster assignments
+    if threshold == -1:
+      self.__threshold = 1.0 / num_clusters
+    else:
+      self.__threshold = threshold
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def __call__(self):
+    """
+    Caller function for server API
+    :return:
+    """
+    return self.run()
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def compute_centroid(self):
+    """
+    Compute the new centroids using the computed partition matrix.
+    :return:
+    """
+    u_m = self.__u ** self.__m
+
+    sum_data_weights = np.dot(u_m, self.__obs)
+    if self.__obs.ndim == 1:
+      m = 1
+    else:
+      m = self.__obs.shape[1]
+
+    sum_weights = np.sum(u_m, axis=1)
+    # tile array (sum of weights repeated in every row)
+    sum_weights = np.ones((m, 1)).dot(np.atleast_2d(sum_weights)).T
+
+    if self.__obs.ndim == 1:
+      sum_weights = sum_weights.flatten()
+
+    # divide by total sum to get new centroids
+    self.__centroids = sum_data_weights / sum_weights
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def compute_coefficients(self):
+    """
+    Compute new partition matrix / weights describing the degree of membership of each patient to all clusters.
+    :return:
+    """
+
+    # TODO you can also use cdist of scipy.spatial.distance module
+    dist_mat = np.zeros((self.__c, self.__n))
+
+    for ii in range(self.__c):
+      dist_mat[ii] = similarity_measurement(self.__obs, self.__centroids[ii], self.__distance)
+
+    # set zero values to smallest values to prevent inf results
+    dist_mat = np.fmax(dist_mat, np.finfo(np.float64).eps)
+
+    # apply coefficient formula
+    denom = np.float(self.__m - 1.0)
+    self.__u = dist_mat ** (-2.0 / denom)
+
+    sum_coeffs = np.sum(self.__u, axis=0)
+
+    self.__u /= np.ones((self.__c, 1)).dot(np.atleast_2d(sum_coeffs))
+    self.__u = np.fmax(self.__u, np.finfo(np.float64).eps)
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def run(self):
+    """
+    Perform the c-means fuzzy clustering.
+    :return:
+    """
+    max_iter = 100
+    iter = 0
+
+    while iter < max_iter:
+      # save last partition matrix
+      u_old = np.copy(self.__u)
+      # compute centroids with given weights
+      self.compute_centroid()
+      # compute new coefficient matrix
+      self.compute_coefficients()
+
+      # normalize weight / partition matrix u
+      self.__u /= np.ones((self.__c, 1)).dot(np.atleast_2d(np.sum(self.__u, axis=0)))
+      self.__u = np.fmax(self.__u, np.finfo(np.float64).eps)
+
+      # compute the difference between the old and new matrix
+      epsilon = np.linalg.norm(self.__u - u_old)
+
+      # stop if difference (epsilon) is smaller than the user-defined threshold
+      if epsilon < self.__error:
+        break
+
+      iter += 1
+
+    self.__end()
+
+    u = self.__u.T
+    # print(self.__u.T)
+
+    return self.__centroids.tolist(), self.__cluster_labels, u.tolist(), self.__threshold
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def __end(self):
+    """
+    Conduct the cluster assignments and creates cluster_label array.
+    :return:
+    """
+    # assign patient to clusters
+    # transpose to get a (n, c) matrix
+    u = self.__u.T
+
+    self.__labels = np.zeros(self.__n, dtype=np.int)
+    self.__cluster_labels = [[] for _ in range(self.__c)]
+    # gather all probabilities / degree of memberships of each patient to the clusters
+    # self.__cluster_probs = [[] for _ in range(self.__c)]
+    # probability that the patients belongs to each cluster
+    max_prob = np.float64(self.__threshold)
+
+    for ii in range(self.__n):
+      # cluster_iD = np.argmax(u[ii])
+      # self.__labels = cluster_iD
+      # self.__cluster_labels[cluster_iD].append(ii)
+
+      for jj in range(self.__c):
+        if u[ii][jj] >= max_prob:
+          cluster_id = jj
+          self.__labels = cluster_id
+          self.__cluster_labels[cluster_id].append(int(ii))
+
+          # for ii in range(self.__c):
+          #     self.__cluster_labels[ii], _ = compute_cluster_intern_distances(self.__obs, self.__cluster_labels[ii])
+
+
+########################################################################################################################
+
+def _plugin_initialize():
+  """
+  optional initialization method of this module, will be called once
+  :return:
+  """
+  pass
+
+
+# ----------------------------------------------------------------------------------------------------------------------
+
+def create(data, num_cluster, m, threshold, distance):
+  """
+  by convention contain a factory called create returning the extension implementation
+  :return:
+  """
+  return Fuzzy(data, num_cluster, m, threshold, distance)
+
+
+########################################################################################################################
+
+if __name__ == '__main__':
+  data = np.array([[1, 1, 2], [5, 4, 5], [3, 2, 2], [8, 8, 7], [9, 8, 9], [2, 2, 2]])
+  # data = np.array([1,1.1,5,8,5.2,8.3])
+
+  fuz = Fuzzy(data, 3, 1.5)
+  print(fuz.run())
diff --git a/phovea_clustering/clustering_hierarchical.py b/phovea_clustering/clustering_hierarchical.py
new file mode 100644
index 0000000..668d77b
--- /dev/null
+++ b/phovea_clustering/clustering_hierarchical.py
@@ -0,0 +1,467 @@
+########################################################################################################################
+# libraries
+
+# module to load own configurations
+import phovea_server.config
+
+
+# library to conduct matrix/vector calculus
+import numpy as np
+# fastest distance computation by scipy
+# import scipy.spatial as spt
+
+# utility functions for clustering and creating the dendrogram trees
+from clustering_util import BinaryNode, BinaryTree
+from clustering_util import similarity_measurement_matrix
+from clustering_util import compute_cluster_intern_distances
+from clustering_util import cut_json_tree_by_clusters
+
+__author__ = 'Michael Kern'
+__version__ = '0.0.3'
+__email__ = 'kernm@in.tum.de'
+# request config if needed for the future
+config = phovea_server.config.view('caleydo-clustering')
+
+
+########################################################################################################################
+
+class Hierarchical(object):
+  """
+  This is a implementation of hierarchical clustering on genomic data using the Lance-Williams dissimilarity update
+  to compute different distance metrics (single linkage, complete linkage, ...).
+  Lance-Williams explained in: http://arxiv.org/pdf/1105.0121.pdf
+  """
+
+  def __init__(self, obs, method='single', distance='euclidean'):
+    """
+    Initializes the algorithm
+    :param obs: genomic data / matrix
+    :param method: linkage method
+    :return:
+    """
+    # genomic data / matrix
+    # observations, can be 1D array or 2D matrix with genes as rows and conditions as columns
+    # remove all NaNs in data
+    self.__obs = np.nan_to_num(obs)
+
+    num_genes = np.shape(self.__obs)[0]
+    self.__n = num_genes
+
+    # check if dimension is 2D
+    # if self.__obs.ndim == 2:
+    #     # obtain number of observations (rows)
+    #     num_genes, _ = np.shape(self.__obs)
+    #     self.__n = num_genes
+
+    # else:
+    #     print("[Error]:\tdata / observations must be 2D. 1D observation arrays are not supported")
+    #     raise Attribute_error
+
+    # distance measurement
+    self.__distance = distance
+
+    # distance / proximity matrix
+    self.__d = []
+    self.__compute_proximity_matrix()
+    # dictionary mapping the string id (i,j,k,...) of clusters to corresponding index in matrix
+    self.__id_map = {}
+    # inverse mapping of id_map --> returns the string id given a certain index
+    self.__key_map = {}
+    # contains actual index of all clusters, old clusters are from [0, n - 1], new clusters have indices in range
+    # [n, 2n - 1]
+    self.__cluster_map = {}
+    for ii in range(self.__n):
+      self.__id_map[str(ii)] = ii
+      self.__key_map[ii] = str(ii)
+      self.__cluster_map[str(ii)] = ii
+
+    # linkage method for hierarchical clustering
+    self.__method = method
+
+    # internal dendrogram tree
+    self.__tree = None
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def __call__(self):
+    """
+    Caller function for server API
+    :return:
+    """
+    return self.run()
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  @property
+  def tree(self):
+    return self.__tree
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def __get_coefficients(self, cluster_i, cluster_j):
+    """
+    Compute the coefficients for the Lance-Williams algorithm
+    :param cluster_i:
+    :param cluster_j:
+    :return:
+    """
+    # TODO! use hash map for storing numbers instead of computing them every time
+    if self.__method == 'single':
+      return 0.5, 0.5, 0, -0.5
+    elif self.__method == 'complete':
+      return 0.5, 0.5, 0, 0.5
+    elif self.__method == 'weighted':
+      return 0.5, 0.5, 0, 0
+    elif self.__method == 'median':
+      return 0.5, 0.5, -0.25, 0
+
+    # TODO! ATTENTION! average method should compute the cluster centroids using the average
+    # TODO! || cluster_i - cluster_j || ** 2
+    elif self.__method == 'average':
+      n_i = np.float(cluster_i.count(',') + 1)
+      n_j = np.float(cluster_j.count(',') + 1)
+      sum_n = n_i + n_j
+      return (n_i / sum_n), (n_j / sum_n), 0, 0
+
+    # TODO! ATTENTION! centroid method should compute the cluster centroids using the mean
+    # TODO! || cluster_i - cluster_j || ** 2
+    elif self.__method == 'centroid':
+      n_i = np.float(cluster_i.count(',') + 1)
+      n_j = np.float(cluster_j.count(',') + 1)
+      sum_n = n_i + n_j
+      return (n_i / sum_n), (n_j / sum_n), -((n_i * n_j) / (sum_n ** 2)), 0
+
+    # TODO! Support ward method
+    # TODO! (|cluster_i| * |cluster_j|) / (|cluster_i| + |cluster_j) * || cluster_i - cluster_j || ** 2
+    # elif self.__method == 'ward':
+    #     n_i = np.float(cluster_i.count(',') + 1)
+    #     n_j = np.float(cluster_j.count(',') + 1)
+    #     n_k = np.float(cluster_k.count(',') + 1)
+    #     sum_n = n_i + n_j + n_k
+    #     return (n_i + n_k) / sum_n, (n_j + n_k) / sum_n, -n_k / sum_n, 0
+    else:
+      raise AttributeError
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def __compute_proximity_matrix(self):
+    """
+    Compute the proximity of each observation and store the results in a nxn matrix
+    :return:
+    """
+
+    # create distance matrix of size n x n
+    self.__d = np.zeros((self.__n, self.__n))
+
+    # compute euclidean distance
+    # TODO! implement generic distance functions
+    # TODO! look for an alternative proximity analysis without computing all distances
+    self.__d = similarity_measurement_matrix(self.__obs, self.__distance)
+
+    # get number of maximum value of float
+    self.__max_value = self.__d.max() + 1
+
+    # fill diagonals with max value to exclude them from min dist process
+    # TODO! operate only on upper triangle matrix of distance matrix
+    np.fill_diagonal(self.__d, self.__max_value)
+
+    # print('\t-> finished.')
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def __get_matrix_minimum_indices(self):
+    """
+    Searches for the minimum distance in the distance matrix
+    :return: indices of both clusters having the smallest distance
+    """
+    min_dist = self.__d.min()
+    min_list = np.argwhere(self.__d == min_dist)
+
+    min_i, min_j = 0, 0
+
+    # look for indices, where i < j
+    # TODO! for the future --> use upper triangle matrix
+    for ii in range(len(min_list)):
+      min_i, min_j = min_list[ii]
+      if min_i < min_j:
+        break
+
+    if min_i == min_j:
+      print("ERROR")
+
+    return self.__key_map[min_i], self.__key_map[min_j], min_dist
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def __delete_clusters(self, i, j):
+    """
+    Reorders and reduces the matrix to insert the new cluster formed of cluster i and j
+    and its distance values, and removes the old clusters by cutting the last row.
+    :param i: cluster index i
+    :param j: cluster index j
+    :return:
+    """
+    id_i = self.__id_map[str(i)]
+    id_j = self.__id_map[str(j)]
+
+    # min_id = min(id_i, id_j)
+    max_id = max(id_i, id_j)
+
+    # now set column max ID to last column -> swap last and i column
+    last_row = self.__d[self.__n - 1]
+    self.__d[max_id] = last_row
+    self.__d[:, max_id] = self.__d[:, (self.__n - 1)]
+
+    # set key of last column (cluster) to column of the cluster with index max_id
+    key = self.__key_map[self.__n - 1]
+    self.__id_map[key] = max_id
+    self.__key_map[max_id] = key
+
+    # delete entries in id and key map --> not required anymore
+    try:
+      del self.__id_map[i]
+      del self.__id_map[j]
+      del self.__key_map[self.__n - 1]
+    except KeyError:
+      print("\nERROR: Key {} not found in id_map".format(j))
+      print("ERROR: Previous key: {} in id_map".format(i))
+      print("Given keys: ")
+      for key in self.__id_map:
+        print(key)
+      return
+
+    # reduce dimension of matrix by one column and row
+    self.__n -= 1
+    self.__d = self.__d[:-1, :-1]
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def __merge_clusters(self, i, j):
+    """
+    Merges cluster i and j, computes the new ID and distances of the newly formed cluster
+    and stores required information
+    :param i: cluster index i
+    :param j: cluster index j
+    :return:
+    """
+    id_i = self.__id_map[str(i)]
+    id_j = self.__id_map[str(j)]
+
+    min_id = min(id_i, id_j)
+    max_id = max(id_i, id_j)
+
+    # use Lance-Williams formula to compute linkages
+    dki = self.__d[:, min_id]
+    dkj = self.__d[:, max_id]
+    dij = self.__d[min_id, max_id]
+    dist_ij = np.abs(dki - dkj)
+
+    # compute coefficients
+    ai, aj, b, y = self.__get_coefficients(i, j)
+
+    new_entries = ai * dki + aj * dkj + b * dij + y * dist_ij
+    new_entries[min_id] = self.__max_value
+    new_entries[max_id] = self.__max_value
+
+    # add new column and row
+    self.__d[min_id] = new_entries
+    self.__d[:, min_id] = new_entries
+
+    id_ij = min_id
+    new_key = i + ',' + j
+    self.__id_map[new_key] = id_ij
+    self.__key_map[id_ij] = new_key
+    self.__cluster_map[new_key] = len(self.__cluster_map)
+
+    # delete old clusters
+    self.__delete_clusters(i, j)
+
+    # count number of elements
+    return new_key.count(',') + 1
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def run(self):
+    """
+    Conducts the algorithm until there's only one cluster.
+    :return:
+    """
+
+    # number of the current iteration
+    m = 0
+
+    # resulting matrix containing information Z[i,x], x=0: cluster i, x=1: cluster j, x=2: dist(i,j), x=3: num(i,j)
+    runs = self.__n - 1
+    z = np.array([[0 for _ in range(4)] for _ in range(runs)], dtype=np.float)
+
+    while m < runs:
+      m += 1
+
+      i, j, dist_ij = self.__get_matrix_minimum_indices()
+      num_ij = self.__merge_clusters(i, j)
+
+      cluster_i, cluster_j = self.__cluster_map[i], self.__cluster_map[j]
+      z[m - 1] = [int(min(cluster_i, cluster_j)), int(max(cluster_i, cluster_j)), np.float(dist_ij), int(num_ij)]
+
+    # reset number n to length of first dimension (number of genes)
+    self.__n = np.shape(self.__obs)[0]
+
+    self.__tree = self.generate_tree(z)
+    return z.tolist()
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def generate_tree(self, linkage_matrix):
+    """
+    Computes the dendrogram tree for a given linkage matrix.
+    :param linkage_matrix:
+    :return:
+    """
+    self.__tree = None
+
+    tree_map = {}
+    num_trees = len(linkage_matrix)
+
+    for ii in range(num_trees):
+      entry = linkage_matrix[ii]
+      current_id = self.__n + ii
+      left_index, right_index, value = int(entry[1]), int(entry[0]), entry[2], int(entry[3])
+      left = right = None
+
+      if left_index < self.__n:
+        left = BinaryNode(self.__obs[left_index].tolist(), left_index, 1, None, None)
+      else:
+        left = tree_map[left_index]
+
+      if right_index < self.__n:
+        right = BinaryNode(self.__obs[right_index].tolist(), right_index, 1, None, None)
+      else:
+        right = tree_map[right_index]
+
+      if isinstance(left, BinaryNode) and isinstance(right, BinaryNode):
+        tree_map[current_id] = BinaryTree(left, right, current_id, value)
+      elif isinstance(left, BinaryNode):
+        tree_map[current_id] = right.add_node(left, current_id, value)
+        del tree_map[right_index]
+      elif isinstance(right, BinaryNode):
+        tree_map[current_id] = left.add_node(right, current_id, value)
+        del tree_map[left_index]
+      else:
+        tree_map[current_id] = left.merge(right, current_id, value)
+        del tree_map[right_index]
+        del tree_map[left_index]
+
+    self.__tree = tree_map[num_trees + self.__n - 1]
+    return self.__tree
+
+    # ------------------------------------------------------------------------------------------------------------------
+
+
+########################################################################################################################
+
+
+def get_clusters(k, obs, dendrogram, sorted=True):
+  """
+  First implementation to cut dendrogram tree automatically by choosing nodes having the greatest node values
+  or rather distance to the other node / potential cluster
+  :param k: number of desired clusters
+  :param obs: set of observations
+  :param dendrogram: dendrogram tree
+  :return: centroids, sorted cluster labels and normal label list
+  """
+  obs = np.nan_to_num(obs)
+  n = obs.shape[0]
+
+  if isinstance(dendrogram, BinaryTree):
+    cluster_labels = dendrogram.cut_tree_by_clusters(k)
+  else:
+    cluster_labels = cut_json_tree_by_clusters(dendrogram, k)
+
+  cluster_centroids = []
+  labels = np.zeros(n, dtype=np.int)
+  cluster_id = 0
+
+  for ii in range(len(cluster_labels)):
+    cluster = cluster_labels[ii]
+    sub_obs = obs[cluster]
+    cluster_centroids.append(np.mean(sub_obs, axis=0).tolist())
+
+    for id in cluster:
+      labels[id] = cluster_id
+
+    # sort labels according to their distance
+    if sorted:
+      cluster_labels[ii], _ = compute_cluster_intern_distances(obs, cluster)
+
+    cluster_id += 1
+
+  return cluster_centroids, cluster_labels, labels.tolist()
+
+
+########################################################################################################################
+
+def _plugin_initialize():
+  """
+  optional initialization method of this module, will be called once
+  :return:
+  """
+  pass
+
+
+# ----------------------------------------------------------------------------------------------------------------------
+
+def create(data, method, distance):
+  """
+  by convention contain a factory called create returning the extension implementation
+  :return:
+  """
+  return Hierarchical(data, method, distance)
+
+
+########################################################################################################################
+def _main():
+  from timeit import default_timer as timer
+  # from scipy.cluster.hierarchy import linkage, leaves_list
+
+  np.random.seed(200)
+  # data = np.array([[1,2,3],[5,4,5],[3,2,2],[8,8,7],[9,6,7],[2,3,4]])
+  data = np.array([1, 1.1, 5, 8, 5.2, 8.3])
+
+  time_mine = 0
+  time_theirs = 0
+
+  n = 10
+
+  for i in range(n):
+    # data = np.array([np.random.rand(6000) * 4 - 2 for _ in range(249)])
+    # import time
+    s1 = timer()
+    hier = Hierarchical(data, 'complete')
+    # s = time.time()
+    linkage_matrix = hier.run()
+    e1 = timer()
+    print(linkage_matrix)
+    tree = hier.generate_tree(linkage_matrix)
+    # print(tree.get_leaves())
+    # print(tree.jsonify())
+    # print(hier.get_clusters(3))
+    import json
+
+    json_tree = json.loads(tree.jsonify())
+    get_clusters(3, data, json_tree)
+
+    s2 = timer()
+    # linkage_matrix2 = linkage(data, 'complete')
+    # print(leaves_list(linkage_matrix2))
+    e2 = timer()
+
+    time_mine += e1 - s1
+    time_theirs += e2 - s2
+
+  # print(linkage_matrix)
+  # print(linkage_matrix2)
+  print('mine: {}'.format(time_mine / n))
+  print('theirs: {}'.format(time_theirs / n))
+
+if __name__ == '__main__':
+  _main()
diff --git a/phovea_clustering/clustering_kmeans.py b/phovea_clustering/clustering_kmeans.py
new file mode 100644
index 0000000..8cc53b0
--- /dev/null
+++ b/phovea_clustering/clustering_kmeans.py
@@ -0,0 +1,406 @@
+########################################################################################################################
+# libraries
+
+# module to load own configurations
+import phovea_server.config
+
+# numpy important to conduct matrix/vector calculus
+import numpy as np
+# creates random numbers
+import random
+
+# contains utility functions
+from clustering_util import weighted_choice, similarity_measurement
+
+__author__ = 'Michael Kern'
+__version__ = '0.0.2'
+__email__ = 'kernm@in.tum.de'
+
+# request config if needed in the future
+config = phovea_server.config.view('caleydo-clustering')
+
+
+########################################################################################################################
+
+class KMeans:
+  """
+  This is an implementation of the k-means algorithm to cluster genomic data / matrices.
+  Returns the centroids, the labels / stratification of each row belonging to one cluster,
+  distance matrix for cluster-cluster distance and distance arrays for row-cluster_centroid distance.
+  Implementation detail: <https://en.wikipedia.org/wiki/K-means_clustering>
+  """
+
+  def __init__(self, obs, k, init_mode='kmeans++', distance='sqeuclidean', iters=1000):
+    """
+    Initializes the algorithm with observation, number of k clusters, the initial method and
+    the maximum number of iterations.
+    Initialization method of random cluster choice can be: forgy, uniform, random, plusplus
+    :param obs: genomic data / matrix
+    :param k: number of clusters
+    :param init_mode: initialization method
+    :param distance: distance measurement
+    :param iters: number of maximum iterations
+    :return:
+    """
+
+    # number of clusters
+    self.__k = k
+    # observations, can be 1D array or 2D matrix with genes as rows and conditions as columns
+    # remove all NaNs in data
+    self.__obs = np.nan_to_num(obs)
+    # number of observations / genes
+    self.__n = np.shape(obs)[0]
+    # maps the element ids to clusters
+    self.__label_map = np.zeros(self.__n, dtype=np.int)
+    # cluster means and number of elements
+    self.__cluster_means = np.array([obs[0] for _ in range(k)], dtype=np.float)
+    self.__cluster_nums = np.array([0 for _ in range(k)], dtype=np.int)
+    # tells if any cluster has changed or rather if any data item was moved
+    self.__changed = True
+    # number of iterations
+    self.__iters = iters
+    # initialization method
+    self.__init_mode = init_mode
+    # compare function
+    self.__distance = distance
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def __call__(self):
+    """
+    Caller function for server API.
+    """
+    return self.run()
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def __init(self):
+    """
+    Initialize clustering with random clusters using a user-specified method
+    :return:
+    """
+    # TODO! consider to init k-Means algorithm with Principal Component Analysis (PCA)
+    # TODO! see <http://www.vision.caltech.edu/wikis/EE148/images/c/c2/KmeansPCA1.pdf>
+    # init cluster
+    if self.__init_mode == 'forgy':
+      self.__forgy_method()
+    elif self.__init_mode == 'uniform':
+      self.__uniform_method()
+    elif self.__init_mode == 'random':
+      self.__random_method()
+    elif self.__init_mode == 'kmeans++':
+      self.__plusplus_method()
+    else:
+      raise AttributeError
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def __forgy_method(self):
+    """
+    Initialization method:
+    Randomly choose k observations from the data using a uniform random distribution.
+    :return:
+    """
+    for ii in range(self.__k):
+      self.__cluster_means[ii] = (self.__obs[random.randint(0, self.__n - 1)])
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def __uniform_method(self):
+    """
+    Initialization method:
+    Randomly assign each observation to one of the k clusters using uniform random distribution
+    and compute the centroids of each cluster.
+    :return:
+    """
+    for i in range(self.__n):
+      self.__label_map[i] = random.randint(0, self.__k - 1)
+
+    self.__update()
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def __random_method(self):
+    """
+    Initialization method:
+    Randomly choose k observations from the data by estimating the mean and standard deviation of the data and
+    using the gaussian random distribution.
+    :return:
+    """
+    mean = np.mean(self.__obs, axis=0)
+    std = np.std(self.__obs, axis=0)
+
+    for ii in range(self.__k):
+      self.__cluster_means[ii] = np.random.normal(mean, std)
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def __plusplus_method(self):
+    """
+    Initialization method:
+    Chooses k observations by computing probabilities for each observation and using a weighted random distribution.
+    Algorithm: <https://en.wikipedia.org/wiki/K-means%2B%2B>. This method should accelerate the algorithm by finding
+    the appropriate clusters right at the beginning and hence should make it more robust.
+    :return:
+    """
+    # 1) choose random center out of data
+    self.__cluster_means[0] = (random.choice(self.__obs))
+
+    max_value = np.max(self.__obs) + 1
+    probs = np.array([max_value for _ in range(self.__n)])
+
+    for i in range(1, self.__k):
+      probs.fill(max_value)
+      # compute new probabilities, choose min of all distances
+      for j in range(0, i):
+        dists = similarity_measurement(self.__obs, self.__cluster_means[j], self.__distance)
+        # collect minimum squared distances to cluster centroids
+        probs = np.minimum(probs, dists)
+
+      # sum all squared distances
+      sum_probs = np.float(np.sum(probs))
+
+      if sum_probs != 0:
+        probs /= sum_probs
+        # 3) choose new center based on probabilities
+        self.__cluster_means[i] = (self.__obs[weighted_choice(probs)])
+      else:
+        print('ERROR: cannot find enough cluster centroids for given k = ' + str(self.__k))
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def get_cluster_mean(self, num):
+    """
+    Returns the centroid of the cluster with index num.
+    :param num:
+    :return:
+    """
+    if num >= self.__k:
+      return None
+    else:
+      return self.__cluster_means[num]
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def get_cluster_of_element(self, index):
+    """
+    :param index: number of element in observation array
+    :return: cluster id of observation with given index.
+    """
+    if index >= self.__n:
+      return None
+    else:
+      return self.__label_map[index]
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def print_clusters(self):
+    """
+    Print the cluster centroids and the labels.
+    :return:
+    """
+    print('Centroids: ' + str(self.__centroids) + ' | _labels: ' + str(self.__labels))
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def __assignment(self):
+    """
+    Assignment step:
+    Compute distance of current observation to each cluster centroid and move gene to the nearest cluster.
+    :return:
+    """
+    for i in range(self.__n):
+      value = self.__obs[i]
+
+      # compute squared distances to each mean
+      dists = similarity_measurement(self.__cluster_means, value, self.__distance)
+      # nearest cluster
+      nearest_i_d = np.argmin(dists)
+
+      if self.__label_map[i] != nearest_i_d:
+        self.__changed = True
+        self.__label_map[i] = nearest_i_d
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def __update(self):
+    """
+    Update step:
+    Compute the new centroids of each cluster after the assignment.
+    :return:
+    """
+    self.__cluster_means.fill(0)
+    self.__cluster_nums.fill(0)
+
+    self.__cluster_labels = [[] for _ in range(self.__k)]
+
+    for ii in range(self.__n):
+      cluster_i_d = self.__label_map[ii]
+      self.__cluster_labels[cluster_i_d].append(ii)
+      self.__cluster_nums[cluster_i_d] += 1
+
+    for ii in range(self.__k):
+      self.__cluster_means[ii] = np.mean(self.__obs[self.__cluster_labels[ii]], axis=0)
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def __end(self):
+    """
+    Writes the results to the corresponding member variables.
+    :return:
+    """
+    # returned values | have to be reinitialized in case of sequential running
+    # centroids
+    self.__centroids = np.array([self.__obs[0] for _ in range(self.__k)], dtype=np.float)
+    # labels of observations
+    self.__labels = np.array([0 for _ in range(self.__n)], dtype=np.int)
+    # distances between centroids
+    # self.__centroid_dist_mat = np.zeros((self.__k, self.__k))
+
+    # we do not use Ordered_dict here, so obtain dict.values and fill array manually
+    for index in range(self.__n):
+      cluster_i_d = self.__label_map[index]
+      self.__labels[index] = cluster_i_d
+
+    # collect centroids
+    for ii in range(self.__k):
+      # self.__centroids.append(self.__cluster_means[ii].tolist())
+      self.__centroids[ii] = self.__cluster_means[ii]
+
+      # compute distances between each centroids
+      # for ii in range(self.__k - 1):
+      #     # compute indices of other clusters
+      #     jj = range(ii + 1, self.__k)
+      #     # select matrix of cluster centroids
+      #     centroid_mat = self.__centroids[jj]
+      #     distances = np.sqrt(self.__compare(centroid_mat, self.__centroids[ii]))
+      #     self.__centroid_dist_mat[ii, jj] = distances
+      #     self.__centroid_dist_mat[jj, ii] = distances
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def run(self):
+    """
+    Runs the algorithm of k-means, using the initialization method and the assignment/update step.
+    Conducts at most iters iterations and terminates if this number is exceeded or no observations
+    was moved to another cluster.
+    :return:
+    """
+    # 1) init algorithm by choosing cluster centroids
+    self.__init()
+
+    max_iters = self.__iters
+    counter = 0
+    # 2) run clustering
+    while self.__changed and counter < max_iters:
+      self.__changed = False
+
+      self.__assignment()
+      self.__update()
+
+      counter += 1
+
+    self.num_iters = counter
+
+    # write results to the class members
+    self.__end()
+    return self.__centroids.tolist(), self.__labels.tolist(), self.__cluster_labels
+    # , self.__centroid_dist_mat.tolist()
+
+    # ------------------------------------------------------------------------------------------------------------------
+
+    # def get_dists_per_centroid(self):
+    #     """
+    #     Compute the distances between observations belonging to one cluster and the corresponding cluster centroid.
+    #     Cluster labels are sorted in ascending order using their distances
+    #     :return: array of distance arrays for each cluster and ordered labels
+    #     """
+    #
+    #     # labels per centroid
+    #     # self.__cluster_labels = [[] for _ in range(self.__k)]
+    #     # distances of obs to their cluster
+    #     self.__centroid_dists = [[] for _ in range(self.__k)]
+    #
+    #     for ii in range(self.__k):
+    #         self.__cluster_labels[ii] = np.array(self.__cluster_labels[ii], dtype=np.int)
+    #
+    #     # compute euclidean distances of values to cluster mean
+    #     for ii in range(self.__k):
+    #         mean = self.__cluster_means[ii]
+    #         obs = self.__obs[self.__cluster_labels[ii]]
+    #         dists = similarity_measurement(obs, mean, self.__compare).tolist()
+    #         self.__centroid_dists[ii] = dists
+    #
+    #         # sort indices in ascending order using the distances
+    #         indices = range(len(dists))
+    #         indices.sort(key=dists.__getitem__)
+    #         self.__cluster_labels[ii] = self.__cluster_labels[ii][indices].tolist()
+    #         self.__centroid_dists[ii].sort()
+    #
+    #     return self.__cluster_labels, self.__centroid_dists
+
+
+########################################################################################################################
+
+def _plugin_initialize():
+  """
+  optional initialization method of this module, will be called once
+  :return:
+  """
+  pass
+
+
+# ----------------------------------------------------------------------------------------------------------------------
+
+def create(data, k, init_method, distance):
+  """
+  by convention contain a factory called create returning the extension implementation
+  :return:
+  """
+  return KMeans(data, k, init_method, distance)
+
+
+########################################################################################################################
+
+
+def _main():
+  from timeit import default_timer as timer
+  from scipy.cluster.vq import kmeans2
+  # np.random.seed(datetime.now())
+  # data = np.array([[1,2,3],[5,4,5],[3,2,2],[8,8,7],[9,6,7],[2,3,4]])
+  data = np.array([1, 1.1, 5, 8, 5.2, 8.3])
+
+  # data = np.array([np.random.rand(2) * 5 for _ in range(10)])
+  k = 3
+
+  time_mine = 0
+  time_theirs = 0
+  n = 10
+
+  for i in range(10):
+    s1 = timer()
+    k_means_plus = KMeans(data, k, 'kmeans++', 'sqeuclidean', 10)
+    result1 = k_means_plus.run()
+    # print(result)
+    e1 = timer()
+    # labels = k_means_plus.get_dists_per_centroid()
+    # l, d = compute_cluster_distances(data, labels[0])
+
+    s2 = timer()
+    result2 = kmeans2(data, k)
+    e2 = timer()
+
+    time_mine += e1 - s1
+    time_theirs += e2 - s2
+
+  print(result1)
+  print(result2)
+  print('mine: {}'.format(time_mine / n))
+  print('theirs: {}'.format(time_theirs / n))
+
+
+"""
+This is for testing the algorithm and comparing the resuls between this and scipy's algorithm
+"""
+if __name__ == '__main__':
+  _main()
diff --git a/phovea_clustering/clustering_service.py b/phovea_clustering/clustering_service.py
new file mode 100644
index 0000000..c789b7b
--- /dev/null
+++ b/phovea_clustering/clustering_service.py
@@ -0,0 +1,158 @@
+import numpy as np
+from clustering_hierarchical import get_clusters
+
+__author__ = 'Michael Kern'
+__version__ = '0.0.1'
+__email__ = 'kernm@in.tum.de'
+
+
+########################################################################################################################
+
+def load_data(dataset_id):
+  """
+  Loads the genomic data with given identifier dataset_id.
+  :param dataset_id: identifier
+  :return: array of the genomic data
+  """
+  import phovea_server.dataset as dt
+  # obtain Caleydo dataset from ID
+  dataset = dt.get(dataset_id)
+  # choose loaded attribute and load raw data in numpy format
+  # somehow hack to get a numpy array out of the data
+  try:
+    arr = np.array(list(dataset.asnumpy()))
+  except:
+    raise Exception
+  return arr
+
+
+########################################################################################################################
+
+def load_plugin(plugin_id, *args, **kwargs):
+  """
+  Loads the clustering plugin with given arguments.
+  :param plugin_id: identifier of plugin
+  :param *args: additional caller function arguments
+  :param **kwargs: additional arguments
+  :return: plugin
+  """
+  import phovea_server.plugin
+  # obtain all plugins with 'plugin_id' extension
+  plugins = phovea_server.plugin.list('clustering')
+  # choose plugin with given ID
+  for plugin in plugins:
+    if plugin.id == plugin_id:
+      # load the implementation of the plugin
+      return plugin.load().factory(*args, **kwargs)
+
+  raise NotImplementedError
+
+
+########################################################################################################################
+
+def run_kmeans(data, k, init_method, distance):
+  """
+  Runs the k-Means clustering algorithm given the loaded data set, the number of clusters k and the initialization
+  method.
+  :param data: observation matrix
+  :param k: number of clusters
+  :param init_method: number of clusters
+  :return: result of k-means
+  """
+  kmeans = load_plugin('caleydo-clustering-kmeans', data, k, init_method, distance)
+  # and run the kmeans extension
+  centroids, labels, cluster_labels = kmeans()
+  # cluster_labels, clusterDists = KMeans.getDistsPerCentroid()
+
+  return {'centroids': centroids, 'clusterLabels': cluster_labels}
+
+
+########################################################################################################################
+
+def run_hierarchical(data, k, method, distance):
+  """
+  Runs the hierarchical clustering algorithm given the loaded data set and type of linkage method.
+  :param data: observation matrix
+  :param method: linkage method
+  :return: linkage matrix / dendrogram of the algorithm
+  """
+  hierarchical = load_plugin('caleydo-clustering-hierarchical', data, method, distance)
+  # and use the extension
+  hierarchical()
+  # obtain k-number of clusters
+  centroids, cluster_labels, labels = get_clusters(k, data, hierarchical.tree, False)
+
+  return {'centroids': centroids, 'clusterLabels': cluster_labels, 'dendrogram': hierarchical.tree.json()}
+  # print('\t-> creating dendrogram tree...')
+  # tree = Hierarchical.generateTree(linkage)
+  # print('\t-> creating json string ...')
+  # dendrogram = tree.jsonify()
+  # print('\t-> finished.')
+
+  # return {'dendrogram': dendrogram} --> if needed later
+
+
+########################################################################################################################
+
+def run_affinity_propagation(data, damping, factor, preference, distance):
+  """
+  Runs the affinity propagation algorithm given the loaded dataset, a damping value, a certain factor and
+  a preference method.
+  :param data:
+  :param damping:
+  :param factor:
+  :param preference:
+  :return:
+  """
+  affinity = load_plugin('caleydo-clustering-affinity', data, damping, factor, preference, distance)
+  # use this extension
+  centroids, labels, cluster_labels = affinity()
+
+  return {'centroids': centroids, 'clusterLabels': cluster_labels}
+
+
+########################################################################################################################
+
+def run_fuzzy(data, num_clusters, m, threshold, distance):
+  fuzzy = load_plugin('caleydo-clustering-fuzzy', data, num_clusters, m, threshold, distance)
+
+  centroids, cluster_labels, partition_matrix, max_prob = fuzzy()
+
+  return {'centroids': centroids, 'clusterLabels': cluster_labels, 'partitionMatrix': partition_matrix,
+          'maxProbability': max_prob}
+
+
+########################################################################################################################
+
+def get_cluster_distances(data, labels, metric, extern_labels=None, sorted=True):
+  """
+  Compute the cluster distances in a given data among certain rows (labels)
+  :param data: genomic data
+  :param labels: indices of rows
+  :param metric: distance metric
+  :param extern_labels:
+  :return: labels and distances values sorted in ascending order
+  """
+  from clustering_util import compute_cluster_intern_distances, compute_cluster_extern_distances
+  dist_labels, dist_values = compute_cluster_intern_distances(data, labels, sorted, metric)
+
+  if extern_labels is not None:
+    extern_dists = compute_cluster_extern_distances(data, dist_labels, extern_labels, metric)
+    return {'labels': dist_labels, 'distances': dist_values, 'externDistances': extern_dists}
+  else:
+    return {'labels': dist_labels, 'distances': dist_values}
+
+
+########################################################################################################################
+
+def get_clusters_from_dendrogram(data, dendrogram, num_clusters):
+  """
+
+  :param data:
+  :param dendrogram:
+  :param num_clusters:
+  :return:
+  """
+
+  centroids, cluster_labels, _ = get_clusters(num_clusters, data, dendrogram)
+  return {'centroids': centroids, 'clusterLabels': cluster_labels}
diff --git a/phovea_clustering/clustering_util.py b/phovea_clustering/clustering_util.py
new file mode 100644
index 0000000..5470538
--- /dev/null
+++ b/phovea_clustering/clustering_util.py
@@ -0,0 +1,489 @@
+########################################################################################################################
+
+import random
+import numpy as np
+
+# use scipy to compute different distance matrices
+from scipy.spatial.distance import pdist, squareform
+import scipy.stats as stats
+
+__author__ = "Michael Kern"
+__email__ = 'kernm@in.tum.de'
+
+"""
+http://eli.thegreenplace.net/2010/01/22/weighted-random-generation-in-python
+--> good explanation to create weighted choices / random numbers
+"""
+
+
+def weighted_choice(weights):
+  # compute sum of all weights
+  sum_total = sum(weights)
+  # compute a random with range[0, sum_total]
+  rnd = random.random() * sum_total
+
+  for index, weight in enumerate(weights):
+    # subtract current weight from random to find current index
+    rnd -= weight
+    if rnd < 0:
+      return index
+
+      # 20% faster if weights are sorted in descending order
+
+
+########################################################################################################################
+
+"""
+Implementation of an binary tree for hierarchical clustering
+"""
+
+"""
+Node of the tree containing information about it's id in data, children and the value
+"""
+
+
+class BinaryNode:
+  def __init__(self, value, id, size, left_child, right_child):
+    self.value = value
+    self.left = left_child
+    self.right = right_child
+    self.size = size
+    self.id = id
+    self.parent = None
+    self.indices = [id]
+
+    # create json info on the fly
+    self.json = {"id": self.id, "size": self.size, "value": self.value, "indices": [id]}
+    if left_child is not None and right_child is not None:
+      # self.json["value"] = np.mean(self.value)
+      self.json["children"] = [right_child.json, left_child.json]
+      self.indices = [] + right_child.indices + left_child.indices
+      self.json["indices"] = self.indices
+
+  def is_leave(self):
+    return self.left is None and self.right is None
+
+
+########################################################################################################################
+
+"""
+Implementation of an hierarchical binary tree
+"""
+
+
+class BinaryTree:
+  # this tree must not be empty and must have at least two children (leaves)
+  def __init__(self, left_node, right_node, new_id, new_value):
+    self.__create_new_root(left_node, right_node, new_id, new_value)
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def add_node(self, new_node, new_id, new_value):
+    self.__create_new_root(self.root, new_node, new_id, new_value)
+    return self
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def merge(self, tree, new_id, new_value):
+    self.__create_new_root(self.root, tree.root, new_id, new_value)
+    return self
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def jsonify(self):
+    import json
+    return json.dumps(self.root.json)
+    # return self.root.json
+    # return self.__traverseJson(self.root)
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def json(self):
+    return self.root.json
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def cut_tree_by_clusters(self, k):
+    queue = [self.root]
+
+    while len(queue) < k:
+      node = queue.pop(0)
+      queue.append(node.left)
+      queue.append(node.right)
+
+      def key_func(x):
+        if x.is_leave():
+          return 0
+        else:
+          return -x.value
+
+      queue.sort(key=key_func)
+
+    clusters = []
+
+    for node in queue:
+      clusters.append(node.indices)
+
+    return clusters
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def __traverse_json(self, node):
+    json = {"id": node.id, "size": node.size, "value": node.value}
+    if node.left is None and node.right is None:
+      return json
+    else:
+      json["children"] = [] + [self.__traverse_json(node.left)] + [self.__traverse_json(node.right)]
+
+    return json
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def get_leaves(self):
+    return self.root.indices
+    # return self.__traverseIDs(self.root)
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def __traverse_ids(self, node):
+
+    if node.left is None and node.right is None:
+      return [node.id]
+    else:
+      return [] + self.__traverse_ids(node.right) + self.__traverse_ids(node.left)
+
+  # ------------------------------------------------------------------------------------------------------------------
+
+  def __create_new_root(self, left_node, right_node, new_id, new_value):
+    new_size = left_node.size + right_node.size
+    self.root = BinaryNode(new_value, new_id, new_size, left_node, right_node)
+    left_node.parent = right_node.parent = self.root
+
+    # ------------------------------------------------------------------------------------------------------------------
+
+
+def cut_json_tree_by_clusters(json_data, k):
+  # import json
+  # tree = json.loads(json_data)
+  queue = [json_data]
+
+  while len(queue) < k:
+    node = queue.pop(0)
+    queue.append(node['children'][0])
+    queue.append(node['children'][1])
+
+    def key_func(x):
+      if 'children' not in x:
+        return 0
+      else:
+        return -x['value']
+
+    queue.sort(key=key_func)
+
+  clusters = []
+
+  for node in queue:
+    clusters.append(node['indices'])
+
+  return clusters
+
+
+########################################################################################################################
+
+def euclidean_distance(matrix, vector, squared=False):
+  """
+  Computes the euclidean distance between a vector and the rows of a matrix in parallel.
+  :param matrix: array of observations or clusters
+  :param vector: cluster centroid or observation
+  :return:
+  """
+
+  # compute distance between values in matrix and the vector
+  dist_mat = matrix - vector
+  num_values = len(matrix)
+  distances = np.array([0.0 for _ in range(num_values)], dtype=np.float)
+
+  for ii in range(num_values):
+    distance = dist_mat[ii]
+    # always try to use np.dot when computing euclidean distance
+    # it's way faster than ** 2 and sum(..., axis=1)
+    distances[ii] = np.dot(distance, distance)
+
+  if squared:
+    return distances
+  else:
+    return np.sqrt(distances)
+
+
+# ----------------------------------------------------------------------------------------------------------------------
+
+def correlation_distance(matrix, vector, method):
+  """
+
+  :param matrix:
+  :param vector:
+  :return:
+  """
+
+  num_values = len(matrix)
+  distances = np.array([0.0 for _ in range(num_values)], dtype=np.float)
+
+  for ii in range(num_values):
+    value = matrix[ii]
+
+    if method == 'pearson':
+      distances[ii], _ = stats.pearsonr(value, vector)
+    elif method == 'spearman':
+      distances[ii], _ = stats.spearmanr(value, vector)
+    elif method == 'kendall':
+      distances[ii], _ = stats.kendalltau(value, vector)
+    else:
+      raise AttributeError
+
+  return distances
+
+
+# ----------------------------------------------------------------------------------------------------------------------
+
+
+def similarity_measurement(matrix, vector, method='euclidean'):
+  from scipy.spatial.distance import cdist
+
+  if method == 'euclidean':
+    return euclidean_distance(matrix, vector)
+
+  if method == 'sqeuclidean':
+    return euclidean_distance(matrix, vector, True)
+
+  spatial_methods = ['cityblock', 'chebyshev', 'canberra', 'correlation', 'hamming', 'mahalanobis', ]
+
+  if method in spatial_methods:
+    return np.nan_to_num(cdist(matrix, np.atleast_2d(vector), method).flatten())
+
+  corr_methods = ['spearman', 'pearson', 'kendall']
+
+  if method in corr_methods:
+    return correlation_distance(matrix, vector, method)
+
+  raise AttributeError
+
+
+# ----------------------------------------------------------------------------------------------------------------------
+
+def euclidean_distance_matrix(matrix, squared=False):
+  """
+  Compute the euclidean distance matrix required for the algorithm
+  :param matrix:
+  :param n:
+  :return:
+  """
+
+  n = np.shape(matrix)[0]
+  dist_mat = np.zeros((n, n))
+
+  # use Gram matrix and compute distances without inner products | FASTER than row-by-row method
+  "Gramiam matrix to compute dot products of each pair of elements: "
+  "<https://en.wikipedia.org/wiki/Gramian_matrix>"
+  gram_mat = np.zeros((n, n))
+  for ii in range(n):
+    for jj in range(ii, n):
+      gram_mat[ii, jj] = np.dot(matrix[ii], matrix[jj])
+
+  # # ! This is slower than computing dot products of rows manually in python
+  # # ! And we only require the upper triangle matrix of the Gram matrix
+  # gram_mat = np.dot(self.__obs, self.__obs.T)
+
+  # make use of formula |a - b|^2 = a^2 - 2ab + b^2
+  for ii in range(n):
+    # self.__d[ii, ii] = self.__maxValue
+    jj = np.arange(ii + 1, n)
+    dist_mat[ii, jj] = gram_mat[ii, ii] - 2 * gram_mat[ii, jj] + gram_mat[jj, jj]
+    dist_mat[jj, ii] = dist_mat[ii, jj]
+
+  # # take square root of distances to compute real euclidean distance
+  # dist_mat = np.sqrt(dist_mat)
+
+  "alternative version --> use scipy's fast euclidean distance implementation: FASTEST"
+  # dist_mat = spt.distance.pdist(self.__obs, 'euclidean')
+  # self.__d = spt.distance.squareform(dist_mat)
+  # print(dist_mat)
+
+  if squared:
+    return dist_mat
+  else:
+    return np.sqrt(dist_mat)
+
+
+# ----------------------------------------------------------------------------------------------------------------------
+
+def norm1_distance(matrix, vector):
+  """
+  Computes the norm-1 distance between a vector and the rows of a matrix in parallel.
+  :param matrix: array of observations or clusters
+  :param vector: cluster centroid or observation
+  :return:
+  """
+  dist_mat = np.abs(matrix - vector)
+  num_values = len(vector)
+
+  distances = np.sum(dist_mat, axis=1) / num_values
+  return distances
+
+
+# ----------------------------------------------------------------------------------------------------------------------
+
+def pearson_correlation_matrix(matrix):
+  """
+
+  :param matrix:
+  :param n:
+  :return:
+  """
+  # TODO! other possibilites like 1 - abs(corr) | sqrt(1 - corr ** 2) | (1 - corr) / 2
+  dist_mat = 1 - np.corrcoef(matrix)
+
+  return dist_mat
+
+
+# ----------------------------------------------------------------------------------------------------------------------
+
+def stats_correlation_matrix(matrix, method):
+  if method == 'pearson':
+    return pearson_correlation_matrix(matrix)
+
+  n = np.shape(matrix)[0]
+  dist_mat = np.zeros((n, n))
+
+  for ii in range(n):
+    row_i = matrix[ii]
+    for jj in range(ii + 1, n):
+      row_j = matrix[jj]
+      corr = 0
+
+      if method == 'spearman':
+        corr, _ = stats.spearmanr(row_i, row_j)
+
+      if method == 'kendall':
+        corr, _ = stats.kendalltau(row_i, row_j)
+
+      # TODO! other possibilites like 1 - abs(corr) | sqrt(1 - corr ** 2) | (1 - corr) / 2
+      corr = 1 - corr
+
+      dist_mat[ii, jj] = corr
+      dist_mat[jj, ii] = corr
+
+  return dist_mat
+
+
+# ----------------------------------------------------------------------------------------------------------------------
+
+def similarity_measurement_matrix(matrix, method):
+  """
+  Generic function to determine the similarity measurement for clustering
+  :param matrix:
+  :param method:
+  :return:
+  """
+  if method == 'euclidean':
+    return euclidean_distance_matrix(matrix)
+    # return squareform(pdist(matrix, method))
+
+  if method == 'sqeuclidean':
+    return euclidean_distance_matrix(matrix, True)
+    # return squareform(pdist(matrix, method))
+
+  spatial_methods = ['cityblock', 'chebyshev', 'canberra', 'correlation', 'hamming', 'mahalanobis']
+
+  if method in spatial_methods:
+    return squareform(np.nan_to_num(pdist(matrix, method)))
+
+  corr_methods = ['spearman', 'pearson', 'kendall']
+
+  if method in corr_methods:
+    return stats_correlation_matrix(matrix, method)
+
+  raise AttributeError
+
+
+########################################################################################################################
+# utility functions to compute distances between rows and cluster centroids
+
+def compute_cluster_intern_distances(matrix, labels, sorted=True, metric='euclidean'):
+  """
+  Computes the distances of each element in one cluster to the cluster's centroid. Returns distance values and labels
+  sorted in ascending order.
+  :param matrix:
+  :param labels:
+  :return: labels / indices of elements corresponding to distance array, distance values of cluster
+  """
+  cluster_labels = np.array(labels)
+  if len(cluster_labels) == 0:
+    return [], []
+
+  sub_matrix = matrix[cluster_labels]
+  # compute centroid of cluster along column (as we want to average each gene separately)
+  centroid = np.mean(sub_matrix, axis=0)
+
+  # compute distances to centroid
+  dists = similarity_measurement(sub_matrix, centroid, metric)
+
+  if sorted == 'true':
+    # sort values
+    indices = range(len(dists))
+    indices.sort(key=dists.__getitem__)
+    dists.sort()
+
+    # reverse order if correlation coefficient is used
+    # (1 means perfect correlation while -1 denotes opposite correlation)
+    corr_metrics = ['pearson', 'spearman', 'kendall']
+    if metric in corr_metrics:
+      indices.reverse()
+      dists = dists[::-1]
+
+    # write back to our arrays
+    dist_labels = cluster_labels[indices].tolist()
+    dist_values = dists.tolist()
+  else:
+    dist_labels = cluster_labels.tolist()
+    dist_values = dists.tolist()
+
+  return dist_labels, dist_values
+
+
+# ----------------------------------------------------------------------------------------------------------------------
+
+def compute_cluster_extern_distances(matrix, labels, outer_labels, metric='euclidean'):
+  """
+  Compute the distances of patients in one cluster to the centroids of all other clusters.
+  :param matrix:
+  :param labels:
+  :param outer_labels:
+  :return:
+  """
+  extern_dists = []
+  intern_sub_matrix = matrix[labels]
+
+  for extern_labels in outer_labels:
+
+    if len(extern_labels) == 0:
+      extern_dists.append([])
+
+    # compute centroid of external cluster
+    sub_matrix = matrix[extern_labels]
+    centroid = np.mean(sub_matrix, axis=0)
+
+    dists = similarity_measurement(intern_sub_matrix, centroid, metric)
+    extern_dists.append(dists.tolist())
+
+  return extern_dists
+
+
+########################################################################################################################
+
+if __name__ == '__main__':
+  from scipy.spatial.distance import cdist
+  print(cdist([[1, 1, 1], [3, 3, 3], [5, 5, 5]], np.atleast_2d([2, 2, 2]), 'sqeuclidean').flatten())
+
+  from scipy.stats import spearmanr
+
+  print(spearmanr([1, 2, 3], [2, 4, 1]))
diff --git a/phovea_clustering/config.json b/phovea_clustering/config.json
new file mode 100644
index 0000000..0967ef4
--- /dev/null
+++ b/phovea_clustering/config.json
@@ -0,0 +1 @@
+{}
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..f74d796
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+-e git+https://github.com/phovea/phovea_server.git#egg=phovea_server
\ No newline at end of file
diff --git a/requirements_dev.txt b/requirements_dev.txt
new file mode 100644
index 0000000..0e2902f
--- /dev/null
+++ b/requirements_dev.txt
@@ -0,0 +1,4 @@
+flake8==3.0.4
+pep8-naming==0.4.1
+pytest==3.0.3
+pytest-runner==2.9
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..5519f85
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,14 @@
+###############################################################################
+# Caleydo - Visualization for Molecular Biology - http://caleydo.org
+# Copyright (c) The Caleydo Team. All rights reserved.
+# Licensed under the new BSD license, available at http://caleydo.org/license
+###############################################################################
+
+[bdist_wheel]
+# This flag says that the code is written to work on both Python 2 and Python
+# 3. If at all possible, it is good practice to do this. If you cannot, you
+# will need to generate wheels for each Python version that you support.
+universal=1
+
+[aliases]
+test=pytest
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..2f7a0a1
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,81 @@
+###############################################################################
+# Caleydo - Visualization for Molecular Biology - http://caleydo.org
+# Copyright (c) The Caleydo Team. All rights reserved.
+# Licensed under the new BSD license, available at http://caleydo.org/license
+###############################################################################
+from __future__ import with_statement, print_function
+from setuptools import setup
+from codecs import open
+from os import path
+
+here = path.abspath(path.dirname(__file__))
+
+
+def read_it(name):
+  with open(path.join(here, name), encoding='utf-8') as f:
+    return f.read()
+
+
+# read package.json information
+with open(path.join(here, 'package.json'), encoding='utf-8') as json_data:
+  import json
+
+  pkg = json.load(json_data)
+
+
+def packaged(*files):
+  r = {}
+  global pkg
+  r[pkg['name'].encode('ascii')] = list(files)
+  return r
+
+
+setup(
+  name=pkg['name'],
+  version=pkg['version'],
+  description=pkg['description'],
+  long_description=read_it('README.md'),
+  keywords=pkg.get('keywords', ''),
+  author=pkg['author']['name'],
+  author_email=pkg['author']['email'],
+  license=pkg['license'],
+  zip_safe=False,
+
+  entry_points={
+    'phovea.registry': ['{0} = {0}:phovea'.format(pkg['name'])],
+    'phovea.config': ['{0} = {0}:phovea_config'.format(pkg['name'])]
+  },
+
+  # See https://pypi.python.org/pypi?%3Aaction=list_classifiers
+  classifiers=[
+    'Intended Audience :: Developers',
+    'Operating System :: OS Independent',
+    # Pick your license as you wish (should match "license" above)
+    'License :: OSI Approved :: ' + pkg['license'],
+    'Programming Language :: Python',
+    'Programming Language :: Python :: 2.7',
+    'Programming Language :: Python :: 3.4'
+  ],
+
+  # You can just specify the packages manually here if your project is
+  # simple. Or you can use find_packages().
+  py_modules=[pkg['name']],
+
+  # List run-time dependencies here.  These will be installed by pip when
+  # your project is installed. For an analysis of "install_requires" vs pip's
+  # requirements files see:
+  # https://packaging.python.org/en/latest/requirements.html
+  install_requires=[r for r in read_it('requirements.txt').split('\n') if not r.startswith('-e git+https://')],
+  tests_require=read_it('requirements_dev.txt').split('\n'),
+
+  # If there are data files included in your packages that need to be
+  # installed, specify them here.  If using Python 2.6 or less, then these
+  # have to be included in MANIFEST.in as well.
+  package_data=packaged('config.json'),
+
+  # Although 'package_data' is the preferred approach, in some case you may
+  # need to place data files outside of your packages. See:
+  # http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa
+  # In this case, 'data_file' will be installed into '<sys.prefix>/my_data'
+  data_files=[]  # [('my_data', ['data/data_file'])],
+)
diff --git a/tests/test_dummy.py b/tests/test_dummy.py
new file mode 100644
index 0000000..4b8fe97
--- /dev/null
+++ b/tests/test_dummy.py
@@ -0,0 +1,4 @@
+
+
+def test_dummy():
+  assert 1 == 1
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..f2734b2
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,28 @@
+###############################################################################
+# Caleydo - Visualization for Molecular Biology - http://caleydo.org
+# Copyright (c) The Caleydo Team. All rights reserved.
+# Licensed under the new BSD license, available at http://caleydo.org/license
+###############################################################################
+
+[tox]
+envlist = py{27,34}
+
+[testenv]
+basepython =
+    py27: python2.7
+    py34: python3.4
+deps =
+    flake8
+    pytest
+commands =
+    check-manifest --ignore tox.ini,tests*
+    python setup.py check -m -r -s
+    flake8 .
+    py.test tests
+
+[flake8]
+ignore=E111,E114,E501
+exclude = .tox,*.egg,build,data,.git,__pycache__,docs,node_modules
+
+[pytest]
+testpaths = tests