|
| 1 | +# coding: utf-8 |
| 2 | + |
| 3 | +""" |
| 4 | + description: Utility functions |
| 5 | + author: Suraj Iyer |
| 6 | +""" |
| 7 | + |
| 8 | +import numpy as np |
| 9 | +import numba as nb |
| 10 | + |
| 11 | + |
| 12 | +def rowwise_dissimilarity(values): |
| 13 | + """ |
| 14 | + Compare every row with each other row and count number |
| 15 | + of differences along column axis per row pairs. |
| 16 | +
|
| 17 | + Example: |
| 18 | + input: [[1, 2, 3], |
| 19 | + [1, 3, 1], |
| 20 | + [2, 2, 2]] |
| 21 | + output: [[0, 2, 2], |
| 22 | + [2, 0, 3] |
| 23 | + [2, 3, 0]] |
| 24 | + """ |
| 25 | + return np.sum(values != values[:, None], axis=-1) |
| 26 | + |
| 27 | + |
| 28 | +def rowwise_cosine_similarity(values): |
| 29 | + """ |
| 30 | + Using every pair of rows in :values: as input, compute |
| 31 | + pairwise cosine similarity between each row. |
| 32 | +
|
| 33 | + URL: https://stackoverflow.com/questions/41905029/create-cosine-similarity-matrix-numpy |
| 34 | + """ |
| 35 | + norm = (values * values).sum(0, keepdims=True) ** .5 |
| 36 | + values = values / norm |
| 37 | + return (values.T @ values) |
| 38 | + |
| 39 | + |
| 40 | +@nb.njit(parallel=True, fastmath=True) |
| 41 | +def convert_to_ultrametric(values): |
| 42 | + """ |
| 43 | + Fix triangular inequality within distance matrix (values) |
| 44 | + by converting to ultra-metric by ensuring the following |
| 45 | + condition: d_{ij} = min(d_{ij}, max(d_{ik}, d_{kj})) |
| 46 | +
|
| 47 | + Parameters: |
| 48 | + ------------ |
| 49 | + values: np.ndarray |
| 50 | + 2D square distance matrix. |
| 51 | +
|
| 52 | + Returns: |
| 53 | + -------- |
| 54 | + np.ndarray |
| 55 | + Ultrametrified distance matrix. |
| 56 | + """ |
| 57 | + values = np.atleast_2d(values) |
| 58 | + result = np.full(values.shape, 1.) |
| 59 | + for i in nb.prange(values.shape[0]): |
| 60 | + for j in range(i + 1, values.shape[0]): |
| 61 | + result[i, j] = result[j, i] = min(np.min( |
| 62 | + np.fmax(values[i], values[j])), values[i, j]) |
| 63 | + return result |
0 commit comments