Skip to content

Commit 81d2c6f

Browse files
authored
Create utils.py
1 parent 9b919b1 commit 81d2c6f

File tree

1 file changed

+63
-0
lines changed

1 file changed

+63
-0
lines changed

Diff for: utils.py

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# coding: utf-8
2+
3+
"""
4+
description: Utility functions
5+
author: Suraj Iyer
6+
"""
7+
8+
import numpy as np
9+
import numba as nb
10+
11+
12+
def rowwise_dissimilarity(values):
13+
"""
14+
Compare every row with each other row and count number
15+
of differences along column axis per row pairs.
16+
17+
Example:
18+
input: [[1, 2, 3],
19+
[1, 3, 1],
20+
[2, 2, 2]]
21+
output: [[0, 2, 2],
22+
[2, 0, 3]
23+
[2, 3, 0]]
24+
"""
25+
return np.sum(values != values[:, None], axis=-1)
26+
27+
28+
def rowwise_cosine_similarity(values):
29+
"""
30+
Using every pair of rows in :values: as input, compute
31+
pairwise cosine similarity between each row.
32+
33+
URL: https://stackoverflow.com/questions/41905029/create-cosine-similarity-matrix-numpy
34+
"""
35+
norm = (values * values).sum(0, keepdims=True) ** .5
36+
values = values / norm
37+
return (values.T @ values)
38+
39+
40+
@nb.njit(parallel=True, fastmath=True)
41+
def convert_to_ultrametric(values):
42+
"""
43+
Fix triangular inequality within distance matrix (values)
44+
by converting to ultra-metric by ensuring the following
45+
condition: d_{ij} = min(d_{ij}, max(d_{ik}, d_{kj}))
46+
47+
Parameters:
48+
------------
49+
values: np.ndarray
50+
2D square distance matrix.
51+
52+
Returns:
53+
--------
54+
np.ndarray
55+
Ultrametrified distance matrix.
56+
"""
57+
values = np.atleast_2d(values)
58+
result = np.full(values.shape, 1.)
59+
for i in nb.prange(values.shape[0]):
60+
for j in range(i + 1, values.shape[0]):
61+
result[i, j] = result[j, i] = min(np.min(
62+
np.fmax(values[i], values[j])), values[i, j])
63+
return result

0 commit comments

Comments
 (0)