Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.

Commit e9d08d7

Browse files
authored
use mambabuild in CI and fix rmsd computation in clustering (#1531)
1 parent ea3439b commit e9d08d7

File tree

4 files changed

+108
-15
lines changed

4 files changed

+108
-15
lines changed

.circleci/config.yml

+6-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,12 @@ jobs:
4646
- run: mkdir -p $CIRCLE_ARTIFACTS $CIRCLE_TEST_REPORTS
4747
- attach_workspace:
4848
at: workspace
49-
- run: conda build devtools/conda-recipe --python=3.9 --numpy=1.19 --test --output-folder workspace
49+
- run:
50+
name: Compile and test
51+
no_output_timeout: 30m
52+
command: |
53+
export PYTHONUNBUFFERED=1
54+
conda build devtools/conda-recipe --python=3.9 --numpy=1.21 --test --output-folder workspace
5055
- run: bash <(curl -s https://codecov.io/bash) -f $HOME/coverage.xml
5156
- store_test_results:
5257
path: /tmp/circleci-test-results

devtools/conda-setup+build.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@ steps:
55
conda config --set quiet true
66
displayName: Configure conda
77
- bash: |
8-
python -m pip install --upgrade pip
9-
conda update --all
8+
conda install mamba
9+
mamba update --all
1010
displayName: Update conda
1111
- bash: |
12-
conda install conda-build conda-verify pip
12+
mamba install boa conda-build conda-verify pip
1313
displayName: 'Install dependencies'
1414
continueOnError: false
1515
- bash: |
16-
conda build devtools/conda-recipe
16+
conda mambabuild devtools/conda-recipe
1717
displayName: 'Build and test'
1818
continueOnError: false

pyemma/coordinates/clustering/src/clustering_module.cpp

+11-7
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,16 @@
88

99
struct RMSDMetric {
1010
template<typename T>
11-
static T compute(const T* xs, const T* ys, std::size_t dim) {
11+
static T compute_squared(const T* xs, const T* ys, std::size_t dim) {
1212
if (dim % 3 != 0) {
13-
throw std::range_error("RMSDMetric is only implemented for input data with a dimension dividable by 3.");
13+
throw std::range_error("RMSDMetric is only implemented for input data with a dimension divisible by 3.");
1414
}
15+
1516
float trace_a, trace_b;
1617
auto dim3 = static_cast<const int>(dim / 3);
17-
std::vector<float> buffer_b (ys, ys + dim);
1818
std::vector<float> buffer_a (xs, xs + dim);
19+
std::vector<float> buffer_b (ys, ys + dim);
20+
1921
inplace_center_and_trace_atom_major(buffer_a.data(), &trace_a, 1, dim3);
2022
inplace_center_and_trace_atom_major(buffer_b.data(), &trace_b, 1, dim3);
2123

@@ -27,14 +29,16 @@ struct RMSDMetric {
2729
}
2830
}
2931

30-
template<typename dtype>
31-
static dtype compute_squared(const dtype* xs, const dtype* ys, std::size_t dim) {
32-
auto d = compute(xs, ys, dim);
33-
return d*d;
32+
template<typename T>
33+
static T compute(const T* xs, const T* ys, std::size_t dim) {
34+
return std::sqrt(compute_squared(xs, ys, dim));
3435
}
3536
};
3637

3738
PYBIND11_MODULE(_ext, m) {
3839
auto rmsdModule = m.def_submodule("rmsd");
3940
deeptime::clustering::registerClusteringImplementation<RMSDMetric>(rmsdModule);
41+
rmsdModule.def("compute_metric", [](py::array_t<float> x, py::array_t<float> y) {
42+
return RMSDMetric::compute<float>(x.data(), y.data(), x.size());
43+
});
4044
}

pyemma/coordinates/clustering/tests/test_kmeans.py

+87-3
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,14 @@
2424
import os
2525
import random
2626
import unittest
27+
28+
import deeptime.clustering
29+
import mdtraj
2730
import numpy as np
31+
from deeptime.clustering import ClusterModel
2832

2933
from pyemma.coordinates.api import cluster_kmeans
34+
from pyemma.coordinates.clustering import KmeansClustering
3035
from pyemma.util.files import TemporaryDirectory
3136
from pyemma.util.contexts import settings, Capturing
3237

@@ -227,9 +232,88 @@ def test_kmeans_convex_hull(self):
227232
self.assertGreaterEqual(np.inner(np.array([0, -144337500, -102061250], dtype=float), res) + 353560531, 0)
228233
self.assertGreaterEqual(np.inner(np.array([0, 0, -10000], dtype=float), res) + 17321, 0)
229234

230-
def test_with_n_jobs_minrmsd(self):
231-
kmeans = cluster_kmeans(np.random.rand(500, 3), 10, metric='minRMSD')
232-
kmeans.dtrajs
235+
def test_minrmsd_assignment(self):
236+
state = np.random.RandomState(123)
237+
data = state.uniform(-50, 50, size=(500, 3 * 15))
238+
n_clusters = 15
239+
kmeans = cluster_kmeans([data], n_clusters, metric='minRMSD', max_iter=0,
240+
fixed_seed=32, init_strategy='kmeans++', n_jobs=1)
241+
kmeans2 = cluster_kmeans([data], n_clusters, metric='minRMSD', max_iter=0,
242+
fixed_seed=32, init_strategy='kmeans++', n_jobs=1)
243+
np.testing.assert_array_equal(kmeans.dtrajs[0], kmeans2.dtrajs[0])
244+
np.testing.assert_array_almost_equal(kmeans.clustercenters, kmeans2.clustercenters)
245+
np.testing.assert_equal(kmeans.metric, 'minRMSD')
246+
247+
impl = deeptime.clustering.metrics['minRMSD']
248+
dtraj_manual = []
249+
for frame in data:
250+
dists_to_cc = [impl.compute_metric(frame, cc) for cc in kmeans.clustercenters]
251+
dtraj_manual.append(np.argmin(dists_to_cc))
252+
np.testing.assert_array_equal(dtraj_manual, kmeans.dtrajs[0])
253+
254+
def test_minrmsd_metric(self):
255+
# make sure impl is registered
256+
_ = KmeansClustering(n_clusters=5)
257+
# now we can import the impl
258+
impl = deeptime.clustering.metrics['minRMSD']
259+
target = np.random.uniform(size=(1, 3 * 15))
260+
reference = np.random.uniform(size=(1, 3 * 15))
261+
x = mdtraj.rmsd(mdtraj.Trajectory(target.reshape(1, -1, 3), None),
262+
mdtraj.Trajectory(reference.reshape(1, -1, 3), None))
263+
y = impl.compute_metric(target, reference)
264+
np.testing.assert_almost_equal(x[0], y)
265+
266+
def test_minrmsd_assignments(self):
267+
# make sure impl is registered
268+
_ = KmeansClustering(n_clusters=5)
269+
# now we can import the impl
270+
impl = deeptime.clustering.metrics['minRMSD']
271+
272+
from scipy.linalg import expm, norm
273+
n_clusters = 5
274+
n_particles = 3
275+
n_frames_per_cluster = 25
276+
277+
def rotation_matrix(axis, theta):
278+
""" rotation matrix
279+
:param axis: np.ndarray, axis around which to rotate
280+
:param theta: float, angle in radians
281+
:return: rotation matrix
282+
"""
283+
return expm(np.cross(np.eye(3), axis/norm(axis)*theta))
284+
285+
out = np.zeros((n_clusters*n_frames_per_cluster, 3*n_particles))
286+
for i in range(n_clusters):
287+
# define `n_particles` random particle xyz positions,
288+
# repeat `n_frames_per_cluster` frames and add noise
289+
_pos = np.random.choice(np.arange(3*n_particles), size=3*n_particles)
290+
pos = np.repeat(_pos[None], n_frames_per_cluster, axis=0).astype(float)
291+
pos += np.random.normal(size=pos.shape, scale=.1)
292+
293+
# add random rotation and translation for each frame
294+
rand_rot_trans = np.zeros_like(pos)
295+
for n, _pos in enumerate(pos):
296+
r = rotation_matrix(np.array([0, 1, 0]), np.pi*np.random.rand())
297+
t = np.array([np.random.normal(), np.random.normal(), np.random.normal()])
298+
299+
for m in range(n_particles):
300+
rand_rot_trans[n, 3*m:3*(m+1)] = np.dot(r, _pos[3*m:3*(m+1)]) - t
301+
302+
out[n_frames_per_cluster*i:n_frames_per_cluster*(i+1)] = rand_rot_trans
303+
304+
cc = impl.kmeans.init_centers_kmpp(out, k=n_clusters, random_seed=-1, n_threads=1, callback=None)
305+
cl = ClusterModel(cc, metric='minRMSD', converged=True)
306+
assignments = cl.transform(out)
307+
unique = []
308+
for i in range(n_clusters):
309+
unique_in_inverval = np.unique(
310+
assignments[n_frames_per_cluster*i:n_frames_per_cluster*(i+1)])
311+
# assert that each interval is assigned correctly
312+
self.assertEqual(unique_in_inverval.shape[0], 1)
313+
unique.append(unique_in_inverval[0])
314+
315+
# assign that all integers are assigned
316+
# self.assertSetEqual(set(unique), set(range(n_clusters)))
233317

234318
def test_skip(self):
235319
cl = cluster_kmeans(np.random.rand(100, 3), skip=42)

0 commit comments

Comments
 (0)