Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.

Commit 62bab56

Browse files
authored
Merge pull request #1252 from clonker/patches
[coordinates] Disable random access optimization in patches
2 parents 78dae90 + 317dbce commit 62bab56

File tree

4 files changed

+85
-12
lines changed

4 files changed

+85
-12
lines changed

doc/source/CHANGELOG.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
Changelog
22
=========
33

4-
2.5.1 (02-14-2018)
4+
2.5.1 (02-17-2018)
55
------------------
66

77
Quick fix release to repair chunking in the coordinates package.
88

99
**Fixes**:
1010

1111
- msm: fix bug in ImpliedTimescales, which happened when an estimation failed for a given lag time. #1248
12-
- coordinates: fixed handling of default chunksize. #1247, #1251
12+
- coordinates: fixed handling of default chunksize. #1247, #1251, #1252
1313
- base: updated pybind to 2.2.2. #1249
1414

1515

pyemma/coordinates/tests/test_coordinates_iterator.py

+72-3
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
1+
import tempfile
12
import unittest
3+
import shutil
4+
import os
5+
from glob import glob
26
import numpy as np
37

48
from pyemma.coordinates.data import DataInMemory
59
from pyemma.util.contexts import settings
610
from pyemma.util.files import TemporaryDirectory
7-
import os
8-
from glob import glob
9-
1011

1112

1213
class TestCoordinatesIterator(unittest.TestCase):
@@ -15,6 +16,12 @@ class TestCoordinatesIterator(unittest.TestCase):
1516
def setUpClass(cls):
1617
cls.d = [np.random.random((100, 3)) for _ in range(3)]
1718

19+
def setUp(self):
20+
self.tempdir = tempfile.mktemp()
21+
22+
def tearDown(self):
23+
shutil.rmtree(self.tempdir, ignore_errors=True)
24+
1825
def test_current_trajindex(self):
1926
r = DataInMemory(self.d)
2027
expected_itraj = 0
@@ -273,5 +280,67 @@ def test_invalid_data_in_input_inf(self):
273280
for itraj, X in it:
274281
pass
275282

283+
def test_lagged_iterator(self):
284+
import pyemma.coordinates as coor
285+
from pyemma.coordinates.tests.util import create_traj, get_top
286+
287+
trajectory_length = 4720
288+
lagtime = 1000
289+
n_trajs = 15
290+
291+
top = get_top()
292+
trajs_data = [create_traj(top=top, length=trajectory_length) for _ in range(n_trajs)]
293+
trajs = [t[0] for t in trajs_data]
294+
xyzs = [t[1].reshape(-1, 9) for t in trajs_data]
295+
296+
reader = coor.source(trajs, top=top, chunksize=5000)
297+
298+
for chunk in [None, 0, trajectory_length, trajectory_length+1, trajectory_length+1000]:
299+
it = reader.iterator(lag=lagtime, chunk=chunk, return_trajindex=True)
300+
with it:
301+
for itraj, X, Y in it:
302+
np.testing.assert_equal(X.shape, Y.shape)
303+
np.testing.assert_equal(X.shape[0], trajectory_length - lagtime)
304+
np.testing.assert_array_almost_equal(X, xyzs[itraj][:trajectory_length-lagtime])
305+
np.testing.assert_array_almost_equal(Y, xyzs[itraj][lagtime:])
306+
307+
def test_lagged_iterator_optimized(self):
308+
import pyemma.coordinates as coor
309+
from pyemma.coordinates.tests.util import create_traj, get_top
310+
from pyemma.coordinates.util.patches import iterload
311+
312+
trajectory_length = 4720
313+
lagtime = 20
314+
n_trajs = 15
315+
stride = iterload.MAX_STRIDE_SWITCH_TO_RA + 1
316+
317+
top = get_top()
318+
trajs_data = [create_traj(top=top, length=trajectory_length) for _ in range(n_trajs)]
319+
trajs = [t[0] for t in trajs_data]
320+
xyzs = [t[1].reshape(-1, 9)[::stride] for t in trajs_data]
321+
xyzs_lagged = [t[1].reshape(-1, 9)[lagtime::stride] for t in trajs_data]
322+
323+
reader = coor.source(trajs, stride=stride, top=top, chunksize=5000)
324+
325+
memory_cutoff = iterload.MEMORY_CUTOFF
326+
try:
327+
iterload.MEMORY_CUTOFF = 8
328+
it = reader.iterator(stride=stride, lag=lagtime, chunk=5000, return_trajindex=True)
329+
with it:
330+
curr_itraj = 0
331+
t = 0
332+
for itraj, X, Y in it:
333+
if itraj != curr_itraj:
334+
curr_itraj = itraj
335+
t = 0
336+
np.testing.assert_equal(X.shape, Y.shape)
337+
l = len(X)
338+
np.testing.assert_array_almost_equal(X, xyzs[itraj][t:t+l])
339+
np.testing.assert_array_almost_equal(Y, xyzs_lagged[itraj][t:t+l])
340+
t += l
341+
finally:
342+
iterload.MEMORY_CUTOFF = memory_cutoff
343+
344+
276345
if __name__ == '__main__':
277346
unittest.main()

pyemma/coordinates/tests/test_random_access_stride.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,8 @@ def test_fragmented_reader_random_access1(self):
461461

462462
def test_RA_high_stride(self):
463463
""" ensure we use a random access pattern for high strides chunksize combinations to avoid memory issues."""
464+
from pyemma.coordinates.util.patches import iterload
465+
464466
n=int(1e5)
465467
n_bytes = 3*3*8*n # ~8Mb
466468
savable_formats_mdtra_18 = (
@@ -475,24 +477,23 @@ def test_RA_high_stride(self):
475477
r = coor.source(traj, top=get_top())
476478
it = r.iterator(stride=1000, chunk=100000)
477479
next(it)
478-
assert it._mditer.is_ra_iter
480+
assert iterload._DEACTIVATE_RANDOM_ACCESS_OPTIMIZATION or it._mditer.is_ra_iter
479481

480482
out_ra = r.get_output(stride=1000, chunk=10000)
481483
it = r.iterator(stride=1)
482484
next(it)
483-
assert not it._mditer.is_ra_iter
485+
assert iterload._DEACTIVATE_RANDOM_ACCESS_OPTIMIZATION or not it._mditer.is_ra_iter
484486
out = r.get_output(stride=1000)
485487
np.testing.assert_equal(out_ra, out)
486488

487489
# check max stride exceeding
488-
from pyemma.coordinates.util.patches import iterload
489490
it = r.iterator(stride=iterload.MAX_STRIDE_SWITCH_TO_RA+1)
490491
next(it)
491-
assert it._mditer.is_ra_iter
492+
assert iterload._DEACTIVATE_RANDOM_ACCESS_OPTIMIZATION or it._mditer.is_ra_iter
492493

493494
it = r.iterator(stride=iterload.MAX_STRIDE_SWITCH_TO_RA)
494495
next(it)
495-
assert not it._mditer.is_ra_iter
496+
assert iterload._DEACTIVATE_RANDOM_ACCESS_OPTIMIZATION or not it._mditer.is_ra_iter
496497

497498
if __name__ == '__main__':
498499
unittest.main()

pyemma/coordinates/util/patches.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ class iterload(object):
7575
MEMORY_CUTOFF = int(128 * 1024**2) # 128 MB
7676
MAX_STRIDE_SWITCH_TO_RA = 20
7777

78+
_DEACTIVATE_RANDOM_ACCESS_OPTIMIZATION = True
79+
7880
def __init__(self, filename, trajlen, chunk=1000, **kwargs):
7981
"""An iterator over a trajectory from one or more files on disk, in fragments
8082
@@ -148,9 +150,10 @@ def __init__(self, filename, trajlen, chunk=1000, **kwargs):
148150
else:
149151
n_atoms = self._topology.n_atoms
150152

151-
if (self.is_ra_iter or
153+
# temporarily(?) disable RA mode, test_lagged_iterator_optimized fails otherwise
154+
if self.is_ra_iter or (not self._DEACTIVATE_RANDOM_ACCESS_OPTIMIZATION and (self.is_ra_iter or
152155
self._stride > iterload.MAX_STRIDE_SWITCH_TO_RA or
153-
(8 * self._chunksize * self._stride * n_atoms > iterload.MEMORY_CUTOFF)):
156+
(8 * self._chunksize * self._stride * n_atoms > iterload.MEMORY_CUTOFF))):
154157
self._mode = 'random_access'
155158
self._f = (lambda x:
156159
md_open(x, n_atoms=self._topology.n_atoms)

0 commit comments

Comments
 (0)