Skip to content
This repository has been archived by the owner on Sep 11, 2023. It is now read-only.

Commit

Permalink
[coordinates] introduced new config var "coordinates_check_output" to…
Browse files Browse the repository at this point in the history
… check for inf and NaN in iterator output.

Can be enabled by config.coordinates_check_input=True. Exception contains position information.
  • Loading branch information
marscher committed Oct 19, 2016
1 parent 600688c commit 1904268
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 1 deletion.
11 changes: 11 additions & 0 deletions pyemma/coordinates/data/_base/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,6 +671,14 @@ def next(self):
(not self.return_traj_index and len(X) == 0) or (self.return_traj_index and len(X[1]) == 0)
):
X = self._it_next()
if config.coordinates_check_output:
array = X if not self.return_traj_index else X[1]
if not np.all(np.isfinite(array)):
# determine position
start = self.pos
msg = "Found invalid values in chunk in trajectory index {itraj} at chunk [{start}, {stop}]" \
.format(itraj=self.current_trajindex, start=start, stop=start+len(array))
raise InvalidDataInStreamException(msg)
return X

def __iter__(self):
Expand All @@ -683,3 +691,6 @@ def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
return False


class InvalidDataInStreamException(Exception):
"""Data stream contained NaN or (+/-) infinity"""
21 changes: 21 additions & 0 deletions pyemma/coordinates/tests/test_coordinates_iterator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import numpy as np

from pyemma.coordinates.data import DataInMemory
from pyemma.util.contexts import settings
from pyemma.util.files import TemporaryDirectory
import os
from glob import glob
Expand Down Expand Up @@ -153,5 +154,25 @@ def test_write_to_csv_propagate_filenames(self):
for a, e in zip(actual, expected):
np.testing.assert_allclose(a, e)

def test_invalid_data_in_input_nan(self):
self.d[0][-1] = np.nan
r = DataInMemory(self.d)
it = r.iterator()
from pyemma.coordinates.data._base.datasource import InvalidDataInStreamException
with settings(coordinates_check_output=True):
with self.assertRaises(InvalidDataInStreamException):
for itraj, X in it:
pass

def test_invalid_data_in_input_inf(self):
self.d[1][-1] = np.inf
r = DataInMemory(self.d, chunksize=5)
it = r.iterator()
from pyemma.coordinates.data._base.datasource import InvalidDataInStreamException
with settings(coordinates_check_output=True):
with self.assertRaises(InvalidDataInStreamException) as cm:
for itraj, X in it:
pass

if __name__ == '__main__':
unittest.main()
5 changes: 4 additions & 1 deletion pyemma/pyemma.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,7 @@ use_trajectory_lengths_cache = True
# maximum entries in database
traj_info_max_entries = 50000
# max size in MB
traj_info_max_size = 500
traj_info_max_size = 500

# check output of iterators in pyemma.coordinates for infinity and NaN, useful for debug purposes.
coordinates_check_output = False
9 changes: 9 additions & 0 deletions pyemma/util/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
'use_trajectory_lengths_cache',
'traj_info_max_entries',
'traj_info_max_size',
'coordinates_check_output',
)

if six.PY2:
Expand Down Expand Up @@ -364,6 +365,14 @@ def show_config_notification(self):
def show_config_notification(self, val):
self._conf_values.set('pyemma', 'show_config_notification', str(val))

@property
def coordinates_check_output(self):
return self._conf_values.getboolean('pyemma', 'coordinates_check_output')

@coordinates_check_output.setter
def coordinates_check_output(self, val):
self._conf_values.set('pyemma', 'coordinates_check_output', str(val))

### FIlE HANDLING

def __copy_default_files_to_cfg_dir(self, target_dir):
Expand Down

0 comments on commit 1904268

Please sign in to comment.