Skip to content
This repository has been archived by the owner on Sep 11, 2023. It is now read-only.

Commit

Permalink
Merge pull request #730 from marscher/minor_fixes
Browse files Browse the repository at this point in the history
Minor fixes
  • Loading branch information
marscher committed Mar 11, 2016
2 parents dba6ab3 + c73ce0f commit 2a3753b
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 31 deletions.
2 changes: 1 addition & 1 deletion pyemma/coordinates/data/_base/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def filenames(self, filename_list):
if len(filename_list) > 3:
self._progress_register(len(filename_list), 'Obtaining file info')
for filename in filename_list:
if config['use_trajectory_lengths_cache'] == 'True':
if config.use_trajectory_lengths_cache:
info = TrajectoryInfoCache.instance()[filename, self]
else:
info = self._get_traj_info(filename)
Expand Down
67 changes: 42 additions & 25 deletions pyemma/coordinates/data/util/traj_info_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,25 +22,21 @@

from __future__ import absolute_import

from six import PY2
from threading import Semaphore
from io import BytesIO
from logging import getLogger
import os
from threading import Semaphore

from pyemma.util.config import conf_values
from logging import getLogger
from pyemma.util import config
import six
import numpy as np
if six.PY2:
import dumbdbm
else:
from dbm import dumb as dumbdbm

logger = getLogger(__name__)

if PY2:
import anydbm
else:
import dbm as anydbm

try:
from cStringIO import StringIO as BytesIO
except ImportError:
from io import BytesIO

__all__ = ('TrajectoryInfoCache', 'TrajInfo')

Expand Down Expand Up @@ -86,9 +82,27 @@ def offsets(self, value):
def hash_value(self):
return self._hash

@hash_value.setter
def hash_value(self, val):
self._hash = val

def __eq__(self, other):
return (isinstance(other, self.__class__)
and self.version == other.version
and self.hash_value == other.hash_value
and self.ndim == other.ndim
and self.length == other.length
and np.all(self.offsets == other.offsets)
)


def create_traj_info(db_val):
fh = BytesIO(str.encode(db_val))
assert isinstance(db_val, (six.string_types, bytes))
if six.PY3 and isinstance(db_val, six.string_types):
logger.warn("db_val is str, converting to utf8 bytes")
db_val = bytes(db_val.encode('utf-8', errors='ignore'))
fh = BytesIO(db_val)

try:
arr = np.load(fh)['data']
info = TrajInfo()
Expand Down Expand Up @@ -131,31 +145,32 @@ class TrajectoryInfoCache(object):
def instance():
if TrajectoryInfoCache._instance is None:
# singleton pattern
cfg_dir = conf_values['pyemma']['cfg_dir']
filename = os.path.join(cfg_dir, "trajlen_cache")
filename = os.path.join(config.cfg_dir, "trajlen_cache")
TrajectoryInfoCache._instance = TrajectoryInfoCache(filename)
import atexit

@atexit.register
def write_at_exit():
if hasattr(TrajectoryInfoCache._instance._database, 'sync'):
# sync db to hard drive at exit.
if hasattr(TrajectoryInfoCache._instance._database, 'sync'):
import atexit
@atexit.register
def write_at_exit():
TrajectoryInfoCache._instance._database.sync()

return TrajectoryInfoCache._instance

def __init__(self, database_filename=None):
self.database_filename = database_filename
if database_filename is not None:
try:
self._database = anydbm.open(database_filename, flag="c")
except anydbm.error as e:
self._database = dumbdbm.open(database_filename, flag="c")
except dumbdbm.error as e:
try:
os.unlink(database_filename)
self._database = anydbm.open(database_filename, flag="n")
self._database = dumbdbm.open(database_filename, flag="n")
# persist file right now, since it was broken
self._set_curr_db_version(TrajectoryInfoCache.DB_VERSION)
# close and re-open to ensure file exists
self._database.close()
self._database = anydbm.open(database_filename, flag="w")
self._database = dumbdbm.open(database_filename, flag="w")
except OSError:
raise RuntimeError('corrupted database in "%s" could not be deleted'
% os.path.abspath(database_filename))
Expand All @@ -178,12 +193,13 @@ def __getitem__(self, filename_reader_tuple):
key = self._get_file_hash(filename)
result = None
try:
result = self._database[key]
result = str(self._database[key])
info = create_traj_info(result)
# handle cache misses and not interpreteable results by re-computation.
# Note: this also handles UnknownDBFormatExceptions!
except KeyError:
info = reader._get_traj_info(filename)
info.hash_value = key
# store info in db
result = self.__setitem__(filename, info)

Expand All @@ -195,6 +211,7 @@ def __getitem__(self, filename_reader_tuple):
return info

def __format_value(self, traj_info):
assert traj_info.hash_value != -1
fh = BytesIO()

header = {'data_format_version': 1,
Expand Down
3 changes: 2 additions & 1 deletion pyemma/coordinates/tests/test_featurereader_and_tica.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ def test_partial_fit(self):
partial.partial_fit(traj)

np.testing.assert_allclose(partial.eigenvalues, ref.eigenvalues)
np.testing.assert_allclose(np.abs(partial.eigenvectors), np.abs(ref.eigenvectors), atol=1e-8)
# only compare first two eigenvectors, because we only have two metastable processes
np.testing.assert_allclose(np.abs(partial.eigenvectors[:2]), np.abs(ref.eigenvectors[:2]), atol=1e-8)

if __name__ == "__main__":
unittest.main()
38 changes: 34 additions & 4 deletions pyemma/coordinates/tests/test_traj_info_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,36 @@
from __future__ import absolute_import

from tempfile import NamedTemporaryFile

try:
import bsddb
have_bsddb = True
except ImportError:
have_bsddb = False

import os
import six
import tempfile
import unittest

from pyemma.coordinates import api
from pyemma.coordinates.data.feature_reader import FeatureReader
from pyemma.coordinates.data.numpy_filereader import NumPyFileReader
from pyemma.coordinates.data.py_csv_reader import PyCSVReader
from pyemma.coordinates.data.util.traj_info_cache import TrajectoryInfoCache
from pyemma.coordinates.tests.test_featurereader import create_traj
from pyemma.datasets import get_bpti_test_data
from pyemma.util import config
from pyemma.util.files import TemporaryDirectory
import mdtraj
import pkg_resources
import pyemma

from pyemma.coordinates.data.util.traj_info_cache import TrajectoryInfoCache
import numpy as np

if six.PY2:
import dumbdbm
else:
from dbm import dumb as dumbdbm

xtcfiles = get_bpti_test_data()['trajs']
pdbfile = get_bpti_test_data()['top']
Expand All @@ -51,7 +62,6 @@ class TestTrajectoryInfoCache(unittest.TestCase):

@classmethod
def setUpClass(cls):
super(TestTrajectoryInfoCache, cls).setUpClass()
cls.work_dir = tempfile.mkdtemp("traj_cache_test")

def setUp(self):
Expand All @@ -67,10 +77,30 @@ def tearDown(self):

@classmethod
def tearDownClass(cls):
super(TestTrajectoryInfoCache, cls).tearDownClass()
import shutil
shutil.rmtree(cls.work_dir, ignore_errors=True)

def test_get_instance(self):
# test for exceptions in singleton creation
inst = TrajectoryInfoCache.instance()
inst.current_db_version

def test_store_load_traj_info(self):
x = np.random.random((10, 3))
try:
old_val = config.conf_values['pyemma']['cfg_dir']
config.conf_values['pyemma']['cfg_dir'] = self.work_dir
with NamedTemporaryFile(delete=False) as fh:
np.savetxt(fh.name, x)
reader = api.source(fh.name)
info = self.db[fh.name, reader]
self.db._database.close()
self.db._database = dumbdbm.open(self.db.database_filename, 'r')
info2 = self.db[fh.name, reader]
self.assertEqual(info2, info)
finally:
config.conf_values['pyemma']['cfg_dir'] = old_val

def test_exceptions(self):
# in accessible files
not_existant = ''.join(
Expand Down
1 change: 1 addition & 0 deletions pyemma/util/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ def default_config_file(self):
def default_logging_file(self):
return pkg_resources.resource_filename('pyemma', Wrapper.DEFAULT_LOGGING_FILE_NAME)

@property
@deprecated("do not use this!")
def conf_values(self):
return self._conf_values
Expand Down

0 comments on commit 2a3753b

Please sign in to comment.