diff --git a/doc/source/CHANGELOG.rst b/doc/source/CHANGELOG.rst index 3240069f6..0bd0846fe 100644 --- a/doc/source/CHANGELOG.rst +++ b/doc/source/CHANGELOG.rst @@ -7,11 +7,16 @@ Changelog **New features**: - thermo: added MBAR estimation +**Fixes**: +- coordinates: In case a configuration directory has not been created yet, the LRU cache + of the TrajInfo database was failed to be created. + 2.2.2 (7-14-16) --------------- **New features**: + - coordinates: SQLite backend for trajectory info data. This enables fast access to this data on parallel filesystems where multiple processes are writing to the database. This greatly speeds ups reader construction and enables fast random access for formats which usually do not diff --git a/pyemma/coordinates/data/util/traj_info_backends.py b/pyemma/coordinates/data/util/traj_info_backends.py index 9cfce0c08..da9679ec7 100644 --- a/pyemma/coordinates/data/util/traj_info_backends.py +++ b/pyemma/coordinates/data/util/traj_info_backends.py @@ -205,15 +205,16 @@ def set(self, traj_info): self._update_time_stamp(hash_value=traj_info.hash_value) - current_size = os.stat(self.filename).st_size - if (self.num_entries >= config.traj_info_max_entries or - # current_size is in bytes, while traj_info_max_size is in MB - 1.*current_size / 1024**2 >= config.traj_info_max_size): - logger.info("Cleaning database because it has too much entries or is too large.\n" - "Entries: %s. Size: %.2fMB. Configured max_entires: %s. Max_size: %sMB" - % (self.num_entries, (current_size*1.0 / 1024**2), - config.traj_info_max_entries, config.traj_info_max_size)) - self._clean(n=self.clean_n_entries) + if self.filename is not None: + current_size = os.stat(self.filename).st_size + if (self.num_entries >= config.traj_info_max_entries or + # current_size is in bytes, while traj_info_max_size is in MB + 1.*current_size / 1024**2 >= config.traj_info_max_size): + logger.info("Cleaning database because it has too much entries or is too large.\n" + "Entries: %s. Size: %.2fMB. Configured max_entires: %s. Max_size: %sMB" + % (self.num_entries, (current_size*1.0 / 1024**2), + config.traj_info_max_entries, config.traj_info_max_size)) + self._clean(n=self.clean_n_entries) def get(self, key): cursor = self._database.execute("SELECT * FROM traj_info WHERE hash=?", (key,)) @@ -231,12 +232,15 @@ def _database_from_key(self, key): database has to be locked for updates and multiple processes want to write, each process has to wait until the lock has been released. - By default the LRU databases will be stored in a sub directory "tra_info_usage" + By default the LRU databases will be stored in a sub directory "traj_info_usage" lying next to the main database. :param key: hash of the TrajInfo instance :return: str, database path """ + if not self.filename: + return None + from pyemma.util.files import mkdir_p hash_value_long = int(key, 16) # bin hash to one of either 10 different databases @@ -250,6 +254,9 @@ def _update_time_stamp(self, hash_value): """ timestamps are being stored distributed over several lru databases. The timestamp is a time.time() snapshot (float), which are seconds since epoch.""" db_name = self._database_from_key(hash_value) + if not db_name: + db_name=':memory:' + import sqlite3 with sqlite3.connect(db_name) as conn: diff --git a/pyemma/coordinates/tests/test_traj_info_cache.py b/pyemma/coordinates/tests/test_traj_info_cache.py index 1e5634729..251e60a68 100644 --- a/pyemma/coordinates/tests/test_traj_info_cache.py +++ b/pyemma/coordinates/tests/test_traj_info_cache.py @@ -35,6 +35,7 @@ from pyemma.coordinates.data.feature_reader import FeatureReader from pyemma.coordinates.data.numpy_filereader import NumPyFileReader from pyemma.coordinates.data.py_csv_reader import PyCSVReader +from pyemma.coordinates.data.util.traj_info_backends import SqliteDB from pyemma.coordinates.data.util.traj_info_cache import TrajectoryInfoCache from pyemma.coordinates.tests.util import create_traj from pyemma.datasets import get_bpti_test_data @@ -276,6 +277,15 @@ def test_max_size(self): self.assertLessEqual(os.stat(self.db.database_filename).st_size / 1024, config.traj_info_max_size) self.assertGreater(self.db.num_entries, 0) + def test_no_working_directory(self): + import sqlite3 + # this is the case as long as the user has not yet created a config directory via config.save() + self.db._database = SqliteDB(filename=None) + + + # trigger caching + pyemma.coordinates.source(xtcfiles, top=pdbfile) + @unittest.skip("not yet functional") def test_no_sqlite(self): def import_mock(name, *args):