Skip to content
This repository has been archived by the owner on Sep 11, 2023. It is now read-only.

Commit

Permalink
Merge pull request #879 from marscher/fix_lru_db_creation
Browse files Browse the repository at this point in the history
[coor/traj_info_cache] fix a bug in creating the LRU cache

This failed if the user has not yet created a config directory
by invoking pyemma.config.save(), eg. in fairly new installations,
since the LRU db name is derived from the main db, which is being hold
in memory, in case no dir exists.
  • Loading branch information
marscher authored Jul 26, 2016
2 parents e1bd9b2 + 7d61161 commit 49e4a3d
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 10 deletions.
5 changes: 5 additions & 0 deletions doc/source/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,16 @@ Changelog
**New features**:
- thermo: added MBAR estimation

**Fixes**:
- coordinates: In case a configuration directory has not been created yet, the LRU cache
of the TrajInfo database was failed to be created.


2.2.2 (7-14-16)
---------------

**New features**:

- coordinates: SQLite backend for trajectory info data. This enables fast access to this data
on parallel filesystems where multiple processes are writing to the database. This greatly
speeds ups reader construction and enables fast random access for formats which usually do not
Expand Down
27 changes: 17 additions & 10 deletions pyemma/coordinates/data/util/traj_info_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,15 +205,16 @@ def set(self, traj_info):

self._update_time_stamp(hash_value=traj_info.hash_value)

current_size = os.stat(self.filename).st_size
if (self.num_entries >= config.traj_info_max_entries or
# current_size is in bytes, while traj_info_max_size is in MB
1.*current_size / 1024**2 >= config.traj_info_max_size):
logger.info("Cleaning database because it has too much entries or is too large.\n"
"Entries: %s. Size: %.2fMB. Configured max_entires: %s. Max_size: %sMB"
% (self.num_entries, (current_size*1.0 / 1024**2),
config.traj_info_max_entries, config.traj_info_max_size))
self._clean(n=self.clean_n_entries)
if self.filename is not None:
current_size = os.stat(self.filename).st_size
if (self.num_entries >= config.traj_info_max_entries or
# current_size is in bytes, while traj_info_max_size is in MB
1.*current_size / 1024**2 >= config.traj_info_max_size):
logger.info("Cleaning database because it has too much entries or is too large.\n"
"Entries: %s. Size: %.2fMB. Configured max_entires: %s. Max_size: %sMB"
% (self.num_entries, (current_size*1.0 / 1024**2),
config.traj_info_max_entries, config.traj_info_max_size))
self._clean(n=self.clean_n_entries)

def get(self, key):
cursor = self._database.execute("SELECT * FROM traj_info WHERE hash=?", (key,))
Expand All @@ -231,12 +232,15 @@ def _database_from_key(self, key):
database has to be locked for updates and multiple processes want to write,
each process has to wait until the lock has been released.
By default the LRU databases will be stored in a sub directory "tra_info_usage"
By default the LRU databases will be stored in a sub directory "traj_info_usage"
lying next to the main database.
:param key: hash of the TrajInfo instance
:return: str, database path
"""
if not self.filename:
return None

from pyemma.util.files import mkdir_p
hash_value_long = int(key, 16)
# bin hash to one of either 10 different databases
Expand All @@ -250,6 +254,9 @@ def _update_time_stamp(self, hash_value):
""" timestamps are being stored distributed over several lru databases.
The timestamp is a time.time() snapshot (float), which are seconds since epoch."""
db_name = self._database_from_key(hash_value)
if not db_name:
db_name=':memory:'

import sqlite3

with sqlite3.connect(db_name) as conn:
Expand Down
10 changes: 10 additions & 0 deletions pyemma/coordinates/tests/test_traj_info_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from pyemma.coordinates.data.feature_reader import FeatureReader
from pyemma.coordinates.data.numpy_filereader import NumPyFileReader
from pyemma.coordinates.data.py_csv_reader import PyCSVReader
from pyemma.coordinates.data.util.traj_info_backends import SqliteDB
from pyemma.coordinates.data.util.traj_info_cache import TrajectoryInfoCache
from pyemma.coordinates.tests.util import create_traj
from pyemma.datasets import get_bpti_test_data
Expand Down Expand Up @@ -276,6 +277,15 @@ def test_max_size(self):
self.assertLessEqual(os.stat(self.db.database_filename).st_size / 1024, config.traj_info_max_size)
self.assertGreater(self.db.num_entries, 0)

def test_no_working_directory(self):
import sqlite3
# this is the case as long as the user has not yet created a config directory via config.save()
self.db._database = SqliteDB(filename=None)


# trigger caching
pyemma.coordinates.source(xtcfiles, top=pdbfile)

@unittest.skip("not yet functional")
def test_no_sqlite(self):
def import_mock(name, *args):
Expand Down

0 comments on commit 49e4a3d

Please sign in to comment.