Skip to content
This repository has been archived by the owner on Sep 11, 2023. It is now read-only.

Commit

Permalink
[coor/pca|tica] set defaults for variance cutoff. Fixes #472
Browse files Browse the repository at this point in the history
  • Loading branch information
marscher committed Aug 10, 2015
1 parent fda2ee6 commit 1969211
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 8 deletions.
8 changes: 3 additions & 5 deletions pyemma/coordinates/transform/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

class PCA(Transformer):

def __init__(self, dim=-1, var_cutoff=1.0, mean=None):
def __init__(self, dim=-1, var_cutoff=0.95, mean=None):
r""" Principal component analysis.
Given a sequence of multivariate data :math:`X_t`,
Expand Down Expand Up @@ -63,7 +63,7 @@ def __init__(self, dim=-1, var_cutoff=1.0, mean=None):
-1 means all numerically available dimensions will be used unless reduced by var_cutoff.
Setting dim to a positive value is exclusive with var_cutoff.
var_cutoff : float in the range [0,1], optional, default 1
var_cutoff : float in the range [0,1], optional, default 0.95
Determines the number of output dimensions by including dimensions until their cumulative kinetic variance
exceeds the fraction subspace_variance. var_cutoff=1.0 means all numerically available dimensions
(see epsilon) will be used, unless set by dim. Setting var_cutoff smaller than 1.0 is exclusive with dim
Expand Down Expand Up @@ -169,8 +169,7 @@ def _param_add_data(self, X, itraj, t, first_chunk, last_chunk_in_traj,
if ipass == 0:
if t == 0:
if self._given_mean:
raise SkipPassException()
self._logger.debug("start to calculate mean for traj nr %i" % itraj)
raise SkipPassException(stride=stride)
self._sum_tmp = np.empty(X.shape[1])
np.sum(X, axis=0, out=self._sum_tmp)
self.mu += self._sum_tmp
Expand All @@ -195,7 +194,6 @@ def _param_add_data(self, X, itraj, t, first_chunk, last_chunk_in_traj,

if last_chunk:
self.cov /= self._N - 1
self._logger.debug("finished")
return True # finished!

# by default, continue
Expand Down
6 changes: 3 additions & 3 deletions pyemma/coordinates/transform/tica.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class MeaningOfLagWithStrideWarning(UserWarning):

class TICA(Transformer):

def __init__(self, lag, dim=-1, var_cutoff=1.0, kinetic_map=False, epsilon=1e-6,
def __init__(self, lag, dim=-1, var_cutoff=0.95, kinetic_map=True, epsilon=1e-6,
force_eigenvalues_le_one=False, mean=None):
r""" Time-lagged independent component analysis (TICA) [1]_, [2]_, [3]_.
Expand All @@ -55,11 +55,11 @@ def __init__(self, lag, dim=-1, var_cutoff=1.0, kinetic_map=False, epsilon=1e-6,
Maximum number of significant independent components to use to reduce dimension of input data. -1 means
all numerically available dimensions (see epsilon) will be used unless reduced by var_cutoff.
Setting dim to a positive value is exclusive with var_cutoff.
var_cutoff : float in the range [0,1], optional, default 1
var_cutoff : float in the range [0,1], optional, default 0.95
Determines the number of output dimensions by including dimensions until their cumulative kinetic variance
exceeds the fraction subspace_variance. var_cutoff=1.0 means all numerically available dimensions
(see epsilon) will be used, unless set by dim. Setting var_cutoff smaller than 1.0 is exclusive with dim
kinetic_map : bool, optional, default False
kinetic_map : bool, optional, default True
Eigenvectors will be scaled by eigenvalues. As a result, Euclidean distances in the transformed data
approximate kinetic distances [4]_. This is a good choice when the data is further processed by clustering.
epsilon : float
Expand Down

0 comments on commit 1969211

Please sign in to comment.