@@ -51,7 +51,7 @@ class PCA(sklearn.decomposition.PCA):
51
51
ignored
52
52
53
53
whiten : bool, optional (default False)
54
- When True (False by default) the `components_` vectors are multiplied
54
+ When True (False by default) the `` components_` ` vectors are multiplied
55
55
by the square root of n_samples and then divided by the singular values
56
56
to ensure uncorrelated outputs with unit component-wise variances.
57
57
@@ -60,25 +60,10 @@ class PCA(sklearn.decomposition.PCA):
60
60
improve the predictive accuracy of the downstream estimators by
61
61
making their data respect some hard-wired assumptions.
62
62
63
- center : bool, optional (default True)
64
- When False (True by default), the underlying data gets centered at zero
65
- by subtracting the mean of the data from the data itself.
66
-
67
- PCA is performed on centered data due to its being a regression model,
68
- without an intercept. As such, its pricipal components originate at the
69
- origin of the transformed space.
70
-
71
- `center` set to False may be employed when performing PCA on already
72
- centered data.
73
-
74
- Since centering is a required step as part of whitening, `center` set
75
- to False and `whiten` set to True is a combination which may result in
76
- unexpected behavior, if performed on not previously centered data.
77
-
78
63
svd_solver : string {'auto', 'full', 'tsqr', 'randomized'}
79
64
auto :
80
- the solver is selected by a default policy based on `X.shape` and
81
- `n_components`: if the input data is larger than 500x500 and the
65
+ the solver is selected by a default policy based on `` X.shape` ` and
66
+ `` n_components` `: if the input data is larger than 500x500 and the
82
67
number of components to extract is lower than 80% of the smallest
83
68
dimension of the data, then the more efficient 'randomized'
84
69
method is enabled. Otherwise the exact full SVD is computed and
@@ -99,7 +84,22 @@ class PCA(sklearn.decomposition.PCA):
99
84
If int, random_state is the seed used by the random number generator;
100
85
If RandomState instance, random_state is the random number generator;
101
86
If None, the random number generator is the RandomState instance used
102
- by `da.random`. Used when ``svd_solver`` == 'randomized'.
87
+ by ``da.random``. Used when ``svd_solver`` == 'randomized'.
88
+
89
+ center : bool, optional (default True)
90
+ When True (the default), the underlying data gets centered at zero
91
+ by subtracting the mean of the data from the data itself.
92
+
93
+ PCA is performed on centered data due to its being a regression model,
94
+ without an intercept. As such, its principal components originate at the
95
+ origin of the transformed space.
96
+
97
+ ``center=False`` may be employed when performing PCA on already
98
+ centered data.
99
+
100
+ Since centering is a required step as part of whitening, ``center`` set
101
+ to False and ``whiten`` set to True is a combination which may result in
102
+ unexpected behavior, if performed on not previously centered data.
103
103
104
104
Attributes
105
105
----------
@@ -128,7 +128,7 @@ class PCA(sklearn.decomposition.PCA):
128
128
mean_ : array, shape (n_features,)
129
129
Per-feature empirical mean, estimated from the training set.
130
130
131
- Equal to `X.mean(axis=0)`.
131
+ Equal to `` X.mean(axis=0)` `.
132
132
133
133
n_components_ : int
134
134
The estimated number of components. When n_components is set
@@ -197,20 +197,22 @@ class PCA(sklearn.decomposition.PCA):
197
197
``dask.linalg.svd_compressed``.
198
198
* n_components : ``n_components='mle'`` is not allowed.
199
199
Fractional ``n_components`` between 0 and 1 is not allowed.
200
- * center : defaults to ``True`` and enables control over whether centering
201
- gets implicitly performed as part of the PCA model steps.
200
+ * center : if ``True`` (the default), automatically center input data before
201
+ performing PCA.
202
+ Set this parameter to ``False``, if the input data have already been
203
+ centered before running ``fit()``.
202
204
"""
203
205
204
206
def __init__ (
205
207
self ,
206
208
n_components = None ,
207
209
copy = True ,
208
210
whiten = False ,
209
- center = True ,
210
211
svd_solver = "auto" ,
211
212
tol = 0.0 ,
212
213
iterated_power = 0 ,
213
214
random_state = None ,
215
+ center = True ,
214
216
):
215
217
self .n_components = n_components
216
218
self .copy = copy
@@ -221,14 +223,10 @@ def __init__(
221
223
self .iterated_power = iterated_power
222
224
self .random_state = random_state
223
225
224
- def _check_params (self ):
225
- pass
226
-
227
226
def fit (self , X , y = None ):
228
227
if not dask .is_dask_collection (X ):
229
228
raise TypeError (_TYPE_MSG .format (type (X )))
230
229
231
- self ._check_params ()
232
230
self ._fit (X )
233
231
self .n_features_in_ = X .shape [1 ]
234
232
return self
0 commit comments