fr/fr_env/lib/python3.8/site-packages/sklearn/decomposition/_base.py

159 lines
5.4 KiB
Python

"""Principal Component Analysis Base Classes"""
# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
# Olivier Grisel <olivier.grisel@ensta.org>
# Mathieu Blondel <mathieu@mblondel.org>
# Denis A. Engemann <denis-alexander.engemann@inria.fr>
# Kyle Kastner <kastnerkyle@gmail.com>
#
# License: BSD 3 clause
import numpy as np
from scipy import linalg
from ..base import BaseEstimator, TransformerMixin
from ..utils.validation import check_is_fitted
from abc import ABCMeta, abstractmethod
class _BasePCA(TransformerMixin, BaseEstimator, metaclass=ABCMeta):
"""Base class for PCA methods.
Warning: This class should not be used directly.
Use derived classes instead.
"""
def get_covariance(self):
"""Compute data covariance with the generative model.
``cov = components_.T * S**2 * components_ + sigma2 * eye(n_features)``
where S**2 contains the explained variances, and sigma2 contains the
noise variances.
Returns
-------
cov : array, shape=(n_features, n_features)
Estimated covariance of data.
"""
components_ = self.components_
exp_var = self.explained_variance_
if self.whiten:
components_ = components_ * np.sqrt(exp_var[:, np.newaxis])
exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.)
cov = np.dot(components_.T * exp_var_diff, components_)
cov.flat[::len(cov) + 1] += self.noise_variance_ # modify diag inplace
return cov
def get_precision(self):
"""Compute data precision matrix with the generative model.
Equals the inverse of the covariance but computed with
the matrix inversion lemma for efficiency.
Returns
-------
precision : array, shape=(n_features, n_features)
Estimated precision of data.
"""
n_features = self.components_.shape[1]
# handle corner cases first
if self.n_components_ == 0:
return np.eye(n_features) / self.noise_variance_
if self.n_components_ == n_features:
return linalg.inv(self.get_covariance())
# Get precision using matrix inversion lemma
components_ = self.components_
exp_var = self.explained_variance_
if self.whiten:
components_ = components_ * np.sqrt(exp_var[:, np.newaxis])
exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.)
precision = np.dot(components_, components_.T) / self.noise_variance_
precision.flat[::len(precision) + 1] += 1. / exp_var_diff
precision = np.dot(components_.T,
np.dot(linalg.inv(precision), components_))
precision /= -(self.noise_variance_ ** 2)
precision.flat[::len(precision) + 1] += 1. / self.noise_variance_
return precision
@abstractmethod
def fit(self, X, y=None):
"""Placeholder for fit. Subclasses should implement this method!
Fit the model with X.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training data, where n_samples is the number of samples and
n_features is the number of features.
Returns
-------
self : object
Returns the instance itself.
"""
def transform(self, X):
"""Apply dimensionality reduction to X.
X is projected on the first principal components previously extracted
from a training set.
Parameters
----------
X : array-like, shape (n_samples, n_features)
New data, where n_samples is the number of samples
and n_features is the number of features.
Returns
-------
X_new : array-like, shape (n_samples, n_components)
Examples
--------
>>> import numpy as np
>>> from sklearn.decomposition import IncrementalPCA
>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
>>> ipca = IncrementalPCA(n_components=2, batch_size=3)
>>> ipca.fit(X)
IncrementalPCA(batch_size=3, n_components=2)
>>> ipca.transform(X) # doctest: +SKIP
"""
check_is_fitted(self)
X = self._validate_data(X, dtype=[np.float64, np.float32], reset=False)
if self.mean_ is not None:
X = X - self.mean_
X_transformed = np.dot(X, self.components_.T)
if self.whiten:
X_transformed /= np.sqrt(self.explained_variance_)
return X_transformed
def inverse_transform(self, X):
"""Transform data back to its original space.
In other words, return an input X_original whose transform would be X.
Parameters
----------
X : array-like, shape (n_samples, n_components)
New data, where n_samples is the number of samples
and n_components is the number of components.
Returns
-------
X_original array-like, shape (n_samples, n_features)
Notes
-----
If whitening is enabled, inverse_transform will compute the
exact inverse operation, which includes reversing whitening.
"""
if self.whiten:
return np.dot(X, np.sqrt(self.explained_variance_[:, np.newaxis]) *
self.components_) + self.mean_
else:
return np.dot(X, self.components_) + self.mean_