551 lines
19 KiB
Python
551 lines
19 KiB
Python
"""
|
|
The :mod:`sklearn.feature_extraction.image` submodule gathers utilities to
|
|
extract features from images.
|
|
"""
|
|
|
|
# Authors: Emmanuelle Gouillart <emmanuelle.gouillart@normalesup.org>
|
|
# Gael Varoquaux <gael.varoquaux@normalesup.org>
|
|
# Olivier Grisel
|
|
# Vlad Niculae
|
|
# License: BSD 3 clause
|
|
|
|
from itertools import product
|
|
import numbers
|
|
import numpy as np
|
|
from scipy import sparse
|
|
from numpy.lib.stride_tricks import as_strided
|
|
|
|
from ..utils import check_array, check_random_state
|
|
from ..utils.validation import _deprecate_positional_args
|
|
from ..base import BaseEstimator
|
|
|
|
__all__ = ['PatchExtractor',
|
|
'extract_patches_2d',
|
|
'grid_to_graph',
|
|
'img_to_graph',
|
|
'reconstruct_from_patches_2d']
|
|
|
|
###############################################################################
|
|
# From an image to a graph
|
|
|
|
|
|
def _make_edges_3d(n_x, n_y, n_z=1):
|
|
"""Returns a list of edges for a 3D image.
|
|
|
|
Parameters
|
|
----------
|
|
n_x : int
|
|
The size of the grid in the x direction.
|
|
n_y : int
|
|
The size of the grid in the y direction.
|
|
n_z : integer, default=1
|
|
The size of the grid in the z direction, defaults to 1
|
|
"""
|
|
vertices = np.arange(n_x * n_y * n_z).reshape((n_x, n_y, n_z))
|
|
edges_deep = np.vstack((vertices[:, :, :-1].ravel(),
|
|
vertices[:, :, 1:].ravel()))
|
|
edges_right = np.vstack((vertices[:, :-1].ravel(),
|
|
vertices[:, 1:].ravel()))
|
|
edges_down = np.vstack((vertices[:-1].ravel(), vertices[1:].ravel()))
|
|
edges = np.hstack((edges_deep, edges_right, edges_down))
|
|
return edges
|
|
|
|
|
|
def _compute_gradient_3d(edges, img):
|
|
_, n_y, n_z = img.shape
|
|
gradient = np.abs(img[edges[0] // (n_y * n_z),
|
|
(edges[0] % (n_y * n_z)) // n_z,
|
|
(edges[0] % (n_y * n_z)) % n_z] -
|
|
img[edges[1] // (n_y * n_z),
|
|
(edges[1] % (n_y * n_z)) // n_z,
|
|
(edges[1] % (n_y * n_z)) % n_z])
|
|
return gradient
|
|
|
|
|
|
# XXX: Why mask the image after computing the weights?
|
|
|
|
def _mask_edges_weights(mask, edges, weights=None):
|
|
"""Apply a mask to edges (weighted or not)"""
|
|
inds = np.arange(mask.size)
|
|
inds = inds[mask.ravel()]
|
|
ind_mask = np.logical_and(np.in1d(edges[0], inds),
|
|
np.in1d(edges[1], inds))
|
|
edges = edges[:, ind_mask]
|
|
if weights is not None:
|
|
weights = weights[ind_mask]
|
|
if len(edges.ravel()):
|
|
maxval = edges.max()
|
|
else:
|
|
maxval = 0
|
|
order = np.searchsorted(np.unique(edges.ravel()), np.arange(maxval + 1))
|
|
edges = order[edges]
|
|
if weights is None:
|
|
return edges
|
|
else:
|
|
return edges, weights
|
|
|
|
|
|
def _to_graph(n_x, n_y, n_z, mask=None, img=None,
|
|
return_as=sparse.coo_matrix, dtype=None):
|
|
"""Auxiliary function for img_to_graph and grid_to_graph
|
|
"""
|
|
edges = _make_edges_3d(n_x, n_y, n_z)
|
|
|
|
if dtype is None:
|
|
if img is None:
|
|
dtype = int
|
|
else:
|
|
dtype = img.dtype
|
|
|
|
if img is not None:
|
|
img = np.atleast_3d(img)
|
|
weights = _compute_gradient_3d(edges, img)
|
|
if mask is not None:
|
|
edges, weights = _mask_edges_weights(mask, edges, weights)
|
|
diag = img.squeeze()[mask]
|
|
else:
|
|
diag = img.ravel()
|
|
n_voxels = diag.size
|
|
else:
|
|
if mask is not None:
|
|
mask = mask.astype(dtype=bool, copy=False)
|
|
mask = np.asarray(mask, dtype=bool)
|
|
edges = _mask_edges_weights(mask, edges)
|
|
n_voxels = np.sum(mask)
|
|
else:
|
|
n_voxels = n_x * n_y * n_z
|
|
weights = np.ones(edges.shape[1], dtype=dtype)
|
|
diag = np.ones(n_voxels, dtype=dtype)
|
|
|
|
diag_idx = np.arange(n_voxels)
|
|
i_idx = np.hstack((edges[0], edges[1]))
|
|
j_idx = np.hstack((edges[1], edges[0]))
|
|
graph = sparse.coo_matrix((np.hstack((weights, weights, diag)),
|
|
(np.hstack((i_idx, diag_idx)),
|
|
np.hstack((j_idx, diag_idx)))),
|
|
(n_voxels, n_voxels),
|
|
dtype=dtype)
|
|
if return_as is np.ndarray:
|
|
return graph.toarray()
|
|
return return_as(graph)
|
|
|
|
|
|
@_deprecate_positional_args
|
|
def img_to_graph(img, *, mask=None, return_as=sparse.coo_matrix, dtype=None):
|
|
"""Graph of the pixel-to-pixel gradient connections
|
|
|
|
Edges are weighted with the gradient values.
|
|
|
|
Read more in the :ref:`User Guide <image_feature_extraction>`.
|
|
|
|
Parameters
|
|
----------
|
|
img : ndarray of shape (height, width) or (height, width, channel)
|
|
2D or 3D image.
|
|
mask : ndarray of shape (height, width) or \
|
|
(height, width, channel), dtype=bool, default=None
|
|
An optional mask of the image, to consider only part of the
|
|
pixels.
|
|
return_as : np.ndarray or a sparse matrix class, \
|
|
default=sparse.coo_matrix
|
|
The class to use to build the returned adjacency matrix.
|
|
dtype : dtype, default=None
|
|
The data of the returned sparse matrix. By default it is the
|
|
dtype of img
|
|
|
|
Notes
|
|
-----
|
|
For scikit-learn versions 0.14.1 and prior, return_as=np.ndarray was
|
|
handled by returning a dense np.matrix instance. Going forward, np.ndarray
|
|
returns an np.ndarray, as expected.
|
|
|
|
For compatibility, user code relying on this method should wrap its
|
|
calls in ``np.asarray`` to avoid type issues.
|
|
"""
|
|
img = np.atleast_3d(img)
|
|
n_x, n_y, n_z = img.shape
|
|
return _to_graph(n_x, n_y, n_z, mask, img, return_as, dtype)
|
|
|
|
|
|
@_deprecate_positional_args
|
|
def grid_to_graph(n_x, n_y, n_z=1, *, mask=None, return_as=sparse.coo_matrix,
|
|
dtype=int):
|
|
"""Graph of the pixel-to-pixel connections
|
|
|
|
Edges exist if 2 voxels are connected.
|
|
|
|
Parameters
|
|
----------
|
|
n_x : int
|
|
Dimension in x axis
|
|
n_y : int
|
|
Dimension in y axis
|
|
n_z : int, default=1
|
|
Dimension in z axis
|
|
mask : ndarray of shape (n_x, n_y, n_z), dtype=bool, default=None
|
|
An optional mask of the image, to consider only part of the
|
|
pixels.
|
|
return_as : np.ndarray or a sparse matrix class, \
|
|
default=sparse.coo_matrix
|
|
The class to use to build the returned adjacency matrix.
|
|
dtype : dtype, default=int
|
|
The data of the returned sparse matrix. By default it is int
|
|
|
|
Notes
|
|
-----
|
|
For scikit-learn versions 0.14.1 and prior, return_as=np.ndarray was
|
|
handled by returning a dense np.matrix instance. Going forward, np.ndarray
|
|
returns an np.ndarray, as expected.
|
|
|
|
For compatibility, user code relying on this method should wrap its
|
|
calls in ``np.asarray`` to avoid type issues.
|
|
"""
|
|
return _to_graph(n_x, n_y, n_z, mask=mask, return_as=return_as,
|
|
dtype=dtype)
|
|
|
|
|
|
###############################################################################
|
|
# From an image to a set of small image patches
|
|
|
|
def _compute_n_patches(i_h, i_w, p_h, p_w, max_patches=None):
|
|
"""Compute the number of patches that will be extracted in an image.
|
|
|
|
Read more in the :ref:`User Guide <image_feature_extraction>`.
|
|
|
|
Parameters
|
|
----------
|
|
i_h : int
|
|
The image height
|
|
i_w : int
|
|
The image with
|
|
p_h : int
|
|
The height of a patch
|
|
p_w : int
|
|
The width of a patch
|
|
max_patches : int or float, default=None
|
|
The maximum number of patches to extract. If max_patches is a float
|
|
between 0 and 1, it is taken to be a proportion of the total number
|
|
of patches.
|
|
"""
|
|
n_h = i_h - p_h + 1
|
|
n_w = i_w - p_w + 1
|
|
all_patches = n_h * n_w
|
|
|
|
if max_patches:
|
|
if (isinstance(max_patches, (numbers.Integral))
|
|
and max_patches < all_patches):
|
|
return max_patches
|
|
elif (isinstance(max_patches, (numbers.Integral))
|
|
and max_patches >= all_patches):
|
|
return all_patches
|
|
elif (isinstance(max_patches, (numbers.Real))
|
|
and 0 < max_patches < 1):
|
|
return int(max_patches * all_patches)
|
|
else:
|
|
raise ValueError("Invalid value for max_patches: %r" % max_patches)
|
|
else:
|
|
return all_patches
|
|
|
|
|
|
def _extract_patches(arr, patch_shape=8, extraction_step=1):
|
|
"""Extracts patches of any n-dimensional array in place using strides.
|
|
|
|
Given an n-dimensional array it will return a 2n-dimensional array with
|
|
the first n dimensions indexing patch position and the last n indexing
|
|
the patch content. This operation is immediate (O(1)). A reshape
|
|
performed on the first n dimensions will cause numpy to copy data, leading
|
|
to a list of extracted patches.
|
|
|
|
Read more in the :ref:`User Guide <image_feature_extraction>`.
|
|
|
|
Parameters
|
|
----------
|
|
arr : ndarray
|
|
n-dimensional array of which patches are to be extracted
|
|
|
|
patch_shape : int or tuple of length arr.ndim.default=8
|
|
Indicates the shape of the patches to be extracted. If an
|
|
integer is given, the shape will be a hypercube of
|
|
sidelength given by its value.
|
|
|
|
extraction_step : int or tuple of length arr.ndim, default=1
|
|
Indicates step size at which extraction shall be performed.
|
|
If integer is given, then the step is uniform in all dimensions.
|
|
|
|
|
|
Returns
|
|
-------
|
|
patches : strided ndarray
|
|
2n-dimensional array indexing patches on first n dimensions and
|
|
containing patches on the last n dimensions. These dimensions
|
|
are fake, but this way no data is copied. A simple reshape invokes
|
|
a copying operation to obtain a list of patches:
|
|
result.reshape([-1] + list(patch_shape))
|
|
"""
|
|
|
|
arr_ndim = arr.ndim
|
|
|
|
if isinstance(patch_shape, numbers.Number):
|
|
patch_shape = tuple([patch_shape] * arr_ndim)
|
|
if isinstance(extraction_step, numbers.Number):
|
|
extraction_step = tuple([extraction_step] * arr_ndim)
|
|
|
|
patch_strides = arr.strides
|
|
|
|
slices = tuple(slice(None, None, st) for st in extraction_step)
|
|
indexing_strides = arr[slices].strides
|
|
|
|
patch_indices_shape = ((np.array(arr.shape) - np.array(patch_shape)) //
|
|
np.array(extraction_step)) + 1
|
|
|
|
shape = tuple(list(patch_indices_shape) + list(patch_shape))
|
|
strides = tuple(list(indexing_strides) + list(patch_strides))
|
|
|
|
patches = as_strided(arr, shape=shape, strides=strides)
|
|
return patches
|
|
|
|
|
|
@_deprecate_positional_args
|
|
def extract_patches_2d(image, patch_size, *, max_patches=None,
|
|
random_state=None):
|
|
"""Reshape a 2D image into a collection of patches
|
|
|
|
The resulting patches are allocated in a dedicated array.
|
|
|
|
Read more in the :ref:`User Guide <image_feature_extraction>`.
|
|
|
|
Parameters
|
|
----------
|
|
image : ndarray of shape (image_height, image_width) or \
|
|
(image_height, image_width, n_channels)
|
|
The original image data. For color images, the last dimension specifies
|
|
the channel: a RGB image would have `n_channels=3`.
|
|
|
|
patch_size : tuple of int (patch_height, patch_width)
|
|
The dimensions of one patch.
|
|
|
|
max_patches : int or float, default=None
|
|
The maximum number of patches to extract. If `max_patches` is a float
|
|
between 0 and 1, it is taken to be a proportion of the total number
|
|
of patches.
|
|
|
|
random_state : int, RandomState instance, default=None
|
|
Determines the random number generator used for random sampling when
|
|
`max_patches` is not None. Use an int to make the randomness
|
|
deterministic.
|
|
See :term:`Glossary <random_state>`.
|
|
|
|
Returns
|
|
-------
|
|
patches : array of shape (n_patches, patch_height, patch_width) or \
|
|
(n_patches, patch_height, patch_width, n_channels)
|
|
The collection of patches extracted from the image, where `n_patches`
|
|
is either `max_patches` or the total number of patches that can be
|
|
extracted.
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.datasets import load_sample_image
|
|
>>> from sklearn.feature_extraction import image
|
|
>>> # Use the array data from the first image in this dataset:
|
|
>>> one_image = load_sample_image("china.jpg")
|
|
>>> print('Image shape: {}'.format(one_image.shape))
|
|
Image shape: (427, 640, 3)
|
|
>>> patches = image.extract_patches_2d(one_image, (2, 2))
|
|
>>> print('Patches shape: {}'.format(patches.shape))
|
|
Patches shape: (272214, 2, 2, 3)
|
|
>>> # Here are just two of these patches:
|
|
>>> print(patches[1])
|
|
[[[174 201 231]
|
|
[174 201 231]]
|
|
[[173 200 230]
|
|
[173 200 230]]]
|
|
>>> print(patches[800])
|
|
[[[187 214 243]
|
|
[188 215 244]]
|
|
[[187 214 243]
|
|
[188 215 244]]]
|
|
"""
|
|
i_h, i_w = image.shape[:2]
|
|
p_h, p_w = patch_size
|
|
|
|
if p_h > i_h:
|
|
raise ValueError("Height of the patch should be less than the height"
|
|
" of the image.")
|
|
|
|
if p_w > i_w:
|
|
raise ValueError("Width of the patch should be less than the width"
|
|
" of the image.")
|
|
|
|
image = check_array(image, allow_nd=True)
|
|
image = image.reshape((i_h, i_w, -1))
|
|
n_colors = image.shape[-1]
|
|
|
|
extracted_patches = _extract_patches(image,
|
|
patch_shape=(p_h, p_w, n_colors),
|
|
extraction_step=1)
|
|
|
|
n_patches = _compute_n_patches(i_h, i_w, p_h, p_w, max_patches)
|
|
if max_patches:
|
|
rng = check_random_state(random_state)
|
|
i_s = rng.randint(i_h - p_h + 1, size=n_patches)
|
|
j_s = rng.randint(i_w - p_w + 1, size=n_patches)
|
|
patches = extracted_patches[i_s, j_s, 0]
|
|
else:
|
|
patches = extracted_patches
|
|
|
|
patches = patches.reshape(-1, p_h, p_w, n_colors)
|
|
# remove the color dimension if useless
|
|
if patches.shape[-1] == 1:
|
|
return patches.reshape((n_patches, p_h, p_w))
|
|
else:
|
|
return patches
|
|
|
|
|
|
def reconstruct_from_patches_2d(patches, image_size):
|
|
"""Reconstruct the image from all of its patches.
|
|
|
|
Patches are assumed to overlap and the image is constructed by filling in
|
|
the patches from left to right, top to bottom, averaging the overlapping
|
|
regions.
|
|
|
|
Read more in the :ref:`User Guide <image_feature_extraction>`.
|
|
|
|
Parameters
|
|
----------
|
|
patches : ndarray of shape (n_patches, patch_height, patch_width) or \
|
|
(n_patches, patch_height, patch_width, n_channels)
|
|
The complete set of patches. If the patches contain colour information,
|
|
channels are indexed along the last dimension: RGB patches would
|
|
have `n_channels=3`.
|
|
|
|
image_size : tuple of int (image_height, image_width) or \
|
|
(image_height, image_width, n_channels)
|
|
The size of the image that will be reconstructed.
|
|
|
|
Returns
|
|
-------
|
|
image : ndarray of shape image_size
|
|
The reconstructed image.
|
|
"""
|
|
i_h, i_w = image_size[:2]
|
|
p_h, p_w = patches.shape[1:3]
|
|
img = np.zeros(image_size)
|
|
# compute the dimensions of the patches array
|
|
n_h = i_h - p_h + 1
|
|
n_w = i_w - p_w + 1
|
|
for p, (i, j) in zip(patches, product(range(n_h), range(n_w))):
|
|
img[i:i + p_h, j:j + p_w] += p
|
|
|
|
for i in range(i_h):
|
|
for j in range(i_w):
|
|
# divide by the amount of overlap
|
|
# XXX: is this the most efficient way? memory-wise yes, cpu wise?
|
|
img[i, j] /= float(min(i + 1, p_h, i_h - i) *
|
|
min(j + 1, p_w, i_w - j))
|
|
return img
|
|
|
|
|
|
class PatchExtractor(BaseEstimator):
|
|
"""Extracts patches from a collection of images
|
|
|
|
Read more in the :ref:`User Guide <image_feature_extraction>`.
|
|
|
|
.. versionadded:: 0.9
|
|
|
|
Parameters
|
|
----------
|
|
patch_size : tuple of int (patch_height, patch_width), default=None
|
|
The dimensions of one patch.
|
|
|
|
max_patches : int or float, default=None
|
|
The maximum number of patches per image to extract. If max_patches is a
|
|
float in (0, 1), it is taken to mean a proportion of the total number
|
|
of patches.
|
|
|
|
random_state : int, RandomState instance, default=None
|
|
Determines the random number generator used for random sampling when
|
|
`max_patches` is not None. Use an int to make the randomness
|
|
deterministic.
|
|
See :term:`Glossary <random_state>`.
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.datasets import load_sample_images
|
|
>>> from sklearn.feature_extraction import image
|
|
>>> # Use the array data from the second image in this dataset:
|
|
>>> X = load_sample_images().images[1]
|
|
>>> print('Image shape: {}'.format(X.shape))
|
|
Image shape: (427, 640, 3)
|
|
>>> pe = image.PatchExtractor(patch_size=(2, 2))
|
|
>>> pe_fit = pe.fit(X)
|
|
>>> pe_trans = pe.transform(X)
|
|
>>> print('Patches shape: {}'.format(pe_trans.shape))
|
|
Patches shape: (545706, 2, 2)
|
|
"""
|
|
@_deprecate_positional_args
|
|
def __init__(self, *, patch_size=None, max_patches=None,
|
|
random_state=None):
|
|
self.patch_size = patch_size
|
|
self.max_patches = max_patches
|
|
self.random_state = random_state
|
|
|
|
def fit(self, X, y=None):
|
|
"""Do nothing and return the estimator unchanged.
|
|
|
|
This method is just there to implement the usual API and hence
|
|
work in pipelines.
|
|
|
|
Parameters
|
|
----------
|
|
X : array-like of shape (n_samples, n_features)
|
|
Training data.
|
|
"""
|
|
return self
|
|
|
|
def transform(self, X):
|
|
"""Transforms the image samples in X into a matrix of patch data.
|
|
|
|
Parameters
|
|
----------
|
|
X : ndarray of shape (n_samples, image_height, image_width) or \
|
|
(n_samples, image_height, image_width, n_channels)
|
|
Array of images from which to extract patches. For color images,
|
|
the last dimension specifies the channel: a RGB image would have
|
|
`n_channels=3`.
|
|
|
|
Returns
|
|
-------
|
|
patches : array of shape (n_patches, patch_height, patch_width) or \
|
|
(n_patches, patch_height, patch_width, n_channels)
|
|
The collection of patches extracted from the images, where
|
|
`n_patches` is either `n_samples * max_patches` or the total
|
|
number of patches that can be extracted.
|
|
"""
|
|
self.random_state = check_random_state(self.random_state)
|
|
n_images, i_h, i_w = X.shape[:3]
|
|
X = np.reshape(X, (n_images, i_h, i_w, -1))
|
|
n_channels = X.shape[-1]
|
|
if self.patch_size is None:
|
|
patch_size = i_h // 10, i_w // 10
|
|
else:
|
|
patch_size = self.patch_size
|
|
|
|
# compute the dimensions of the patches array
|
|
p_h, p_w = patch_size
|
|
n_patches = _compute_n_patches(i_h, i_w, p_h, p_w, self.max_patches)
|
|
patches_shape = (n_images * n_patches,) + patch_size
|
|
if n_channels > 1:
|
|
patches_shape += (n_channels,)
|
|
|
|
# extract the patches
|
|
patches = np.empty(patches_shape)
|
|
for ii, image in enumerate(X):
|
|
patches[ii * n_patches:(ii + 1) * n_patches] = extract_patches_2d(
|
|
image, patch_size, max_patches=self.max_patches,
|
|
random_state=self.random_state)
|
|
return patches
|
|
|
|
def _more_tags(self):
|
|
return {'X_types': ['3darray']}
|