|
1 |
| -# -*- coding: utf-8 -*- |
2 | 1 | """
|
3 |
| -========================================================= |
| 2 | +=========================== |
4 | 3 | Vector Quantization Example
|
5 |
| -========================================================= |
6 |
| -
|
7 |
| -Face, a 1024 x 768 size image of a raccoon face, |
8 |
| -is used here to illustrate how `k`-means is |
9 |
| -used for vector quantization. |
| 4 | +=========================== |
10 | 5 |
|
| 6 | +This example shows how one can use :class:`~sklearn.preprocessing.KBinsDiscretizer` |
| 7 | +to perform vector quantization on a set of toy image, the raccoon face. |
11 | 8 | """
|
12 | 9 |
|
13 |
| -# Code source: Gaël Varoquaux |
14 |
| -# Modified for documentation by Jaques Grobler |
| 10 | +# Authors: Gael Varoquaux |
| 11 | +# Jaques Grobler |
15 | 12 | # License: BSD 3 clause
|
16 | 13 |
|
17 |
| -import numpy as np |
18 |
| -import matplotlib.pyplot as plt |
19 |
| - |
20 |
| -from sklearn import cluster |
21 |
| - |
| 14 | +# %% |
| 15 | +# Original image |
| 16 | +# -------------- |
| 17 | +# |
| 18 | +# We start by loading the raccoon face image from SciPy. We will additionally check |
| 19 | +# a couple of information regarding the image, such as the shape and data type used |
| 20 | +# to store the image. |
| 21 | +# |
| 22 | +# Note that depending of the SciPy version, we have to adapt the import since the |
| 23 | +# function returning the image is not located in the same module. Also, SciPy >= 1.10 |
| 24 | +# requires the package `pooch` to be installed. |
22 | 25 | try: # Scipy >= 1.10
|
23 | 26 | from scipy.datasets import face
|
24 | 27 | except ImportError:
|
25 | 28 | from scipy.misc import face
|
26 | 29 |
|
27 | 30 | raccoon_face = face(gray=True)
|
28 | 31 |
|
29 |
| -n_clusters = 5 |
30 |
| -np.random.seed(0) |
31 |
| - |
32 |
| -X = raccoon_face.reshape((-1, 1)) # We need an (n_sample, n_feature) array |
33 |
| -k_means = cluster.KMeans(n_clusters=n_clusters, n_init=4) |
34 |
| -k_means.fit(X) |
35 |
| -values = k_means.cluster_centers_.squeeze() |
36 |
| -labels = k_means.labels_ |
37 |
| - |
38 |
| -# create an array from labels and values |
39 |
| -raccoon_face_compressed = np.choose(labels, values) |
40 |
| -raccoon_face_compressed.shape = raccoon_face.shape |
41 |
| - |
42 |
| -vmin = raccoon_face.min() |
43 |
| -vmax = raccoon_face.max() |
44 |
| - |
45 |
| -# original raccoon_face |
46 |
| -plt.figure(1, figsize=(3, 2.2)) |
47 |
| -plt.imshow(raccoon_face, cmap=plt.cm.gray, vmin=vmin, vmax=256) |
48 |
| - |
49 |
| -# compressed raccoon_face |
50 |
| -plt.figure(2, figsize=(3, 2.2)) |
51 |
| -plt.imshow(raccoon_face_compressed, cmap=plt.cm.gray, vmin=vmin, vmax=vmax) |
52 |
| - |
53 |
| -# equal bins raccoon_face |
54 |
| -regular_values = np.linspace(0, 256, n_clusters + 1) |
55 |
| -regular_labels = np.searchsorted(regular_values, raccoon_face) - 1 |
56 |
| -regular_values = 0.5 * (regular_values[1:] + regular_values[:-1]) # mean |
57 |
| -regular_raccoon_face = np.choose(regular_labels.ravel(), regular_values, mode="clip") |
58 |
| -regular_raccoon_face.shape = raccoon_face.shape |
59 |
| -plt.figure(3, figsize=(3, 2.2)) |
60 |
| -plt.imshow(regular_raccoon_face, cmap=plt.cm.gray, vmin=vmin, vmax=vmax) |
61 |
| - |
62 |
| -# histogram |
63 |
| -plt.figure(4, figsize=(3, 2.2)) |
64 |
| -plt.clf() |
65 |
| -plt.axes([0.01, 0.01, 0.98, 0.98]) |
66 |
| -plt.hist(X, bins=256, color=".5", edgecolor=".5") |
67 |
| -plt.yticks(()) |
68 |
| -plt.xticks(regular_values) |
69 |
| -values = np.sort(values) |
70 |
| -for center_1, center_2 in zip(values[:-1], values[1:]): |
71 |
| - plt.axvline(0.5 * (center_1 + center_2), color="b") |
72 |
| - |
73 |
| -for center_1, center_2 in zip(regular_values[:-1], regular_values[1:]): |
74 |
| - plt.axvline(0.5 * (center_1 + center_2), color="b", linestyle="--") |
75 |
| - |
76 |
| -plt.show() |
| 32 | +print(f"The dimension of the image is {raccoon_face.shape}") |
| 33 | +print(f"The data used to encode the image is of type {raccoon_face.dtype}") |
| 34 | +print(f"The number of bytes taken in RAM is {raccoon_face.nbytes}") |
| 35 | + |
| 36 | +# %% |
| 37 | +# Thus the image is a 2D array of 768 pixels in height and 1024 pixels in width. Each |
| 38 | +# value is a 8-bit unsigned integer, which means that the image is encoded using 8 |
| 39 | +# bits per pixel. The total memory usage of the image is 786 kilobytes (1 bytes equals |
| 40 | +# 8 bits). |
| 41 | +# |
| 42 | +# Using 8-bit unsigned integer means that the image is encoded using 256 different |
| 43 | +# shades of gray, at most. We can check the distribution of these values. |
| 44 | +import matplotlib.pyplot as plt |
| 45 | + |
| 46 | +fig, ax = plt.subplots(ncols=2, figsize=(12, 4)) |
| 47 | + |
| 48 | +ax[0].imshow(raccoon_face, cmap=plt.cm.gray) |
| 49 | +ax[0].axis("off") |
| 50 | +ax[0].set_title("Rendering of the image") |
| 51 | +ax[1].hist(raccoon_face.ravel(), bins=256) |
| 52 | +ax[1].set_xlabel("Pixel value") |
| 53 | +ax[1].set_ylabel("Count of pixels") |
| 54 | +ax[1].set_title("Distribution of the pixel values") |
| 55 | +_ = fig.suptitle("Original image of a raccoon face") |
| 56 | + |
| 57 | +# %% |
| 58 | +# Compression via vector quantization |
| 59 | +# ----------------------------------- |
| 60 | +# |
| 61 | +# The idea behind compression via vector quantization is to reduce the number of |
| 62 | +# gray levels to represent an image. For instance, we can use 8 values instead |
| 63 | +# of 256 values. Therefore, it means that we could efficiently use 1 bit instead |
| 64 | +# of 8 bits to encode a single pixel and therefore reduce the memory usage by a |
| 65 | +# factor of 8. We will later discuss about this memory usage. |
| 66 | +# |
| 67 | +# Encoding strategy |
| 68 | +# """"""""""""""""" |
| 69 | +# |
| 70 | +# The compression can be done using a |
| 71 | +# :class:`~sklearn.preprocessing.KBinsDiscretizer`. We need to choose a strategy |
| 72 | +# to define the 8 gray values to sub-sample. The simplest strategy is to define |
| 73 | +# them equally spaced, which correspond to setting `strategy="uniform"`. From |
| 74 | +# the previous histogram, we know that this strategy is certainly not optimal. |
| 75 | + |
| 76 | +from sklearn.preprocessing import KBinsDiscretizer |
| 77 | + |
| 78 | +n_bins = 8 |
| 79 | +encoder = KBinsDiscretizer( |
| 80 | + n_bins=n_bins, encode="ordinal", strategy="uniform", random_state=0 |
| 81 | +) |
| 82 | +compressed_raccoon_uniform = encoder.fit_transform(raccoon_face.reshape(-1, 1)).reshape( |
| 83 | + raccoon_face.shape |
| 84 | +) |
| 85 | + |
| 86 | +fig, ax = plt.subplots(ncols=2, figsize=(12, 4)) |
| 87 | +ax[0].imshow(compressed_raccoon_uniform, cmap=plt.cm.gray) |
| 88 | +ax[0].axis("off") |
| 89 | +ax[0].set_title("Rendering of the image") |
| 90 | +ax[1].hist(compressed_raccoon_uniform.ravel(), bins=256) |
| 91 | +ax[1].set_xlabel("Pixel value") |
| 92 | +ax[1].set_ylabel("Count of pixels") |
| 93 | +ax[1].set_title("Sub-sampled distribution of the pixel values") |
| 94 | +_ = fig.suptitle("Raccoon face compressed using 1-bit and a uniform strategy") |
| 95 | + |
| 96 | +# %% |
| 97 | +# Qualitatively, we can spot some small regions where we see the effect of the |
| 98 | +# compression (e.g. leaves on the bottom right corner). But after all, the resulting |
| 99 | +# image is still looking good. |
| 100 | +# |
| 101 | +# We observe that the distribution of pixels values have been mapped to 8 |
| 102 | +# different values. We can check the correspondance between such values and the |
| 103 | +# original pixel values. |
| 104 | + |
| 105 | +bin_edges = encoder.bin_edges_[0] |
| 106 | +bin_center = bin_edges[:-1] + (bin_edges[1:] - bin_edges[:-1]) / 2 |
| 107 | +bin_center |
| 108 | + |
| 109 | +# %% |
| 110 | +_, ax = plt.subplots() |
| 111 | +ax.hist(raccoon_face.ravel(), bins=256) |
| 112 | +color = "tab:orange" |
| 113 | +for center in bin_center: |
| 114 | + ax.axvline(center, color=color) |
| 115 | + ax.text(center - 10, ax.get_ybound()[1] + 100, f"{center:.1f}", color=color) |
| 116 | + |
| 117 | +# %% |
| 118 | +# As previously stated, the uniform sampling strategy is not optimal. Notice for |
| 119 | +# instance that the pixels mapped to the value 7 will encode a rather small |
| 120 | +# amount of information, whereas the mapped value 3 will represent a large |
| 121 | +# amount of counts. We can instead use a clustering strategy such as k-means to |
| 122 | +# find a more optimal mapping. |
| 123 | + |
| 124 | +encoder = KBinsDiscretizer( |
| 125 | + n_bins=n_bins, encode="ordinal", strategy="kmeans", random_state=0 |
| 126 | +) |
| 127 | +compressed_raccoon_kmeans = encoder.fit_transform(raccoon_face.reshape(-1, 1)).reshape( |
| 128 | + raccoon_face.shape |
| 129 | +) |
| 130 | + |
| 131 | +fig, ax = plt.subplots(ncols=2, figsize=(12, 4)) |
| 132 | +ax[0].imshow(compressed_raccoon_kmeans, cmap=plt.cm.gray) |
| 133 | +ax[0].axis("off") |
| 134 | +ax[0].set_title("Rendering of the image") |
| 135 | +ax[1].hist(compressed_raccoon_kmeans.ravel(), bins=256) |
| 136 | +ax[1].set_xlabel("Pixel value") |
| 137 | +ax[1].set_ylabel("Number of pixels") |
| 138 | +ax[1].set_title("Distribution of the pixel values") |
| 139 | +_ = fig.suptitle("Raccoon face compressed using 1-bit and a K-means strategy") |
| 140 | + |
| 141 | +# %% |
| 142 | +bin_edges = encoder.bin_edges_[0] |
| 143 | +bin_center = bin_edges[:-1] + (bin_edges[1:] - bin_edges[:-1]) / 2 |
| 144 | +bin_center |
| 145 | + |
| 146 | +# %% |
| 147 | +_, ax = plt.subplots() |
| 148 | +ax.hist(raccoon_face.ravel(), bins=256) |
| 149 | +color = "tab:orange" |
| 150 | +for center in bin_center: |
| 151 | + ax.axvline(center, color=color) |
| 152 | + ax.text(center - 10, ax.get_ybound()[1] + 100, f"{center:.1f}", color=color) |
| 153 | + |
| 154 | +# %% |
| 155 | +# The counts in the bins are now more balanced and their centers are no longer |
| 156 | +# equally spaced. Note that we could enforce the same number of pixels per bin |
| 157 | +# by using the `strategy="quantile"` instead of `strategy="kmeans"`. |
| 158 | +# |
| 159 | +# Memory footprint |
| 160 | +# """""""""""""""" |
| 161 | +# |
| 162 | +# We previously stated that we should save 8 times less memory. Let's verify it. |
| 163 | + |
| 164 | +print(f"The number of bytes taken in RAM is {compressed_raccoon_kmeans.nbytes}") |
| 165 | +print(f"Compression ratio: {compressed_raccoon_kmeans.nbytes / raccoon_face.nbytes}") |
| 166 | + |
| 167 | +# %% |
| 168 | +# It is quite surprising to see that our compressed image is taking x8 more |
| 169 | +# memory than the original image. This is indeed the opposite of what we |
| 170 | +# expected. The reason is mainly due to the type of data used to encode the |
| 171 | +# image. |
| 172 | + |
| 173 | +print(f"Type of the compressed image: {compressed_raccoon_kmeans.dtype}") |
| 174 | + |
| 175 | +# %% |
| 176 | +# Indeed, the output of the :class:`~sklearn.preprocessing.KBinsDiscretizer` is |
| 177 | +# an array of 64-bit float. It means that it takes x8 more memory. However, we |
| 178 | +# use this 64-bit float representation to encode 8 values. Indeed, we will save |
| 179 | +# memory only if we cast the compressed image into an array of 1-bit integer. We |
| 180 | +# could use the method `numpy.ndarray.astype`. However, a 1-bit integer |
| 181 | +# representation does not exist and to encode the 8 values, we would need to use |
| 182 | +# the 8-bit unsigned integer representation as well. |
| 183 | +# |
| 184 | +# In practice, observing a memory gain would require the original image to be in |
| 185 | +# a 64-bit float representation. |
0 commit comments