Skip to content

Commit de7c9ca

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 6a4a6afbc4afc827cb2ed7a9158142569ae0a7db
1 parent a552444 commit de7c9ca

File tree

1,209 files changed

+4605
-4471
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,209 files changed

+4605
-4471
lines changed
Binary file not shown.

dev/_downloads/0af0092c704518874f82d38d725bb97f/plot_dict_face_patches.ipynb

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,49 @@
1818
"\n# Online learning of a dictionary of parts of faces\n\nThis example uses a large dataset of faces to learn a set of 20 x 20\nimages patches that constitute faces.\n\nFrom the programming standpoint, it is interesting because it shows how\nto use the online API of the scikit-learn to process a very large\ndataset by chunks. The way we proceed is that we load an image at a time\nand extract randomly 50 patches from this image. Once we have accumulated\n500 of these patches (using 10 images), we run the\n:func:`~sklearn.cluster.MiniBatchKMeans.partial_fit` method\nof the online KMeans object, MiniBatchKMeans.\n\nThe verbose setting on the MiniBatchKMeans enables us to see that some\nclusters are reassigned during the successive calls to\npartial-fit. This is because the number of patches that they represent\nhas become too low, and it is better to choose a random new\ncluster.\n"
1919
]
2020
},
21+
{
22+
"cell_type": "markdown",
23+
"metadata": {},
24+
"source": [
25+
"## Load the data\n\n"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": null,
31+
"metadata": {
32+
"collapsed": false
33+
},
34+
"outputs": [],
35+
"source": [
36+
"from sklearn import datasets\n\nfaces = datasets.fetch_olivetti_faces()"
37+
]
38+
},
39+
{
40+
"cell_type": "markdown",
41+
"metadata": {},
42+
"source": [
43+
"## Learn the dictionary of images\n\n"
44+
]
45+
},
46+
{
47+
"cell_type": "code",
48+
"execution_count": null,
49+
"metadata": {
50+
"collapsed": false
51+
},
52+
"outputs": [],
53+
"source": [
54+
"import time\n\nimport numpy as np\n\nfrom sklearn.cluster import MiniBatchKMeans\nfrom sklearn.feature_extraction.image import extract_patches_2d\n\nprint(\"Learning the dictionary... \")\nrng = np.random.RandomState(0)\nkmeans = MiniBatchKMeans(n_clusters=81, random_state=rng, verbose=True)\npatch_size = (20, 20)\n\nbuffer = []\nt0 = time.time()\n\n# The online learning part: cycle over the whole dataset 6 times\nindex = 0\nfor _ in range(6):\n for img in faces.images:\n data = extract_patches_2d(img, patch_size, max_patches=50, random_state=rng)\n data = np.reshape(data, (len(data), -1))\n buffer.append(data)\n index += 1\n if index % 10 == 0:\n data = np.concatenate(buffer, axis=0)\n data -= np.mean(data, axis=0)\n data /= np.std(data, axis=0)\n kmeans.partial_fit(data)\n buffer = []\n if index % 100 == 0:\n print(\"Partial fit of %4i out of %i\" % (index, 6 * len(faces.images)))\n\ndt = time.time() - t0\nprint(\"done in %.2fs.\" % dt)"
55+
]
56+
},
57+
{
58+
"cell_type": "markdown",
59+
"metadata": {},
60+
"source": [
61+
"## Plot the results\n\n"
62+
]
63+
},
2164
{
2265
"cell_type": "code",
2366
"execution_count": null,
@@ -26,7 +69,7 @@
2669
},
2770
"outputs": [],
2871
"source": [
29-
"import time\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\n\nfrom sklearn import datasets\nfrom sklearn.cluster import MiniBatchKMeans\nfrom sklearn.feature_extraction.image import extract_patches_2d\n\nfaces = datasets.fetch_olivetti_faces()\n\n# #############################################################################\n# Learn the dictionary of images\n\nprint(\"Learning the dictionary... \")\nrng = np.random.RandomState(0)\nkmeans = MiniBatchKMeans(n_clusters=81, random_state=rng, verbose=True)\npatch_size = (20, 20)\n\nbuffer = []\nt0 = time.time()\n\n# The online learning part: cycle over the whole dataset 6 times\nindex = 0\nfor _ in range(6):\n for img in faces.images:\n data = extract_patches_2d(img, patch_size, max_patches=50, random_state=rng)\n data = np.reshape(data, (len(data), -1))\n buffer.append(data)\n index += 1\n if index % 10 == 0:\n data = np.concatenate(buffer, axis=0)\n data -= np.mean(data, axis=0)\n data /= np.std(data, axis=0)\n kmeans.partial_fit(data)\n buffer = []\n if index % 100 == 0:\n print(\"Partial fit of %4i out of %i\" % (index, 6 * len(faces.images)))\n\ndt = time.time() - t0\nprint(\"done in %.2fs.\" % dt)\n\n# #############################################################################\n# Plot the results\nplt.figure(figsize=(4.2, 4))\nfor i, patch in enumerate(kmeans.cluster_centers_):\n plt.subplot(9, 9, i + 1)\n plt.imshow(patch.reshape(patch_size), cmap=plt.cm.gray, interpolation=\"nearest\")\n plt.xticks(())\n plt.yticks(())\n\n\nplt.suptitle(\n \"Patches of faces\\nTrain time %.1fs on %d patches\" % (dt, 8 * len(faces.images)),\n fontsize=16,\n)\nplt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)\n\nplt.show()"
72+
"import matplotlib.pyplot as plt\n\nplt.figure(figsize=(4.2, 4))\nfor i, patch in enumerate(kmeans.cluster_centers_):\n plt.subplot(9, 9, i + 1)\n plt.imshow(patch.reshape(patch_size), cmap=plt.cm.gray, interpolation=\"nearest\")\n plt.xticks(())\n plt.yticks(())\n\n\nplt.suptitle(\n \"Patches of faces\\nTrain time %.1fs on %d patches\" % (dt, 8 * len(faces.images)),\n fontsize=16,\n)\nplt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)\n\nplt.show()"
3073
]
3174
}
3275
],
Binary file not shown.

dev/_downloads/7f205ae026a8c21fcab1e6a86cfadb7d/plot_dict_face_patches.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""
22
Online learning of a dictionary of parts of faces
3-
==================================================
3+
=================================================
44
55
This example uses a large dataset of faces to learn a set of 20 x 20
66
images patches that constitute faces.
@@ -21,20 +21,24 @@
2121
2222
"""
2323

24-
import time
25-
26-
import matplotlib.pyplot as plt
27-
import numpy as np
28-
24+
# %%
25+
# Load the data
26+
# -------------
2927

3028
from sklearn import datasets
31-
from sklearn.cluster import MiniBatchKMeans
32-
from sklearn.feature_extraction.image import extract_patches_2d
3329

3430
faces = datasets.fetch_olivetti_faces()
3531

36-
# #############################################################################
32+
# %%
3733
# Learn the dictionary of images
34+
# ------------------------------
35+
36+
import time
37+
38+
import numpy as np
39+
40+
from sklearn.cluster import MiniBatchKMeans
41+
from sklearn.feature_extraction.image import extract_patches_2d
3842

3943
print("Learning the dictionary... ")
4044
rng = np.random.RandomState(0)
@@ -64,8 +68,12 @@
6468
dt = time.time() - t0
6569
print("done in %.2fs." % dt)
6670

67-
# #############################################################################
71+
# %%
6872
# Plot the results
73+
# ----------------
74+
75+
import matplotlib.pyplot as plt
76+
6977
plt.figure(figsize=(4.2, 4))
7078
for i, patch in enumerate(kmeans.cluster_centers_):
7179
plt.subplot(9, 9, i + 1)

dev/_downloads/scikit-learn-docs.zip

-5.48 KB
Binary file not shown.
-39 Bytes
-39 Bytes
12 Bytes
-5 Bytes
-1014 Bytes

0 commit comments

Comments
 (0)