scikit-learn
diff --git a/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
-59 Bytes b/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
-59 Bytes
diff --git a/‎dev/_downloads/0af0092c704518874f82d38d725bb97f/plot_dict_face_patches.ipynb
Lines changed: 44 additions & 1 deletion b/‎dev/_downloads/0af0092c704518874f82d38d725bb97f/plot_dict_face_patches.ipynb
Lines changed: 44 additions & 1 deletion
diff --git a/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
535 Bytes b/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
535 Bytes
diff --git a/‎dev/_downloads/7f205ae026a8c21fcab1e6a86cfadb7d/plot_dict_face_patches.py
Lines changed: 18 additions & 10 deletions b/‎dev/_downloads/7f205ae026a8c21fcab1e6a86cfadb7d/plot_dict_face_patches.py
Lines changed: 18 additions & 10 deletions
diff --git a/‎dev/_downloads/scikit-learn-docs.zip
-5.48 KB b/‎dev/_downloads/scikit-learn-docs.zip
-5.48 KB
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
-39 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
-39 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
-39 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
-39 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_anomaly_comparison_001.png
12 Bytes b/‎dev/_images/sphx_glr_plot_anomaly_comparison_001.png
12 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_anomaly_comparison_thumb.png
-5 Bytes b/‎dev/_images/sphx_glr_plot_anomaly_comparison_thumb.png
-5 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_caching_nearest_neighbors_001.png
-1014 Bytes b/‎dev/_images/sphx_glr_plot_caching_nearest_neighbors_001.png
-1014 Bytes
@@ -18,6 +18,49 @@
         "\n# Online learning of a dictionary of parts of faces\n\nThis example uses a large dataset of faces to learn a set of 20 x 20\nimages patches that constitute faces.\n\nFrom the programming standpoint, it is interesting because it shows how\nto use the online API of the scikit-learn to process a very large\ndataset by chunks. The way we proceed is that we load an image at a time\nand extract randomly 50 patches from this image. Once we have accumulated\n500 of these patches (using 10 images), we run the\n:func:`~sklearn.cluster.MiniBatchKMeans.partial_fit` method\nof the online KMeans object, MiniBatchKMeans.\n\nThe verbose setting on the MiniBatchKMeans enables us to see that some\nclusters are reassigned during the successive calls to\npartial-fit. This is because the number of patches that they represent\nhas become too low, and it is better to choose a random new\ncluster.\n"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Load the data\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn import datasets\n\nfaces = datasets.fetch_olivetti_faces()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Learn the dictionary of images\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import time\n\nimport numpy as np\n\nfrom sklearn.cluster import MiniBatchKMeans\nfrom sklearn.feature_extraction.image import extract_patches_2d\n\nprint(\"Learning the dictionary... \")\nrng = np.random.RandomState(0)\nkmeans = MiniBatchKMeans(n_clusters=81, random_state=rng, verbose=True)\npatch_size = (20, 20)\n\nbuffer = []\nt0 = time.time()\n\n# The online learning part: cycle over the whole dataset 6 times\nindex = 0\nfor _ in range(6):\n    for img in faces.images:\n        data = extract_patches_2d(img, patch_size, max_patches=50, random_state=rng)\n        data = np.reshape(data, (len(data), -1))\n        buffer.append(data)\n        index += 1\n        if index % 10 == 0:\n            data = np.concatenate(buffer, axis=0)\n            data -= np.mean(data, axis=0)\n            data /= np.std(data, axis=0)\n            kmeans.partial_fit(data)\n            buffer = []\n        if index % 100 == 0:\n            print(\"Partial fit of %4i out of %i\" % (index, 6 * len(faces.images)))\n\ndt = time.time() - t0\nprint(\"done in %.2fs.\" % dt)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Plot the results\n\n"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -26,7 +69,7 @@
       },
       "outputs": [],
       "source": [
-        "import time\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\n\nfrom sklearn import datasets\nfrom sklearn.cluster import MiniBatchKMeans\nfrom sklearn.feature_extraction.image import extract_patches_2d\n\nfaces = datasets.fetch_olivetti_faces()\n\n# #############################################################################\n# Learn the dictionary of images\n\nprint(\"Learning the dictionary... \")\nrng = np.random.RandomState(0)\nkmeans = MiniBatchKMeans(n_clusters=81, random_state=rng, verbose=True)\npatch_size = (20, 20)\n\nbuffer = []\nt0 = time.time()\n\n# The online learning part: cycle over the whole dataset 6 times\nindex = 0\nfor _ in range(6):\n    for img in faces.images:\n        data = extract_patches_2d(img, patch_size, max_patches=50, random_state=rng)\n        data = np.reshape(data, (len(data), -1))\n        buffer.append(data)\n        index += 1\n        if index % 10 == 0:\n            data = np.concatenate(buffer, axis=0)\n            data -= np.mean(data, axis=0)\n            data /= np.std(data, axis=0)\n            kmeans.partial_fit(data)\n            buffer = []\n        if index % 100 == 0:\n            print(\"Partial fit of %4i out of %i\" % (index, 6 * len(faces.images)))\n\ndt = time.time() - t0\nprint(\"done in %.2fs.\" % dt)\n\n# #############################################################################\n# Plot the results\nplt.figure(figsize=(4.2, 4))\nfor i, patch in enumerate(kmeans.cluster_centers_):\n    plt.subplot(9, 9, i + 1)\n    plt.imshow(patch.reshape(patch_size), cmap=plt.cm.gray, interpolation=\"nearest\")\n    plt.xticks(())\n    plt.yticks(())\n\n\nplt.suptitle(\n    \"Patches of faces\\nTrain time %.1fs on %d patches\" % (dt, 8 * len(faces.images)),\n    fontsize=16,\n)\nplt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)\n\nplt.show()"
+        "import matplotlib.pyplot as plt\n\nplt.figure(figsize=(4.2, 4))\nfor i, patch in enumerate(kmeans.cluster_centers_):\n    plt.subplot(9, 9, i + 1)\n    plt.imshow(patch.reshape(patch_size), cmap=plt.cm.gray, interpolation=\"nearest\")\n    plt.xticks(())\n    plt.yticks(())\n\n\nplt.suptitle(\n    \"Patches of faces\\nTrain time %.1fs on %d patches\" % (dt, 8 * len(faces.images)),\n    fontsize=16,\n)\nplt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)\n\nplt.show()"
       ]
     }
   ],
 
@@ -1,6 +1,6 @@
 """
 Online learning of a dictionary of parts of faces
-==================================================
+=================================================
 
 This example uses a large dataset of faces to learn a set of 20 x 20
 images patches that constitute faces.
@@ -21,20 +21,24 @@
 
 """
 
-import time
-
-import matplotlib.pyplot as plt
-import numpy as np
-
+# %%
+# Load the data
+# -------------
 
 from sklearn import datasets
-from sklearn.cluster import MiniBatchKMeans
-from sklearn.feature_extraction.image import extract_patches_2d
 
 faces = datasets.fetch_olivetti_faces()
 
-# #############################################################################
+# %%
 # Learn the dictionary of images
+# ------------------------------
+
+import time
+
+import numpy as np
+
+from sklearn.cluster import MiniBatchKMeans
+from sklearn.feature_extraction.image import extract_patches_2d
 
 print("Learning the dictionary... ")
 rng = np.random.RandomState(0)
@@ -64,8 +68,12 @@
 dt = time.time() - t0
 print("done in %.2fs." % dt)
 
-# #############################################################################
+# %%
 # Plot the results
+# ----------------
+
+import matplotlib.pyplot as plt
+
 plt.figure(figsize=(4.2, 4))
 for i, patch in enumerate(kmeans.cluster_centers_):
     plt.subplot(9, 9, i + 1)