scikit-learn
diff --git a/‎dev/_downloads/006fc185672e58b056a5c134db26935c/plot_coin_segmentation.ipynb
Lines changed: 4 additions & 4 deletions b/‎dev/_downloads/006fc185672e58b056a5c134db26935c/plot_coin_segmentation.ipynb
Lines changed: 4 additions & 4 deletions
diff --git a/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
1.32 KB b/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
1.32 KB
diff --git a/‎dev/_downloads/2e86a4838807f09bbbb529d9643d45ab/plot_coin_segmentation.py
Lines changed: 42 additions & 16 deletions b/‎dev/_downloads/2e86a4838807f09bbbb529d9643d45ab/plot_coin_segmentation.py
Lines changed: 42 additions & 16 deletions
diff --git a/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
1.35 KB b/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
1.35 KB
diff --git a/‎dev/_downloads/scikit-learn-docs.zip
67.1 KB b/‎dev/_downloads/scikit-learn-docs.zip
67.1 KB
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
19 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
19 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
372 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
372 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
100 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
100 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
335 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
335 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_thumb.png
13 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_thumb.png
13 Bytes
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\n# Segmenting the picture of greek coins in regions\n\nThis example uses `spectral_clustering` on a graph created from\nvoxel-to-voxel difference on an image to break this image into multiple\npartly-homogeneous regions.\n\nThis procedure (spectral clustering on an image) is an efficient\napproximate solution for finding normalized graph cuts.\n\nThere are two options to assign labels:\n\n* with 'kmeans' spectral clustering will cluster samples in the embedding space\n  using a kmeans algorithm\n* whereas 'discrete' will iteratively search for the closest partition\n  space to the embedding space.\n"
+        "\n# Segmenting the picture of greek coins in regions\n\nThis example uses `spectral_clustering` on a graph created from\nvoxel-to-voxel difference on an image to break this image into multiple\npartly-homogeneous regions.\n\nThis procedure (spectral clustering on an image) is an efficient\napproximate solution for finding normalized graph cuts.\n\nThere are three options to assign labels:\n\n* 'kmeans' spectral clustering clusters samples in the embedding space\n  using a kmeans algorithm\n* 'discrete' iteratively searches for the closest partition\n  space to the embedding space of spectral clustering.\n* 'cluster_qr' assigns labels using the QR factorization with pivoting\n  that directly determines the partition in the embedding space.\n"
       ]
     },
     {
@@ -26,14 +26,14 @@
       },
       "outputs": [],
       "source": [
-        "# Author: Gael Varoquaux <[email protected]>, Brian Cheung\n# License: BSD 3 clause\n\nimport time\n\nimport numpy as np\nfrom scipy.ndimage.filters import gaussian_filter\nimport matplotlib.pyplot as plt\nimport skimage\nfrom skimage.data import coins\nfrom skimage.transform import rescale\n\nfrom sklearn.feature_extraction import image\nfrom sklearn.cluster import spectral_clustering\nfrom sklearn.utils.fixes import parse_version\n\n# these were introduced in skimage-0.14\nif parse_version(skimage.__version__) >= parse_version(\"0.14\"):\n    rescale_params = {\"anti_aliasing\": False, \"multichannel\": False}\nelse:\n    rescale_params = {}\n\n# load the coins as a numpy array\norig_coins = coins()\n\n# Resize it to 20% of the original size to speed up the processing\n# Applying a Gaussian filter for smoothing prior to down-scaling\n# reduces aliasing artifacts.\nsmoothened_coins = gaussian_filter(orig_coins, sigma=2)\nrescaled_coins = rescale(smoothened_coins, 0.2, mode=\"reflect\", **rescale_params)\n\n# Convert the image into a graph with the value of the gradient on the\n# edges.\ngraph = image.img_to_graph(rescaled_coins)\n\n# Take a decreasing function of the gradient: an exponential\n# The smaller beta is, the more independent the segmentation is of the\n# actual image. For beta=1, the segmentation is close to a voronoi\nbeta = 10\neps = 1e-6\ngraph.data = np.exp(-beta * graph.data / graph.data.std()) + eps\n\n# Apply spectral clustering (this step goes much faster if you have pyamg\n# installed)\nN_REGIONS = 25"
+        "# Author: Gael Varoquaux <[email protected]>\n#         Brian Cheung\n#         Andrew Knyazev <[email protected]>\n# License: BSD 3 clause\n\nimport time\n\nimport numpy as np\nfrom scipy.ndimage.filters import gaussian_filter\nimport matplotlib.pyplot as plt\nimport skimage\nfrom skimage.data import coins\nfrom skimage.transform import rescale\n\nfrom sklearn.feature_extraction import image\nfrom sklearn.cluster import spectral_clustering\nfrom sklearn.utils.fixes import parse_version\n\n# these were introduced in skimage-0.14\nif parse_version(skimage.__version__) >= parse_version(\"0.14\"):\n    rescale_params = {\"anti_aliasing\": False, \"multichannel\": False}\nelse:\n    rescale_params = {}\n\n# load the coins as a numpy array\norig_coins = coins()\n\n# Resize it to 20% of the original size to speed up the processing\n# Applying a Gaussian filter for smoothing prior to down-scaling\n# reduces aliasing artifacts.\nsmoothened_coins = gaussian_filter(orig_coins, sigma=2)\nrescaled_coins = rescale(smoothened_coins, 0.2, mode=\"reflect\", **rescale_params)\n\n# Convert the image into a graph with the value of the gradient on the\n# edges.\ngraph = image.img_to_graph(rescaled_coins)\n\n# Take a decreasing function of the gradient: an exponential\n# The smaller beta is, the more independent the segmentation is of the\n# actual image. For beta=1, the segmentation is close to a voronoi\nbeta = 10\neps = 1e-6\ngraph.data = np.exp(-beta * graph.data / graph.data.std()) + eps\n\n# The number of segmented regions to display needs to be chosen manually.\n# The current version of 'spectral_clustering' does not support determining\n# the number of good quality clusters automatically.\nn_regions = 26"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Visualize the resulting regions\n\n"
+        "Compute and visualize the resulting regions\n\n"
       ]
     },
     {
@@ -44,7 +44,7 @@
       },
       "outputs": [],
       "source": [
-        "for assign_labels in (\"kmeans\", \"discretize\"):\n    t0 = time.time()\n    labels = spectral_clustering(\n        graph, n_clusters=N_REGIONS, assign_labels=assign_labels, random_state=42\n    )\n    t1 = time.time()\n    labels = labels.reshape(rescaled_coins.shape)\n\n    plt.figure(figsize=(5, 5))\n    plt.imshow(rescaled_coins, cmap=plt.cm.gray)\n    for l in range(N_REGIONS):\n        plt.contour(labels == l, colors=[plt.cm.nipy_spectral(l / float(N_REGIONS))])\n    plt.xticks(())\n    plt.yticks(())\n    title = \"Spectral clustering: %s, %.2fs\" % (assign_labels, (t1 - t0))\n    print(title)\n    plt.title(title)\nplt.show()"
+        "# Computing a few extra eigenvectors may speed up the eigen_solver.\n# The spectral clustering quality may also benetif from requesting\n# extra regions for segmentation.\nn_regions_plus = 3\n\n# Apply spectral clustering using the default eigen_solver='arpack'.\n# Any implemented solver can be used: eigen_solver='arpack', 'lobpcg', or 'amg'.\n# Choosing eigen_solver='amg' requires an extra package called 'pyamg'.\n# The quality of segmentation and the speed of calculations is mostly determined\n# by the choice of the solver and the value of the tolerance 'eigen_tol'.\n# TODO: varying eigen_tol seems to have no effect for 'lobpcg' and 'amg' #21243.\nfor assign_labels in (\"kmeans\", \"discretize\", \"cluster_qr\"):\n    t0 = time.time()\n    labels = spectral_clustering(\n        graph,\n        n_clusters=(n_regions + n_regions_plus),\n        eigen_tol=1e-7,\n        assign_labels=assign_labels,\n        random_state=42,\n    )\n\n    t1 = time.time()\n    labels = labels.reshape(rescaled_coins.shape)\n    plt.figure(figsize=(5, 5))\n    plt.imshow(rescaled_coins, cmap=plt.cm.gray)\n\n    plt.xticks(())\n    plt.yticks(())\n    title = \"Spectral clustering: %s, %.2fs\" % (assign_labels, (t1 - t0))\n    print(title)\n    plt.title(title)\n    for l in range(n_regions):\n        colors = [plt.cm.nipy_spectral((l + 4) / float(n_regions + 4))]\n        plt.contour(labels == l, colors=colors)\n        # To view individual segments as appear comment in plt.pause(0.5)\nplt.show()\n\n# TODO: After #21194 is merged and #21243 is fixed, check which eigen_solver\n# is the best and set eigen_solver='arpack', 'lobpcg', or 'amg' and eigen_tol\n# explicitly in this example."
       ]
     }
   ],
 
@@ -10,16 +10,19 @@
 This procedure (spectral clustering on an image) is an efficient
 approximate solution for finding normalized graph cuts.
 
-There are two options to assign labels:
+There are three options to assign labels:
 
-* with 'kmeans' spectral clustering will cluster samples in the embedding space
+* 'kmeans' spectral clustering clusters samples in the embedding space
   using a kmeans algorithm
-* whereas 'discrete' will iteratively search for the closest partition
-  space to the embedding space.
-
+* 'discrete' iteratively searches for the closest partition
+  space to the embedding space of spectral clustering.
+* 'cluster_qr' assigns labels using the QR factorization with pivoting
+  that directly determines the partition in the embedding space.
 """
 
-# Author: Gael Varoquaux <[email protected]>, Brian Cheung
+# Author: Gael Varoquaux <[email protected]>
+#         Brian Cheung
+#         Andrew Knyazev <[email protected]>
 # License: BSD 3 clause
 
 import time
@@ -61,28 +64,51 @@
 eps = 1e-6
 graph.data = np.exp(-beta * graph.data / graph.data.std()) + eps
 
-# Apply spectral clustering (this step goes much faster if you have pyamg
-# installed)
-N_REGIONS = 25
+# The number of segmented regions to display needs to be chosen manually.
+# The current version of 'spectral_clustering' does not support determining
+# the number of good quality clusters automatically.
+n_regions = 26
 
 # %%
-# Visualize the resulting regions
-
-for assign_labels in ("kmeans", "discretize"):
+# Compute and visualize the resulting regions
+
+# Computing a few extra eigenvectors may speed up the eigen_solver.
+# The spectral clustering quality may also benetif from requesting
+# extra regions for segmentation.
+n_regions_plus = 3
+
+# Apply spectral clustering using the default eigen_solver='arpack'.
+# Any implemented solver can be used: eigen_solver='arpack', 'lobpcg', or 'amg'.
+# Choosing eigen_solver='amg' requires an extra package called 'pyamg'.
+# The quality of segmentation and the speed of calculations is mostly determined
+# by the choice of the solver and the value of the tolerance 'eigen_tol'.
+# TODO: varying eigen_tol seems to have no effect for 'lobpcg' and 'amg' #21243.
+for assign_labels in ("kmeans", "discretize", "cluster_qr"):
     t0 = time.time()
     labels = spectral_clustering(
-        graph, n_clusters=N_REGIONS, assign_labels=assign_labels, random_state=42
+        graph,
+        n_clusters=(n_regions + n_regions_plus),
+        eigen_tol=1e-7,
+        assign_labels=assign_labels,
+        random_state=42,
     )
+
     t1 = time.time()
     labels = labels.reshape(rescaled_coins.shape)
-
     plt.figure(figsize=(5, 5))
     plt.imshow(rescaled_coins, cmap=plt.cm.gray)
-    for l in range(N_REGIONS):
-        plt.contour(labels == l, colors=[plt.cm.nipy_spectral(l / float(N_REGIONS))])
+
     plt.xticks(())
     plt.yticks(())
     title = "Spectral clustering: %s, %.2fs" % (assign_labels, (t1 - t0))
     print(title)
     plt.title(title)
+    for l in range(n_regions):
+        colors = [plt.cm.nipy_spectral((l + 4) / float(n_regions + 4))]
+        plt.contour(labels == l, colors=colors)
+        # To view individual segments as appear comment in plt.pause(0.5)
 plt.show()
+
+# TODO: After #21194 is merged and #21243 is fixed, check which eigen_solver
+# is the best and set eigen_solver='arpack', 'lobpcg', or 'amg' and eigen_tol
+# explicitly in this example.
Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,7 @@`
`15`	`15`	`"cell_type": "markdown",`
`16`	`16`	`"metadata": {},`
`17`	`17`	`"source": [`
`18`		- "\n# Segmenting the picture of greek coins in regions\n\nThis example uses `spectral_clustering` on a graph created from\nvoxel-to-voxel difference on an image to break this image into multiple\npartly-homogeneous regions.\n\nThis procedure (spectral clustering on an image) is an efficient\napproximate solution for finding normalized graph cuts.\n\nThere are two options to assign labels:\n\n* with 'kmeans' spectral clustering will cluster samples in the embedding space\n using a kmeans algorithm\n* whereas 'discrete' will iteratively search for the closest partition\n space to the embedding space.\n"
	`18`	+ "\n# Segmenting the picture of greek coins in regions\n\nThis example uses `spectral_clustering` on a graph created from\nvoxel-to-voxel difference on an image to break this image into multiple\npartly-homogeneous regions.\n\nThis procedure (spectral clustering on an image) is an efficient\napproximate solution for finding normalized graph cuts.\n\nThere are three options to assign labels:\n\n* 'kmeans' spectral clustering clusters samples in the embedding space\n using a kmeans algorithm\n* 'discrete' iteratively searches for the closest partition\n space to the embedding space of spectral clustering.\n* 'cluster_qr' assigns labels using the QR factorization with pivoting\n that directly determines the partition in the embedding space.\n"
`19`	`19`	`]`
`20`	`20`	`},`
`21`	`21`	`{`
`@@ -26,14 +26,14 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "# Author: Gael Varoquaux <[email protected]>, Brian Cheung\n# License: BSD 3 clause\n\nimport time\n\nimport numpy as np\nfrom scipy.ndimage.filters import gaussian_filter\nimport matplotlib.pyplot as plt\nimport skimage\nfrom skimage.data import coins\nfrom skimage.transform import rescale\n\nfrom sklearn.feature_extraction import image\nfrom sklearn.cluster import spectral_clustering\nfrom sklearn.utils.fixes import parse_version\n\n# these were introduced in skimage-0.14\nif parse_version(skimage.__version__) >= parse_version(\"0.14\"):\n rescale_params = {\"anti_aliasing\": False, \"multichannel\": False}\nelse:\n rescale_params = {}\n\n# load the coins as a numpy array\norig_coins = coins()\n\n# Resize it to 20% of the original size to speed up the processing\n# Applying a Gaussian filter for smoothing prior to down-scaling\n# reduces aliasing artifacts.\nsmoothened_coins = gaussian_filter(orig_coins, sigma=2)\nrescaled_coins = rescale(smoothened_coins, 0.2, mode=\"reflect\", *rescale_params)\n\n# Convert the image into a graph with the value of the gradient on the\n# edges.\ngraph = image.img_to_graph(rescaled_coins)\n\n# Take a decreasing function of the gradient: an exponential\n# The smaller beta is, the more independent the segmentation is of the\n# actual image. For beta=1, the segmentation is close to a voronoi\nbeta = 10\neps = 1e-6\ngraph.data = np.exp(-beta graph.data / graph.data.std()) + eps\n\n# Apply spectral clustering (this step goes much faster if you have pyamg\n# installed)\nN_REGIONS = 25"
	`29`	+ "# Author: Gael Varoquaux <[email protected]>\n# Brian Cheung\n# Andrew Knyazev <[email protected]>\n# License: BSD 3 clause\n\nimport time\n\nimport numpy as np\nfrom scipy.ndimage.filters import gaussian_filter\nimport matplotlib.pyplot as plt\nimport skimage\nfrom skimage.data import coins\nfrom skimage.transform import rescale\n\nfrom sklearn.feature_extraction import image\nfrom sklearn.cluster import spectral_clustering\nfrom sklearn.utils.fixes import parse_version\n\n# these were introduced in skimage-0.14\nif parse_version(skimage.__version__) >= parse_version(\"0.14\"):\n rescale_params = {\"anti_aliasing\": False, \"multichannel\": False}\nelse:\n rescale_params = {}\n\n# load the coins as a numpy array\norig_coins = coins()\n\n# Resize it to 20% of the original size to speed up the processing\n# Applying a Gaussian filter for smoothing prior to down-scaling\n# reduces aliasing artifacts.\nsmoothened_coins = gaussian_filter(orig_coins, sigma=2)\nrescaled_coins = rescale(smoothened_coins, 0.2, mode=\"reflect\", *rescale_params)\n\n# Convert the image into a graph with the value of the gradient on the\n# edges.\ngraph = image.img_to_graph(rescaled_coins)\n\n# Take a decreasing function of the gradient: an exponential\n# The smaller beta is, the more independent the segmentation is of the\n# actual image. For beta=1, the segmentation is close to a voronoi\nbeta = 10\neps = 1e-6\ngraph.data = np.exp(-beta graph.data / graph.data.std()) + eps\n\n# The number of segmented regions to display needs to be chosen manually.\n# The current version of 'spectral_clustering' does not support determining\n# the number of good quality clusters automatically.\nn_regions = 26"
`30`	`30`	`]`
`31`	`31`	`},`
`32`	`32`	`{`
`33`	`33`	`"cell_type": "markdown",`
`34`	`34`	`"metadata": {},`
`35`	`35`	`"source": [`
`36`		`- "Visualize the resulting regions\n\n"`
	`36`	`+ "Compute and visualize the resulting regions\n\n"`
`37`	`37`	`]`
`38`	`38`	`},`
`39`	`39`	`{`
`@@ -44,7 +44,7 @@`
`44`	`44`	`},`
`45`	`45`	`"outputs": [],`
`46`	`46`	`"source": [`
`47`		- "for assign_labels in (\"kmeans\", \"discretize\"):\n t0 = time.time()\n labels = spectral_clustering(\n graph, n_clusters=N_REGIONS, assign_labels=assign_labels, random_state=42\n )\n t1 = time.time()\n labels = labels.reshape(rescaled_coins.shape)\n\n plt.figure(figsize=(5, 5))\n plt.imshow(rescaled_coins, cmap=plt.cm.gray)\n for l in range(N_REGIONS):\n plt.contour(labels == l, colors=[plt.cm.nipy_spectral(l / float(N_REGIONS))])\n plt.xticks(())\n plt.yticks(())\n title = \"Spectral clustering: %s, %.2fs\" % (assign_labels, (t1 - t0))\n print(title)\n plt.title(title)\nplt.show()"
	`47`	+ "# Computing a few extra eigenvectors may speed up the eigen_solver.\n# The spectral clustering quality may also benetif from requesting\n# extra regions for segmentation.\nn_regions_plus = 3\n\n# Apply spectral clustering using the default eigen_solver='arpack'.\n# Any implemented solver can be used: eigen_solver='arpack', 'lobpcg', or 'amg'.\n# Choosing eigen_solver='amg' requires an extra package called 'pyamg'.\n# The quality of segmentation and the speed of calculations is mostly determined\n# by the choice of the solver and the value of the tolerance 'eigen_tol'.\n# TODO: varying eigen_tol seems to have no effect for 'lobpcg' and 'amg' #21243.\nfor assign_labels in (\"kmeans\", \"discretize\", \"cluster_qr\"):\n t0 = time.time()\n labels = spectral_clustering(\n graph,\n n_clusters=(n_regions + n_regions_plus),\n eigen_tol=1e-7,\n assign_labels=assign_labels,\n random_state=42,\n )\n\n t1 = time.time()\n labels = labels.reshape(rescaled_coins.shape)\n plt.figure(figsize=(5, 5))\n plt.imshow(rescaled_coins, cmap=plt.cm.gray)\n\n plt.xticks(())\n plt.yticks(())\n title = \"Spectral clustering: %s, %.2fs\" % (assign_labels, (t1 - t0))\n print(title)\n plt.title(title)\n for l in range(n_regions):\n colors = [plt.cm.nipy_spectral((l + 4) / float(n_regions + 4))]\n plt.contour(labels == l, colors=colors)\n # To view individual segments as appear comment in plt.pause(0.5)\nplt.show()\n\n# TODO: After #21194 is merged and #21243 is fixed, check which eigen_solver\n# is the best and set eigen_solver='arpack', 'lobpcg', or 'amg' and eigen_tol\n# explicitly in this example."
`48`	`48`	`]`
`49`	`49`	`}`
`50`	`50`	`],`