Skip to content

Commit ea917eb

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 13480ff627ccbcded0cad339108dcfc90076451a
1 parent 066d925 commit ea917eb

File tree

1,241 files changed

+4246
-4276
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,241 files changed

+4246
-4276
lines changed
Binary file not shown.

dev/_downloads/21ba44171c2107e5285a530bdf3dd0f6/plot_digits_linkage.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"cell_type": "markdown",
1616
"metadata": {},
1717
"source": [
18-
"\n# Various Agglomerative Clustering on a 2D embedding of digits\n\nAn illustration of various linkage option for agglomerative clustering on\na 2D embedding of the digits dataset.\n\nThe goal of this example is to show intuitively how the metrics behave, and\nnot to find good clusters for the digits. This is why the example works on a\n2D embedding.\n\nWhat this example shows us is the behavior \"rich getting richer\" of\nagglomerative clustering that tends to create uneven cluster sizes.\nThis behavior is pronounced for the average linkage strategy,\nthat ends up with a couple of singleton clusters, while in the case\nof single linkage we get a single central cluster with all other clusters\nbeing drawn from noise points around the fringes.\n"
18+
"\n# Various Agglomerative Clustering on a 2D embedding of digits\n\nAn illustration of various linkage option for agglomerative clustering on\na 2D embedding of the digits dataset.\n\nThe goal of this example is to show intuitively how the metrics behave, and\nnot to find good clusters for the digits. This is why the example works on a\n2D embedding.\n\nWhat this example shows us is the behavior \"rich getting richer\" of\nagglomerative clustering that tends to create uneven cluster sizes.\n\nThis behavior is pronounced for the average linkage strategy,\nthat ends up with a couple of clusters with few datapoints.\n\nThe case of single linkage is even more pathologic with a very\nlarge cluster covering most digits, an intermediate size (clean)\ncluster with most zero digits and all other clusters being drawn\nfrom noise points around the fringes.\n\nThe other linkage strategies lead to more evenly distributed\nclusters that are therefore likely to be less sensible to a\nrandom resampling of the dataset.\n"
1919
]
2020
},
2121
{
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Authors: Gael Varoquaux\n# License: BSD 3 clause (C) INRIA 2014\n\nfrom time import time\n\nimport numpy as np\nfrom scipy import ndimage\nfrom matplotlib import pyplot as plt\n\nfrom sklearn import manifold, datasets\n\nX, y = datasets.load_digits(return_X_y=True)\nn_samples, n_features = X.shape\n\nnp.random.seed(0)\n\n\ndef nudge_images(X, y):\n # Having a larger dataset shows more clearly the behavior of the\n # methods, but we multiply the size of the dataset only by 2, as the\n # cost of the hierarchical clustering methods are strongly\n # super-linear in n_samples\n shift = lambda x: ndimage.shift(\n x.reshape((8, 8)), 0.3 * np.random.normal(size=2), mode=\"constant\"\n ).ravel()\n X = np.concatenate([X, np.apply_along_axis(shift, 1, X)])\n Y = np.concatenate([y, y], axis=0)\n return X, Y\n\n\nX, y = nudge_images(X, y)\n\n\n# ----------------------------------------------------------------------\n# Visualize the clustering\ndef plot_clustering(X_red, labels, title=None):\n x_min, x_max = np.min(X_red, axis=0), np.max(X_red, axis=0)\n X_red = (X_red - x_min) / (x_max - x_min)\n\n plt.figure(figsize=(6, 4))\n for i in range(X_red.shape[0]):\n plt.text(\n X_red[i, 0],\n X_red[i, 1],\n str(y[i]),\n color=plt.cm.nipy_spectral(labels[i] / 10.0),\n fontdict={\"weight\": \"bold\", \"size\": 9},\n )\n\n plt.xticks([])\n plt.yticks([])\n if title is not None:\n plt.title(title, size=17)\n plt.axis(\"off\")\n plt.tight_layout(rect=[0, 0.03, 1, 0.95])\n\n\n# ----------------------------------------------------------------------\n# 2D embedding of the digits dataset\nprint(\"Computing embedding\")\nX_red = manifold.SpectralEmbedding(n_components=2).fit_transform(X)\nprint(\"Done.\")\n\nfrom sklearn.cluster import AgglomerativeClustering\n\nfor linkage in (\"ward\", \"average\", \"complete\", \"single\"):\n clustering = AgglomerativeClustering(linkage=linkage, n_clusters=10)\n t0 = time()\n clustering.fit(X_red)\n print(\"%s :\\t%.2fs\" % (linkage, time() - t0))\n\n plot_clustering(X_red, clustering.labels_, \"%s linkage\" % linkage)\n\n\nplt.show()"
29+
"# Authors: Gael Varoquaux\n# License: BSD 3 clause (C) INRIA 2014\n\nfrom time import time\n\nimport numpy as np\nfrom matplotlib import pyplot as plt\n\nfrom sklearn import manifold, datasets\n\nX, y = datasets.load_digits(return_X_y=True)\nn_samples, n_features = X.shape\n\nnp.random.seed(0)\n\n\n# ----------------------------------------------------------------------\n# Visualize the clustering\ndef plot_clustering(X_red, labels, title=None):\n x_min, x_max = np.min(X_red, axis=0), np.max(X_red, axis=0)\n X_red = (X_red - x_min) / (x_max - x_min)\n\n plt.figure(figsize=(6, 4))\n for i in range(X_red.shape[0]):\n plt.text(\n X_red[i, 0],\n X_red[i, 1],\n str(y[i]),\n color=plt.cm.nipy_spectral(labels[i] / 10.0),\n fontdict={\"weight\": \"bold\", \"size\": 9},\n )\n\n plt.xticks([])\n plt.yticks([])\n if title is not None:\n plt.title(title, size=17)\n plt.axis(\"off\")\n plt.tight_layout(rect=[0, 0.03, 1, 0.95])\n\n\n# ----------------------------------------------------------------------\n# 2D embedding of the digits dataset\nprint(\"Computing embedding\")\nX_red = manifold.SpectralEmbedding(n_components=2).fit_transform(X)\nprint(\"Done.\")\n\nfrom sklearn.cluster import AgglomerativeClustering\n\nfor linkage in (\"ward\", \"average\", \"complete\", \"single\"):\n clustering = AgglomerativeClustering(linkage=linkage, n_clusters=10)\n t0 = time()\n clustering.fit(X_red)\n print(\"%s :\\t%.2fs\" % (linkage, time() - t0))\n\n plot_clustering(X_red, clustering.labels_, \"%s linkage\" % linkage)\n\n\nplt.show()"
3030
]
3131
}
3232
],

0 commit comments

Comments
 (0)