Skip to content

Commit 2dd9b40

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 87460e95a76f297dce4106e81e3e897b211070c9
1 parent af77ee5 commit 2dd9b40

File tree

1,264 files changed

+4489
-4590
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,264 files changed

+4489
-4590
lines changed
Binary file not shown.

dev/_downloads/2434f000f4405168e6285a3e410c709f/plot_kmeans_silhouette_analysis.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"from sklearn.datasets import make_blobs\nfrom sklearn.cluster import KMeans\nfrom sklearn.metrics import silhouette_samples, silhouette_score\n\nimport matplotlib.pyplot as plt\nimport matplotlib.cm as cm\nimport numpy as np\n\n# Generating the sample data from make_blobs\n# This particular setting has one distinct cluster and 3 clusters placed close\n# together.\nX, y = make_blobs(\n n_samples=500,\n n_features=2,\n centers=4,\n cluster_std=1,\n center_box=(-10.0, 10.0),\n shuffle=True,\n random_state=1,\n) # For reproducibility\n\nrange_n_clusters = [2, 3, 4, 5, 6]\n\nfor n_clusters in range_n_clusters:\n # Create a subplot with 1 row and 2 columns\n fig, (ax1, ax2) = plt.subplots(1, 2)\n fig.set_size_inches(18, 7)\n\n # The 1st subplot is the silhouette plot\n # The silhouette coefficient can range from -1, 1 but in this example all\n # lie within [-0.1, 1]\n ax1.set_xlim([-0.1, 1])\n # The (n_clusters+1)*10 is for inserting blank space between silhouette\n # plots of individual clusters, to demarcate them clearly.\n ax1.set_ylim([0, len(X) + (n_clusters + 1) * 10])\n\n # Initialize the clusterer with n_clusters value and a random generator\n # seed of 10 for reproducibility.\n clusterer = KMeans(n_clusters=n_clusters, random_state=10)\n cluster_labels = clusterer.fit_predict(X)\n\n # The silhouette_score gives the average value for all the samples.\n # This gives a perspective into the density and separation of the formed\n # clusters\n silhouette_avg = silhouette_score(X, cluster_labels)\n print(\n \"For n_clusters =\",\n n_clusters,\n \"The average silhouette_score is :\",\n silhouette_avg,\n )\n\n # Compute the silhouette scores for each sample\n sample_silhouette_values = silhouette_samples(X, cluster_labels)\n\n y_lower = 10\n for i in range(n_clusters):\n # Aggregate the silhouette scores for samples belonging to\n # cluster i, and sort them\n ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels == i]\n\n ith_cluster_silhouette_values.sort()\n\n size_cluster_i = ith_cluster_silhouette_values.shape[0]\n y_upper = y_lower + size_cluster_i\n\n color = cm.nipy_spectral(float(i) / n_clusters)\n ax1.fill_betweenx(\n np.arange(y_lower, y_upper),\n 0,\n ith_cluster_silhouette_values,\n facecolor=color,\n edgecolor=color,\n alpha=0.7,\n )\n\n # Label the silhouette plots with their cluster numbers at the middle\n ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))\n\n # Compute the new y_lower for next plot\n y_lower = y_upper + 10 # 10 for the 0 samples\n\n ax1.set_title(\"The silhouette plot for the various clusters.\")\n ax1.set_xlabel(\"The silhouette coefficient values\")\n ax1.set_ylabel(\"Cluster label\")\n\n # The vertical line for average silhouette score of all the values\n ax1.axvline(x=silhouette_avg, color=\"red\", linestyle=\"--\")\n\n ax1.set_yticks([]) # Clear the yaxis labels / ticks\n ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])\n\n # 2nd Plot showing the actual clusters formed\n colors = cm.nipy_spectral(cluster_labels.astype(float) / n_clusters)\n ax2.scatter(\n X[:, 0], X[:, 1], marker=\".\", s=30, lw=0, alpha=0.7, c=colors, edgecolor=\"k\"\n )\n\n # Labeling the clusters\n centers = clusterer.cluster_centers_\n # Draw white circles at cluster centers\n ax2.scatter(\n centers[:, 0],\n centers[:, 1],\n marker=\"o\",\n c=\"white\",\n alpha=1,\n s=200,\n edgecolor=\"k\",\n )\n\n for i, c in enumerate(centers):\n ax2.scatter(c[0], c[1], marker=\"$%d$\" % i, alpha=1, s=50, edgecolor=\"k\")\n\n ax2.set_title(\"The visualization of the clustered data.\")\n ax2.set_xlabel(\"Feature space for the 1st feature\")\n ax2.set_ylabel(\"Feature space for the 2nd feature\")\n\n plt.suptitle(\n \"Silhouette analysis for KMeans clustering on sample data with n_clusters = %d\"\n % n_clusters,\n fontsize=14,\n fontweight=\"bold\",\n )\n\nplt.show()"
29+
"from sklearn.datasets import make_blobs\nfrom sklearn.cluster import KMeans\nfrom sklearn.metrics import silhouette_samples, silhouette_score\n\nimport matplotlib.pyplot as plt\nimport matplotlib.cm as cm\nimport numpy as np\n\n# Generating the sample data from make_blobs\n# This particular setting has one distinct cluster and 3 clusters placed close\n# together.\nX, y = make_blobs(\n n_samples=500,\n n_features=2,\n centers=4,\n cluster_std=1,\n center_box=(-10.0, 10.0),\n shuffle=True,\n random_state=1,\n) # For reproducibility\n\nrange_n_clusters = [2, 3, 4, 5, 6]\n\nfor n_clusters in range_n_clusters:\n # Create a subplot with 1 row and 2 columns\n fig, (ax1, ax2) = plt.subplots(1, 2)\n fig.set_size_inches(18, 7)\n\n # The 1st subplot is the silhouette plot\n # The silhouette coefficient can range from -1, 1 but in this example all\n # lie within [-0.1, 1]\n ax1.set_xlim([-0.1, 1])\n # The (n_clusters+1)*10 is for inserting blank space between silhouette\n # plots of individual clusters, to demarcate them clearly.\n ax1.set_ylim([0, len(X) + (n_clusters + 1) * 10])\n\n # Initialize the clusterer with n_clusters value and a random generator\n # seed of 10 for reproducibility.\n clusterer = KMeans(n_clusters=n_clusters, n_init=\"auto\", random_state=10)\n cluster_labels = clusterer.fit_predict(X)\n\n # The silhouette_score gives the average value for all the samples.\n # This gives a perspective into the density and separation of the formed\n # clusters\n silhouette_avg = silhouette_score(X, cluster_labels)\n print(\n \"For n_clusters =\",\n n_clusters,\n \"The average silhouette_score is :\",\n silhouette_avg,\n )\n\n # Compute the silhouette scores for each sample\n sample_silhouette_values = silhouette_samples(X, cluster_labels)\n\n y_lower = 10\n for i in range(n_clusters):\n # Aggregate the silhouette scores for samples belonging to\n # cluster i, and sort them\n ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels == i]\n\n ith_cluster_silhouette_values.sort()\n\n size_cluster_i = ith_cluster_silhouette_values.shape[0]\n y_upper = y_lower + size_cluster_i\n\n color = cm.nipy_spectral(float(i) / n_clusters)\n ax1.fill_betweenx(\n np.arange(y_lower, y_upper),\n 0,\n ith_cluster_silhouette_values,\n facecolor=color,\n edgecolor=color,\n alpha=0.7,\n )\n\n # Label the silhouette plots with their cluster numbers at the middle\n ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))\n\n # Compute the new y_lower for next plot\n y_lower = y_upper + 10 # 10 for the 0 samples\n\n ax1.set_title(\"The silhouette plot for the various clusters.\")\n ax1.set_xlabel(\"The silhouette coefficient values\")\n ax1.set_ylabel(\"Cluster label\")\n\n # The vertical line for average silhouette score of all the values\n ax1.axvline(x=silhouette_avg, color=\"red\", linestyle=\"--\")\n\n ax1.set_yticks([]) # Clear the yaxis labels / ticks\n ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])\n\n # 2nd Plot showing the actual clusters formed\n colors = cm.nipy_spectral(cluster_labels.astype(float) / n_clusters)\n ax2.scatter(\n X[:, 0], X[:, 1], marker=\".\", s=30, lw=0, alpha=0.7, c=colors, edgecolor=\"k\"\n )\n\n # Labeling the clusters\n centers = clusterer.cluster_centers_\n # Draw white circles at cluster centers\n ax2.scatter(\n centers[:, 0],\n centers[:, 1],\n marker=\"o\",\n c=\"white\",\n alpha=1,\n s=200,\n edgecolor=\"k\",\n )\n\n for i, c in enumerate(centers):\n ax2.scatter(c[0], c[1], marker=\"$%d$\" % i, alpha=1, s=50, edgecolor=\"k\")\n\n ax2.set_title(\"The visualization of the clustered data.\")\n ax2.set_xlabel(\"Feature space for the 1st feature\")\n ax2.set_ylabel(\"Feature space for the 2nd feature\")\n\n plt.suptitle(\n \"Silhouette analysis for KMeans clustering on sample data with n_clusters = %d\"\n % n_clusters,\n fontsize=14,\n fontweight=\"bold\",\n )\n\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/586f6cb589cefcd68d55348630efbfa0/plot_kmeans_silhouette_analysis.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@
6969

7070
# Initialize the clusterer with n_clusters value and a random generator
7171
# seed of 10 for reproducibility.
72-
clusterer = KMeans(n_clusters=n_clusters, random_state=10)
72+
clusterer = KMeans(n_clusters=n_clusters, n_init="auto", random_state=10)
7373
cluster_labels = clusterer.fit_predict(X)
7474

7575
# The silhouette_score gives the average value for all the samples.
Binary file not shown.

0 commit comments

Comments
 (0)