scikit-learn
diff --git a/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
30 Bytes b/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
30 Bytes
diff --git a/‎dev/_downloads/2434f000f4405168e6285a3e410c709f/plot_kmeans_silhouette_analysis.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/2434f000f4405168e6285a3e410c709f/plot_kmeans_silhouette_analysis.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/586f6cb589cefcd68d55348630efbfa0/plot_kmeans_silhouette_analysis.py
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/586f6cb589cefcd68d55348630efbfa0/plot_kmeans_silhouette_analysis.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
34 Bytes b/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
34 Bytes
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "from sklearn.datasets import make_blobs\nfrom sklearn.cluster import KMeans\nfrom sklearn.metrics import silhouette_samples, silhouette_score\n\nimport matplotlib.pyplot as plt\nimport matplotlib.cm as cm\nimport numpy as np\n\n# Generating the sample data from make_blobs\n# This particular setting has one distinct cluster and 3 clusters placed close\n# together.\nX, y = make_blobs(\n    n_samples=500,\n    n_features=2,\n    centers=4,\n    cluster_std=1,\n    center_box=(-10.0, 10.0),\n    shuffle=True,\n    random_state=1,\n)  # For reproducibility\n\nrange_n_clusters = [2, 3, 4, 5, 6]\n\nfor n_clusters in range_n_clusters:\n    # Create a subplot with 1 row and 2 columns\n    fig, (ax1, ax2) = plt.subplots(1, 2)\n    fig.set_size_inches(18, 7)\n\n    # The 1st subplot is the silhouette plot\n    # The silhouette coefficient can range from -1, 1 but in this example all\n    # lie within [-0.1, 1]\n    ax1.set_xlim([-0.1, 1])\n    # The (n_clusters+1)*10 is for inserting blank space between silhouette\n    # plots of individual clusters, to demarcate them clearly.\n    ax1.set_ylim([0, len(X) + (n_clusters + 1) * 10])\n\n    # Initialize the clusterer with n_clusters value and a random generator\n    # seed of 10 for reproducibility.\n    clusterer = KMeans(n_clusters=n_clusters, random_state=10)\n    cluster_labels = clusterer.fit_predict(X)\n\n    # The silhouette_score gives the average value for all the samples.\n    # This gives a perspective into the density and separation of the formed\n    # clusters\n    silhouette_avg = silhouette_score(X, cluster_labels)\n    print(\n        \"For n_clusters =\",\n        n_clusters,\n        \"The average silhouette_score is :\",\n        silhouette_avg,\n    )\n\n    # Compute the silhouette scores for each sample\n    sample_silhouette_values = silhouette_samples(X, cluster_labels)\n\n    y_lower = 10\n    for i in range(n_clusters):\n        # Aggregate the silhouette scores for samples belonging to\n        # cluster i, and sort them\n        ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels == i]\n\n        ith_cluster_silhouette_values.sort()\n\n        size_cluster_i = ith_cluster_silhouette_values.shape[0]\n        y_upper = y_lower + size_cluster_i\n\n        color = cm.nipy_spectral(float(i) / n_clusters)\n        ax1.fill_betweenx(\n            np.arange(y_lower, y_upper),\n            0,\n            ith_cluster_silhouette_values,\n            facecolor=color,\n            edgecolor=color,\n            alpha=0.7,\n        )\n\n        # Label the silhouette plots with their cluster numbers at the middle\n        ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))\n\n        # Compute the new y_lower for next plot\n        y_lower = y_upper + 10  # 10 for the 0 samples\n\n    ax1.set_title(\"The silhouette plot for the various clusters.\")\n    ax1.set_xlabel(\"The silhouette coefficient values\")\n    ax1.set_ylabel(\"Cluster label\")\n\n    # The vertical line for average silhouette score of all the values\n    ax1.axvline(x=silhouette_avg, color=\"red\", linestyle=\"--\")\n\n    ax1.set_yticks([])  # Clear the yaxis labels / ticks\n    ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])\n\n    # 2nd Plot showing the actual clusters formed\n    colors = cm.nipy_spectral(cluster_labels.astype(float) / n_clusters)\n    ax2.scatter(\n        X[:, 0], X[:, 1], marker=\".\", s=30, lw=0, alpha=0.7, c=colors, edgecolor=\"k\"\n    )\n\n    # Labeling the clusters\n    centers = clusterer.cluster_centers_\n    # Draw white circles at cluster centers\n    ax2.scatter(\n        centers[:, 0],\n        centers[:, 1],\n        marker=\"o\",\n        c=\"white\",\n        alpha=1,\n        s=200,\n        edgecolor=\"k\",\n    )\n\n    for i, c in enumerate(centers):\n        ax2.scatter(c[0], c[1], marker=\"$%d$\" % i, alpha=1, s=50, edgecolor=\"k\")\n\n    ax2.set_title(\"The visualization of the clustered data.\")\n    ax2.set_xlabel(\"Feature space for the 1st feature\")\n    ax2.set_ylabel(\"Feature space for the 2nd feature\")\n\n    plt.suptitle(\n        \"Silhouette analysis for KMeans clustering on sample data with n_clusters = %d\"\n        % n_clusters,\n        fontsize=14,\n        fontweight=\"bold\",\n    )\n\nplt.show()"
+        "from sklearn.datasets import make_blobs\nfrom sklearn.cluster import KMeans\nfrom sklearn.metrics import silhouette_samples, silhouette_score\n\nimport matplotlib.pyplot as plt\nimport matplotlib.cm as cm\nimport numpy as np\n\n# Generating the sample data from make_blobs\n# This particular setting has one distinct cluster and 3 clusters placed close\n# together.\nX, y = make_blobs(\n    n_samples=500,\n    n_features=2,\n    centers=4,\n    cluster_std=1,\n    center_box=(-10.0, 10.0),\n    shuffle=True,\n    random_state=1,\n)  # For reproducibility\n\nrange_n_clusters = [2, 3, 4, 5, 6]\n\nfor n_clusters in range_n_clusters:\n    # Create a subplot with 1 row and 2 columns\n    fig, (ax1, ax2) = plt.subplots(1, 2)\n    fig.set_size_inches(18, 7)\n\n    # The 1st subplot is the silhouette plot\n    # The silhouette coefficient can range from -1, 1 but in this example all\n    # lie within [-0.1, 1]\n    ax1.set_xlim([-0.1, 1])\n    # The (n_clusters+1)*10 is for inserting blank space between silhouette\n    # plots of individual clusters, to demarcate them clearly.\n    ax1.set_ylim([0, len(X) + (n_clusters + 1) * 10])\n\n    # Initialize the clusterer with n_clusters value and a random generator\n    # seed of 10 for reproducibility.\n    clusterer = KMeans(n_clusters=n_clusters, n_init=\"auto\", random_state=10)\n    cluster_labels = clusterer.fit_predict(X)\n\n    # The silhouette_score gives the average value for all the samples.\n    # This gives a perspective into the density and separation of the formed\n    # clusters\n    silhouette_avg = silhouette_score(X, cluster_labels)\n    print(\n        \"For n_clusters =\",\n        n_clusters,\n        \"The average silhouette_score is :\",\n        silhouette_avg,\n    )\n\n    # Compute the silhouette scores for each sample\n    sample_silhouette_values = silhouette_samples(X, cluster_labels)\n\n    y_lower = 10\n    for i in range(n_clusters):\n        # Aggregate the silhouette scores for samples belonging to\n        # cluster i, and sort them\n        ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels == i]\n\n        ith_cluster_silhouette_values.sort()\n\n        size_cluster_i = ith_cluster_silhouette_values.shape[0]\n        y_upper = y_lower + size_cluster_i\n\n        color = cm.nipy_spectral(float(i) / n_clusters)\n        ax1.fill_betweenx(\n            np.arange(y_lower, y_upper),\n            0,\n            ith_cluster_silhouette_values,\n            facecolor=color,\n            edgecolor=color,\n            alpha=0.7,\n        )\n\n        # Label the silhouette plots with their cluster numbers at the middle\n        ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))\n\n        # Compute the new y_lower for next plot\n        y_lower = y_upper + 10  # 10 for the 0 samples\n\n    ax1.set_title(\"The silhouette plot for the various clusters.\")\n    ax1.set_xlabel(\"The silhouette coefficient values\")\n    ax1.set_ylabel(\"Cluster label\")\n\n    # The vertical line for average silhouette score of all the values\n    ax1.axvline(x=silhouette_avg, color=\"red\", linestyle=\"--\")\n\n    ax1.set_yticks([])  # Clear the yaxis labels / ticks\n    ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])\n\n    # 2nd Plot showing the actual clusters formed\n    colors = cm.nipy_spectral(cluster_labels.astype(float) / n_clusters)\n    ax2.scatter(\n        X[:, 0], X[:, 1], marker=\".\", s=30, lw=0, alpha=0.7, c=colors, edgecolor=\"k\"\n    )\n\n    # Labeling the clusters\n    centers = clusterer.cluster_centers_\n    # Draw white circles at cluster centers\n    ax2.scatter(\n        centers[:, 0],\n        centers[:, 1],\n        marker=\"o\",\n        c=\"white\",\n        alpha=1,\n        s=200,\n        edgecolor=\"k\",\n    )\n\n    for i, c in enumerate(centers):\n        ax2.scatter(c[0], c[1], marker=\"$%d$\" % i, alpha=1, s=50, edgecolor=\"k\")\n\n    ax2.set_title(\"The visualization of the clustered data.\")\n    ax2.set_xlabel(\"Feature space for the 1st feature\")\n    ax2.set_ylabel(\"Feature space for the 2nd feature\")\n\n    plt.suptitle(\n        \"Silhouette analysis for KMeans clustering on sample data with n_clusters = %d\"\n        % n_clusters,\n        fontsize=14,\n        fontweight=\"bold\",\n    )\n\nplt.show()"
       ]
     }
   ],
 
@@ -69,7 +69,7 @@
 
     # Initialize the clusterer with n_clusters value and a random generator
     # seed of 10 for reproducibility.
-    clusterer = KMeans(n_clusters=n_clusters, random_state=10)
+    clusterer = KMeans(n_clusters=n_clusters, n_init="auto", random_state=10)
     cluster_labels = clusterer.fit_predict(X)
 
     # The silhouette_score gives the average value for all the samples.
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "from sklearn.datasets import make_blobs\nfrom sklearn.cluster import KMeans\nfrom sklearn.metrics import silhouette_samples, silhouette_score\n\nimport matplotlib.pyplot as plt\nimport matplotlib.cm as cm\nimport numpy as np\n\n# Generating the sample data from make_blobs\n# This particular setting has one distinct cluster and 3 clusters placed close\n# together.\nX, y = make_blobs(\n n_samples=500,\n n_features=2,\n centers=4,\n cluster_std=1,\n center_box=(-10.0, 10.0),\n shuffle=True,\n random_state=1,\n) # For reproducibility\n\nrange_n_clusters = [2, 3, 4, 5, 6]\n\nfor n_clusters in range_n_clusters:\n # Create a subplot with 1 row and 2 columns\n fig, (ax1, ax2) = plt.subplots(1, 2)\n fig.set_size_inches(18, 7)\n\n # The 1st subplot is the silhouette plot\n # The silhouette coefficient can range from -1, 1 but in this example all\n # lie within [-0.1, 1]\n ax1.set_xlim([-0.1, 1])\n # The (n_clusters+1)10 is for inserting blank space between silhouette\n # plots of individual clusters, to demarcate them clearly.\n ax1.set_ylim([0, len(X) + (n_clusters + 1) 10])\n\n # Initialize the clusterer with n_clusters value and a random generator\n # seed of 10 for reproducibility.\n clusterer = KMeans(n_clusters=n_clusters, random_state=10)\n cluster_labels = clusterer.fit_predict(X)\n\n # The silhouette_score gives the average value for all the samples.\n # This gives a perspective into the density and separation of the formed\n # clusters\n silhouette_avg = silhouette_score(X, cluster_labels)\n print(\n \"For n_clusters =\",\n n_clusters,\n \"The average silhouette_score is :\",\n silhouette_avg,\n )\n\n # Compute the silhouette scores for each sample\n sample_silhouette_values = silhouette_samples(X, cluster_labels)\n\n y_lower = 10\n for i in range(n_clusters):\n # Aggregate the silhouette scores for samples belonging to\n # cluster i, and sort them\n ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels == i]\n\n ith_cluster_silhouette_values.sort()\n\n size_cluster_i = ith_cluster_silhouette_values.shape[0]\n y_upper = y_lower + size_cluster_i\n\n color = cm.nipy_spectral(float(i) / n_clusters)\n ax1.fill_betweenx(\n np.arange(y_lower, y_upper),\n 0,\n ith_cluster_silhouette_values,\n facecolor=color,\n edgecolor=color,\n alpha=0.7,\n )\n\n # Label the silhouette plots with their cluster numbers at the middle\n ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))\n\n # Compute the new y_lower for next plot\n y_lower = y_upper + 10 # 10 for the 0 samples\n\n ax1.set_title(\"The silhouette plot for the various clusters.\")\n ax1.set_xlabel(\"The silhouette coefficient values\")\n ax1.set_ylabel(\"Cluster label\")\n\n # The vertical line for average silhouette score of all the values\n ax1.axvline(x=silhouette_avg, color=\"red\", linestyle=\"--\")\n\n ax1.set_yticks([]) # Clear the yaxis labels / ticks\n ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])\n\n # 2nd Plot showing the actual clusters formed\n colors = cm.nipy_spectral(cluster_labels.astype(float) / n_clusters)\n ax2.scatter(\n X[:, 0], X[:, 1], marker=\".\", s=30, lw=0, alpha=0.7, c=colors, edgecolor=\"k\"\n )\n\n # Labeling the clusters\n centers = clusterer.cluster_centers_\n # Draw white circles at cluster centers\n ax2.scatter(\n centers[:, 0],\n centers[:, 1],\n marker=\"o\",\n c=\"white\",\n alpha=1,\n s=200,\n edgecolor=\"k\",\n )\n\n for i, c in enumerate(centers):\n ax2.scatter(c[0], c[1], marker=\"$%d$\" % i, alpha=1, s=50, edgecolor=\"k\")\n\n ax2.set_title(\"The visualization of the clustered data.\")\n ax2.set_xlabel(\"Feature space for the 1st feature\")\n ax2.set_ylabel(\"Feature space for the 2nd feature\")\n\n plt.suptitle(\n \"Silhouette analysis for KMeans clustering on sample data with n_clusters = %d\"\n % n_clusters,\n fontsize=14,\n fontweight=\"bold\",\n )\n\nplt.show()"
	`29`	+ "from sklearn.datasets import make_blobs\nfrom sklearn.cluster import KMeans\nfrom sklearn.metrics import silhouette_samples, silhouette_score\n\nimport matplotlib.pyplot as plt\nimport matplotlib.cm as cm\nimport numpy as np\n\n# Generating the sample data from make_blobs\n# This particular setting has one distinct cluster and 3 clusters placed close\n# together.\nX, y = make_blobs(\n n_samples=500,\n n_features=2,\n centers=4,\n cluster_std=1,\n center_box=(-10.0, 10.0),\n shuffle=True,\n random_state=1,\n) # For reproducibility\n\nrange_n_clusters = [2, 3, 4, 5, 6]\n\nfor n_clusters in range_n_clusters:\n # Create a subplot with 1 row and 2 columns\n fig, (ax1, ax2) = plt.subplots(1, 2)\n fig.set_size_inches(18, 7)\n\n # The 1st subplot is the silhouette plot\n # The silhouette coefficient can range from -1, 1 but in this example all\n # lie within [-0.1, 1]\n ax1.set_xlim([-0.1, 1])\n # The (n_clusters+1)10 is for inserting blank space between silhouette\n # plots of individual clusters, to demarcate them clearly.\n ax1.set_ylim([0, len(X) + (n_clusters + 1) 10])\n\n # Initialize the clusterer with n_clusters value and a random generator\n # seed of 10 for reproducibility.\n clusterer = KMeans(n_clusters=n_clusters, n_init=\"auto\", random_state=10)\n cluster_labels = clusterer.fit_predict(X)\n\n # The silhouette_score gives the average value for all the samples.\n # This gives a perspective into the density and separation of the formed\n # clusters\n silhouette_avg = silhouette_score(X, cluster_labels)\n print(\n \"For n_clusters =\",\n n_clusters,\n \"The average silhouette_score is :\",\n silhouette_avg,\n )\n\n # Compute the silhouette scores for each sample\n sample_silhouette_values = silhouette_samples(X, cluster_labels)\n\n y_lower = 10\n for i in range(n_clusters):\n # Aggregate the silhouette scores for samples belonging to\n # cluster i, and sort them\n ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels == i]\n\n ith_cluster_silhouette_values.sort()\n\n size_cluster_i = ith_cluster_silhouette_values.shape[0]\n y_upper = y_lower + size_cluster_i\n\n color = cm.nipy_spectral(float(i) / n_clusters)\n ax1.fill_betweenx(\n np.arange(y_lower, y_upper),\n 0,\n ith_cluster_silhouette_values,\n facecolor=color,\n edgecolor=color,\n alpha=0.7,\n )\n\n # Label the silhouette plots with their cluster numbers at the middle\n ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))\n\n # Compute the new y_lower for next plot\n y_lower = y_upper + 10 # 10 for the 0 samples\n\n ax1.set_title(\"The silhouette plot for the various clusters.\")\n ax1.set_xlabel(\"The silhouette coefficient values\")\n ax1.set_ylabel(\"Cluster label\")\n\n # The vertical line for average silhouette score of all the values\n ax1.axvline(x=silhouette_avg, color=\"red\", linestyle=\"--\")\n\n ax1.set_yticks([]) # Clear the yaxis labels / ticks\n ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])\n\n # 2nd Plot showing the actual clusters formed\n colors = cm.nipy_spectral(cluster_labels.astype(float) / n_clusters)\n ax2.scatter(\n X[:, 0], X[:, 1], marker=\".\", s=30, lw=0, alpha=0.7, c=colors, edgecolor=\"k\"\n )\n\n # Labeling the clusters\n centers = clusterer.cluster_centers_\n # Draw white circles at cluster centers\n ax2.scatter(\n centers[:, 0],\n centers[:, 1],\n marker=\"o\",\n c=\"white\",\n alpha=1,\n s=200,\n edgecolor=\"k\",\n )\n\n for i, c in enumerate(centers):\n ax2.scatter(c[0], c[1], marker=\"$%d$\" % i, alpha=1, s=50, edgecolor=\"k\")\n\n ax2.set_title(\"The visualization of the clustered data.\")\n ax2.set_xlabel(\"Feature space for the 1st feature\")\n ax2.set_ylabel(\"Feature space for the 2nd feature\")\n\n plt.suptitle(\n \"Silhouette analysis for KMeans clustering on sample data with n_clusters = %d\"\n % n_clusters,\n fontsize=14,\n fontweight=\"bold\",\n )\n\nplt.show()"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`