Thayakaran
diff --git a/‎dev/_downloads/auto_examples_jupyter.zip
-300 Bytes b/‎dev/_downloads/auto_examples_jupyter.zip
-300 Bytes
diff --git a/‎dev/_downloads/auto_examples_python.zip
-296 Bytes b/‎dev/_downloads/auto_examples_python.zip
-296 Bytes
diff --git a/‎dev/_downloads/plot_adjusted_for_chance_measures.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_adjusted_for_chance_measures.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_adjusted_for_chance_measures.py
Lines changed: 1 addition & 2 deletions b/‎dev/_downloads/plot_adjusted_for_chance_measures.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎dev/_downloads/plot_affinity_propagation.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_affinity_propagation.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_affinity_propagation.py
Lines changed: 1 addition & 2 deletions b/‎dev/_downloads/plot_affinity_propagation.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎dev/_downloads/plot_dbscan.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_dbscan.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_dbscan.py
Lines changed: 1 addition & 2 deletions b/‎dev/_downloads/plot_dbscan.py
Lines changed: 1 addition & 2 deletions
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\n# Author: Olivier Grisel <[email protected]>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom time import time\nfrom sklearn import metrics\n\ndef uniform_labelings_scores(score_func, n_samples, n_clusters_range,\n                             fixed_n_classes=None, n_runs=5, seed=42):\n    \"\"\"Compute score for 2 random uniform cluster labelings.\n\n    Both random labelings have the same number of clusters for each value\n    possible value in ``n_clusters_range``.\n\n    When fixed_n_classes is not None the first labeling is considered a ground\n    truth class assignment with fixed number of classes.\n    \"\"\"\n    random_labels = np.random.RandomState(seed).randint\n    scores = np.zeros((len(n_clusters_range), n_runs))\n\n    if fixed_n_classes is not None:\n        labels_a = random_labels(low=0, high=fixed_n_classes, size=n_samples)\n\n    for i, k in enumerate(n_clusters_range):\n        for j in range(n_runs):\n            if fixed_n_classes is None:\n                labels_a = random_labels(low=0, high=k, size=n_samples)\n            labels_b = random_labels(low=0, high=k, size=n_samples)\n            scores[i, j] = score_func(labels_a, labels_b)\n    return scores\n\n\ndef ami_score(U, V):\n    return metrics.adjusted_mutual_info_score(U, V,\n                                              average_method='arithmetic')\n\nscore_funcs = [\n    metrics.adjusted_rand_score,\n    metrics.v_measure_score,\n    ami_score,\n    metrics.mutual_info_score,\n]\n\n# 2 independent random clusterings with equal cluster number\n\nn_samples = 100\nn_clusters_range = np.linspace(2, n_samples, 10).astype(np.int)\n\nplt.figure(1)\n\nplots = []\nnames = []\nfor score_func in score_funcs:\n    print(\"Computing %s for %d values of n_clusters and n_samples=%d\"\n          % (score_func.__name__, len(n_clusters_range), n_samples))\n\n    t0 = time()\n    scores = uniform_labelings_scores(score_func, n_samples, n_clusters_range)\n    print(\"done in %0.3fs\" % (time() - t0))\n    plots.append(plt.errorbar(\n        n_clusters_range, np.median(scores, axis=1), scores.std(axis=1))[0])\n    names.append(score_func.__name__)\n\nplt.title(\"Clustering measures for 2 random uniform labelings\\n\"\n          \"with equal number of clusters\")\nplt.xlabel('Number of clusters (Number of samples is fixed to %d)' % n_samples)\nplt.ylabel('Score value')\nplt.legend(plots, names)\nplt.ylim(bottom=-0.05, top=1.05)\n\n\n# Random labeling with varying n_clusters against ground class labels\n# with fixed number of clusters\n\nn_samples = 1000\nn_clusters_range = np.linspace(2, 100, 10).astype(np.int)\nn_classes = 10\n\nplt.figure(2)\n\nplots = []\nnames = []\nfor score_func in score_funcs:\n    print(\"Computing %s for %d values of n_clusters and n_samples=%d\"\n          % (score_func.__name__, len(n_clusters_range), n_samples))\n\n    t0 = time()\n    scores = uniform_labelings_scores(score_func, n_samples, n_clusters_range,\n                                      fixed_n_classes=n_classes)\n    print(\"done in %0.3fs\" % (time() - t0))\n    plots.append(plt.errorbar(\n        n_clusters_range, scores.mean(axis=1), scores.std(axis=1))[0])\n    names.append(score_func.__name__)\n\nplt.title(\"Clustering measures for random uniform labeling\\n\"\n          \"against reference assignment with %d classes\" % n_classes)\nplt.xlabel('Number of clusters (Number of samples is fixed to %d)' % n_samples)\nplt.ylabel('Score value')\nplt.ylim(bottom=-0.05, top=1.05)\nplt.legend(plots, names)\nplt.show()"
+        "print(__doc__)\n\n# Author: Olivier Grisel <[email protected]>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom time import time\nfrom sklearn import metrics\n\ndef uniform_labelings_scores(score_func, n_samples, n_clusters_range,\n                             fixed_n_classes=None, n_runs=5, seed=42):\n    \"\"\"Compute score for 2 random uniform cluster labelings.\n\n    Both random labelings have the same number of clusters for each value\n    possible value in ``n_clusters_range``.\n\n    When fixed_n_classes is not None the first labeling is considered a ground\n    truth class assignment with fixed number of classes.\n    \"\"\"\n    random_labels = np.random.RandomState(seed).randint\n    scores = np.zeros((len(n_clusters_range), n_runs))\n\n    if fixed_n_classes is not None:\n        labels_a = random_labels(low=0, high=fixed_n_classes, size=n_samples)\n\n    for i, k in enumerate(n_clusters_range):\n        for j in range(n_runs):\n            if fixed_n_classes is None:\n                labels_a = random_labels(low=0, high=k, size=n_samples)\n            labels_b = random_labels(low=0, high=k, size=n_samples)\n            scores[i, j] = score_func(labels_a, labels_b)\n    return scores\n\n\ndef ami_score(U, V):\n    return metrics.adjusted_mutual_info_score(U, V)\n\nscore_funcs = [\n    metrics.adjusted_rand_score,\n    metrics.v_measure_score,\n    ami_score,\n    metrics.mutual_info_score,\n]\n\n# 2 independent random clusterings with equal cluster number\n\nn_samples = 100\nn_clusters_range = np.linspace(2, n_samples, 10).astype(np.int)\n\nplt.figure(1)\n\nplots = []\nnames = []\nfor score_func in score_funcs:\n    print(\"Computing %s for %d values of n_clusters and n_samples=%d\"\n          % (score_func.__name__, len(n_clusters_range), n_samples))\n\n    t0 = time()\n    scores = uniform_labelings_scores(score_func, n_samples, n_clusters_range)\n    print(\"done in %0.3fs\" % (time() - t0))\n    plots.append(plt.errorbar(\n        n_clusters_range, np.median(scores, axis=1), scores.std(axis=1))[0])\n    names.append(score_func.__name__)\n\nplt.title(\"Clustering measures for 2 random uniform labelings\\n\"\n          \"with equal number of clusters\")\nplt.xlabel('Number of clusters (Number of samples is fixed to %d)' % n_samples)\nplt.ylabel('Score value')\nplt.legend(plots, names)\nplt.ylim(bottom=-0.05, top=1.05)\n\n\n# Random labeling with varying n_clusters against ground class labels\n# with fixed number of clusters\n\nn_samples = 1000\nn_clusters_range = np.linspace(2, 100, 10).astype(np.int)\nn_classes = 10\n\nplt.figure(2)\n\nplots = []\nnames = []\nfor score_func in score_funcs:\n    print(\"Computing %s for %d values of n_clusters and n_samples=%d\"\n          % (score_func.__name__, len(n_clusters_range), n_samples))\n\n    t0 = time()\n    scores = uniform_labelings_scores(score_func, n_samples, n_clusters_range,\n                                      fixed_n_classes=n_classes)\n    print(\"done in %0.3fs\" % (time() - t0))\n    plots.append(plt.errorbar(\n        n_clusters_range, scores.mean(axis=1), scores.std(axis=1))[0])\n    names.append(score_func.__name__)\n\nplt.title(\"Clustering measures for random uniform labeling\\n\"\n          \"against reference assignment with %d classes\" % n_classes)\nplt.xlabel('Number of clusters (Number of samples is fixed to %d)' % n_samples)\nplt.ylabel('Score value')\nplt.ylim(bottom=-0.05, top=1.05)\nplt.legend(plots, names)\nplt.show()"
       ]
     }
   ],
 
@@ -56,8 +56,7 @@ def uniform_labelings_scores(score_func, n_samples, n_clusters_range,
 
 
 def ami_score(U, V):
-    return metrics.adjusted_mutual_info_score(U, V,
-                                              average_method='arithmetic')
+    return metrics.adjusted_mutual_info_score(U, V)
 
 score_funcs = [
     metrics.adjusted_rand_score,
 
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\nfrom sklearn.cluster import AffinityPropagation\nfrom sklearn import metrics\nfrom sklearn.datasets.samples_generator import make_blobs\n\n# #############################################################################\n# Generate sample data\ncenters = [[1, 1], [-1, -1], [1, -1]]\nX, labels_true = make_blobs(n_samples=300, centers=centers, cluster_std=0.5,\n                            random_state=0)\n\n# #############################################################################\n# Compute Affinity Propagation\naf = AffinityPropagation(preference=-50).fit(X)\ncluster_centers_indices = af.cluster_centers_indices_\nlabels = af.labels_\n\nn_clusters_ = len(cluster_centers_indices)\n\nprint('Estimated number of clusters: %d' % n_clusters_)\nprint(\"Homogeneity: %0.3f\" % metrics.homogeneity_score(labels_true, labels))\nprint(\"Completeness: %0.3f\" % metrics.completeness_score(labels_true, labels))\nprint(\"V-measure: %0.3f\" % metrics.v_measure_score(labels_true, labels))\nprint(\"Adjusted Rand Index: %0.3f\"\n      % metrics.adjusted_rand_score(labels_true, labels))\nprint(\"Adjusted Mutual Information: %0.3f\"\n      % metrics.adjusted_mutual_info_score(labels_true, labels,\n                                           average_method='arithmetic'))\nprint(\"Silhouette Coefficient: %0.3f\"\n      % metrics.silhouette_score(X, labels, metric='sqeuclidean'))\n\n# #############################################################################\n# Plot result\nimport matplotlib.pyplot as plt\nfrom itertools import cycle\n\nplt.close('all')\nplt.figure(1)\nplt.clf()\n\ncolors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')\nfor k, col in zip(range(n_clusters_), colors):\n    class_members = labels == k\n    cluster_center = X[cluster_centers_indices[k]]\n    plt.plot(X[class_members, 0], X[class_members, 1], col + '.')\n    plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,\n             markeredgecolor='k', markersize=14)\n    for x in X[class_members]:\n        plt.plot([cluster_center[0], x[0]], [cluster_center[1], x[1]], col)\n\nplt.title('Estimated number of clusters: %d' % n_clusters_)\nplt.show()"
+        "print(__doc__)\n\nfrom sklearn.cluster import AffinityPropagation\nfrom sklearn import metrics\nfrom sklearn.datasets.samples_generator import make_blobs\n\n# #############################################################################\n# Generate sample data\ncenters = [[1, 1], [-1, -1], [1, -1]]\nX, labels_true = make_blobs(n_samples=300, centers=centers, cluster_std=0.5,\n                            random_state=0)\n\n# #############################################################################\n# Compute Affinity Propagation\naf = AffinityPropagation(preference=-50).fit(X)\ncluster_centers_indices = af.cluster_centers_indices_\nlabels = af.labels_\n\nn_clusters_ = len(cluster_centers_indices)\n\nprint('Estimated number of clusters: %d' % n_clusters_)\nprint(\"Homogeneity: %0.3f\" % metrics.homogeneity_score(labels_true, labels))\nprint(\"Completeness: %0.3f\" % metrics.completeness_score(labels_true, labels))\nprint(\"V-measure: %0.3f\" % metrics.v_measure_score(labels_true, labels))\nprint(\"Adjusted Rand Index: %0.3f\"\n      % metrics.adjusted_rand_score(labels_true, labels))\nprint(\"Adjusted Mutual Information: %0.3f\"\n      % metrics.adjusted_mutual_info_score(labels_true, labels))\nprint(\"Silhouette Coefficient: %0.3f\"\n      % metrics.silhouette_score(X, labels, metric='sqeuclidean'))\n\n# #############################################################################\n# Plot result\nimport matplotlib.pyplot as plt\nfrom itertools import cycle\n\nplt.close('all')\nplt.figure(1)\nplt.clf()\n\ncolors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')\nfor k, col in zip(range(n_clusters_), colors):\n    class_members = labels == k\n    cluster_center = X[cluster_centers_indices[k]]\n    plt.plot(X[class_members, 0], X[class_members, 1], col + '.')\n    plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,\n             markeredgecolor='k', markersize=14)\n    for x in X[class_members]:\n        plt.plot([cluster_center[0], x[0]], [cluster_center[1], x[1]], col)\n\nplt.title('Estimated number of clusters: %d' % n_clusters_)\nplt.show()"
       ]
     }
   ],
 
@@ -35,8 +35,7 @@
 print("Adjusted Rand Index: %0.3f"
       % metrics.adjusted_rand_score(labels_true, labels))
 print("Adjusted Mutual Information: %0.3f"
-      % metrics.adjusted_mutual_info_score(labels_true, labels,
-                                           average_method='arithmetic'))
+      % metrics.adjusted_mutual_info_score(labels_true, labels))
 print("Silhouette Coefficient: %0.3f"
       % metrics.silhouette_score(X, labels, metric='sqeuclidean'))
 
 
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\nimport numpy as np\n\nfrom sklearn.cluster import DBSCAN\nfrom sklearn import metrics\nfrom sklearn.datasets.samples_generator import make_blobs\nfrom sklearn.preprocessing import StandardScaler\n\n\n# #############################################################################\n# Generate sample data\ncenters = [[1, 1], [-1, -1], [1, -1]]\nX, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4,\n                            random_state=0)\n\nX = StandardScaler().fit_transform(X)\n\n# #############################################################################\n# Compute DBSCAN\ndb = DBSCAN(eps=0.3, min_samples=10).fit(X)\ncore_samples_mask = np.zeros_like(db.labels_, dtype=bool)\ncore_samples_mask[db.core_sample_indices_] = True\nlabels = db.labels_\n\n# Number of clusters in labels, ignoring noise if present.\nn_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)\nn_noise_ = list(labels).count(-1)\n\nprint('Estimated number of clusters: %d' % n_clusters_)\nprint('Estimated number of noise points: %d' % n_noise_)\nprint(\"Homogeneity: %0.3f\" % metrics.homogeneity_score(labels_true, labels))\nprint(\"Completeness: %0.3f\" % metrics.completeness_score(labels_true, labels))\nprint(\"V-measure: %0.3f\" % metrics.v_measure_score(labels_true, labels))\nprint(\"Adjusted Rand Index: %0.3f\"\n      % metrics.adjusted_rand_score(labels_true, labels))\nprint(\"Adjusted Mutual Information: %0.3f\"\n      % metrics.adjusted_mutual_info_score(labels_true, labels,\n                                           average_method='arithmetic'))\nprint(\"Silhouette Coefficient: %0.3f\"\n      % metrics.silhouette_score(X, labels))\n\n# #############################################################################\n# Plot result\nimport matplotlib.pyplot as plt\n\n# Black removed and is used for noise instead.\nunique_labels = set(labels)\ncolors = [plt.cm.Spectral(each)\n          for each in np.linspace(0, 1, len(unique_labels))]\nfor k, col in zip(unique_labels, colors):\n    if k == -1:\n        # Black used for noise.\n        col = [0, 0, 0, 1]\n\n    class_member_mask = (labels == k)\n\n    xy = X[class_member_mask & core_samples_mask]\n    plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),\n             markeredgecolor='k', markersize=14)\n\n    xy = X[class_member_mask & ~core_samples_mask]\n    plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),\n             markeredgecolor='k', markersize=6)\n\nplt.title('Estimated number of clusters: %d' % n_clusters_)\nplt.show()"
+        "print(__doc__)\n\nimport numpy as np\n\nfrom sklearn.cluster import DBSCAN\nfrom sklearn import metrics\nfrom sklearn.datasets.samples_generator import make_blobs\nfrom sklearn.preprocessing import StandardScaler\n\n\n# #############################################################################\n# Generate sample data\ncenters = [[1, 1], [-1, -1], [1, -1]]\nX, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4,\n                            random_state=0)\n\nX = StandardScaler().fit_transform(X)\n\n# #############################################################################\n# Compute DBSCAN\ndb = DBSCAN(eps=0.3, min_samples=10).fit(X)\ncore_samples_mask = np.zeros_like(db.labels_, dtype=bool)\ncore_samples_mask[db.core_sample_indices_] = True\nlabels = db.labels_\n\n# Number of clusters in labels, ignoring noise if present.\nn_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)\nn_noise_ = list(labels).count(-1)\n\nprint('Estimated number of clusters: %d' % n_clusters_)\nprint('Estimated number of noise points: %d' % n_noise_)\nprint(\"Homogeneity: %0.3f\" % metrics.homogeneity_score(labels_true, labels))\nprint(\"Completeness: %0.3f\" % metrics.completeness_score(labels_true, labels))\nprint(\"V-measure: %0.3f\" % metrics.v_measure_score(labels_true, labels))\nprint(\"Adjusted Rand Index: %0.3f\"\n      % metrics.adjusted_rand_score(labels_true, labels))\nprint(\"Adjusted Mutual Information: %0.3f\"\n      % metrics.adjusted_mutual_info_score(labels_true, labels))\nprint(\"Silhouette Coefficient: %0.3f\"\n      % metrics.silhouette_score(X, labels))\n\n# #############################################################################\n# Plot result\nimport matplotlib.pyplot as plt\n\n# Black removed and is used for noise instead.\nunique_labels = set(labels)\ncolors = [plt.cm.Spectral(each)\n          for each in np.linspace(0, 1, len(unique_labels))]\nfor k, col in zip(unique_labels, colors):\n    if k == -1:\n        # Black used for noise.\n        col = [0, 0, 0, 1]\n\n    class_member_mask = (labels == k)\n\n    xy = X[class_member_mask & core_samples_mask]\n    plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),\n             markeredgecolor='k', markersize=14)\n\n    xy = X[class_member_mask & ~core_samples_mask]\n    plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),\n             markeredgecolor='k', markersize=6)\n\nplt.title('Estimated number of clusters: %d' % n_clusters_)\nplt.show()"
       ]
     }
   ],
 
@@ -44,8 +44,7 @@
 print("Adjusted Rand Index: %0.3f"
       % metrics.adjusted_rand_score(labels_true, labels))
 print("Adjusted Mutual Information: %0.3f"
-      % metrics.adjusted_mutual_info_score(labels_true, labels,
-                                           average_method='arithmetic'))
+      % metrics.adjusted_mutual_info_score(labels_true, labels))
 print("Silhouette Coefficient: %0.3f"
       % metrics.silhouette_score(X, labels))
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "print(__doc__)\n\n# Author: Olivier Grisel <[email protected]>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom time import time\nfrom sklearn import metrics\n\ndef uniform_labelings_scores(score_func, n_samples, n_clusters_range,\n fixed_n_classes=None, n_runs=5, seed=42):\n \"\"\"Compute score for 2 random uniform cluster labelings.\n\n Both random labelings have the same number of clusters for each value\n possible value in ``n_clusters_range``.\n\n When fixed_n_classes is not None the first labeling is considered a ground\n truth class assignment with fixed number of classes.\n \"\"\"\n random_labels = np.random.RandomState(seed).randint\n scores = np.zeros((len(n_clusters_range), n_runs))\n\n if fixed_n_classes is not None:\n labels_a = random_labels(low=0, high=fixed_n_classes, size=n_samples)\n\n for i, k in enumerate(n_clusters_range):\n for j in range(n_runs):\n if fixed_n_classes is None:\n labels_a = random_labels(low=0, high=k, size=n_samples)\n labels_b = random_labels(low=0, high=k, size=n_samples)\n scores[i, j] = score_func(labels_a, labels_b)\n return scores\n\n\ndef ami_score(U, V):\n return metrics.adjusted_mutual_info_score(U, V,\n average_method='arithmetic')\n\nscore_funcs = [\n metrics.adjusted_rand_score,\n metrics.v_measure_score,\n ami_score,\n metrics.mutual_info_score,\n]\n\n# 2 independent random clusterings with equal cluster number\n\nn_samples = 100\nn_clusters_range = np.linspace(2, n_samples, 10).astype(np.int)\n\nplt.figure(1)\n\nplots = []\nnames = []\nfor score_func in score_funcs:\n print(\"Computing %s for %d values of n_clusters and n_samples=%d\"\n % (score_func.__name__, len(n_clusters_range), n_samples))\n\n t0 = time()\n scores = uniform_labelings_scores(score_func, n_samples, n_clusters_range)\n print(\"done in %0.3fs\" % (time() - t0))\n plots.append(plt.errorbar(\n n_clusters_range, np.median(scores, axis=1), scores.std(axis=1))[0])\n names.append(score_func.__name__)\n\nplt.title(\"Clustering measures for 2 random uniform labelings\\n\"\n \"with equal number of clusters\")\nplt.xlabel('Number of clusters (Number of samples is fixed to %d)' % n_samples)\nplt.ylabel('Score value')\nplt.legend(plots, names)\nplt.ylim(bottom=-0.05, top=1.05)\n\n\n# Random labeling with varying n_clusters against ground class labels\n# with fixed number of clusters\n\nn_samples = 1000\nn_clusters_range = np.linspace(2, 100, 10).astype(np.int)\nn_classes = 10\n\nplt.figure(2)\n\nplots = []\nnames = []\nfor score_func in score_funcs:\n print(\"Computing %s for %d values of n_clusters and n_samples=%d\"\n % (score_func.__name__, len(n_clusters_range), n_samples))\n\n t0 = time()\n scores = uniform_labelings_scores(score_func, n_samples, n_clusters_range,\n fixed_n_classes=n_classes)\n print(\"done in %0.3fs\" % (time() - t0))\n plots.append(plt.errorbar(\n n_clusters_range, scores.mean(axis=1), scores.std(axis=1))[0])\n names.append(score_func.__name__)\n\nplt.title(\"Clustering measures for random uniform labeling\\n\"\n \"against reference assignment with %d classes\" % n_classes)\nplt.xlabel('Number of clusters (Number of samples is fixed to %d)' % n_samples)\nplt.ylabel('Score value')\nplt.ylim(bottom=-0.05, top=1.05)\nplt.legend(plots, names)\nplt.show()"
	`29`	+ "print(__doc__)\n\n# Author: Olivier Grisel <[email protected]>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom time import time\nfrom sklearn import metrics\n\ndef uniform_labelings_scores(score_func, n_samples, n_clusters_range,\n fixed_n_classes=None, n_runs=5, seed=42):\n \"\"\"Compute score for 2 random uniform cluster labelings.\n\n Both random labelings have the same number of clusters for each value\n possible value in ``n_clusters_range``.\n\n When fixed_n_classes is not None the first labeling is considered a ground\n truth class assignment with fixed number of classes.\n \"\"\"\n random_labels = np.random.RandomState(seed).randint\n scores = np.zeros((len(n_clusters_range), n_runs))\n\n if fixed_n_classes is not None:\n labels_a = random_labels(low=0, high=fixed_n_classes, size=n_samples)\n\n for i, k in enumerate(n_clusters_range):\n for j in range(n_runs):\n if fixed_n_classes is None:\n labels_a = random_labels(low=0, high=k, size=n_samples)\n labels_b = random_labels(low=0, high=k, size=n_samples)\n scores[i, j] = score_func(labels_a, labels_b)\n return scores\n\n\ndef ami_score(U, V):\n return metrics.adjusted_mutual_info_score(U, V)\n\nscore_funcs = [\n metrics.adjusted_rand_score,\n metrics.v_measure_score,\n ami_score,\n metrics.mutual_info_score,\n]\n\n# 2 independent random clusterings with equal cluster number\n\nn_samples = 100\nn_clusters_range = np.linspace(2, n_samples, 10).astype(np.int)\n\nplt.figure(1)\n\nplots = []\nnames = []\nfor score_func in score_funcs:\n print(\"Computing %s for %d values of n_clusters and n_samples=%d\"\n % (score_func.__name__, len(n_clusters_range), n_samples))\n\n t0 = time()\n scores = uniform_labelings_scores(score_func, n_samples, n_clusters_range)\n print(\"done in %0.3fs\" % (time() - t0))\n plots.append(plt.errorbar(\n n_clusters_range, np.median(scores, axis=1), scores.std(axis=1))[0])\n names.append(score_func.__name__)\n\nplt.title(\"Clustering measures for 2 random uniform labelings\\n\"\n \"with equal number of clusters\")\nplt.xlabel('Number of clusters (Number of samples is fixed to %d)' % n_samples)\nplt.ylabel('Score value')\nplt.legend(plots, names)\nplt.ylim(bottom=-0.05, top=1.05)\n\n\n# Random labeling with varying n_clusters against ground class labels\n# with fixed number of clusters\n\nn_samples = 1000\nn_clusters_range = np.linspace(2, 100, 10).astype(np.int)\nn_classes = 10\n\nplt.figure(2)\n\nplots = []\nnames = []\nfor score_func in score_funcs:\n print(\"Computing %s for %d values of n_clusters and n_samples=%d\"\n % (score_func.__name__, len(n_clusters_range), n_samples))\n\n t0 = time()\n scores = uniform_labelings_scores(score_func, n_samples, n_clusters_range,\n fixed_n_classes=n_classes)\n print(\"done in %0.3fs\" % (time() - t0))\n plots.append(plt.errorbar(\n n_clusters_range, scores.mean(axis=1), scores.std(axis=1))[0])\n names.append(score_func.__name__)\n\nplt.title(\"Clustering measures for random uniform labeling\\n\"\n \"against reference assignment with %d classes\" % n_classes)\nplt.xlabel('Number of clusters (Number of samples is fixed to %d)' % n_samples)\nplt.ylabel('Score value')\nplt.ylim(bottom=-0.05, top=1.05)\nplt.legend(plots, names)\nplt.show()"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`