Skip to content

Commit 6654af3

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 7166ae8ecec24adf7fd844b3a3733e980e47e231
1 parent 249810b commit 6654af3

File tree

1,092 files changed

+3327
-3357
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,092 files changed

+3327
-3357
lines changed
-300 Bytes
Binary file not shown.
-296 Bytes
Binary file not shown.

dev/_downloads/plot_adjusted_for_chance_measures.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\n# Author: Olivier Grisel <[email protected]>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom time import time\nfrom sklearn import metrics\n\ndef uniform_labelings_scores(score_func, n_samples, n_clusters_range,\n fixed_n_classes=None, n_runs=5, seed=42):\n \"\"\"Compute score for 2 random uniform cluster labelings.\n\n Both random labelings have the same number of clusters for each value\n possible value in ``n_clusters_range``.\n\n When fixed_n_classes is not None the first labeling is considered a ground\n truth class assignment with fixed number of classes.\n \"\"\"\n random_labels = np.random.RandomState(seed).randint\n scores = np.zeros((len(n_clusters_range), n_runs))\n\n if fixed_n_classes is not None:\n labels_a = random_labels(low=0, high=fixed_n_classes, size=n_samples)\n\n for i, k in enumerate(n_clusters_range):\n for j in range(n_runs):\n if fixed_n_classes is None:\n labels_a = random_labels(low=0, high=k, size=n_samples)\n labels_b = random_labels(low=0, high=k, size=n_samples)\n scores[i, j] = score_func(labels_a, labels_b)\n return scores\n\n\ndef ami_score(U, V):\n return metrics.adjusted_mutual_info_score(U, V,\n average_method='arithmetic')\n\nscore_funcs = [\n metrics.adjusted_rand_score,\n metrics.v_measure_score,\n ami_score,\n metrics.mutual_info_score,\n]\n\n# 2 independent random clusterings with equal cluster number\n\nn_samples = 100\nn_clusters_range = np.linspace(2, n_samples, 10).astype(np.int)\n\nplt.figure(1)\n\nplots = []\nnames = []\nfor score_func in score_funcs:\n print(\"Computing %s for %d values of n_clusters and n_samples=%d\"\n % (score_func.__name__, len(n_clusters_range), n_samples))\n\n t0 = time()\n scores = uniform_labelings_scores(score_func, n_samples, n_clusters_range)\n print(\"done in %0.3fs\" % (time() - t0))\n plots.append(plt.errorbar(\n n_clusters_range, np.median(scores, axis=1), scores.std(axis=1))[0])\n names.append(score_func.__name__)\n\nplt.title(\"Clustering measures for 2 random uniform labelings\\n\"\n \"with equal number of clusters\")\nplt.xlabel('Number of clusters (Number of samples is fixed to %d)' % n_samples)\nplt.ylabel('Score value')\nplt.legend(plots, names)\nplt.ylim(bottom=-0.05, top=1.05)\n\n\n# Random labeling with varying n_clusters against ground class labels\n# with fixed number of clusters\n\nn_samples = 1000\nn_clusters_range = np.linspace(2, 100, 10).astype(np.int)\nn_classes = 10\n\nplt.figure(2)\n\nplots = []\nnames = []\nfor score_func in score_funcs:\n print(\"Computing %s for %d values of n_clusters and n_samples=%d\"\n % (score_func.__name__, len(n_clusters_range), n_samples))\n\n t0 = time()\n scores = uniform_labelings_scores(score_func, n_samples, n_clusters_range,\n fixed_n_classes=n_classes)\n print(\"done in %0.3fs\" % (time() - t0))\n plots.append(plt.errorbar(\n n_clusters_range, scores.mean(axis=1), scores.std(axis=1))[0])\n names.append(score_func.__name__)\n\nplt.title(\"Clustering measures for random uniform labeling\\n\"\n \"against reference assignment with %d classes\" % n_classes)\nplt.xlabel('Number of clusters (Number of samples is fixed to %d)' % n_samples)\nplt.ylabel('Score value')\nplt.ylim(bottom=-0.05, top=1.05)\nplt.legend(plots, names)\nplt.show()"
29+
"print(__doc__)\n\n# Author: Olivier Grisel <[email protected]>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom time import time\nfrom sklearn import metrics\n\ndef uniform_labelings_scores(score_func, n_samples, n_clusters_range,\n fixed_n_classes=None, n_runs=5, seed=42):\n \"\"\"Compute score for 2 random uniform cluster labelings.\n\n Both random labelings have the same number of clusters for each value\n possible value in ``n_clusters_range``.\n\n When fixed_n_classes is not None the first labeling is considered a ground\n truth class assignment with fixed number of classes.\n \"\"\"\n random_labels = np.random.RandomState(seed).randint\n scores = np.zeros((len(n_clusters_range), n_runs))\n\n if fixed_n_classes is not None:\n labels_a = random_labels(low=0, high=fixed_n_classes, size=n_samples)\n\n for i, k in enumerate(n_clusters_range):\n for j in range(n_runs):\n if fixed_n_classes is None:\n labels_a = random_labels(low=0, high=k, size=n_samples)\n labels_b = random_labels(low=0, high=k, size=n_samples)\n scores[i, j] = score_func(labels_a, labels_b)\n return scores\n\n\ndef ami_score(U, V):\n return metrics.adjusted_mutual_info_score(U, V)\n\nscore_funcs = [\n metrics.adjusted_rand_score,\n metrics.v_measure_score,\n ami_score,\n metrics.mutual_info_score,\n]\n\n# 2 independent random clusterings with equal cluster number\n\nn_samples = 100\nn_clusters_range = np.linspace(2, n_samples, 10).astype(np.int)\n\nplt.figure(1)\n\nplots = []\nnames = []\nfor score_func in score_funcs:\n print(\"Computing %s for %d values of n_clusters and n_samples=%d\"\n % (score_func.__name__, len(n_clusters_range), n_samples))\n\n t0 = time()\n scores = uniform_labelings_scores(score_func, n_samples, n_clusters_range)\n print(\"done in %0.3fs\" % (time() - t0))\n plots.append(plt.errorbar(\n n_clusters_range, np.median(scores, axis=1), scores.std(axis=1))[0])\n names.append(score_func.__name__)\n\nplt.title(\"Clustering measures for 2 random uniform labelings\\n\"\n \"with equal number of clusters\")\nplt.xlabel('Number of clusters (Number of samples is fixed to %d)' % n_samples)\nplt.ylabel('Score value')\nplt.legend(plots, names)\nplt.ylim(bottom=-0.05, top=1.05)\n\n\n# Random labeling with varying n_clusters against ground class labels\n# with fixed number of clusters\n\nn_samples = 1000\nn_clusters_range = np.linspace(2, 100, 10).astype(np.int)\nn_classes = 10\n\nplt.figure(2)\n\nplots = []\nnames = []\nfor score_func in score_funcs:\n print(\"Computing %s for %d values of n_clusters and n_samples=%d\"\n % (score_func.__name__, len(n_clusters_range), n_samples))\n\n t0 = time()\n scores = uniform_labelings_scores(score_func, n_samples, n_clusters_range,\n fixed_n_classes=n_classes)\n print(\"done in %0.3fs\" % (time() - t0))\n plots.append(plt.errorbar(\n n_clusters_range, scores.mean(axis=1), scores.std(axis=1))[0])\n names.append(score_func.__name__)\n\nplt.title(\"Clustering measures for random uniform labeling\\n\"\n \"against reference assignment with %d classes\" % n_classes)\nplt.xlabel('Number of clusters (Number of samples is fixed to %d)' % n_samples)\nplt.ylabel('Score value')\nplt.ylim(bottom=-0.05, top=1.05)\nplt.legend(plots, names)\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_adjusted_for_chance_measures.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,7 @@ def uniform_labelings_scores(score_func, n_samples, n_clusters_range,
5656

5757

5858
def ami_score(U, V):
59-
return metrics.adjusted_mutual_info_score(U, V,
60-
average_method='arithmetic')
59+
return metrics.adjusted_mutual_info_score(U, V)
6160

6261
score_funcs = [
6362
metrics.adjusted_rand_score,

dev/_downloads/plot_affinity_propagation.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\nfrom sklearn.cluster import AffinityPropagation\nfrom sklearn import metrics\nfrom sklearn.datasets.samples_generator import make_blobs\n\n# #############################################################################\n# Generate sample data\ncenters = [[1, 1], [-1, -1], [1, -1]]\nX, labels_true = make_blobs(n_samples=300, centers=centers, cluster_std=0.5,\n random_state=0)\n\n# #############################################################################\n# Compute Affinity Propagation\naf = AffinityPropagation(preference=-50).fit(X)\ncluster_centers_indices = af.cluster_centers_indices_\nlabels = af.labels_\n\nn_clusters_ = len(cluster_centers_indices)\n\nprint('Estimated number of clusters: %d' % n_clusters_)\nprint(\"Homogeneity: %0.3f\" % metrics.homogeneity_score(labels_true, labels))\nprint(\"Completeness: %0.3f\" % metrics.completeness_score(labels_true, labels))\nprint(\"V-measure: %0.3f\" % metrics.v_measure_score(labels_true, labels))\nprint(\"Adjusted Rand Index: %0.3f\"\n % metrics.adjusted_rand_score(labels_true, labels))\nprint(\"Adjusted Mutual Information: %0.3f\"\n % metrics.adjusted_mutual_info_score(labels_true, labels,\n average_method='arithmetic'))\nprint(\"Silhouette Coefficient: %0.3f\"\n % metrics.silhouette_score(X, labels, metric='sqeuclidean'))\n\n# #############################################################################\n# Plot result\nimport matplotlib.pyplot as plt\nfrom itertools import cycle\n\nplt.close('all')\nplt.figure(1)\nplt.clf()\n\ncolors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')\nfor k, col in zip(range(n_clusters_), colors):\n class_members = labels == k\n cluster_center = X[cluster_centers_indices[k]]\n plt.plot(X[class_members, 0], X[class_members, 1], col + '.')\n plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,\n markeredgecolor='k', markersize=14)\n for x in X[class_members]:\n plt.plot([cluster_center[0], x[0]], [cluster_center[1], x[1]], col)\n\nplt.title('Estimated number of clusters: %d' % n_clusters_)\nplt.show()"
29+
"print(__doc__)\n\nfrom sklearn.cluster import AffinityPropagation\nfrom sklearn import metrics\nfrom sklearn.datasets.samples_generator import make_blobs\n\n# #############################################################################\n# Generate sample data\ncenters = [[1, 1], [-1, -1], [1, -1]]\nX, labels_true = make_blobs(n_samples=300, centers=centers, cluster_std=0.5,\n random_state=0)\n\n# #############################################################################\n# Compute Affinity Propagation\naf = AffinityPropagation(preference=-50).fit(X)\ncluster_centers_indices = af.cluster_centers_indices_\nlabels = af.labels_\n\nn_clusters_ = len(cluster_centers_indices)\n\nprint('Estimated number of clusters: %d' % n_clusters_)\nprint(\"Homogeneity: %0.3f\" % metrics.homogeneity_score(labels_true, labels))\nprint(\"Completeness: %0.3f\" % metrics.completeness_score(labels_true, labels))\nprint(\"V-measure: %0.3f\" % metrics.v_measure_score(labels_true, labels))\nprint(\"Adjusted Rand Index: %0.3f\"\n % metrics.adjusted_rand_score(labels_true, labels))\nprint(\"Adjusted Mutual Information: %0.3f\"\n % metrics.adjusted_mutual_info_score(labels_true, labels))\nprint(\"Silhouette Coefficient: %0.3f\"\n % metrics.silhouette_score(X, labels, metric='sqeuclidean'))\n\n# #############################################################################\n# Plot result\nimport matplotlib.pyplot as plt\nfrom itertools import cycle\n\nplt.close('all')\nplt.figure(1)\nplt.clf()\n\ncolors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')\nfor k, col in zip(range(n_clusters_), colors):\n class_members = labels == k\n cluster_center = X[cluster_centers_indices[k]]\n plt.plot(X[class_members, 0], X[class_members, 1], col + '.')\n plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,\n markeredgecolor='k', markersize=14)\n for x in X[class_members]:\n plt.plot([cluster_center[0], x[0]], [cluster_center[1], x[1]], col)\n\nplt.title('Estimated number of clusters: %d' % n_clusters_)\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_affinity_propagation.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,7 @@
3535
print("Adjusted Rand Index: %0.3f"
3636
% metrics.adjusted_rand_score(labels_true, labels))
3737
print("Adjusted Mutual Information: %0.3f"
38-
% metrics.adjusted_mutual_info_score(labels_true, labels,
39-
average_method='arithmetic'))
38+
% metrics.adjusted_mutual_info_score(labels_true, labels))
4039
print("Silhouette Coefficient: %0.3f"
4140
% metrics.silhouette_score(X, labels, metric='sqeuclidean'))
4241

dev/_downloads/plot_dbscan.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\nimport numpy as np\n\nfrom sklearn.cluster import DBSCAN\nfrom sklearn import metrics\nfrom sklearn.datasets.samples_generator import make_blobs\nfrom sklearn.preprocessing import StandardScaler\n\n\n# #############################################################################\n# Generate sample data\ncenters = [[1, 1], [-1, -1], [1, -1]]\nX, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4,\n random_state=0)\n\nX = StandardScaler().fit_transform(X)\n\n# #############################################################################\n# Compute DBSCAN\ndb = DBSCAN(eps=0.3, min_samples=10).fit(X)\ncore_samples_mask = np.zeros_like(db.labels_, dtype=bool)\ncore_samples_mask[db.core_sample_indices_] = True\nlabels = db.labels_\n\n# Number of clusters in labels, ignoring noise if present.\nn_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)\nn_noise_ = list(labels).count(-1)\n\nprint('Estimated number of clusters: %d' % n_clusters_)\nprint('Estimated number of noise points: %d' % n_noise_)\nprint(\"Homogeneity: %0.3f\" % metrics.homogeneity_score(labels_true, labels))\nprint(\"Completeness: %0.3f\" % metrics.completeness_score(labels_true, labels))\nprint(\"V-measure: %0.3f\" % metrics.v_measure_score(labels_true, labels))\nprint(\"Adjusted Rand Index: %0.3f\"\n % metrics.adjusted_rand_score(labels_true, labels))\nprint(\"Adjusted Mutual Information: %0.3f\"\n % metrics.adjusted_mutual_info_score(labels_true, labels,\n average_method='arithmetic'))\nprint(\"Silhouette Coefficient: %0.3f\"\n % metrics.silhouette_score(X, labels))\n\n# #############################################################################\n# Plot result\nimport matplotlib.pyplot as plt\n\n# Black removed and is used for noise instead.\nunique_labels = set(labels)\ncolors = [plt.cm.Spectral(each)\n for each in np.linspace(0, 1, len(unique_labels))]\nfor k, col in zip(unique_labels, colors):\n if k == -1:\n # Black used for noise.\n col = [0, 0, 0, 1]\n\n class_member_mask = (labels == k)\n\n xy = X[class_member_mask & core_samples_mask]\n plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),\n markeredgecolor='k', markersize=14)\n\n xy = X[class_member_mask & ~core_samples_mask]\n plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),\n markeredgecolor='k', markersize=6)\n\nplt.title('Estimated number of clusters: %d' % n_clusters_)\nplt.show()"
29+
"print(__doc__)\n\nimport numpy as np\n\nfrom sklearn.cluster import DBSCAN\nfrom sklearn import metrics\nfrom sklearn.datasets.samples_generator import make_blobs\nfrom sklearn.preprocessing import StandardScaler\n\n\n# #############################################################################\n# Generate sample data\ncenters = [[1, 1], [-1, -1], [1, -1]]\nX, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4,\n random_state=0)\n\nX = StandardScaler().fit_transform(X)\n\n# #############################################################################\n# Compute DBSCAN\ndb = DBSCAN(eps=0.3, min_samples=10).fit(X)\ncore_samples_mask = np.zeros_like(db.labels_, dtype=bool)\ncore_samples_mask[db.core_sample_indices_] = True\nlabels = db.labels_\n\n# Number of clusters in labels, ignoring noise if present.\nn_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)\nn_noise_ = list(labels).count(-1)\n\nprint('Estimated number of clusters: %d' % n_clusters_)\nprint('Estimated number of noise points: %d' % n_noise_)\nprint(\"Homogeneity: %0.3f\" % metrics.homogeneity_score(labels_true, labels))\nprint(\"Completeness: %0.3f\" % metrics.completeness_score(labels_true, labels))\nprint(\"V-measure: %0.3f\" % metrics.v_measure_score(labels_true, labels))\nprint(\"Adjusted Rand Index: %0.3f\"\n % metrics.adjusted_rand_score(labels_true, labels))\nprint(\"Adjusted Mutual Information: %0.3f\"\n % metrics.adjusted_mutual_info_score(labels_true, labels))\nprint(\"Silhouette Coefficient: %0.3f\"\n % metrics.silhouette_score(X, labels))\n\n# #############################################################################\n# Plot result\nimport matplotlib.pyplot as plt\n\n# Black removed and is used for noise instead.\nunique_labels = set(labels)\ncolors = [plt.cm.Spectral(each)\n for each in np.linspace(0, 1, len(unique_labels))]\nfor k, col in zip(unique_labels, colors):\n if k == -1:\n # Black used for noise.\n col = [0, 0, 0, 1]\n\n class_member_mask = (labels == k)\n\n xy = X[class_member_mask & core_samples_mask]\n plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),\n markeredgecolor='k', markersize=14)\n\n xy = X[class_member_mask & ~core_samples_mask]\n plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),\n markeredgecolor='k', markersize=6)\n\nplt.title('Estimated number of clusters: %d' % n_clusters_)\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_dbscan.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,7 @@
4444
print("Adjusted Rand Index: %0.3f"
4545
% metrics.adjusted_rand_score(labels_true, labels))
4646
print("Adjusted Mutual Information: %0.3f"
47-
% metrics.adjusted_mutual_info_score(labels_true, labels,
48-
average_method='arithmetic'))
47+
% metrics.adjusted_mutual_info_score(labels_true, labels))
4948
print("Silhouette Coefficient: %0.3f"
5049
% metrics.silhouette_score(X, labels))
5150

0 commit comments

Comments
 (0)