Skip to content

Commit af66861

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 935f7e66068fc130971646225a95ade649f57928
1 parent 50c2e6d commit af66861

File tree

1,370 files changed

+4733
-4699
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,370 files changed

+4733
-4699
lines changed
Binary file not shown.
Binary file not shown.

dev/_downloads/bc88e7ec572d6d2d2ff19cf0d75265c9/plot_agglomerative_clustering_metrics.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
# License: BSD 3-Clause or CC-0
3939

4040
import matplotlib.pyplot as plt
41+
import matplotlib.patheffects as PathEffects
4142
import numpy as np
4243

4344
from sklearn.cluster import AgglomerativeClustering
@@ -80,18 +81,20 @@ def sqr(x):
8081

8182
labels = ("Waveform 1", "Waveform 2", "Waveform 3")
8283

84+
colors = ["#f7bd01", "#377eb8", "#f781bf"]
85+
8386
# Plot the ground-truth labelling
8487
plt.figure()
8588
plt.axes([0, 0, 1, 1])
86-
for l, c, n in zip(range(n_clusters), "rgb", labels):
87-
lines = plt.plot(X[y == l].T, c=c, alpha=0.5)
89+
for l, color, n in zip(range(n_clusters), colors, labels):
90+
lines = plt.plot(X[y == l].T, c=color, alpha=0.5)
8891
lines[0].set_label(n)
8992

9093
plt.legend(loc="best")
9194

9295
plt.axis("tight")
9396
plt.axis("off")
94-
plt.suptitle("Ground truth", size=20)
97+
plt.suptitle("Ground truth", size=20, y=1)
9598

9699

97100
# Plot the distances
@@ -106,19 +109,22 @@ def sqr(x):
106109
avg_dist /= avg_dist.max()
107110
for i in range(n_clusters):
108111
for j in range(n_clusters):
109-
plt.text(
112+
t = plt.text(
110113
i,
111114
j,
112115
"%5.3f" % avg_dist[i, j],
113116
verticalalignment="center",
114117
horizontalalignment="center",
115118
)
119+
t.set_path_effects(
120+
[PathEffects.withStroke(linewidth=5, foreground="w", alpha=0.5)]
121+
)
116122

117-
plt.imshow(avg_dist, interpolation="nearest", cmap=plt.cm.gnuplot2, vmin=0)
123+
plt.imshow(avg_dist, interpolation="nearest", cmap="cividis", vmin=0)
118124
plt.xticks(range(n_clusters), labels, rotation=45)
119125
plt.yticks(range(n_clusters), labels)
120126
plt.colorbar()
121-
plt.suptitle("Interclass %s distances" % metric, size=18)
127+
plt.suptitle("Interclass %s distances" % metric, size=18, y=1)
122128
plt.tight_layout()
123129

124130

@@ -130,11 +136,11 @@ def sqr(x):
130136
model.fit(X)
131137
plt.figure()
132138
plt.axes([0, 0, 1, 1])
133-
for l, c in zip(np.arange(model.n_clusters), "rgbk"):
134-
plt.plot(X[model.labels_ == l].T, c=c, alpha=0.5)
139+
for l, color in zip(np.arange(model.n_clusters), colors):
140+
plt.plot(X[model.labels_ == l].T, c=color, alpha=0.5)
135141
plt.axis("tight")
136142
plt.axis("off")
137-
plt.suptitle("AgglomerativeClustering(metric=%s)" % metric, size=20)
143+
plt.suptitle("AgglomerativeClustering(metric=%s)" % metric, size=20, y=1)
138144

139145

140146
plt.show()

dev/_downloads/e7d5500e87a046a110ca0daebd702588/plot_agglomerative_clustering_metrics.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Author: Gael Varoquaux\n# License: BSD 3-Clause or CC-0\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.cluster import AgglomerativeClustering\nfrom sklearn.metrics import pairwise_distances\n\nnp.random.seed(0)\n\n# Generate waveform data\nn_features = 2000\nt = np.pi * np.linspace(0, 1, n_features)\n\n\ndef sqr(x):\n return np.sign(np.cos(x))\n\n\nX = list()\ny = list()\nfor i, (phi, a) in enumerate([(0.5, 0.15), (0.5, 0.6), (0.3, 0.2)]):\n for _ in range(30):\n phase_noise = 0.01 * np.random.normal()\n amplitude_noise = 0.04 * np.random.normal()\n additional_noise = 1 - 2 * np.random.rand(n_features)\n # Make the noise sparse\n additional_noise[np.abs(additional_noise) < 0.997] = 0\n\n X.append(\n 12\n * (\n (a + amplitude_noise) * (sqr(6 * (t + phi + phase_noise)))\n + additional_noise\n )\n )\n y.append(i)\n\nX = np.array(X)\ny = np.array(y)\n\nn_clusters = 3\n\nlabels = (\"Waveform 1\", \"Waveform 2\", \"Waveform 3\")\n\n# Plot the ground-truth labelling\nplt.figure()\nplt.axes([0, 0, 1, 1])\nfor l, c, n in zip(range(n_clusters), \"rgb\", labels):\n lines = plt.plot(X[y == l].T, c=c, alpha=0.5)\n lines[0].set_label(n)\n\nplt.legend(loc=\"best\")\n\nplt.axis(\"tight\")\nplt.axis(\"off\")\nplt.suptitle(\"Ground truth\", size=20)\n\n\n# Plot the distances\nfor index, metric in enumerate([\"cosine\", \"euclidean\", \"cityblock\"]):\n avg_dist = np.zeros((n_clusters, n_clusters))\n plt.figure(figsize=(5, 4.5))\n for i in range(n_clusters):\n for j in range(n_clusters):\n avg_dist[i, j] = pairwise_distances(\n X[y == i], X[y == j], metric=metric\n ).mean()\n avg_dist /= avg_dist.max()\n for i in range(n_clusters):\n for j in range(n_clusters):\n plt.text(\n i,\n j,\n \"%5.3f\" % avg_dist[i, j],\n verticalalignment=\"center\",\n horizontalalignment=\"center\",\n )\n\n plt.imshow(avg_dist, interpolation=\"nearest\", cmap=plt.cm.gnuplot2, vmin=0)\n plt.xticks(range(n_clusters), labels, rotation=45)\n plt.yticks(range(n_clusters), labels)\n plt.colorbar()\n plt.suptitle(\"Interclass %s distances\" % metric, size=18)\n plt.tight_layout()\n\n\n# Plot clustering results\nfor index, metric in enumerate([\"cosine\", \"euclidean\", \"cityblock\"]):\n model = AgglomerativeClustering(\n n_clusters=n_clusters, linkage=\"average\", metric=metric\n )\n model.fit(X)\n plt.figure()\n plt.axes([0, 0, 1, 1])\n for l, c in zip(np.arange(model.n_clusters), \"rgbk\"):\n plt.plot(X[model.labels_ == l].T, c=c, alpha=0.5)\n plt.axis(\"tight\")\n plt.axis(\"off\")\n plt.suptitle(\"AgglomerativeClustering(metric=%s)\" % metric, size=20)\n\n\nplt.show()"
29+
"# Author: Gael Varoquaux\n# License: BSD 3-Clause or CC-0\n\nimport matplotlib.pyplot as plt\nimport matplotlib.patheffects as PathEffects\nimport numpy as np\n\nfrom sklearn.cluster import AgglomerativeClustering\nfrom sklearn.metrics import pairwise_distances\n\nnp.random.seed(0)\n\n# Generate waveform data\nn_features = 2000\nt = np.pi * np.linspace(0, 1, n_features)\n\n\ndef sqr(x):\n return np.sign(np.cos(x))\n\n\nX = list()\ny = list()\nfor i, (phi, a) in enumerate([(0.5, 0.15), (0.5, 0.6), (0.3, 0.2)]):\n for _ in range(30):\n phase_noise = 0.01 * np.random.normal()\n amplitude_noise = 0.04 * np.random.normal()\n additional_noise = 1 - 2 * np.random.rand(n_features)\n # Make the noise sparse\n additional_noise[np.abs(additional_noise) < 0.997] = 0\n\n X.append(\n 12\n * (\n (a + amplitude_noise) * (sqr(6 * (t + phi + phase_noise)))\n + additional_noise\n )\n )\n y.append(i)\n\nX = np.array(X)\ny = np.array(y)\n\nn_clusters = 3\n\nlabels = (\"Waveform 1\", \"Waveform 2\", \"Waveform 3\")\n\ncolors = [\"#f7bd01\", \"#377eb8\", \"#f781bf\"]\n\n# Plot the ground-truth labelling\nplt.figure()\nplt.axes([0, 0, 1, 1])\nfor l, color, n in zip(range(n_clusters), colors, labels):\n lines = plt.plot(X[y == l].T, c=color, alpha=0.5)\n lines[0].set_label(n)\n\nplt.legend(loc=\"best\")\n\nplt.axis(\"tight\")\nplt.axis(\"off\")\nplt.suptitle(\"Ground truth\", size=20, y=1)\n\n\n# Plot the distances\nfor index, metric in enumerate([\"cosine\", \"euclidean\", \"cityblock\"]):\n avg_dist = np.zeros((n_clusters, n_clusters))\n plt.figure(figsize=(5, 4.5))\n for i in range(n_clusters):\n for j in range(n_clusters):\n avg_dist[i, j] = pairwise_distances(\n X[y == i], X[y == j], metric=metric\n ).mean()\n avg_dist /= avg_dist.max()\n for i in range(n_clusters):\n for j in range(n_clusters):\n t = plt.text(\n i,\n j,\n \"%5.3f\" % avg_dist[i, j],\n verticalalignment=\"center\",\n horizontalalignment=\"center\",\n )\n t.set_path_effects(\n [PathEffects.withStroke(linewidth=5, foreground=\"w\", alpha=0.5)]\n )\n\n plt.imshow(avg_dist, interpolation=\"nearest\", cmap=\"cividis\", vmin=0)\n plt.xticks(range(n_clusters), labels, rotation=45)\n plt.yticks(range(n_clusters), labels)\n plt.colorbar()\n plt.suptitle(\"Interclass %s distances\" % metric, size=18, y=1)\n plt.tight_layout()\n\n\n# Plot clustering results\nfor index, metric in enumerate([\"cosine\", \"euclidean\", \"cityblock\"]):\n model = AgglomerativeClustering(\n n_clusters=n_clusters, linkage=\"average\", metric=metric\n )\n model.fit(X)\n plt.figure()\n plt.axes([0, 0, 1, 1])\n for l, color in zip(np.arange(model.n_clusters), colors):\n plt.plot(X[model.labels_ == l].T, c=color, alpha=0.5)\n plt.axis(\"tight\")\n plt.axis(\"off\")\n plt.suptitle(\"AgglomerativeClustering(metric=%s)\" % metric, size=20, y=1)\n\n\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/scikit-learn-docs.zip

-3.94 KB
Binary file not shown.
22 Bytes
48 Bytes
21 Bytes
-99 Bytes
-6.54 KB

0 commit comments

Comments
 (0)