Skip to content

Commit 078b4cc

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 6680bffb15b523c8beaa0bc90719239dab080b27
1 parent a29f85e commit 078b4cc

File tree

1,176 files changed

+3900
-3808
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,176 files changed

+3900
-3808
lines changed
Binary file not shown.

dev/_downloads/52c5954be0a0672abf5902b09fa6997d/plot_nca_illustration.py

Lines changed: 31 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
Neighborhood Components Analysis Illustration
44
=============================================
55
6-
An example illustrating the goal of learning a distance metric that maximizes
7-
the nearest neighbors classification accuracy. The example is solely for
8-
illustration purposes. Please refer to the :ref:`User Guide <nca>` for
9-
more information.
6+
This example illustrates a learned distance metric that maximizes
7+
the nearest neighbors classification accuracy. It provides a visual
8+
representation of this metric compared to the original point
9+
space. Please refer to the :ref:`User Guide <nca>` for more information.
1010
"""
1111

1212
# License: BSD 3 clause
@@ -20,23 +20,31 @@
2020

2121
print(__doc__)
2222

23-
random_state = 0
23+
##############################################################################
24+
# Original points
25+
# ---------------
26+
# First we create a data set of 9 samples from 3 classes, and plot the points
27+
# in the original space. For this example, we focus on the classification of
28+
# point no. 3. The thickness of a link between point no. 3 and another point
29+
# is proportional to their distance.
2430

25-
# Create a tiny data set of 9 samples from 3 classes
2631
X, y = make_classification(n_samples=9, n_features=2, n_informative=2,
2732
n_redundant=0, n_classes=3, n_clusters_per_class=1,
28-
class_sep=1.0, random_state=random_state)
33+
class_sep=1.0, random_state=0)
2934

30-
# Plot the points in the original space
31-
plt.figure()
35+
plt.figure(1)
3236
ax = plt.gca()
33-
34-
# Draw the graph nodes
3537
for i in range(X.shape[0]):
3638
ax.text(X[i, 0], X[i, 1], str(i), va='center', ha='center')
3739
ax.scatter(X[i, 0], X[i, 1], s=300, c=cm.Set1(y[[i]]), alpha=0.4)
3840

39-
def p_i(X, i):
41+
ax.set_title("Original points")
42+
ax.axes.get_xaxis().set_visible(False)
43+
ax.axes.get_yaxis().set_visible(False)
44+
ax.axis('equal') # so that boundaries are displayed correctly as circles
45+
46+
47+
def link_thickness_i(X, i):
4048
diff_embedded = X[i] - X
4149
dist_embedded = np.einsum('ij,ij->i', diff_embedded,
4250
diff_embedded)
@@ -52,34 +60,30 @@ def p_i(X, i):
5260
def relate_point(X, i, ax):
5361
pt_i = X[i]
5462
for j, pt_j in enumerate(X):
55-
thickness = p_i(X, i)
63+
thickness = link_thickness_i(X, i)
5664
if i != j:
5765
line = ([pt_i[0], pt_j[0]], [pt_i[1], pt_j[1]])
5866
ax.plot(*line, c=cm.Set1(y[j]),
5967
linewidth=5*thickness[j])
6068

6169

62-
# we consider only point 3
6370
i = 3
64-
65-
# Plot bonds linked to sample i in the original space
6671
relate_point(X, i, ax)
67-
ax.set_title("Original points")
68-
ax.axes.get_xaxis().set_visible(False)
69-
ax.axes.get_yaxis().set_visible(False)
70-
ax.axis('equal')
72+
plt.show()
7173

72-
# Learn an embedding with NeighborhoodComponentsAnalysis
73-
nca = NeighborhoodComponentsAnalysis(max_iter=30, random_state=random_state)
74+
##############################################################################
75+
# Learning an embedding
76+
# ---------------------
77+
# We use :class:`~sklearn.neighbors.NeighborhoodComponentsAnalysis` to learn an
78+
# embedding and plot the points after the transformation. We then take the
79+
# embedding and find the nearest neighbors.
80+
81+
nca = NeighborhoodComponentsAnalysis(max_iter=30, random_state=0)
7482
nca = nca.fit(X, y)
7583

76-
# Plot the points after transformation with NeighborhoodComponentsAnalysis
77-
plt.figure()
84+
plt.figure(2)
7885
ax2 = plt.gca()
79-
80-
# Get the embedding and find the new nearest neighbors
8186
X_embedded = nca.transform(X)
82-
8387
relate_point(X_embedded, i, ax2)
8488

8589
for i in range(len(X)):
@@ -88,7 +92,6 @@ def relate_point(X, i, ax):
8892
ax2.scatter(X_embedded[i, 0], X_embedded[i, 1], s=300, c=cm.Set1(y[[i]]),
8993
alpha=0.4)
9094

91-
# Make axes equal so that boundaries are displayed correctly as circles
9295
ax2.set_title("NCA embedding")
9396
ax2.axes.get_xaxis().set_visible(False)
9497
ax2.axes.get_yaxis().set_visible(False)

dev/_downloads/953f706a40d84ccda2146f9de7386779/plot_nca_illustration.ipynb

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"cell_type": "markdown",
1616
"metadata": {},
1717
"source": [
18-
"\n# Neighborhood Components Analysis Illustration\n\n\nAn example illustrating the goal of learning a distance metric that maximizes\nthe nearest neighbors classification accuracy. The example is solely for\nillustration purposes. Please refer to the `User Guide <nca>` for\nmore information.\n"
18+
"\n# Neighborhood Components Analysis Illustration\n\n\nThis example illustrates a learned distance metric that maximizes\nthe nearest neighbors classification accuracy. It provides a visual\nrepresentation of this metric compared to the original point\nspace. Please refer to the `User Guide <nca>` for more information.\n"
1919
]
2020
},
2121
{
@@ -26,7 +26,43 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import make_classification\nfrom sklearn.neighbors import NeighborhoodComponentsAnalysis\nfrom matplotlib import cm\nfrom sklearn.utils.fixes import logsumexp\n\nprint(__doc__)\n\nrandom_state = 0\n\n# Create a tiny data set of 9 samples from 3 classes\nX, y = make_classification(n_samples=9, n_features=2, n_informative=2,\n n_redundant=0, n_classes=3, n_clusters_per_class=1,\n class_sep=1.0, random_state=random_state)\n\n# Plot the points in the original space\nplt.figure()\nax = plt.gca()\n\n# Draw the graph nodes\nfor i in range(X.shape[0]):\n ax.text(X[i, 0], X[i, 1], str(i), va='center', ha='center')\n ax.scatter(X[i, 0], X[i, 1], s=300, c=cm.Set1(y[[i]]), alpha=0.4)\n\ndef p_i(X, i):\n diff_embedded = X[i] - X\n dist_embedded = np.einsum('ij,ij->i', diff_embedded,\n diff_embedded)\n dist_embedded[i] = np.inf\n\n # compute exponentiated distances (use the log-sum-exp trick to\n # avoid numerical instabilities\n exp_dist_embedded = np.exp(-dist_embedded -\n logsumexp(-dist_embedded))\n return exp_dist_embedded\n\n\ndef relate_point(X, i, ax):\n pt_i = X[i]\n for j, pt_j in enumerate(X):\n thickness = p_i(X, i)\n if i != j:\n line = ([pt_i[0], pt_j[0]], [pt_i[1], pt_j[1]])\n ax.plot(*line, c=cm.Set1(y[j]),\n linewidth=5*thickness[j])\n\n\n# we consider only point 3\ni = 3\n\n# Plot bonds linked to sample i in the original space\nrelate_point(X, i, ax)\nax.set_title(\"Original points\")\nax.axes.get_xaxis().set_visible(False)\nax.axes.get_yaxis().set_visible(False)\nax.axis('equal')\n\n# Learn an embedding with NeighborhoodComponentsAnalysis\nnca = NeighborhoodComponentsAnalysis(max_iter=30, random_state=random_state)\nnca = nca.fit(X, y)\n\n# Plot the points after transformation with NeighborhoodComponentsAnalysis\nplt.figure()\nax2 = plt.gca()\n\n# Get the embedding and find the new nearest neighbors\nX_embedded = nca.transform(X)\n\nrelate_point(X_embedded, i, ax2)\n\nfor i in range(len(X)):\n ax2.text(X_embedded[i, 0], X_embedded[i, 1], str(i),\n va='center', ha='center')\n ax2.scatter(X_embedded[i, 0], X_embedded[i, 1], s=300, c=cm.Set1(y[[i]]),\n alpha=0.4)\n\n# Make axes equal so that boundaries are displayed correctly as circles\nax2.set_title(\"NCA embedding\")\nax2.axes.get_xaxis().set_visible(False)\nax2.axes.get_yaxis().set_visible(False)\nax2.axis('equal')\nplt.show()"
29+
"# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import make_classification\nfrom sklearn.neighbors import NeighborhoodComponentsAnalysis\nfrom matplotlib import cm\nfrom sklearn.utils.fixes import logsumexp\n\nprint(__doc__)"
30+
]
31+
},
32+
{
33+
"cell_type": "markdown",
34+
"metadata": {},
35+
"source": [
36+
"Original points\n---------------\nFirst we create a data set of 9 samples from 3 classes, and plot the points\nin the original space. For this example, we focus on the classification of\npoint no. 3. The thickness of a link between point no. 3 and another point\nis proportional to their distance.\n\n"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": null,
42+
"metadata": {
43+
"collapsed": false
44+
},
45+
"outputs": [],
46+
"source": [
47+
"X, y = make_classification(n_samples=9, n_features=2, n_informative=2,\n n_redundant=0, n_classes=3, n_clusters_per_class=1,\n class_sep=1.0, random_state=0)\n\nplt.figure(1)\nax = plt.gca()\nfor i in range(X.shape[0]):\n ax.text(X[i, 0], X[i, 1], str(i), va='center', ha='center')\n ax.scatter(X[i, 0], X[i, 1], s=300, c=cm.Set1(y[[i]]), alpha=0.4)\n\nax.set_title(\"Original points\")\nax.axes.get_xaxis().set_visible(False)\nax.axes.get_yaxis().set_visible(False)\nax.axis('equal') # so that boundaries are displayed correctly as circles\n\n\ndef link_thickness_i(X, i):\n diff_embedded = X[i] - X\n dist_embedded = np.einsum('ij,ij->i', diff_embedded,\n diff_embedded)\n dist_embedded[i] = np.inf\n\n # compute exponentiated distances (use the log-sum-exp trick to\n # avoid numerical instabilities\n exp_dist_embedded = np.exp(-dist_embedded -\n logsumexp(-dist_embedded))\n return exp_dist_embedded\n\n\ndef relate_point(X, i, ax):\n pt_i = X[i]\n for j, pt_j in enumerate(X):\n thickness = link_thickness_i(X, i)\n if i != j:\n line = ([pt_i[0], pt_j[0]], [pt_i[1], pt_j[1]])\n ax.plot(*line, c=cm.Set1(y[j]),\n linewidth=5*thickness[j])\n\n\ni = 3\nrelate_point(X, i, ax)\nplt.show()"
48+
]
49+
},
50+
{
51+
"cell_type": "markdown",
52+
"metadata": {},
53+
"source": [
54+
"Learning an embedding\n---------------------\nWe use :class:`~sklearn.neighbors.NeighborhoodComponentsAnalysis` to learn an\nembedding and plot the points after the transformation. We then take the\nembedding and find the nearest neighbors.\n\n"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"execution_count": null,
60+
"metadata": {
61+
"collapsed": false
62+
},
63+
"outputs": [],
64+
"source": [
65+
"nca = NeighborhoodComponentsAnalysis(max_iter=30, random_state=0)\nnca = nca.fit(X, y)\n\nplt.figure(2)\nax2 = plt.gca()\nX_embedded = nca.transform(X)\nrelate_point(X_embedded, i, ax2)\n\nfor i in range(len(X)):\n ax2.text(X_embedded[i, 0], X_embedded[i, 1], str(i),\n va='center', ha='center')\n ax2.scatter(X_embedded[i, 0], X_embedded[i, 1], s=300, c=cm.Set1(y[[i]]),\n alpha=0.4)\n\nax2.set_title(\"NCA embedding\")\nax2.axes.get_xaxis().set_visible(False)\nax2.axes.get_yaxis().set_visible(False)\nax2.axis('equal')\nplt.show()"
3066
]
3167
}
3268
],
Binary file not shown.

dev/_downloads/scikit-learn-docs.pdf

-2.16 KB
Binary file not shown.

dev/_images/iris.png

0 Bytes
-3 Bytes
-3 Bytes
-411 Bytes
-411 Bytes

0 commit comments

Comments
 (0)