linuxdevhub
diff --git a/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
338 Bytes b/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
338 Bytes
diff --git a/‎dev/_downloads/52c5954be0a0672abf5902b09fa6997d/plot_nca_illustration.py
Lines changed: 31 additions & 28 deletions b/‎dev/_downloads/52c5954be0a0672abf5902b09fa6997d/plot_nca_illustration.py
Lines changed: 31 additions & 28 deletions
diff --git a/‎dev/_downloads/953f706a40d84ccda2146f9de7386779/plot_nca_illustration.ipynb
Lines changed: 38 additions & 2 deletions b/‎dev/_downloads/953f706a40d84ccda2146f9de7386779/plot_nca_illustration.ipynb
Lines changed: 38 additions & 2 deletions
diff --git a/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
727 Bytes b/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
727 Bytes
diff --git a/‎dev/_downloads/scikit-learn-docs.pdf
-2.16 KB b/‎dev/_downloads/scikit-learn-docs.pdf
-2.16 KB
diff --git a/‎dev/_images/iris.png
0 Bytes b/‎dev/_images/iris.png
0 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-3 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-3 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
-3 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
-3 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
-411 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
-411 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0031.png
-411 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0031.png
-411 Bytes
@@ -3,10 +3,10 @@
 Neighborhood Components Analysis Illustration
 =============================================
 
-An example illustrating the goal of learning a distance metric that maximizes
-the nearest neighbors classification accuracy. The example is solely for
-illustration purposes. Please refer to the :ref:`User Guide <nca>` for
-more information.
+This example illustrates a learned distance metric that maximizes
+the nearest neighbors classification accuracy. It provides a visual
+representation of this metric compared to the original point
+space. Please refer to the :ref:`User Guide <nca>` for more information.
 """
 
 # License: BSD 3 clause
@@ -20,23 +20,31 @@
 
 print(__doc__)
 
-random_state = 0
+##############################################################################
+# Original points
+# ---------------
+# First we create a data set of 9 samples from 3 classes, and plot the points
+# in the original space. For this example, we focus on the classification of
+# point no. 3. The thickness of a link between point no. 3 and another point
+# is proportional to their distance.
 
-# Create a tiny data set of 9 samples from 3 classes
 X, y = make_classification(n_samples=9, n_features=2, n_informative=2,
                            n_redundant=0, n_classes=3, n_clusters_per_class=1,
-                           class_sep=1.0, random_state=random_state)
+                           class_sep=1.0, random_state=0)
 
-# Plot the points in the original space
-plt.figure()
+plt.figure(1)
 ax = plt.gca()
-
-# Draw the graph nodes
 for i in range(X.shape[0]):
     ax.text(X[i, 0], X[i, 1], str(i), va='center', ha='center')
     ax.scatter(X[i, 0], X[i, 1], s=300, c=cm.Set1(y[[i]]), alpha=0.4)
 
-def p_i(X, i):
+ax.set_title("Original points")
+ax.axes.get_xaxis().set_visible(False)
+ax.axes.get_yaxis().set_visible(False)
+ax.axis('equal')  # so that boundaries are displayed correctly as circles
+
+
+def link_thickness_i(X, i):
     diff_embedded = X[i] - X
     dist_embedded = np.einsum('ij,ij->i', diff_embedded,
                               diff_embedded)
@@ -52,34 +60,30 @@ def p_i(X, i):
 def relate_point(X, i, ax):
     pt_i = X[i]
     for j, pt_j in enumerate(X):
-        thickness = p_i(X, i)
+        thickness = link_thickness_i(X, i)
         if i != j:
             line = ([pt_i[0], pt_j[0]], [pt_i[1], pt_j[1]])
             ax.plot(*line, c=cm.Set1(y[j]),
                     linewidth=5*thickness[j])
 
 
-# we consider only point 3
 i = 3
-
-# Plot bonds linked to sample i in the original space
 relate_point(X, i, ax)
-ax.set_title("Original points")
-ax.axes.get_xaxis().set_visible(False)
-ax.axes.get_yaxis().set_visible(False)
-ax.axis('equal')
+plt.show()
 
-# Learn an embedding with NeighborhoodComponentsAnalysis
-nca = NeighborhoodComponentsAnalysis(max_iter=30, random_state=random_state)
+##############################################################################
+# Learning an embedding
+# ---------------------
+# We use :class:`~sklearn.neighbors.NeighborhoodComponentsAnalysis` to learn an
+# embedding and plot the points after the transformation. We then take the
+# embedding and find the nearest neighbors.
+
+nca = NeighborhoodComponentsAnalysis(max_iter=30, random_state=0)
 nca = nca.fit(X, y)
 
-# Plot the points after transformation with NeighborhoodComponentsAnalysis
-plt.figure()
+plt.figure(2)
 ax2 = plt.gca()
-
-# Get the embedding and find the new nearest neighbors
 X_embedded = nca.transform(X)
-
 relate_point(X_embedded, i, ax2)
 
 for i in range(len(X)):
@@ -88,7 +92,6 @@ def relate_point(X, i, ax):
     ax2.scatter(X_embedded[i, 0], X_embedded[i, 1], s=300, c=cm.Set1(y[[i]]),
                 alpha=0.4)
 
-# Make axes equal so that boundaries are displayed correctly as circles
 ax2.set_title("NCA embedding")
 ax2.axes.get_xaxis().set_visible(False)
 ax2.axes.get_yaxis().set_visible(False)
 
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\n# Neighborhood Components Analysis Illustration\n\n\nAn example illustrating the goal of learning a distance metric that maximizes\nthe nearest neighbors classification accuracy. The example is solely for\nillustration purposes. Please refer to the `User Guide <nca>` for\nmore information.\n"
+        "\n# Neighborhood Components Analysis Illustration\n\n\nThis example illustrates a learned distance metric that maximizes\nthe nearest neighbors classification accuracy. It provides a visual\nrepresentation of this metric compared to the original point\nspace. Please refer to the `User Guide <nca>` for more information.\n"
       ]
     },
     {
@@ -26,7 +26,43 @@
       },
       "outputs": [],
       "source": [
-        "# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import make_classification\nfrom sklearn.neighbors import NeighborhoodComponentsAnalysis\nfrom matplotlib import cm\nfrom sklearn.utils.fixes import logsumexp\n\nprint(__doc__)\n\nrandom_state = 0\n\n# Create a tiny data set of 9 samples from 3 classes\nX, y = make_classification(n_samples=9, n_features=2, n_informative=2,\n                           n_redundant=0, n_classes=3, n_clusters_per_class=1,\n                           class_sep=1.0, random_state=random_state)\n\n# Plot the points in the original space\nplt.figure()\nax = plt.gca()\n\n# Draw the graph nodes\nfor i in range(X.shape[0]):\n    ax.text(X[i, 0], X[i, 1], str(i), va='center', ha='center')\n    ax.scatter(X[i, 0], X[i, 1], s=300, c=cm.Set1(y[[i]]), alpha=0.4)\n\ndef p_i(X, i):\n    diff_embedded = X[i] - X\n    dist_embedded = np.einsum('ij,ij->i', diff_embedded,\n                              diff_embedded)\n    dist_embedded[i] = np.inf\n\n    # compute exponentiated distances (use the log-sum-exp trick to\n    # avoid numerical instabilities\n    exp_dist_embedded = np.exp(-dist_embedded -\n                               logsumexp(-dist_embedded))\n    return exp_dist_embedded\n\n\ndef relate_point(X, i, ax):\n    pt_i = X[i]\n    for j, pt_j in enumerate(X):\n        thickness = p_i(X, i)\n        if i != j:\n            line = ([pt_i[0], pt_j[0]], [pt_i[1], pt_j[1]])\n            ax.plot(*line, c=cm.Set1(y[j]),\n                    linewidth=5*thickness[j])\n\n\n# we consider only point 3\ni = 3\n\n# Plot bonds linked to sample i in the original space\nrelate_point(X, i, ax)\nax.set_title(\"Original points\")\nax.axes.get_xaxis().set_visible(False)\nax.axes.get_yaxis().set_visible(False)\nax.axis('equal')\n\n# Learn an embedding with NeighborhoodComponentsAnalysis\nnca = NeighborhoodComponentsAnalysis(max_iter=30, random_state=random_state)\nnca = nca.fit(X, y)\n\n# Plot the points after transformation with NeighborhoodComponentsAnalysis\nplt.figure()\nax2 = plt.gca()\n\n# Get the embedding and find the new nearest neighbors\nX_embedded = nca.transform(X)\n\nrelate_point(X_embedded, i, ax2)\n\nfor i in range(len(X)):\n    ax2.text(X_embedded[i, 0], X_embedded[i, 1], str(i),\n             va='center', ha='center')\n    ax2.scatter(X_embedded[i, 0], X_embedded[i, 1], s=300, c=cm.Set1(y[[i]]),\n                alpha=0.4)\n\n# Make axes equal so that boundaries are displayed correctly as circles\nax2.set_title(\"NCA embedding\")\nax2.axes.get_xaxis().set_visible(False)\nax2.axes.get_yaxis().set_visible(False)\nax2.axis('equal')\nplt.show()"
+        "# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import make_classification\nfrom sklearn.neighbors import NeighborhoodComponentsAnalysis\nfrom matplotlib import cm\nfrom sklearn.utils.fixes import logsumexp\n\nprint(__doc__)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Original points\n---------------\nFirst we create a data set of 9 samples from 3 classes, and plot the points\nin the original space. For this example, we focus on the classification of\npoint no. 3. The thickness of a link between point no. 3 and another point\nis proportional to their distance.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "X, y = make_classification(n_samples=9, n_features=2, n_informative=2,\n                           n_redundant=0, n_classes=3, n_clusters_per_class=1,\n                           class_sep=1.0, random_state=0)\n\nplt.figure(1)\nax = plt.gca()\nfor i in range(X.shape[0]):\n    ax.text(X[i, 0], X[i, 1], str(i), va='center', ha='center')\n    ax.scatter(X[i, 0], X[i, 1], s=300, c=cm.Set1(y[[i]]), alpha=0.4)\n\nax.set_title(\"Original points\")\nax.axes.get_xaxis().set_visible(False)\nax.axes.get_yaxis().set_visible(False)\nax.axis('equal')  # so that boundaries are displayed correctly as circles\n\n\ndef link_thickness_i(X, i):\n    diff_embedded = X[i] - X\n    dist_embedded = np.einsum('ij,ij->i', diff_embedded,\n                              diff_embedded)\n    dist_embedded[i] = np.inf\n\n    # compute exponentiated distances (use the log-sum-exp trick to\n    # avoid numerical instabilities\n    exp_dist_embedded = np.exp(-dist_embedded -\n                               logsumexp(-dist_embedded))\n    return exp_dist_embedded\n\n\ndef relate_point(X, i, ax):\n    pt_i = X[i]\n    for j, pt_j in enumerate(X):\n        thickness = link_thickness_i(X, i)\n        if i != j:\n            line = ([pt_i[0], pt_j[0]], [pt_i[1], pt_j[1]])\n            ax.plot(*line, c=cm.Set1(y[j]),\n                    linewidth=5*thickness[j])\n\n\ni = 3\nrelate_point(X, i, ax)\nplt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Learning an embedding\n---------------------\nWe use :class:`~sklearn.neighbors.NeighborhoodComponentsAnalysis` to learn an\nembedding and plot the points after the transformation. We then take the\nembedding and find the nearest neighbors.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "nca = NeighborhoodComponentsAnalysis(max_iter=30, random_state=0)\nnca = nca.fit(X, y)\n\nplt.figure(2)\nax2 = plt.gca()\nX_embedded = nca.transform(X)\nrelate_point(X_embedded, i, ax2)\n\nfor i in range(len(X)):\n    ax2.text(X_embedded[i, 0], X_embedded[i, 1], str(i),\n             va='center', ha='center')\n    ax2.scatter(X_embedded[i, 0], X_embedded[i, 1], s=300, c=cm.Set1(y[[i]]),\n                alpha=0.4)\n\nax2.set_title(\"NCA embedding\")\nax2.axes.get_xaxis().set_visible(False)\nax2.axes.get_yaxis().set_visible(False)\nax2.axis('equal')\nplt.show()"
       ]
     }
   ],