scikit-learn
diff --git a/‎dev/_downloads/023324c27491610e7c0ccff87c59abf9/plot_kernel_pca.py
Lines changed: 149 additions & 56 deletions b/‎dev/_downloads/023324c27491610e7c0ccff87c59abf9/plot_kernel_pca.py
Lines changed: 149 additions & 56 deletions
diff --git a/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
4.55 KB b/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
4.55 KB
diff --git a/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
5.96 KB b/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
5.96 KB
diff --git a/‎dev/_downloads/c0a901203201090b01ac6d929a31ce08/plot_kernel_pca.ipynb
Lines changed: 103 additions & 2 deletions b/‎dev/_downloads/c0a901203201090b01ac6d929a31ce08/plot_kernel_pca.ipynb
Lines changed: 103 additions & 2 deletions
diff --git a/‎dev/_downloads/scikit-learn-docs.zip
126 KB b/‎dev/_downloads/scikit-learn-docs.zip
126 KB
diff --git a/‎dev/_images/sphx_glr_plot_adaboost_hastie_10_2_001.png
298 Bytes b/‎dev/_images/sphx_glr_plot_adaboost_hastie_10_2_001.png
298 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_adaboost_hastie_10_2_thumb.png
18 Bytes b/‎dev/_images/sphx_glr_plot_adaboost_hastie_10_2_thumb.png
18 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_adaboost_multiclass_001.png
15 Bytes b/‎dev/_images/sphx_glr_plot_adaboost_multiclass_001.png
15 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_adaboost_multiclass_thumb.png
-5 Bytes b/‎dev/_images/sphx_glr_plot_adaboost_multiclass_thumb.png
-5 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
75 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
75 Bytes
@@ -3,70 +3,163 @@
 Kernel PCA
 ==========
 
-This example shows that Kernel PCA is able to find a projection of the data
-that makes data linearly separable.
+This example shows the difference between the Principal Components Analysis
+(:class:`~sklearn.decomposition.PCA`) and its kernalized version
+(:class:`~sklearn.decomposition.KernelPCA`).
 
+On the one hand, we show that :class:`~sklearn.decomposition.KernelPCA` is able
+to find a projection of the data which linearly separates them while it is not the case
+with :class:`~sklearn.decomposition.PCA`.
+
+Finally, we show that inverting this projection is an approximation with
+:class:`~sklearn.decomposition.KernelPCA`, while it is exact with
+:class:`~sklearn.decomposition.PCA`.
 """
 
 # Authors: Mathieu Blondel
 #          Andreas Mueller
+#          Guillaume Lemaitre
 # License: BSD 3 clause
 
-import numpy as np
+# %%
+# Projecting data: `PCA` vs. `KernelPCA`
+# --------------------------------------
+#
+# In this section, we show the advantages of using a kernel when
+# projecting data using a Principal Component Analysis (PCA). We create a
+# dataset made of two nested circles.
+from sklearn.datasets import make_circles
+from sklearn.model_selection import train_test_split
+
+X, y = make_circles(n_samples=1_000, factor=0.3, noise=0.05, random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=0)
+
+# %%
+# Let's have a quick first look at the generated dataset.
 import matplotlib.pyplot as plt
 
+_, (train_ax, test_ax) = plt.subplots(ncols=2, sharex=True, sharey=True, figsize=(8, 4))
+
+train_ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train)
+train_ax.set_ylabel("Feature #1")
+train_ax.set_xlabel("Feature #0")
+train_ax.set_title("Training data")
+
+test_ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test)
+test_ax.set_xlabel("Feature #0")
+_ = test_ax.set_title("Testing data")
+
+# %%
+# The samples from each class cannot be linearly separated: there is no
+# straight line that can split the samples of the inner set from the outer
+# set.
+#
+# Now, we will use PCA with and without a kernel to see what is the effect of
+# using such a kernel. The kernel used here is a radial basis function (RBF)
+# kernel.
 from sklearn.decomposition import PCA, KernelPCA
-from sklearn.datasets import make_circles
 
-np.random.seed(0)
-
-X, y = make_circles(n_samples=400, factor=0.3, noise=0.05)
-
-kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10)
-X_kpca = kpca.fit_transform(X)
-X_back = kpca.inverse_transform(X_kpca)
-pca = PCA()
-X_pca = pca.fit_transform(X)
-
-# Plot results
-
-plt.figure()
-plt.subplot(2, 2, 1, aspect="equal")
-plt.title("Original space")
-reds = y == 0
-blues = y == 1
-
-plt.scatter(X[reds, 0], X[reds, 1], c="red", s=20, edgecolor="k")
-plt.scatter(X[blues, 0], X[blues, 1], c="blue", s=20, edgecolor="k")
-plt.xlabel("$x_1$")
-plt.ylabel("$x_2$")
-
-X1, X2 = np.meshgrid(np.linspace(-1.5, 1.5, 50), np.linspace(-1.5, 1.5, 50))
-X_grid = np.array([np.ravel(X1), np.ravel(X2)]).T
-# projection on the first principal component (in the phi space)
-Z_grid = kpca.transform(X_grid)[:, 0].reshape(X1.shape)
-plt.contour(X1, X2, Z_grid, colors="grey", linewidths=1, origin="lower")
-
-plt.subplot(2, 2, 2, aspect="equal")
-plt.scatter(X_pca[reds, 0], X_pca[reds, 1], c="red", s=20, edgecolor="k")
-plt.scatter(X_pca[blues, 0], X_pca[blues, 1], c="blue", s=20, edgecolor="k")
-plt.title("Projection by PCA")
-plt.xlabel("1st principal component")
-plt.ylabel("2nd component")
-
-plt.subplot(2, 2, 3, aspect="equal")
-plt.scatter(X_kpca[reds, 0], X_kpca[reds, 1], c="red", s=20, edgecolor="k")
-plt.scatter(X_kpca[blues, 0], X_kpca[blues, 1], c="blue", s=20, edgecolor="k")
-plt.title("Projection by KPCA")
-plt.xlabel(r"1st principal component in space induced by $\phi$")
-plt.ylabel("2nd component")
-
-plt.subplot(2, 2, 4, aspect="equal")
-plt.scatter(X_back[reds, 0], X_back[reds, 1], c="red", s=20, edgecolor="k")
-plt.scatter(X_back[blues, 0], X_back[blues, 1], c="blue", s=20, edgecolor="k")
-plt.title("Original space after inverse transform")
-plt.xlabel("$x_1$")
-plt.ylabel("$x_2$")
-
-plt.tight_layout()
-plt.show()
+pca = PCA(n_components=2)
+kernel_pca = KernelPCA(
+    n_components=None, kernel="rbf", gamma=10, fit_inverse_transform=True, alpha=0.1
+)
+
+X_test_pca = pca.fit(X_train).transform(X_test)
+X_test_kernel_pca = kernel_pca.fit(X_train).transform(X_test)
+
+# %%
+fig, (orig_data_ax, pca_proj_ax, kernel_pca_proj_ax) = plt.subplots(
+    ncols=3, figsize=(14, 4)
+)
+
+orig_data_ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test)
+orig_data_ax.set_ylabel("Feature #1")
+orig_data_ax.set_xlabel("Feature #0")
+orig_data_ax.set_title("Testing data")
+
+pca_proj_ax.scatter(X_test_pca[:, 0], X_test_pca[:, 1], c=y_test)
+pca_proj_ax.set_ylabel("Principal component #1")
+pca_proj_ax.set_xlabel("Principal component #0")
+pca_proj_ax.set_title("Projection of testing data\n using PCA")
+
+kernel_pca_proj_ax.scatter(X_test_kernel_pca[:, 0], X_test_kernel_pca[:, 1], c=y_test)
+kernel_pca_proj_ax.set_ylabel("Principal component #1")
+kernel_pca_proj_ax.set_xlabel("Principal component #0")
+_ = kernel_pca_proj_ax.set_title("Projection of testing data\n using KernelPCA")
+
+# %%
+# We recall that PCA transforms the data linearly. Intuitively, it means that
+# the coordinate system will be centered, rescaled on each component
+# with respected to its variance and finally be rotated.
+# The obtained data from this transformation is isotropic and can now be
+# projected on its _principal components_.
+#
+# Thus, looking at the projection made using PCA (i.e. the middle figure), we
+# see that there is no change regarding the scaling; indeed the data being two
+# concentric circles centered in zero, the original data is already isotropic.
+# However, we can see that the data have been rotated. As a
+# conclusion, we see that such a projection would not help if define a linear
+# classifier to distinguish samples from both classes.
+#
+# Using a kernel allows to make a non-linear projection. Here, by using an RBF
+# kernel, we expect that the projection will unfold the dataset while keeping
+# approximately preserving the relative distances of pairs of data points that
+# are close to one another in the original space.
+#
+# We observe such behaviour in the figure on the right: the samples of a given
+# class are closer to each other than the samples from the opposite class,
+# untangling both sample sets. Now, we can use a linear classifier to separate
+# the samples from the two classes.
+#
+# Projecting into the original feature space
+# ------------------------------------------
+#
+# One particularity to have in mind when using
+# :class:`~sklearn.decomposition.KernelPCA` is related to the reconstruction
+# (i.e. the back projection in the original feature space). With
+# :class:`~sklearn.decomposition.PCA`, the reconstruction will be exact if
+# `n_components` is the same than the number of original features.
+# This is the case in this example.
+#
+# We can investigate if we get the original dataset when back projecting with
+# :class:`~sklearn.decomposition.KernelPCA`.
+X_reconstructed_pca = pca.inverse_transform(pca.transform(X_test))
+X_reconstructed_kernel_pca = kernel_pca.inverse_transform(kernel_pca.transform(X_test))
+
+# %%
+fig, (orig_data_ax, pca_back_proj_ax, kernel_pca_back_proj_ax) = plt.subplots(
+    ncols=3, sharex=True, sharey=True, figsize=(13, 4)
+)
+
+orig_data_ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test)
+orig_data_ax.set_ylabel("Feature #1")
+orig_data_ax.set_xlabel("Feature #0")
+orig_data_ax.set_title("Original test data")
+
+pca_back_proj_ax.scatter(X_reconstructed_pca[:, 0], X_reconstructed_pca[:, 1], c=y_test)
+pca_back_proj_ax.set_xlabel("Feature #0")
+pca_back_proj_ax.set_title("Reconstruction via PCA")
+
+kernel_pca_back_proj_ax.scatter(
+    X_reconstructed_kernel_pca[:, 0], X_reconstructed_kernel_pca[:, 1], c=y_test
+)
+kernel_pca_back_proj_ax.set_xlabel("Feature #0")
+_ = kernel_pca_back_proj_ax.set_title("Reconstruction via KernelPCA")
+
+# %%
+# While we see a perfect reconstruction with
+# :class:`~sklearn.decomposition.PCA` we observe a different result for
+# :class:`~sklearn.decomposition.KernelPCA`.
+#
+# Indeed, :meth:`~sklearn.decomposition.KernelPCA.inverse_transform` cannot
+# rely on an analytical back-projection and thus an extact reconstruction.
+# Instead, a :class:`~sklearn.kernel_ridge.KernelRidge` is internally trained
+# to learn a mapping from the kernalized PCA basis to the original feature
+# space. This method therefore comes with an approximation introducing small
+# differences when back projecting in the original feature space.
+#
+# To improve the reconstruction using
+# :meth:`~sklearn.decomposition.KernelPCA.inverse_transform`, one can tune
+# `alpha` in :class:`~sklearn.decomposition.KernelPCA`, the regularization term
+# which controls the reliance on the training data during the training of
+# the mapping.
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\n# Kernel PCA\n\nThis example shows that Kernel PCA is able to find a projection of the data\nthat makes data linearly separable.\n"
+        "\n# Kernel PCA\n\nThis example shows the difference between the Principal Components Analysis\n(:class:`~sklearn.decomposition.PCA`) and its kernalized version\n(:class:`~sklearn.decomposition.KernelPCA`).\n\nOn the one hand, we show that :class:`~sklearn.decomposition.KernelPCA` is able\nto find a projection of the data which linearly separates them while it is not the case\nwith :class:`~sklearn.decomposition.PCA`.\n\nFinally, we show that inverting this projection is an approximation with\n:class:`~sklearn.decomposition.KernelPCA`, while it is exact with\n:class:`~sklearn.decomposition.PCA`.\n"
       ]
     },
     {
@@ -26,7 +26,108 @@
       },
       "outputs": [],
       "source": [
-        "# Authors: Mathieu Blondel\n#          Andreas Mueller\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.decomposition import PCA, KernelPCA\nfrom sklearn.datasets import make_circles\n\nnp.random.seed(0)\n\nX, y = make_circles(n_samples=400, factor=0.3, noise=0.05)\n\nkpca = KernelPCA(kernel=\"rbf\", fit_inverse_transform=True, gamma=10)\nX_kpca = kpca.fit_transform(X)\nX_back = kpca.inverse_transform(X_kpca)\npca = PCA()\nX_pca = pca.fit_transform(X)\n\n# Plot results\n\nplt.figure()\nplt.subplot(2, 2, 1, aspect=\"equal\")\nplt.title(\"Original space\")\nreds = y == 0\nblues = y == 1\n\nplt.scatter(X[reds, 0], X[reds, 1], c=\"red\", s=20, edgecolor=\"k\")\nplt.scatter(X[blues, 0], X[blues, 1], c=\"blue\", s=20, edgecolor=\"k\")\nplt.xlabel(\"$x_1$\")\nplt.ylabel(\"$x_2$\")\n\nX1, X2 = np.meshgrid(np.linspace(-1.5, 1.5, 50), np.linspace(-1.5, 1.5, 50))\nX_grid = np.array([np.ravel(X1), np.ravel(X2)]).T\n# projection on the first principal component (in the phi space)\nZ_grid = kpca.transform(X_grid)[:, 0].reshape(X1.shape)\nplt.contour(X1, X2, Z_grid, colors=\"grey\", linewidths=1, origin=\"lower\")\n\nplt.subplot(2, 2, 2, aspect=\"equal\")\nplt.scatter(X_pca[reds, 0], X_pca[reds, 1], c=\"red\", s=20, edgecolor=\"k\")\nplt.scatter(X_pca[blues, 0], X_pca[blues, 1], c=\"blue\", s=20, edgecolor=\"k\")\nplt.title(\"Projection by PCA\")\nplt.xlabel(\"1st principal component\")\nplt.ylabel(\"2nd component\")\n\nplt.subplot(2, 2, 3, aspect=\"equal\")\nplt.scatter(X_kpca[reds, 0], X_kpca[reds, 1], c=\"red\", s=20, edgecolor=\"k\")\nplt.scatter(X_kpca[blues, 0], X_kpca[blues, 1], c=\"blue\", s=20, edgecolor=\"k\")\nplt.title(\"Projection by KPCA\")\nplt.xlabel(r\"1st principal component in space induced by $\\phi$\")\nplt.ylabel(\"2nd component\")\n\nplt.subplot(2, 2, 4, aspect=\"equal\")\nplt.scatter(X_back[reds, 0], X_back[reds, 1], c=\"red\", s=20, edgecolor=\"k\")\nplt.scatter(X_back[blues, 0], X_back[blues, 1], c=\"blue\", s=20, edgecolor=\"k\")\nplt.title(\"Original space after inverse transform\")\nplt.xlabel(\"$x_1$\")\nplt.ylabel(\"$x_2$\")\n\nplt.tight_layout()\nplt.show()"
+        "# Authors: Mathieu Blondel\n#          Andreas Mueller\n#          Guillaume Lemaitre\n# License: BSD 3 clause"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Projecting data: `PCA` vs. `KernelPCA`\n\nIn this section, we show the advantages of using a kernel when\nprojecting data using a Principal Component Analysis (PCA). We create a\ndataset made of two nested circles.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn.datasets import make_circles\nfrom sklearn.model_selection import train_test_split\n\nX, y = make_circles(n_samples=1_000, factor=0.3, noise=0.05, random_state=0)\nX_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=0)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Let's have a quick first look at the generated dataset.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n\n_, (train_ax, test_ax) = plt.subplots(ncols=2, sharex=True, sharey=True, figsize=(8, 4))\n\ntrain_ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train)\ntrain_ax.set_ylabel(\"Feature #1\")\ntrain_ax.set_xlabel(\"Feature #0\")\ntrain_ax.set_title(\"Training data\")\n\ntest_ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test)\ntest_ax.set_xlabel(\"Feature #0\")\n_ = test_ax.set_title(\"Testing data\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "The samples from each class cannot be linearly separated: there is no\nstraight line that can split the samples of the inner set from the outer\nset.\n\nNow, we will use PCA with and without a kernel to see what is the effect of\nusing such a kernel. The kernel used here is a radial basis function (RBF)\nkernel.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn.decomposition import PCA, KernelPCA\n\npca = PCA(n_components=2)\nkernel_pca = KernelPCA(\n    n_components=None, kernel=\"rbf\", gamma=10, fit_inverse_transform=True, alpha=0.1\n)\n\nX_test_pca = pca.fit(X_train).transform(X_test)\nX_test_kernel_pca = kernel_pca.fit(X_train).transform(X_test)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "fig, (orig_data_ax, pca_proj_ax, kernel_pca_proj_ax) = plt.subplots(\n    ncols=3, figsize=(14, 4)\n)\n\norig_data_ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test)\norig_data_ax.set_ylabel(\"Feature #1\")\norig_data_ax.set_xlabel(\"Feature #0\")\norig_data_ax.set_title(\"Testing data\")\n\npca_proj_ax.scatter(X_test_pca[:, 0], X_test_pca[:, 1], c=y_test)\npca_proj_ax.set_ylabel(\"Principal component #1\")\npca_proj_ax.set_xlabel(\"Principal component #0\")\npca_proj_ax.set_title(\"Projection of testing data\\n using PCA\")\n\nkernel_pca_proj_ax.scatter(X_test_kernel_pca[:, 0], X_test_kernel_pca[:, 1], c=y_test)\nkernel_pca_proj_ax.set_ylabel(\"Principal component #1\")\nkernel_pca_proj_ax.set_xlabel(\"Principal component #0\")\n_ = kernel_pca_proj_ax.set_title(\"Projection of testing data\\n using KernelPCA\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "We recall that PCA transforms the data linearly. Intuitively, it means that\nthe coordinate system will be centered, rescaled on each component\nwith respected to its variance and finally be rotated.\nThe obtained data from this transformation is isotropic and can now be\nprojected on its _principal components_.\n\nThus, looking at the projection made using PCA (i.e. the middle figure), we\nsee that there is no change regarding the scaling; indeed the data being two\nconcentric circles centered in zero, the original data is already isotropic.\nHowever, we can see that the data have been rotated. As a\nconclusion, we see that such a projection would not help if define a linear\nclassifier to distinguish samples from both classes.\n\nUsing a kernel allows to make a non-linear projection. Here, by using an RBF\nkernel, we expect that the projection will unfold the dataset while keeping\napproximately preserving the relative distances of pairs of data points that\nare close to one another in the original space.\n\nWe observe such behaviour in the figure on the right: the samples of a given\nclass are closer to each other than the samples from the opposite class,\nuntangling both sample sets. Now, we can use a linear classifier to separate\nthe samples from the two classes.\n\n## Projecting into the original feature space\n\nOne particularity to have in mind when using\n:class:`~sklearn.decomposition.KernelPCA` is related to the reconstruction\n(i.e. the back projection in the original feature space). With\n:class:`~sklearn.decomposition.PCA`, the reconstruction will be exact if\n`n_components` is the same than the number of original features.\nThis is the case in this example.\n\nWe can investigate if we get the original dataset when back projecting with\n:class:`~sklearn.decomposition.KernelPCA`.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "X_reconstructed_pca = pca.inverse_transform(pca.transform(X_test))\nX_reconstructed_kernel_pca = kernel_pca.inverse_transform(kernel_pca.transform(X_test))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "fig, (orig_data_ax, pca_back_proj_ax, kernel_pca_back_proj_ax) = plt.subplots(\n    ncols=3, sharex=True, sharey=True, figsize=(13, 4)\n)\n\norig_data_ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test)\norig_data_ax.set_ylabel(\"Feature #1\")\norig_data_ax.set_xlabel(\"Feature #0\")\norig_data_ax.set_title(\"Original test data\")\n\npca_back_proj_ax.scatter(X_reconstructed_pca[:, 0], X_reconstructed_pca[:, 1], c=y_test)\npca_back_proj_ax.set_xlabel(\"Feature #0\")\npca_back_proj_ax.set_title(\"Reconstruction via PCA\")\n\nkernel_pca_back_proj_ax.scatter(\n    X_reconstructed_kernel_pca[:, 0], X_reconstructed_kernel_pca[:, 1], c=y_test\n)\nkernel_pca_back_proj_ax.set_xlabel(\"Feature #0\")\n_ = kernel_pca_back_proj_ax.set_title(\"Reconstruction via KernelPCA\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "While we see a perfect reconstruction with\n:class:`~sklearn.decomposition.PCA` we observe a different result for\n:class:`~sklearn.decomposition.KernelPCA`.\n\nIndeed, :meth:`~sklearn.decomposition.KernelPCA.inverse_transform` cannot\nrely on an analytical back-projection and thus an extact reconstruction.\nInstead, a :class:`~sklearn.kernel_ridge.KernelRidge` is internally trained\nto learn a mapping from the kernalized PCA basis to the original feature\nspace. This method therefore comes with an approximation introducing small\ndifferences when back projecting in the original feature space.\n\nTo improve the reconstruction using\n:meth:`~sklearn.decomposition.KernelPCA.inverse_transform`, one can tune\n`alpha` in :class:`~sklearn.decomposition.KernelPCA`, the regularization term\nwhich controls the reliance on the training data during the training of\nthe mapping.\n\n"
       ]
     }
   ],