scikit-learn
diff --git a/‎1.0/.buildinfo
Lines changed: 1 addition & 1 deletion b/‎1.0/.buildinfo
Lines changed: 1 addition & 1 deletion
diff --git a/‎1.0/_downloads/006fc185672e58b056a5c134db26935c/plot_coin_segmentation.ipynb
Lines changed: 1 addition & 1 deletion b/‎1.0/_downloads/006fc185672e58b056a5c134db26935c/plot_coin_segmentation.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎1.0/_downloads/00ae629d652473137a3905a5e08ea815/plot_iris_dtc.py
Lines changed: 20 additions & 10 deletions b/‎1.0/_downloads/00ae629d652473137a3905a5e08ea815/plot_iris_dtc.py
Lines changed: 20 additions & 10 deletions
diff --git a/‎1.0/_downloads/023324c27491610e7c0ccff87c59abf9/plot_kernel_pca.py
Lines changed: 149 additions & 56 deletions b/‎1.0/_downloads/023324c27491610e7c0ccff87c59abf9/plot_kernel_pca.py
Lines changed: 149 additions & 56 deletions
diff --git a/‎1.0/_downloads/02f111fb3dd79805b161e14c564184fc/plot_sgd_comparison.ipynb
Lines changed: 3 additions & 3 deletions b/‎1.0/_downloads/02f111fb3dd79805b161e14c564184fc/plot_sgd_comparison.ipynb
Lines changed: 3 additions & 3 deletions
diff --git a/‎1.0/_downloads/0486bf9e537e44cedd2a236d034bcd90/plot_pcr_vs_pls.ipynb
Lines changed: 1 addition & 1 deletion b/‎1.0/_downloads/0486bf9e537e44cedd2a236d034bcd90/plot_pcr_vs_pls.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎1.0/_downloads/055e8313e28f2f3b5fd508054dfe5fe0/plot_roc_crossval.ipynb
Lines changed: 1 addition & 1 deletion b/‎1.0/_downloads/055e8313e28f2f3b5fd508054dfe5fe0/plot_roc_crossval.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎1.0/_downloads/05ca8a4e90b4cc2acd69f9e24b4a1f3a/plot_classifier_chain_yeast.ipynb
Lines changed: 1 addition & 1 deletion b/‎1.0/_downloads/05ca8a4e90b4cc2acd69f9e24b4a1f3a/plot_classifier_chain_yeast.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎1.0/_downloads/061854726c268bcdae5cd1c330cf8c75/plot_sgd_penalties.ipynb
Lines changed: 1 addition & 1 deletion b/‎1.0/_downloads/061854726c268bcdae5cd1c330cf8c75/plot_sgd_penalties.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎1.0/_downloads/067cd5d39b097d2c49dd98f563dac13a/plot_iterative_imputer_variants_comparison.ipynb
Lines changed: 1 addition & 1 deletion b/‎1.0/_downloads/067cd5d39b097d2c49dd98f563dac13a/plot_iterative_imputer_variants_comparison.ipynb
Lines changed: 1 addition & 1 deletion
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 2eb390e0f69446f70670632283d7ce9d
+config: 7b04e98bf471141a6eaf08dcf325cbf7
 tags: 645f666f9bcd5a90fca523b33c5a78b7
@@ -64,7 +64,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.9.7"
+      "version": "3.9.9"
     }
   },
   "nbformat": 4,
 
@@ -1,7 +1,7 @@
 """
-================================================================
-Plot the decision surface of a decision tree on the iris dataset
-================================================================
+=======================================================================
+Plot the decision surface of decision trees trained on the iris dataset
+=======================================================================
 
 Plot the decision surface of a decision tree trained on pairs
 of features of the iris dataset.
@@ -14,20 +14,24 @@
 
 We also show the tree structure of a model built on all of the features.
 """
+# %%
+# First load the copy of the Iris dataset shipped with scikit-learn:
+from sklearn.datasets import load_iris
+
+iris = load_iris()
+
 
+# %%
+# Display the decision functions of trees trained on all pairs of features.
 import numpy as np
 import matplotlib.pyplot as plt
-
-from sklearn.datasets import load_iris
-from sklearn.tree import DecisionTreeClassifier, plot_tree
+from sklearn.tree import DecisionTreeClassifier
 
 # Parameters
 n_classes = 3
 plot_colors = "ryb"
 plot_step = 0.02
 
-# Load data
-iris = load_iris()
 
 for pairidx, pair in enumerate([[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2, 3]]):
     # We only take the two corresponding features
@@ -67,11 +71,17 @@
             s=15,
         )
 
-plt.suptitle("Decision surface of a decision tree using paired features")
+plt.suptitle("Decision surface of decision trees trained on pairs of features")
 plt.legend(loc="lower right", borderpad=0, handletextpad=0)
-plt.axis("tight")
+_ = plt.axis("tight")
+
+# %%
+# Display the structure of a single decision tree trained on all the features
+# together.
+from sklearn.tree import plot_tree
 
 plt.figure()
 clf = DecisionTreeClassifier().fit(iris.data, iris.target)
 plot_tree(clf, filled=True)
+plt.title("Decision tree trained on all the iris features")
 plt.show()
@@ -3,70 +3,163 @@
 Kernel PCA
 ==========
 
-This example shows that Kernel PCA is able to find a projection of the data
-that makes data linearly separable.
+This example shows the difference between the Principal Components Analysis
+(:class:`~sklearn.decomposition.PCA`) and its kernalized version
+(:class:`~sklearn.decomposition.KernelPCA`).
 
+On the one hand, we show that :class:`~sklearn.decomposition.KernelPCA` is able
+to find a projection of the data which linearly separates them while it is not the case
+with :class:`~sklearn.decomposition.PCA`.
+
+Finally, we show that inverting this projection is an approximation with
+:class:`~sklearn.decomposition.KernelPCA`, while it is exact with
+:class:`~sklearn.decomposition.PCA`.
 """
 
 # Authors: Mathieu Blondel
 #          Andreas Mueller
+#          Guillaume Lemaitre
 # License: BSD 3 clause
 
-import numpy as np
+# %%
+# Projecting data: `PCA` vs. `KernelPCA`
+# --------------------------------------
+#
+# In this section, we show the advantages of using a kernel when
+# projecting data using a Principal Component Analysis (PCA). We create a
+# dataset made of two nested circles.
+from sklearn.datasets import make_circles
+from sklearn.model_selection import train_test_split
+
+X, y = make_circles(n_samples=1_000, factor=0.3, noise=0.05, random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=0)
+
+# %%
+# Let's have a quick first look at the generated dataset.
 import matplotlib.pyplot as plt
 
+_, (train_ax, test_ax) = plt.subplots(ncols=2, sharex=True, sharey=True, figsize=(8, 4))
+
+train_ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train)
+train_ax.set_ylabel("Feature #1")
+train_ax.set_xlabel("Feature #0")
+train_ax.set_title("Training data")
+
+test_ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test)
+test_ax.set_xlabel("Feature #0")
+_ = test_ax.set_title("Testing data")
+
+# %%
+# The samples from each class cannot be linearly separated: there is no
+# straight line that can split the samples of the inner set from the outer
+# set.
+#
+# Now, we will use PCA with and without a kernel to see what is the effect of
+# using such a kernel. The kernel used here is a radial basis function (RBF)
+# kernel.
 from sklearn.decomposition import PCA, KernelPCA
-from sklearn.datasets import make_circles
 
-np.random.seed(0)
-
-X, y = make_circles(n_samples=400, factor=0.3, noise=0.05)
-
-kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10)
-X_kpca = kpca.fit_transform(X)
-X_back = kpca.inverse_transform(X_kpca)
-pca = PCA()
-X_pca = pca.fit_transform(X)
-
-# Plot results
-
-plt.figure()
-plt.subplot(2, 2, 1, aspect="equal")
-plt.title("Original space")
-reds = y == 0
-blues = y == 1
-
-plt.scatter(X[reds, 0], X[reds, 1], c="red", s=20, edgecolor="k")
-plt.scatter(X[blues, 0], X[blues, 1], c="blue", s=20, edgecolor="k")
-plt.xlabel("$x_1$")
-plt.ylabel("$x_2$")
-
-X1, X2 = np.meshgrid(np.linspace(-1.5, 1.5, 50), np.linspace(-1.5, 1.5, 50))
-X_grid = np.array([np.ravel(X1), np.ravel(X2)]).T
-# projection on the first principal component (in the phi space)
-Z_grid = kpca.transform(X_grid)[:, 0].reshape(X1.shape)
-plt.contour(X1, X2, Z_grid, colors="grey", linewidths=1, origin="lower")
-
-plt.subplot(2, 2, 2, aspect="equal")
-plt.scatter(X_pca[reds, 0], X_pca[reds, 1], c="red", s=20, edgecolor="k")
-plt.scatter(X_pca[blues, 0], X_pca[blues, 1], c="blue", s=20, edgecolor="k")
-plt.title("Projection by PCA")
-plt.xlabel("1st principal component")
-plt.ylabel("2nd component")
-
-plt.subplot(2, 2, 3, aspect="equal")
-plt.scatter(X_kpca[reds, 0], X_kpca[reds, 1], c="red", s=20, edgecolor="k")
-plt.scatter(X_kpca[blues, 0], X_kpca[blues, 1], c="blue", s=20, edgecolor="k")
-plt.title("Projection by KPCA")
-plt.xlabel(r"1st principal component in space induced by $\phi$")
-plt.ylabel("2nd component")
-
-plt.subplot(2, 2, 4, aspect="equal")
-plt.scatter(X_back[reds, 0], X_back[reds, 1], c="red", s=20, edgecolor="k")
-plt.scatter(X_back[blues, 0], X_back[blues, 1], c="blue", s=20, edgecolor="k")
-plt.title("Original space after inverse transform")
-plt.xlabel("$x_1$")
-plt.ylabel("$x_2$")
-
-plt.tight_layout()
-plt.show()
+pca = PCA(n_components=2)
+kernel_pca = KernelPCA(
+    n_components=None, kernel="rbf", gamma=10, fit_inverse_transform=True, alpha=0.1
+)
+
+X_test_pca = pca.fit(X_train).transform(X_test)
+X_test_kernel_pca = kernel_pca.fit(X_train).transform(X_test)
+
+# %%
+fig, (orig_data_ax, pca_proj_ax, kernel_pca_proj_ax) = plt.subplots(
+    ncols=3, figsize=(14, 4)
+)
+
+orig_data_ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test)
+orig_data_ax.set_ylabel("Feature #1")
+orig_data_ax.set_xlabel("Feature #0")
+orig_data_ax.set_title("Testing data")
+
+pca_proj_ax.scatter(X_test_pca[:, 0], X_test_pca[:, 1], c=y_test)
+pca_proj_ax.set_ylabel("Principal component #1")
+pca_proj_ax.set_xlabel("Principal component #0")
+pca_proj_ax.set_title("Projection of testing data\n using PCA")
+
+kernel_pca_proj_ax.scatter(X_test_kernel_pca[:, 0], X_test_kernel_pca[:, 1], c=y_test)
+kernel_pca_proj_ax.set_ylabel("Principal component #1")
+kernel_pca_proj_ax.set_xlabel("Principal component #0")
+_ = kernel_pca_proj_ax.set_title("Projection of testing data\n using KernelPCA")
+
+# %%
+# We recall that PCA transforms the data linearly. Intuitively, it means that
+# the coordinate system will be centered, rescaled on each component
+# with respected to its variance and finally be rotated.
+# The obtained data from this transformation is isotropic and can now be
+# projected on its _principal components_.
+#
+# Thus, looking at the projection made using PCA (i.e. the middle figure), we
+# see that there is no change regarding the scaling; indeed the data being two
+# concentric circles centered in zero, the original data is already isotropic.
+# However, we can see that the data have been rotated. As a
+# conclusion, we see that such a projection would not help if define a linear
+# classifier to distinguish samples from both classes.
+#
+# Using a kernel allows to make a non-linear projection. Here, by using an RBF
+# kernel, we expect that the projection will unfold the dataset while keeping
+# approximately preserving the relative distances of pairs of data points that
+# are close to one another in the original space.
+#
+# We observe such behaviour in the figure on the right: the samples of a given
+# class are closer to each other than the samples from the opposite class,
+# untangling both sample sets. Now, we can use a linear classifier to separate
+# the samples from the two classes.
+#
+# Projecting into the original feature space
+# ------------------------------------------
+#
+# One particularity to have in mind when using
+# :class:`~sklearn.decomposition.KernelPCA` is related to the reconstruction
+# (i.e. the back projection in the original feature space). With
+# :class:`~sklearn.decomposition.PCA`, the reconstruction will be exact if
+# `n_components` is the same than the number of original features.
+# This is the case in this example.
+#
+# We can investigate if we get the original dataset when back projecting with
+# :class:`~sklearn.decomposition.KernelPCA`.
+X_reconstructed_pca = pca.inverse_transform(pca.transform(X_test))
+X_reconstructed_kernel_pca = kernel_pca.inverse_transform(kernel_pca.transform(X_test))
+
+# %%
+fig, (orig_data_ax, pca_back_proj_ax, kernel_pca_back_proj_ax) = plt.subplots(
+    ncols=3, sharex=True, sharey=True, figsize=(13, 4)
+)
+
+orig_data_ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test)
+orig_data_ax.set_ylabel("Feature #1")
+orig_data_ax.set_xlabel("Feature #0")
+orig_data_ax.set_title("Original test data")
+
+pca_back_proj_ax.scatter(X_reconstructed_pca[:, 0], X_reconstructed_pca[:, 1], c=y_test)
+pca_back_proj_ax.set_xlabel("Feature #0")
+pca_back_proj_ax.set_title("Reconstruction via PCA")
+
+kernel_pca_back_proj_ax.scatter(
+    X_reconstructed_kernel_pca[:, 0], X_reconstructed_kernel_pca[:, 1], c=y_test
+)
+kernel_pca_back_proj_ax.set_xlabel("Feature #0")
+_ = kernel_pca_back_proj_ax.set_title("Reconstruction via KernelPCA")
+
+# %%
+# While we see a perfect reconstruction with
+# :class:`~sklearn.decomposition.PCA` we observe a different result for
+# :class:`~sklearn.decomposition.KernelPCA`.
+#
+# Indeed, :meth:`~sklearn.decomposition.KernelPCA.inverse_transform` cannot
+# rely on an analytical back-projection and thus an extact reconstruction.
+# Instead, a :class:`~sklearn.kernel_ridge.KernelRidge` is internally trained
+# to learn a mapping from the kernalized PCA basis to the original feature
+# space. This method therefore comes with an approximation introducing small
+# differences when back projecting in the original feature space.
+#
+# To improve the reconstruction using
+# :meth:`~sklearn.decomposition.KernelPCA.inverse_transform`, one can tune
+# `alpha` in :class:`~sklearn.decomposition.KernelPCA`, the regularization term
+# which controls the reliance on the training data during the training of
+# the mapping.
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\n# Comparing various online solvers\n\nAn example showing how different online solvers perform\non the hand-written digits dataset.\n"
+        "\n# Comparing various online solvers\nAn example showing how different online solvers perform\non the hand-written digits dataset.\n"
       ]
     },
     {
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "# Author: Rob Zinkov <rob at zinkov dot com>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn import datasets\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import SGDClassifier, Perceptron\nfrom sklearn.linear_model import PassiveAggressiveClassifier\nfrom sklearn.linear_model import LogisticRegression\n\nheldout = [0.95, 0.90, 0.75, 0.50, 0.01]\nrounds = 20\nX, y = datasets.load_digits(return_X_y=True)\n\nclassifiers = [\n    (\"SGD\", SGDClassifier(max_iter=100)),\n    (\"ASGD\", SGDClassifier(average=True)),\n    (\"Perceptron\", Perceptron()),\n    (\n        \"Passive-Aggressive I\",\n        PassiveAggressiveClassifier(loss=\"hinge\", C=1.0, tol=1e-4),\n    ),\n    (\n        \"Passive-Aggressive II\",\n        PassiveAggressiveClassifier(loss=\"squared_hinge\", C=1.0, tol=1e-4),\n    ),\n    (\"SAG\", LogisticRegression(solver=\"sag\", tol=1e-1, C=1.0e4 / X.shape[0])),\n]\n\nxx = 1.0 - np.array(heldout)\n\nfor name, clf in classifiers:\n    print(\"training %s\" % name)\n    rng = np.random.RandomState(42)\n    yy = []\n    for i in heldout:\n        yy_ = []\n        for r in range(rounds):\n            X_train, X_test, y_train, y_test = train_test_split(\n                X, y, test_size=i, random_state=rng\n            )\n            clf.fit(X_train, y_train)\n            y_pred = clf.predict(X_test)\n            yy_.append(1 - np.mean(y_pred == y_test))\n        yy.append(np.mean(yy_))\n    plt.plot(xx, yy, label=name)\n\nplt.legend(loc=\"upper right\")\nplt.xlabel(\"Proportion train\")\nplt.ylabel(\"Test Error Rate\")\nplt.show()"
+        "# Author: Rob Zinkov <rob at zinkov dot com>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn import datasets\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import SGDClassifier, Perceptron\nfrom sklearn.linear_model import PassiveAggressiveClassifier\nfrom sklearn.linear_model import LogisticRegression\n\nheldout = [0.95, 0.90, 0.75, 0.50, 0.01]\n# Number of rounds to fit and evaluate an estimator.\nrounds = 10\nX, y = datasets.load_digits(return_X_y=True)\n\nclassifiers = [\n    (\"SGD\", SGDClassifier(max_iter=110)),\n    (\"ASGD\", SGDClassifier(max_iter=110, average=True)),\n    (\"Perceptron\", Perceptron(max_iter=110)),\n    (\n        \"Passive-Aggressive I\",\n        PassiveAggressiveClassifier(max_iter=110, loss=\"hinge\", C=1.0, tol=1e-4),\n    ),\n    (\n        \"Passive-Aggressive II\",\n        PassiveAggressiveClassifier(\n            max_iter=110, loss=\"squared_hinge\", C=1.0, tol=1e-4\n        ),\n    ),\n    (\n        \"SAG\",\n        LogisticRegression(max_iter=110, solver=\"sag\", tol=1e-1, C=1.0e4 / X.shape[0]),\n    ),\n]\n\nxx = 1.0 - np.array(heldout)\n\nfor name, clf in classifiers:\n    print(\"training %s\" % name)\n    rng = np.random.RandomState(42)\n    yy = []\n    for i in heldout:\n        yy_ = []\n        for r in range(rounds):\n            X_train, X_test, y_train, y_test = train_test_split(\n                X, y, test_size=i, random_state=rng\n            )\n            clf.fit(X_train, y_train)\n            y_pred = clf.predict(X_test)\n            yy_.append(1 - np.mean(y_pred == y_test))\n        yy.append(np.mean(yy_))\n    plt.plot(xx, yy, label=name)\n\nplt.legend(loc=\"upper right\")\nplt.xlabel(\"Proportion train\")\nplt.ylabel(\"Test Error Rate\")\nplt.show()"
       ]
     }
   ],
@@ -46,7 +46,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.9.7"
+      "version": "3.9.9"
     }
   },
   "nbformat": 4,
 
@@ -125,7 +125,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.9.7"
+      "version": "3.9.9"
     }
   },
   "nbformat": 4,
 
@@ -46,7 +46,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.9.7"
+      "version": "3.9.9"
     }
   },
   "nbformat": 4,
 
@@ -46,7 +46,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.9.7"
+      "version": "3.9.9"
     }
   },
   "nbformat": 4,
 
@@ -46,7 +46,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.9.7"
+      "version": "3.9.9"
     }
   },
   "nbformat": 4,
 
@@ -46,7 +46,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.9.7"
+      "version": "3.9.9"
     }
   },
   "nbformat": 4,
Original file line number	Diff line number	Diff line change
`@@ -64,7 +64,7 @@`
`64`	`64`	`"name": "python",`
`65`	`65`	`"nbconvert_exporter": "python",`
`66`	`66`	`"pygments_lexer": "ipython3",`
`67`		`- "version": "3.9.7"`
	`67`	`+ "version": "3.9.9"`
`68`	`68`	`}`
`69`	`69`	`},`
`70`	`70`	`"nbformat": 4,`
Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,7 @@`
`15`	`15`	`"cell_type": "markdown",`
`16`	`16`	`"metadata": {},`
`17`	`17`	`"source": [`
`18`		`- "\n# Comparing various online solvers\n\nAn example showing how different online solvers perform\non the hand-written digits dataset.\n"`
	`18`	`+ "\n# Comparing various online solvers\nAn example showing how different online solvers perform\non the hand-written digits dataset.\n"`
`19`	`19`	`]`
`20`	`20`	`},`
`21`	`21`	`{`
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "# Author: Rob Zinkov <rob at zinkov dot com>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn import datasets\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import SGDClassifier, Perceptron\nfrom sklearn.linear_model import PassiveAggressiveClassifier\nfrom sklearn.linear_model import LogisticRegression\n\nheldout = [0.95, 0.90, 0.75, 0.50, 0.01]\nrounds = 20\nX, y = datasets.load_digits(return_X_y=True)\n\nclassifiers = [\n (\"SGD\", SGDClassifier(max_iter=100)),\n (\"ASGD\", SGDClassifier(average=True)),\n (\"Perceptron\", Perceptron()),\n (\n \"Passive-Aggressive I\",\n PassiveAggressiveClassifier(loss=\"hinge\", C=1.0, tol=1e-4),\n ),\n (\n \"Passive-Aggressive II\",\n PassiveAggressiveClassifier(loss=\"squared_hinge\", C=1.0, tol=1e-4),\n ),\n (\"SAG\", LogisticRegression(solver=\"sag\", tol=1e-1, C=1.0e4 / X.shape[0])),\n]\n\nxx = 1.0 - np.array(heldout)\n\nfor name, clf in classifiers:\n print(\"training %s\" % name)\n rng = np.random.RandomState(42)\n yy = []\n for i in heldout:\n yy_ = []\n for r in range(rounds):\n X_train, X_test, y_train, y_test = train_test_split(\n X, y, test_size=i, random_state=rng\n )\n clf.fit(X_train, y_train)\n y_pred = clf.predict(X_test)\n yy_.append(1 - np.mean(y_pred == y_test))\n yy.append(np.mean(yy_))\n plt.plot(xx, yy, label=name)\n\nplt.legend(loc=\"upper right\")\nplt.xlabel(\"Proportion train\")\nplt.ylabel(\"Test Error Rate\")\nplt.show()"
	`29`	+ "# Author: Rob Zinkov <rob at zinkov dot com>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn import datasets\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import SGDClassifier, Perceptron\nfrom sklearn.linear_model import PassiveAggressiveClassifier\nfrom sklearn.linear_model import LogisticRegression\n\nheldout = [0.95, 0.90, 0.75, 0.50, 0.01]\n# Number of rounds to fit and evaluate an estimator.\nrounds = 10\nX, y = datasets.load_digits(return_X_y=True)\n\nclassifiers = [\n (\"SGD\", SGDClassifier(max_iter=110)),\n (\"ASGD\", SGDClassifier(max_iter=110, average=True)),\n (\"Perceptron\", Perceptron(max_iter=110)),\n (\n \"Passive-Aggressive I\",\n PassiveAggressiveClassifier(max_iter=110, loss=\"hinge\", C=1.0, tol=1e-4),\n ),\n (\n \"Passive-Aggressive II\",\n PassiveAggressiveClassifier(\n max_iter=110, loss=\"squared_hinge\", C=1.0, tol=1e-4\n ),\n ),\n (\n \"SAG\",\n LogisticRegression(max_iter=110, solver=\"sag\", tol=1e-1, C=1.0e4 / X.shape[0]),\n ),\n]\n\nxx = 1.0 - np.array(heldout)\n\nfor name, clf in classifiers:\n print(\"training %s\" % name)\n rng = np.random.RandomState(42)\n yy = []\n for i in heldout:\n yy_ = []\n for r in range(rounds):\n X_train, X_test, y_train, y_test = train_test_split(\n X, y, test_size=i, random_state=rng\n )\n clf.fit(X_train, y_train)\n y_pred = clf.predict(X_test)\n yy_.append(1 - np.mean(y_pred == y_test))\n yy.append(np.mean(yy_))\n plt.plot(xx, yy, label=name)\n\nplt.legend(loc=\"upper right\")\nplt.xlabel(\"Proportion train\")\nplt.ylabel(\"Test Error Rate\")\nplt.show()"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`
`@@ -46,7 +46,7 @@`
`46`	`46`	`"name": "python",`
`47`	`47`	`"nbconvert_exporter": "python",`
`48`	`48`	`"pygments_lexer": "ipython3",`
`49`		`- "version": "3.9.7"`
	`49`	`+ "version": "3.9.9"`
`50`	`50`	`}`
`51`	`51`	`},`
`52`	`52`	`"nbformat": 4,`
Original file line number	Diff line number	Diff line change
`@@ -125,7 +125,7 @@`
`125`	`125`	`"name": "python",`
`126`	`126`	`"nbconvert_exporter": "python",`
`127`	`127`	`"pygments_lexer": "ipython3",`
`128`		`- "version": "3.9.7"`
	`128`	`+ "version": "3.9.9"`
`129`	`129`	`}`
`130`	`130`	`},`
`131`	`131`	`"nbformat": 4,`