Skip to content

Commit b4b604d

Browse files
committed
Pushing the docs to 1.0/ for branch: 1.0.X, commit 7e1e6d09bcc2eaeba98f7e737aac2ac782f0e5f1
1 parent e3b2464 commit b4b604d

File tree

2,608 files changed

+23823
-14265
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,608 files changed

+23823
-14265
lines changed

1.0/.buildinfo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: 2eb390e0f69446f70670632283d7ce9d
3+
config: 7b04e98bf471141a6eaf08dcf325cbf7
44
tags: 645f666f9bcd5a90fca523b33c5a78b7

1.0/_downloads/006fc185672e58b056a5c134db26935c/plot_coin_segmentation.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@
6464
"name": "python",
6565
"nbconvert_exporter": "python",
6666
"pygments_lexer": "ipython3",
67-
"version": "3.9.7"
67+
"version": "3.9.9"
6868
}
6969
},
7070
"nbformat": 4,

1.0/_downloads/00ae629d652473137a3905a5e08ea815/plot_iris_dtc.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
2-
================================================================
3-
Plot the decision surface of a decision tree on the iris dataset
4-
================================================================
2+
=======================================================================
3+
Plot the decision surface of decision trees trained on the iris dataset
4+
=======================================================================
55
66
Plot the decision surface of a decision tree trained on pairs
77
of features of the iris dataset.
@@ -14,20 +14,24 @@
1414
1515
We also show the tree structure of a model built on all of the features.
1616
"""
17+
# %%
18+
# First load the copy of the Iris dataset shipped with scikit-learn:
19+
from sklearn.datasets import load_iris
20+
21+
iris = load_iris()
22+
1723

24+
# %%
25+
# Display the decision functions of trees trained on all pairs of features.
1826
import numpy as np
1927
import matplotlib.pyplot as plt
20-
21-
from sklearn.datasets import load_iris
22-
from sklearn.tree import DecisionTreeClassifier, plot_tree
28+
from sklearn.tree import DecisionTreeClassifier
2329

2430
# Parameters
2531
n_classes = 3
2632
plot_colors = "ryb"
2733
plot_step = 0.02
2834

29-
# Load data
30-
iris = load_iris()
3135

3236
for pairidx, pair in enumerate([[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2, 3]]):
3337
# We only take the two corresponding features
@@ -67,11 +71,17 @@
6771
s=15,
6872
)
6973

70-
plt.suptitle("Decision surface of a decision tree using paired features")
74+
plt.suptitle("Decision surface of decision trees trained on pairs of features")
7175
plt.legend(loc="lower right", borderpad=0, handletextpad=0)
72-
plt.axis("tight")
76+
_ = plt.axis("tight")
77+
78+
# %%
79+
# Display the structure of a single decision tree trained on all the features
80+
# together.
81+
from sklearn.tree import plot_tree
7382

7483
plt.figure()
7584
clf = DecisionTreeClassifier().fit(iris.data, iris.target)
7685
plot_tree(clf, filled=True)
86+
plt.title("Decision tree trained on all the iris features")
7787
plt.show()

1.0/_downloads/023324c27491610e7c0ccff87c59abf9/plot_kernel_pca.py

Lines changed: 149 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -3,70 +3,163 @@
33
Kernel PCA
44
==========
55
6-
This example shows that Kernel PCA is able to find a projection of the data
7-
that makes data linearly separable.
6+
This example shows the difference between the Principal Components Analysis
7+
(:class:`~sklearn.decomposition.PCA`) and its kernalized version
8+
(:class:`~sklearn.decomposition.KernelPCA`).
89
10+
On the one hand, we show that :class:`~sklearn.decomposition.KernelPCA` is able
11+
to find a projection of the data which linearly separates them while it is not the case
12+
with :class:`~sklearn.decomposition.PCA`.
13+
14+
Finally, we show that inverting this projection is an approximation with
15+
:class:`~sklearn.decomposition.KernelPCA`, while it is exact with
16+
:class:`~sklearn.decomposition.PCA`.
917
"""
1018

1119
# Authors: Mathieu Blondel
1220
# Andreas Mueller
21+
# Guillaume Lemaitre
1322
# License: BSD 3 clause
1423

15-
import numpy as np
24+
# %%
25+
# Projecting data: `PCA` vs. `KernelPCA`
26+
# --------------------------------------
27+
#
28+
# In this section, we show the advantages of using a kernel when
29+
# projecting data using a Principal Component Analysis (PCA). We create a
30+
# dataset made of two nested circles.
31+
from sklearn.datasets import make_circles
32+
from sklearn.model_selection import train_test_split
33+
34+
X, y = make_circles(n_samples=1_000, factor=0.3, noise=0.05, random_state=0)
35+
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=0)
36+
37+
# %%
38+
# Let's have a quick first look at the generated dataset.
1639
import matplotlib.pyplot as plt
1740

41+
_, (train_ax, test_ax) = plt.subplots(ncols=2, sharex=True, sharey=True, figsize=(8, 4))
42+
43+
train_ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train)
44+
train_ax.set_ylabel("Feature #1")
45+
train_ax.set_xlabel("Feature #0")
46+
train_ax.set_title("Training data")
47+
48+
test_ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test)
49+
test_ax.set_xlabel("Feature #0")
50+
_ = test_ax.set_title("Testing data")
51+
52+
# %%
53+
# The samples from each class cannot be linearly separated: there is no
54+
# straight line that can split the samples of the inner set from the outer
55+
# set.
56+
#
57+
# Now, we will use PCA with and without a kernel to see what is the effect of
58+
# using such a kernel. The kernel used here is a radial basis function (RBF)
59+
# kernel.
1860
from sklearn.decomposition import PCA, KernelPCA
19-
from sklearn.datasets import make_circles
2061

21-
np.random.seed(0)
22-
23-
X, y = make_circles(n_samples=400, factor=0.3, noise=0.05)
24-
25-
kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10)
26-
X_kpca = kpca.fit_transform(X)
27-
X_back = kpca.inverse_transform(X_kpca)
28-
pca = PCA()
29-
X_pca = pca.fit_transform(X)
30-
31-
# Plot results
32-
33-
plt.figure()
34-
plt.subplot(2, 2, 1, aspect="equal")
35-
plt.title("Original space")
36-
reds = y == 0
37-
blues = y == 1
38-
39-
plt.scatter(X[reds, 0], X[reds, 1], c="red", s=20, edgecolor="k")
40-
plt.scatter(X[blues, 0], X[blues, 1], c="blue", s=20, edgecolor="k")
41-
plt.xlabel("$x_1$")
42-
plt.ylabel("$x_2$")
43-
44-
X1, X2 = np.meshgrid(np.linspace(-1.5, 1.5, 50), np.linspace(-1.5, 1.5, 50))
45-
X_grid = np.array([np.ravel(X1), np.ravel(X2)]).T
46-
# projection on the first principal component (in the phi space)
47-
Z_grid = kpca.transform(X_grid)[:, 0].reshape(X1.shape)
48-
plt.contour(X1, X2, Z_grid, colors="grey", linewidths=1, origin="lower")
49-
50-
plt.subplot(2, 2, 2, aspect="equal")
51-
plt.scatter(X_pca[reds, 0], X_pca[reds, 1], c="red", s=20, edgecolor="k")
52-
plt.scatter(X_pca[blues, 0], X_pca[blues, 1], c="blue", s=20, edgecolor="k")
53-
plt.title("Projection by PCA")
54-
plt.xlabel("1st principal component")
55-
plt.ylabel("2nd component")
56-
57-
plt.subplot(2, 2, 3, aspect="equal")
58-
plt.scatter(X_kpca[reds, 0], X_kpca[reds, 1], c="red", s=20, edgecolor="k")
59-
plt.scatter(X_kpca[blues, 0], X_kpca[blues, 1], c="blue", s=20, edgecolor="k")
60-
plt.title("Projection by KPCA")
61-
plt.xlabel(r"1st principal component in space induced by $\phi$")
62-
plt.ylabel("2nd component")
63-
64-
plt.subplot(2, 2, 4, aspect="equal")
65-
plt.scatter(X_back[reds, 0], X_back[reds, 1], c="red", s=20, edgecolor="k")
66-
plt.scatter(X_back[blues, 0], X_back[blues, 1], c="blue", s=20, edgecolor="k")
67-
plt.title("Original space after inverse transform")
68-
plt.xlabel("$x_1$")
69-
plt.ylabel("$x_2$")
70-
71-
plt.tight_layout()
72-
plt.show()
62+
pca = PCA(n_components=2)
63+
kernel_pca = KernelPCA(
64+
n_components=None, kernel="rbf", gamma=10, fit_inverse_transform=True, alpha=0.1
65+
)
66+
67+
X_test_pca = pca.fit(X_train).transform(X_test)
68+
X_test_kernel_pca = kernel_pca.fit(X_train).transform(X_test)
69+
70+
# %%
71+
fig, (orig_data_ax, pca_proj_ax, kernel_pca_proj_ax) = plt.subplots(
72+
ncols=3, figsize=(14, 4)
73+
)
74+
75+
orig_data_ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test)
76+
orig_data_ax.set_ylabel("Feature #1")
77+
orig_data_ax.set_xlabel("Feature #0")
78+
orig_data_ax.set_title("Testing data")
79+
80+
pca_proj_ax.scatter(X_test_pca[:, 0], X_test_pca[:, 1], c=y_test)
81+
pca_proj_ax.set_ylabel("Principal component #1")
82+
pca_proj_ax.set_xlabel("Principal component #0")
83+
pca_proj_ax.set_title("Projection of testing data\n using PCA")
84+
85+
kernel_pca_proj_ax.scatter(X_test_kernel_pca[:, 0], X_test_kernel_pca[:, 1], c=y_test)
86+
kernel_pca_proj_ax.set_ylabel("Principal component #1")
87+
kernel_pca_proj_ax.set_xlabel("Principal component #0")
88+
_ = kernel_pca_proj_ax.set_title("Projection of testing data\n using KernelPCA")
89+
90+
# %%
91+
# We recall that PCA transforms the data linearly. Intuitively, it means that
92+
# the coordinate system will be centered, rescaled on each component
93+
# with respected to its variance and finally be rotated.
94+
# The obtained data from this transformation is isotropic and can now be
95+
# projected on its _principal components_.
96+
#
97+
# Thus, looking at the projection made using PCA (i.e. the middle figure), we
98+
# see that there is no change regarding the scaling; indeed the data being two
99+
# concentric circles centered in zero, the original data is already isotropic.
100+
# However, we can see that the data have been rotated. As a
101+
# conclusion, we see that such a projection would not help if define a linear
102+
# classifier to distinguish samples from both classes.
103+
#
104+
# Using a kernel allows to make a non-linear projection. Here, by using an RBF
105+
# kernel, we expect that the projection will unfold the dataset while keeping
106+
# approximately preserving the relative distances of pairs of data points that
107+
# are close to one another in the original space.
108+
#
109+
# We observe such behaviour in the figure on the right: the samples of a given
110+
# class are closer to each other than the samples from the opposite class,
111+
# untangling both sample sets. Now, we can use a linear classifier to separate
112+
# the samples from the two classes.
113+
#
114+
# Projecting into the original feature space
115+
# ------------------------------------------
116+
#
117+
# One particularity to have in mind when using
118+
# :class:`~sklearn.decomposition.KernelPCA` is related to the reconstruction
119+
# (i.e. the back projection in the original feature space). With
120+
# :class:`~sklearn.decomposition.PCA`, the reconstruction will be exact if
121+
# `n_components` is the same than the number of original features.
122+
# This is the case in this example.
123+
#
124+
# We can investigate if we get the original dataset when back projecting with
125+
# :class:`~sklearn.decomposition.KernelPCA`.
126+
X_reconstructed_pca = pca.inverse_transform(pca.transform(X_test))
127+
X_reconstructed_kernel_pca = kernel_pca.inverse_transform(kernel_pca.transform(X_test))
128+
129+
# %%
130+
fig, (orig_data_ax, pca_back_proj_ax, kernel_pca_back_proj_ax) = plt.subplots(
131+
ncols=3, sharex=True, sharey=True, figsize=(13, 4)
132+
)
133+
134+
orig_data_ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test)
135+
orig_data_ax.set_ylabel("Feature #1")
136+
orig_data_ax.set_xlabel("Feature #0")
137+
orig_data_ax.set_title("Original test data")
138+
139+
pca_back_proj_ax.scatter(X_reconstructed_pca[:, 0], X_reconstructed_pca[:, 1], c=y_test)
140+
pca_back_proj_ax.set_xlabel("Feature #0")
141+
pca_back_proj_ax.set_title("Reconstruction via PCA")
142+
143+
kernel_pca_back_proj_ax.scatter(
144+
X_reconstructed_kernel_pca[:, 0], X_reconstructed_kernel_pca[:, 1], c=y_test
145+
)
146+
kernel_pca_back_proj_ax.set_xlabel("Feature #0")
147+
_ = kernel_pca_back_proj_ax.set_title("Reconstruction via KernelPCA")
148+
149+
# %%
150+
# While we see a perfect reconstruction with
151+
# :class:`~sklearn.decomposition.PCA` we observe a different result for
152+
# :class:`~sklearn.decomposition.KernelPCA`.
153+
#
154+
# Indeed, :meth:`~sklearn.decomposition.KernelPCA.inverse_transform` cannot
155+
# rely on an analytical back-projection and thus an extact reconstruction.
156+
# Instead, a :class:`~sklearn.kernel_ridge.KernelRidge` is internally trained
157+
# to learn a mapping from the kernalized PCA basis to the original feature
158+
# space. This method therefore comes with an approximation introducing small
159+
# differences when back projecting in the original feature space.
160+
#
161+
# To improve the reconstruction using
162+
# :meth:`~sklearn.decomposition.KernelPCA.inverse_transform`, one can tune
163+
# `alpha` in :class:`~sklearn.decomposition.KernelPCA`, the regularization term
164+
# which controls the reliance on the training data during the training of
165+
# the mapping.

1.0/_downloads/02f111fb3dd79805b161e14c564184fc/plot_sgd_comparison.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"cell_type": "markdown",
1616
"metadata": {},
1717
"source": [
18-
"\n# Comparing various online solvers\n\nAn example showing how different online solvers perform\non the hand-written digits dataset.\n"
18+
"\n# Comparing various online solvers\nAn example showing how different online solvers perform\non the hand-written digits dataset.\n"
1919
]
2020
},
2121
{
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Author: Rob Zinkov <rob at zinkov dot com>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn import datasets\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import SGDClassifier, Perceptron\nfrom sklearn.linear_model import PassiveAggressiveClassifier\nfrom sklearn.linear_model import LogisticRegression\n\nheldout = [0.95, 0.90, 0.75, 0.50, 0.01]\nrounds = 20\nX, y = datasets.load_digits(return_X_y=True)\n\nclassifiers = [\n (\"SGD\", SGDClassifier(max_iter=100)),\n (\"ASGD\", SGDClassifier(average=True)),\n (\"Perceptron\", Perceptron()),\n (\n \"Passive-Aggressive I\",\n PassiveAggressiveClassifier(loss=\"hinge\", C=1.0, tol=1e-4),\n ),\n (\n \"Passive-Aggressive II\",\n PassiveAggressiveClassifier(loss=\"squared_hinge\", C=1.0, tol=1e-4),\n ),\n (\"SAG\", LogisticRegression(solver=\"sag\", tol=1e-1, C=1.0e4 / X.shape[0])),\n]\n\nxx = 1.0 - np.array(heldout)\n\nfor name, clf in classifiers:\n print(\"training %s\" % name)\n rng = np.random.RandomState(42)\n yy = []\n for i in heldout:\n yy_ = []\n for r in range(rounds):\n X_train, X_test, y_train, y_test = train_test_split(\n X, y, test_size=i, random_state=rng\n )\n clf.fit(X_train, y_train)\n y_pred = clf.predict(X_test)\n yy_.append(1 - np.mean(y_pred == y_test))\n yy.append(np.mean(yy_))\n plt.plot(xx, yy, label=name)\n\nplt.legend(loc=\"upper right\")\nplt.xlabel(\"Proportion train\")\nplt.ylabel(\"Test Error Rate\")\nplt.show()"
29+
"# Author: Rob Zinkov <rob at zinkov dot com>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn import datasets\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import SGDClassifier, Perceptron\nfrom sklearn.linear_model import PassiveAggressiveClassifier\nfrom sklearn.linear_model import LogisticRegression\n\nheldout = [0.95, 0.90, 0.75, 0.50, 0.01]\n# Number of rounds to fit and evaluate an estimator.\nrounds = 10\nX, y = datasets.load_digits(return_X_y=True)\n\nclassifiers = [\n (\"SGD\", SGDClassifier(max_iter=110)),\n (\"ASGD\", SGDClassifier(max_iter=110, average=True)),\n (\"Perceptron\", Perceptron(max_iter=110)),\n (\n \"Passive-Aggressive I\",\n PassiveAggressiveClassifier(max_iter=110, loss=\"hinge\", C=1.0, tol=1e-4),\n ),\n (\n \"Passive-Aggressive II\",\n PassiveAggressiveClassifier(\n max_iter=110, loss=\"squared_hinge\", C=1.0, tol=1e-4\n ),\n ),\n (\n \"SAG\",\n LogisticRegression(max_iter=110, solver=\"sag\", tol=1e-1, C=1.0e4 / X.shape[0]),\n ),\n]\n\nxx = 1.0 - np.array(heldout)\n\nfor name, clf in classifiers:\n print(\"training %s\" % name)\n rng = np.random.RandomState(42)\n yy = []\n for i in heldout:\n yy_ = []\n for r in range(rounds):\n X_train, X_test, y_train, y_test = train_test_split(\n X, y, test_size=i, random_state=rng\n )\n clf.fit(X_train, y_train)\n y_pred = clf.predict(X_test)\n yy_.append(1 - np.mean(y_pred == y_test))\n yy.append(np.mean(yy_))\n plt.plot(xx, yy, label=name)\n\nplt.legend(loc=\"upper right\")\nplt.xlabel(\"Proportion train\")\nplt.ylabel(\"Test Error Rate\")\nplt.show()"
3030
]
3131
}
3232
],
@@ -46,7 +46,7 @@
4646
"name": "python",
4747
"nbconvert_exporter": "python",
4848
"pygments_lexer": "ipython3",
49-
"version": "3.9.7"
49+
"version": "3.9.9"
5050
}
5151
},
5252
"nbformat": 4,

1.0/_downloads/0486bf9e537e44cedd2a236d034bcd90/plot_pcr_vs_pls.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@
125125
"name": "python",
126126
"nbconvert_exporter": "python",
127127
"pygments_lexer": "ipython3",
128-
"version": "3.9.7"
128+
"version": "3.9.9"
129129
}
130130
},
131131
"nbformat": 4,

1.0/_downloads/055e8313e28f2f3b5fd508054dfe5fe0/plot_roc_crossval.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
"name": "python",
4747
"nbconvert_exporter": "python",
4848
"pygments_lexer": "ipython3",
49-
"version": "3.9.7"
49+
"version": "3.9.9"
5050
}
5151
},
5252
"nbformat": 4,

1.0/_downloads/05ca8a4e90b4cc2acd69f9e24b4a1f3a/plot_classifier_chain_yeast.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
"name": "python",
4747
"nbconvert_exporter": "python",
4848
"pygments_lexer": "ipython3",
49-
"version": "3.9.7"
49+
"version": "3.9.9"
5050
}
5151
},
5252
"nbformat": 4,

1.0/_downloads/061854726c268bcdae5cd1c330cf8c75/plot_sgd_penalties.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
"name": "python",
4747
"nbconvert_exporter": "python",
4848
"pygments_lexer": "ipython3",
49-
"version": "3.9.7"
49+
"version": "3.9.9"
5050
}
5151
},
5252
"nbformat": 4,

1.0/_downloads/067cd5d39b097d2c49dd98f563dac13a/plot_iterative_imputer_variants_comparison.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
"name": "python",
4747
"nbconvert_exporter": "python",
4848
"pygments_lexer": "ipython3",
49-
"version": "3.9.7"
49+
"version": "3.9.9"
5050
}
5151
},
5252
"nbformat": 4,

0 commit comments

Comments
 (0)