Skip to content

Commit 39f3fb1

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 1393a63c6c95e7015b2c85f919679182366335fe
1 parent 7905c0d commit 39f3fb1

File tree

1,228 files changed

+4774
-4407
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,228 files changed

+4774
-4407
lines changed
Binary file not shown.

dev/_downloads/1dcd684ce26b8c407ec2c2d2101c5c73/plot_kernel_ridge_regression.py

Lines changed: 65 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -14,41 +14,20 @@
1414
1515
This example illustrates both methods on an artificial dataset, which
1616
consists of a sinusoidal target function and strong noise added to every fifth
17-
datapoint. The first figure compares the learned model of KRR and SVR when both
18-
complexity/regularization and bandwidth of the RBF kernel are optimized using
19-
grid-search. The learned functions are very similar; however, fitting KRR is
20-
approx. seven times faster than fitting SVR (both with grid-search). However,
21-
prediction of 100000 target values is more than tree times faster with SVR
22-
since it has learned a sparse model using only approx. 1/3 of the 100 training
23-
datapoints as support vectors.
24-
25-
The next figure compares the time for fitting and prediction of KRR and SVR for
26-
different sizes of the training set. Fitting KRR is faster than SVR for medium-
27-
sized training sets (less than 1000 samples); however, for larger training sets
28-
SVR scales better. With regard to prediction time, SVR is faster than
29-
KRR for all sizes of the training set because of the learned sparse
30-
solution. Note that the degree of sparsity and thus the prediction time depends
31-
on the parameters epsilon and C of the SVR.
17+
datapoint.
3218
3319
"""
34-
20+
# %%
3521
# Authors: Jan Hendrik Metzen <[email protected]>
3622
# License: BSD 3 clause
3723

38-
import time
39-
24+
# %%
25+
# Generate sample data
26+
# --------------------
4027
import numpy as np
4128

42-
from sklearn.svm import SVR
43-
from sklearn.model_selection import GridSearchCV
44-
from sklearn.model_selection import learning_curve
45-
from sklearn.kernel_ridge import KernelRidge
46-
import matplotlib.pyplot as plt
47-
4829
rng = np.random.RandomState(42)
4930

50-
# #############################################################################
51-
# Generate sample data
5231
X = 5 * rng.rand(10000, 1)
5332
y = np.sin(X).ravel()
5433

@@ -57,9 +36,16 @@
5736

5837
X_plot = np.linspace(0, 5, 100000)[:, None]
5938

60-
# #############################################################################
61-
# Fit regression model
39+
# %%
40+
# Construct the kernel-based regression models
41+
# --------------------------------------------
42+
43+
from sklearn.model_selection import GridSearchCV
44+
from sklearn.svm import SVR
45+
from sklearn.kernel_ridge import KernelRidge
46+
6247
train_size = 100
48+
6349
svr = GridSearchCV(
6450
SVR(kernel="rbf", gamma=0.1),
6551
param_grid={"C": [1e0, 1e1, 1e2, 1e3], "gamma": np.logspace(-2, 2, 5)},
@@ -70,14 +56,22 @@
7056
param_grid={"alpha": [1e0, 0.1, 1e-2, 1e-3], "gamma": np.logspace(-2, 2, 5)},
7157
)
7258

59+
# %%
60+
# Compare times of SVR and Kernel Ridge Regression
61+
# ------------------------------------------------
62+
63+
import time
64+
7365
t0 = time.time()
7466
svr.fit(X[:train_size], y[:train_size])
7567
svr_fit = time.time() - t0
68+
print(f"Best SVR with params: {svr.best_params_} and R2 score: {svr.best_score_:.3f}")
7669
print("SVR complexity and bandwidth selected and model fitted in %.3f s" % svr_fit)
7770

7871
t0 = time.time()
7972
kr.fit(X[:train_size], y[:train_size])
8073
kr_fit = time.time() - t0
74+
print(f"Best KRR with params: {kr.best_params_} and R2 score: {kr.best_score_:.3f}")
8175
print("KRR complexity and bandwidth selected and model fitted in %.3f s" % kr_fit)
8276

8377
sv_ratio = svr.best_estimator_.support_.shape[0] / train_size
@@ -93,9 +87,12 @@
9387
kr_predict = time.time() - t0
9488
print("KRR prediction for %d inputs in %.3f s" % (X_plot.shape[0], kr_predict))
9589

96-
97-
# #############################################################################
90+
# %%
9891
# Look at the results
92+
# -------------------
93+
94+
import matplotlib.pyplot as plt
95+
9996
sv_ind = svr.best_estimator_.support_
10097
plt.scatter(
10198
X[sv_ind],
@@ -119,15 +116,28 @@
119116
plt.xlabel("data")
120117
plt.ylabel("target")
121118
plt.title("SVR versus Kernel Ridge")
122-
plt.legend()
119+
_ = plt.legend()
120+
121+
# %%
122+
# The previous figure compares the learned model of KRR and SVR when both
123+
# complexity/regularization and bandwidth of the RBF kernel are optimized using
124+
# grid-search. The learned functions are very similar; however, fitting KRR is
125+
# approximatively 3-4 times faster than fitting SVR (both with grid-search).
126+
#
127+
# Prediction of 100000 target values could be in theory approximately three
128+
# times faster with SVR since it has learned a sparse model using only
129+
# approximately 1/3 of the training datapoints as support vectors. However, in
130+
# practice, this is not necessarily the case because of implementation details
131+
# in the way the kernel function is computed for each model that can make the
132+
# KRR model as fast or even faster despite computing more arithmetic
133+
# operations.
134+
135+
# %%
136+
# Visualize training and prediction times
137+
# ---------------------------------------
123138

124-
# Visualize training and prediction time
125139
plt.figure()
126140

127-
# Generate sample data
128-
X = 5 * rng.rand(10000, 1)
129-
y = np.sin(X).ravel()
130-
y[::5] += 3 * (0.5 - rng.rand(X.shape[0] // 5))
131141
sizes = np.logspace(1, 3.8, 7).astype(int)
132142
for name, estimator in {
133143
"KRR": KernelRidge(kernel="rbf", alpha=0.01, gamma=10),
@@ -164,9 +174,24 @@
164174
plt.xlabel("Train size")
165175
plt.ylabel("Time (seconds)")
166176
plt.title("Execution Time")
167-
plt.legend(loc="best")
177+
_ = plt.legend(loc="best")
178+
179+
# %%
180+
# This figure compares the time for fitting and prediction of KRR and SVR for
181+
# different sizes of the training set. Fitting KRR is faster than SVR for
182+
# medium-sized training sets (less than a few thousand samples); however, for
183+
# larger training sets SVR scales better. With regard to prediction time, SVR
184+
# should be faster than KRR for all sizes of the training set because of the
185+
# learned sparse solution, however this is not necessarily the case in practice
186+
# because of implementation details. Note that the degree of sparsity and thus
187+
# the prediction time depends on the parameters epsilon and C of the SVR.
188+
189+
# %%
190+
# Visualize the learning curves
191+
# -----------------------------
192+
193+
from sklearn.model_selection import learning_curve
168194

169-
# Visualize learning curves
170195
plt.figure()
171196

172197
svr = SVR(kernel="rbf", C=1e1, gamma=0.1)
@@ -188,8 +213,8 @@
188213
cv=10,
189214
)
190215

191-
plt.plot(train_sizes, -test_scores_svr.mean(1), "o-", color="r", label="SVR")
192-
plt.plot(train_sizes, -test_scores_kr.mean(1), "o-", color="g", label="KRR")
216+
plt.plot(train_sizes, -test_scores_kr.mean(1), "o--", color="g", label="KRR")
217+
plt.plot(train_sizes, -test_scores_svr.mean(1), "o--", color="r", label="SVR")
193218
plt.xlabel("Train size")
194219
plt.ylabel("Mean Squared Error")
195220
plt.title("Learning curves")
Binary file not shown.

0 commit comments

Comments
 (0)