Skip to content

Commit 7d116c2

Browse files
committed
Pushing the docs to dev/ for branch: main, commit d1a72af8a6d6c4350ed5bb027af794a8141a251b
1 parent fb4c9ef commit 7d116c2

File tree

1,227 files changed

+4798
-4494
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,227 files changed

+4798
-4494
lines changed
Binary file not shown.

dev/_downloads/1dcd684ce26b8c407ec2c2d2101c5c73/plot_kernel_ridge_regression.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
from sklearn.kernel_ridge import KernelRidge
4646
import matplotlib.pyplot as plt
4747

48-
rng = np.random.RandomState(0)
48+
rng = np.random.RandomState(42)
4949

5050
# #############################################################################
5151
# Generate sample data
@@ -128,10 +128,10 @@
128128
X = 5 * rng.rand(10000, 1)
129129
y = np.sin(X).ravel()
130130
y[::5] += 3 * (0.5 - rng.rand(X.shape[0] // 5))
131-
sizes = np.logspace(1, 4, 7).astype(int)
131+
sizes = np.logspace(1, 3.8, 7).astype(int)
132132
for name, estimator in {
133-
"KRR": KernelRidge(kernel="rbf", alpha=0.1, gamma=10),
134-
"SVR": SVR(kernel="rbf", C=1e1, gamma=10),
133+
"KRR": KernelRidge(kernel="rbf", alpha=0.01, gamma=10),
134+
"SVR": SVR(kernel="rbf", C=1e2, gamma=10),
135135
}.items():
136136
train_time = []
137137
test_time = []
Binary file not shown.

dev/_downloads/788b8c55a85f84a55e652c6048c4f623/plot_bayesian_ridge.py

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,12 @@
2424
2525
"""
2626

27+
# %%
28+
# Generate simulated data with Gaussian weights
29+
# ---------------------------------------------
2730
import numpy as np
28-
import matplotlib.pyplot as plt
2931
from scipy import stats
3032

31-
from sklearn.linear_model import BayesianRidge, LinearRegression
32-
33-
# #############################################################################
34-
# Generating simulated data with Gaussian weights
3533
np.random.seed(0)
3634
n_samples, n_features = 100, 100
3735
X = np.random.randn(n_samples, n_features) # Create Gaussian data
@@ -40,6 +38,7 @@
4038
w = np.zeros(n_features)
4139
# Only keep 10 weights of interest
4240
relevant_features = np.random.randint(0, n_features, 10)
41+
4342
for i in relevant_features:
4443
w[i] = stats.norm.rvs(loc=0, scale=1.0 / np.sqrt(lambda_))
4544
# Create noise with a precision alpha of 50.
@@ -48,17 +47,22 @@
4847
# Create the target
4948
y = np.dot(X, w) + noise
5049

51-
# #############################################################################
50+
# %%
5251
# Fit the Bayesian Ridge Regression and an OLS for comparison
52+
# -----------------------------------------------------------
53+
from sklearn.linear_model import BayesianRidge, LinearRegression
54+
5355
clf = BayesianRidge(compute_score=True)
5456
clf.fit(X, y)
5557

5658
ols = LinearRegression()
5759
ols.fit(X, y)
5860

59-
# #############################################################################
60-
# Plot true weights, estimated weights, histogram of the weights, and
61-
# predictions with standard deviations
61+
# %%
62+
# Plot true weights and estimated weights
63+
# ---------------------------------------
64+
import matplotlib.pyplot as plt
65+
6266
lw = 2
6367
plt.figure(figsize=(6, 5))
6468
plt.title("Weights of the model")
@@ -67,7 +71,11 @@
6771
plt.plot(ols.coef_, color="navy", linestyle="--", label="OLS estimate")
6872
plt.xlabel("Features")
6973
plt.ylabel("Values of the weights")
70-
plt.legend(loc="best", prop=dict(size=12))
74+
_ = plt.legend(loc="best", prop=dict(size=12))
75+
76+
# %%
77+
# Plot histogram of the weights
78+
# -----------------------------
7179

7280
plt.figure(figsize=(6, 5))
7381
plt.title("Histogram of the weights")
@@ -80,16 +88,23 @@
8088
)
8189
plt.ylabel("Features")
8290
plt.xlabel("Values of the weights")
83-
plt.legend(loc="upper left")
91+
_ = plt.legend(loc="upper left")
92+
93+
# %%
94+
# Plot marginal log-likelihood
95+
# ----------------------------
8496

8597
plt.figure(figsize=(6, 5))
8698
plt.title("Marginal log-likelihood")
8799
plt.plot(clf.scores_, color="navy", linewidth=lw)
88100
plt.ylabel("Score")
89-
plt.xlabel("Iterations")
101+
_ = plt.xlabel("Iterations")
102+
103+
# %%
104+
# Plot some predictions for polynomial regression with standard deviations
105+
# ------------------------------------------------------------------------
90106

91107

92-
# Plotting some predictions for polynomial regression
93108
def f(x, noise_amount):
94109
y = np.sqrt(x) * np.sin(x)
95110
noise = np.random.normal(0, 1, len(x))
@@ -117,5 +132,4 @@ def f(x, noise_amount):
117132
plt.plot(X_plot, y_plot, color="gold", linewidth=lw, label="Ground Truth")
118133
plt.ylabel("Output y")
119134
plt.xlabel("Feature X")
120-
plt.legend(loc="lower left")
121-
plt.show()
135+
_ = plt.legend(loc="lower left")

dev/_downloads/9d2f119ab4a1b6f1454c43b796f2c6a6/plot_kernel_ridge_regression.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Authors: Jan Hendrik Metzen <[email protected]>\n# License: BSD 3 clause\n\nimport time\n\nimport numpy as np\n\nfrom sklearn.svm import SVR\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.model_selection import learning_curve\nfrom sklearn.kernel_ridge import KernelRidge\nimport matplotlib.pyplot as plt\n\nrng = np.random.RandomState(0)\n\n# #############################################################################\n# Generate sample data\nX = 5 * rng.rand(10000, 1)\ny = np.sin(X).ravel()\n\n# Add noise to targets\ny[::5] += 3 * (0.5 - rng.rand(X.shape[0] // 5))\n\nX_plot = np.linspace(0, 5, 100000)[:, None]\n\n# #############################################################################\n# Fit regression model\ntrain_size = 100\nsvr = GridSearchCV(\n SVR(kernel=\"rbf\", gamma=0.1),\n param_grid={\"C\": [1e0, 1e1, 1e2, 1e3], \"gamma\": np.logspace(-2, 2, 5)},\n)\n\nkr = GridSearchCV(\n KernelRidge(kernel=\"rbf\", gamma=0.1),\n param_grid={\"alpha\": [1e0, 0.1, 1e-2, 1e-3], \"gamma\": np.logspace(-2, 2, 5)},\n)\n\nt0 = time.time()\nsvr.fit(X[:train_size], y[:train_size])\nsvr_fit = time.time() - t0\nprint(\"SVR complexity and bandwidth selected and model fitted in %.3f s\" % svr_fit)\n\nt0 = time.time()\nkr.fit(X[:train_size], y[:train_size])\nkr_fit = time.time() - t0\nprint(\"KRR complexity and bandwidth selected and model fitted in %.3f s\" % kr_fit)\n\nsv_ratio = svr.best_estimator_.support_.shape[0] / train_size\nprint(\"Support vector ratio: %.3f\" % sv_ratio)\n\nt0 = time.time()\ny_svr = svr.predict(X_plot)\nsvr_predict = time.time() - t0\nprint(\"SVR prediction for %d inputs in %.3f s\" % (X_plot.shape[0], svr_predict))\n\nt0 = time.time()\ny_kr = kr.predict(X_plot)\nkr_predict = time.time() - t0\nprint(\"KRR prediction for %d inputs in %.3f s\" % (X_plot.shape[0], kr_predict))\n\n\n# #############################################################################\n# Look at the results\nsv_ind = svr.best_estimator_.support_\nplt.scatter(\n X[sv_ind],\n y[sv_ind],\n c=\"r\",\n s=50,\n label=\"SVR support vectors\",\n zorder=2,\n edgecolors=(0, 0, 0),\n)\nplt.scatter(X[:100], y[:100], c=\"k\", label=\"data\", zorder=1, edgecolors=(0, 0, 0))\nplt.plot(\n X_plot,\n y_svr,\n c=\"r\",\n label=\"SVR (fit: %.3fs, predict: %.3fs)\" % (svr_fit, svr_predict),\n)\nplt.plot(\n X_plot, y_kr, c=\"g\", label=\"KRR (fit: %.3fs, predict: %.3fs)\" % (kr_fit, kr_predict)\n)\nplt.xlabel(\"data\")\nplt.ylabel(\"target\")\nplt.title(\"SVR versus Kernel Ridge\")\nplt.legend()\n\n# Visualize training and prediction time\nplt.figure()\n\n# Generate sample data\nX = 5 * rng.rand(10000, 1)\ny = np.sin(X).ravel()\ny[::5] += 3 * (0.5 - rng.rand(X.shape[0] // 5))\nsizes = np.logspace(1, 4, 7).astype(int)\nfor name, estimator in {\n \"KRR\": KernelRidge(kernel=\"rbf\", alpha=0.1, gamma=10),\n \"SVR\": SVR(kernel=\"rbf\", C=1e1, gamma=10),\n}.items():\n train_time = []\n test_time = []\n for train_test_size in sizes:\n t0 = time.time()\n estimator.fit(X[:train_test_size], y[:train_test_size])\n train_time.append(time.time() - t0)\n\n t0 = time.time()\n estimator.predict(X_plot[:1000])\n test_time.append(time.time() - t0)\n\n plt.plot(\n sizes,\n train_time,\n \"o-\",\n color=\"r\" if name == \"SVR\" else \"g\",\n label=\"%s (train)\" % name,\n )\n plt.plot(\n sizes,\n test_time,\n \"o--\",\n color=\"r\" if name == \"SVR\" else \"g\",\n label=\"%s (test)\" % name,\n )\n\nplt.xscale(\"log\")\nplt.yscale(\"log\")\nplt.xlabel(\"Train size\")\nplt.ylabel(\"Time (seconds)\")\nplt.title(\"Execution Time\")\nplt.legend(loc=\"best\")\n\n# Visualize learning curves\nplt.figure()\n\nsvr = SVR(kernel=\"rbf\", C=1e1, gamma=0.1)\nkr = KernelRidge(kernel=\"rbf\", alpha=0.1, gamma=0.1)\ntrain_sizes, train_scores_svr, test_scores_svr = learning_curve(\n svr,\n X[:100],\n y[:100],\n train_sizes=np.linspace(0.1, 1, 10),\n scoring=\"neg_mean_squared_error\",\n cv=10,\n)\ntrain_sizes_abs, train_scores_kr, test_scores_kr = learning_curve(\n kr,\n X[:100],\n y[:100],\n train_sizes=np.linspace(0.1, 1, 10),\n scoring=\"neg_mean_squared_error\",\n cv=10,\n)\n\nplt.plot(train_sizes, -test_scores_svr.mean(1), \"o-\", color=\"r\", label=\"SVR\")\nplt.plot(train_sizes, -test_scores_kr.mean(1), \"o-\", color=\"g\", label=\"KRR\")\nplt.xlabel(\"Train size\")\nplt.ylabel(\"Mean Squared Error\")\nplt.title(\"Learning curves\")\nplt.legend(loc=\"best\")\n\nplt.show()"
29+
"# Authors: Jan Hendrik Metzen <[email protected]>\n# License: BSD 3 clause\n\nimport time\n\nimport numpy as np\n\nfrom sklearn.svm import SVR\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.model_selection import learning_curve\nfrom sklearn.kernel_ridge import KernelRidge\nimport matplotlib.pyplot as plt\n\nrng = np.random.RandomState(42)\n\n# #############################################################################\n# Generate sample data\nX = 5 * rng.rand(10000, 1)\ny = np.sin(X).ravel()\n\n# Add noise to targets\ny[::5] += 3 * (0.5 - rng.rand(X.shape[0] // 5))\n\nX_plot = np.linspace(0, 5, 100000)[:, None]\n\n# #############################################################################\n# Fit regression model\ntrain_size = 100\nsvr = GridSearchCV(\n SVR(kernel=\"rbf\", gamma=0.1),\n param_grid={\"C\": [1e0, 1e1, 1e2, 1e3], \"gamma\": np.logspace(-2, 2, 5)},\n)\n\nkr = GridSearchCV(\n KernelRidge(kernel=\"rbf\", gamma=0.1),\n param_grid={\"alpha\": [1e0, 0.1, 1e-2, 1e-3], \"gamma\": np.logspace(-2, 2, 5)},\n)\n\nt0 = time.time()\nsvr.fit(X[:train_size], y[:train_size])\nsvr_fit = time.time() - t0\nprint(\"SVR complexity and bandwidth selected and model fitted in %.3f s\" % svr_fit)\n\nt0 = time.time()\nkr.fit(X[:train_size], y[:train_size])\nkr_fit = time.time() - t0\nprint(\"KRR complexity and bandwidth selected and model fitted in %.3f s\" % kr_fit)\n\nsv_ratio = svr.best_estimator_.support_.shape[0] / train_size\nprint(\"Support vector ratio: %.3f\" % sv_ratio)\n\nt0 = time.time()\ny_svr = svr.predict(X_plot)\nsvr_predict = time.time() - t0\nprint(\"SVR prediction for %d inputs in %.3f s\" % (X_plot.shape[0], svr_predict))\n\nt0 = time.time()\ny_kr = kr.predict(X_plot)\nkr_predict = time.time() - t0\nprint(\"KRR prediction for %d inputs in %.3f s\" % (X_plot.shape[0], kr_predict))\n\n\n# #############################################################################\n# Look at the results\nsv_ind = svr.best_estimator_.support_\nplt.scatter(\n X[sv_ind],\n y[sv_ind],\n c=\"r\",\n s=50,\n label=\"SVR support vectors\",\n zorder=2,\n edgecolors=(0, 0, 0),\n)\nplt.scatter(X[:100], y[:100], c=\"k\", label=\"data\", zorder=1, edgecolors=(0, 0, 0))\nplt.plot(\n X_plot,\n y_svr,\n c=\"r\",\n label=\"SVR (fit: %.3fs, predict: %.3fs)\" % (svr_fit, svr_predict),\n)\nplt.plot(\n X_plot, y_kr, c=\"g\", label=\"KRR (fit: %.3fs, predict: %.3fs)\" % (kr_fit, kr_predict)\n)\nplt.xlabel(\"data\")\nplt.ylabel(\"target\")\nplt.title(\"SVR versus Kernel Ridge\")\nplt.legend()\n\n# Visualize training and prediction time\nplt.figure()\n\n# Generate sample data\nX = 5 * rng.rand(10000, 1)\ny = np.sin(X).ravel()\ny[::5] += 3 * (0.5 - rng.rand(X.shape[0] // 5))\nsizes = np.logspace(1, 3.8, 7).astype(int)\nfor name, estimator in {\n \"KRR\": KernelRidge(kernel=\"rbf\", alpha=0.01, gamma=10),\n \"SVR\": SVR(kernel=\"rbf\", C=1e2, gamma=10),\n}.items():\n train_time = []\n test_time = []\n for train_test_size in sizes:\n t0 = time.time()\n estimator.fit(X[:train_test_size], y[:train_test_size])\n train_time.append(time.time() - t0)\n\n t0 = time.time()\n estimator.predict(X_plot[:1000])\n test_time.append(time.time() - t0)\n\n plt.plot(\n sizes,\n train_time,\n \"o-\",\n color=\"r\" if name == \"SVR\" else \"g\",\n label=\"%s (train)\" % name,\n )\n plt.plot(\n sizes,\n test_time,\n \"o--\",\n color=\"r\" if name == \"SVR\" else \"g\",\n label=\"%s (test)\" % name,\n )\n\nplt.xscale(\"log\")\nplt.yscale(\"log\")\nplt.xlabel(\"Train size\")\nplt.ylabel(\"Time (seconds)\")\nplt.title(\"Execution Time\")\nplt.legend(loc=\"best\")\n\n# Visualize learning curves\nplt.figure()\n\nsvr = SVR(kernel=\"rbf\", C=1e1, gamma=0.1)\nkr = KernelRidge(kernel=\"rbf\", alpha=0.1, gamma=0.1)\ntrain_sizes, train_scores_svr, test_scores_svr = learning_curve(\n svr,\n X[:100],\n y[:100],\n train_sizes=np.linspace(0.1, 1, 10),\n scoring=\"neg_mean_squared_error\",\n cv=10,\n)\ntrain_sizes_abs, train_scores_kr, test_scores_kr = learning_curve(\n kr,\n X[:100],\n y[:100],\n train_sizes=np.linspace(0.1, 1, 10),\n scoring=\"neg_mean_squared_error\",\n cv=10,\n)\n\nplt.plot(train_sizes, -test_scores_svr.mean(1), \"o-\", color=\"r\", label=\"SVR\")\nplt.plot(train_sizes, -test_scores_kr.mean(1), \"o-\", color=\"g\", label=\"KRR\")\nplt.xlabel(\"Train size\")\nplt.ylabel(\"Mean Squared Error\")\nplt.title(\"Learning curves\")\nplt.legend(loc=\"best\")\n\nplt.show()"
3030
]
3131
}
3232
],

0 commit comments

Comments
 (0)