Skip to content

Commit c8ce22b

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 257e0a2cffd97e0b797cd0f6aa53d33e13ce21fc
1 parent 7d116c2 commit c8ce22b

File tree

700 files changed

+2340
-2644
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

700 files changed

+2340
-2644
lines changed
Binary file not shown.
Binary file not shown.

dev/_downloads/788b8c55a85f84a55e652c6048c4f623/plot_bayesian_ridge.py

Lines changed: 15 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,14 @@
2424
2525
"""
2626

27-
# %%
28-
# Generate simulated data with Gaussian weights
29-
# ---------------------------------------------
3027
import numpy as np
28+
import matplotlib.pyplot as plt
3129
from scipy import stats
3230

31+
from sklearn.linear_model import BayesianRidge, LinearRegression
32+
33+
# #############################################################################
34+
# Generating simulated data with Gaussian weights
3335
np.random.seed(0)
3436
n_samples, n_features = 100, 100
3537
X = np.random.randn(n_samples, n_features) # Create Gaussian data
@@ -38,7 +40,6 @@
3840
w = np.zeros(n_features)
3941
# Only keep 10 weights of interest
4042
relevant_features = np.random.randint(0, n_features, 10)
41-
4243
for i in relevant_features:
4344
w[i] = stats.norm.rvs(loc=0, scale=1.0 / np.sqrt(lambda_))
4445
# Create noise with a precision alpha of 50.
@@ -47,22 +48,17 @@
4748
# Create the target
4849
y = np.dot(X, w) + noise
4950

50-
# %%
51+
# #############################################################################
5152
# Fit the Bayesian Ridge Regression and an OLS for comparison
52-
# -----------------------------------------------------------
53-
from sklearn.linear_model import BayesianRidge, LinearRegression
54-
5553
clf = BayesianRidge(compute_score=True)
5654
clf.fit(X, y)
5755

5856
ols = LinearRegression()
5957
ols.fit(X, y)
6058

61-
# %%
62-
# Plot true weights and estimated weights
63-
# ---------------------------------------
64-
import matplotlib.pyplot as plt
65-
59+
# #############################################################################
60+
# Plot true weights, estimated weights, histogram of the weights, and
61+
# predictions with standard deviations
6662
lw = 2
6763
plt.figure(figsize=(6, 5))
6864
plt.title("Weights of the model")
@@ -71,11 +67,7 @@
7167
plt.plot(ols.coef_, color="navy", linestyle="--", label="OLS estimate")
7268
plt.xlabel("Features")
7369
plt.ylabel("Values of the weights")
74-
_ = plt.legend(loc="best", prop=dict(size=12))
75-
76-
# %%
77-
# Plot histogram of the weights
78-
# -----------------------------
70+
plt.legend(loc="best", prop=dict(size=12))
7971

8072
plt.figure(figsize=(6, 5))
8173
plt.title("Histogram of the weights")
@@ -88,23 +80,16 @@
8880
)
8981
plt.ylabel("Features")
9082
plt.xlabel("Values of the weights")
91-
_ = plt.legend(loc="upper left")
92-
93-
# %%
94-
# Plot marginal log-likelihood
95-
# ----------------------------
83+
plt.legend(loc="upper left")
9684

9785
plt.figure(figsize=(6, 5))
9886
plt.title("Marginal log-likelihood")
9987
plt.plot(clf.scores_, color="navy", linewidth=lw)
10088
plt.ylabel("Score")
101-
_ = plt.xlabel("Iterations")
102-
103-
# %%
104-
# Plot some predictions for polynomial regression with standard deviations
105-
# ------------------------------------------------------------------------
89+
plt.xlabel("Iterations")
10690

10791

92+
# Plotting some predictions for polynomial regression
10893
def f(x, noise_amount):
10994
y = np.sqrt(x) * np.sin(x)
11095
noise = np.random.normal(0, 1, len(x))
@@ -132,4 +117,5 @@ def f(x, noise_amount):
132117
plt.plot(X_plot, y_plot, color="gold", linewidth=lw, label="Ground Truth")
133118
plt.ylabel("Output y")
134119
plt.xlabel("Feature X")
135-
_ = plt.legend(loc="lower left")
120+
plt.legend(loc="lower left")
121+
plt.show()

dev/_downloads/eac1d0922187b84e57eb8359500f5bfb/plot_bayesian_ridge.ipynb

Lines changed: 1 addition & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -18,103 +18,6 @@
1818
"\n# Bayesian Ridge Regression\n\nComputes a Bayesian Ridge Regression on a synthetic dataset.\n\nSee `bayesian_ridge_regression` for more information on the regressor.\n\nCompared to the OLS (ordinary least squares) estimator, the coefficient\nweights are slightly shifted toward zeros, which stabilises them.\n\nAs the prior on the weights is a Gaussian prior, the histogram of the\nestimated weights is Gaussian.\n\nThe estimation of the model is done by iteratively maximizing the\nmarginal log-likelihood of the observations.\n\nWe also plot predictions and uncertainties for Bayesian Ridge Regression\nfor one dimensional regression using polynomial feature expansion.\nNote the uncertainty starts going up on the right side of the plot.\nThis is because these test samples are outside of the range of the training\nsamples.\n"
1919
]
2020
},
21-
{
22-
"cell_type": "markdown",
23-
"metadata": {},
24-
"source": [
25-
"## Generate simulated data with Gaussian weights\n\n"
26-
]
27-
},
28-
{
29-
"cell_type": "code",
30-
"execution_count": null,
31-
"metadata": {
32-
"collapsed": false
33-
},
34-
"outputs": [],
35-
"source": [
36-
"import numpy as np\nfrom scipy import stats\n\nnp.random.seed(0)\nn_samples, n_features = 100, 100\nX = np.random.randn(n_samples, n_features) # Create Gaussian data\n# Create weights with a precision lambda_ of 4.\nlambda_ = 4.0\nw = np.zeros(n_features)\n# Only keep 10 weights of interest\nrelevant_features = np.random.randint(0, n_features, 10)\n\nfor i in relevant_features:\n w[i] = stats.norm.rvs(loc=0, scale=1.0 / np.sqrt(lambda_))\n# Create noise with a precision alpha of 50.\nalpha_ = 50.0\nnoise = stats.norm.rvs(loc=0, scale=1.0 / np.sqrt(alpha_), size=n_samples)\n# Create the target\ny = np.dot(X, w) + noise"
37-
]
38-
},
39-
{
40-
"cell_type": "markdown",
41-
"metadata": {},
42-
"source": [
43-
"## Fit the Bayesian Ridge Regression and an OLS for comparison\n\n"
44-
]
45-
},
46-
{
47-
"cell_type": "code",
48-
"execution_count": null,
49-
"metadata": {
50-
"collapsed": false
51-
},
52-
"outputs": [],
53-
"source": [
54-
"from sklearn.linear_model import BayesianRidge, LinearRegression\n\nclf = BayesianRidge(compute_score=True)\nclf.fit(X, y)\n\nols = LinearRegression()\nols.fit(X, y)"
55-
]
56-
},
57-
{
58-
"cell_type": "markdown",
59-
"metadata": {},
60-
"source": [
61-
"## Plot true weights and estimated weights\n\n"
62-
]
63-
},
64-
{
65-
"cell_type": "code",
66-
"execution_count": null,
67-
"metadata": {
68-
"collapsed": false
69-
},
70-
"outputs": [],
71-
"source": [
72-
"import matplotlib.pyplot as plt\n\nlw = 2\nplt.figure(figsize=(6, 5))\nplt.title(\"Weights of the model\")\nplt.plot(clf.coef_, color=\"lightgreen\", linewidth=lw, label=\"Bayesian Ridge estimate\")\nplt.plot(w, color=\"gold\", linewidth=lw, label=\"Ground truth\")\nplt.plot(ols.coef_, color=\"navy\", linestyle=\"--\", label=\"OLS estimate\")\nplt.xlabel(\"Features\")\nplt.ylabel(\"Values of the weights\")\n_ = plt.legend(loc=\"best\", prop=dict(size=12))"
73-
]
74-
},
75-
{
76-
"cell_type": "markdown",
77-
"metadata": {},
78-
"source": [
79-
"## Plot histogram of the weights\n\n"
80-
]
81-
},
82-
{
83-
"cell_type": "code",
84-
"execution_count": null,
85-
"metadata": {
86-
"collapsed": false
87-
},
88-
"outputs": [],
89-
"source": [
90-
"plt.figure(figsize=(6, 5))\nplt.title(\"Histogram of the weights\")\nplt.hist(clf.coef_, bins=n_features, color=\"gold\", log=True, edgecolor=\"black\")\nplt.scatter(\n clf.coef_[relevant_features],\n np.full(len(relevant_features), 5.0),\n color=\"navy\",\n label=\"Relevant features\",\n)\nplt.ylabel(\"Features\")\nplt.xlabel(\"Values of the weights\")\n_ = plt.legend(loc=\"upper left\")"
91-
]
92-
},
93-
{
94-
"cell_type": "markdown",
95-
"metadata": {},
96-
"source": [
97-
"## Plot marginal log-likelihood\n\n"
98-
]
99-
},
100-
{
101-
"cell_type": "code",
102-
"execution_count": null,
103-
"metadata": {
104-
"collapsed": false
105-
},
106-
"outputs": [],
107-
"source": [
108-
"plt.figure(figsize=(6, 5))\nplt.title(\"Marginal log-likelihood\")\nplt.plot(clf.scores_, color=\"navy\", linewidth=lw)\nplt.ylabel(\"Score\")\n_ = plt.xlabel(\"Iterations\")"
109-
]
110-
},
111-
{
112-
"cell_type": "markdown",
113-
"metadata": {},
114-
"source": [
115-
"## Plot some predictions for polynomial regression with standard deviations\n\n"
116-
]
117-
},
11821
{
11922
"cell_type": "code",
12023
"execution_count": null,
@@ -123,7 +26,7 @@
12326
},
12427
"outputs": [],
12528
"source": [
126-
"def f(x, noise_amount):\n y = np.sqrt(x) * np.sin(x)\n noise = np.random.normal(0, 1, len(x))\n return y + noise_amount * noise\n\n\ndegree = 10\nX = np.linspace(0, 10, 100)\ny = f(X, noise_amount=0.1)\nclf_poly = BayesianRidge()\nclf_poly.fit(np.vander(X, degree), y)\n\nX_plot = np.linspace(0, 11, 25)\ny_plot = f(X_plot, noise_amount=0)\ny_mean, y_std = clf_poly.predict(np.vander(X_plot, degree), return_std=True)\nplt.figure(figsize=(6, 5))\nplt.errorbar(\n X_plot,\n y_mean,\n y_std,\n color=\"navy\",\n label=\"Polynomial Bayesian Ridge Regression\",\n linewidth=lw,\n)\nplt.plot(X_plot, y_plot, color=\"gold\", linewidth=lw, label=\"Ground Truth\")\nplt.ylabel(\"Output y\")\nplt.xlabel(\"Feature X\")\n_ = plt.legend(loc=\"lower left\")"
29+
"import numpy as np\nimport matplotlib.pyplot as plt\nfrom scipy import stats\n\nfrom sklearn.linear_model import BayesianRidge, LinearRegression\n\n# #############################################################################\n# Generating simulated data with Gaussian weights\nnp.random.seed(0)\nn_samples, n_features = 100, 100\nX = np.random.randn(n_samples, n_features) # Create Gaussian data\n# Create weights with a precision lambda_ of 4.\nlambda_ = 4.0\nw = np.zeros(n_features)\n# Only keep 10 weights of interest\nrelevant_features = np.random.randint(0, n_features, 10)\nfor i in relevant_features:\n w[i] = stats.norm.rvs(loc=0, scale=1.0 / np.sqrt(lambda_))\n# Create noise with a precision alpha of 50.\nalpha_ = 50.0\nnoise = stats.norm.rvs(loc=0, scale=1.0 / np.sqrt(alpha_), size=n_samples)\n# Create the target\ny = np.dot(X, w) + noise\n\n# #############################################################################\n# Fit the Bayesian Ridge Regression and an OLS for comparison\nclf = BayesianRidge(compute_score=True)\nclf.fit(X, y)\n\nols = LinearRegression()\nols.fit(X, y)\n\n# #############################################################################\n# Plot true weights, estimated weights, histogram of the weights, and\n# predictions with standard deviations\nlw = 2\nplt.figure(figsize=(6, 5))\nplt.title(\"Weights of the model\")\nplt.plot(clf.coef_, color=\"lightgreen\", linewidth=lw, label=\"Bayesian Ridge estimate\")\nplt.plot(w, color=\"gold\", linewidth=lw, label=\"Ground truth\")\nplt.plot(ols.coef_, color=\"navy\", linestyle=\"--\", label=\"OLS estimate\")\nplt.xlabel(\"Features\")\nplt.ylabel(\"Values of the weights\")\nplt.legend(loc=\"best\", prop=dict(size=12))\n\nplt.figure(figsize=(6, 5))\nplt.title(\"Histogram of the weights\")\nplt.hist(clf.coef_, bins=n_features, color=\"gold\", log=True, edgecolor=\"black\")\nplt.scatter(\n clf.coef_[relevant_features],\n np.full(len(relevant_features), 5.0),\n color=\"navy\",\n label=\"Relevant features\",\n)\nplt.ylabel(\"Features\")\nplt.xlabel(\"Values of the weights\")\nplt.legend(loc=\"upper left\")\n\nplt.figure(figsize=(6, 5))\nplt.title(\"Marginal log-likelihood\")\nplt.plot(clf.scores_, color=\"navy\", linewidth=lw)\nplt.ylabel(\"Score\")\nplt.xlabel(\"Iterations\")\n\n\n# Plotting some predictions for polynomial regression\ndef f(x, noise_amount):\n y = np.sqrt(x) * np.sin(x)\n noise = np.random.normal(0, 1, len(x))\n return y + noise_amount * noise\n\n\ndegree = 10\nX = np.linspace(0, 10, 100)\ny = f(X, noise_amount=0.1)\nclf_poly = BayesianRidge()\nclf_poly.fit(np.vander(X, degree), y)\n\nX_plot = np.linspace(0, 11, 25)\ny_plot = f(X_plot, noise_amount=0)\ny_mean, y_std = clf_poly.predict(np.vander(X_plot, degree), return_std=True)\nplt.figure(figsize=(6, 5))\nplt.errorbar(\n X_plot,\n y_mean,\n y_std,\n color=\"navy\",\n label=\"Polynomial Bayesian Ridge Regression\",\n linewidth=lw,\n)\nplt.plot(X_plot, y_plot, color=\"gold\", linewidth=lw, label=\"Ground Truth\")\nplt.ylabel(\"Output y\")\nplt.xlabel(\"Feature X\")\nplt.legend(loc=\"lower left\")\nplt.show()"
12730
]
12831
}
12932
],

dev/_downloads/scikit-learn-docs.zip

-4.85 KB
Binary file not shown.
-212 Bytes
-189 Bytes
-120 Bytes
-146 Bytes
-62 Bytes

0 commit comments

Comments
 (0)