|
18 | 18 | "\n# Bayesian Ridge Regression\n\nComputes a Bayesian Ridge Regression on a synthetic dataset.\n\nSee `bayesian_ridge_regression` for more information on the regressor.\n\nCompared to the OLS (ordinary least squares) estimator, the coefficient\nweights are slightly shifted toward zeros, which stabilises them.\n\nAs the prior on the weights is a Gaussian prior, the histogram of the\nestimated weights is Gaussian.\n\nThe estimation of the model is done by iteratively maximizing the\nmarginal log-likelihood of the observations.\n\nWe also plot predictions and uncertainties for Bayesian Ridge Regression\nfor one dimensional regression using polynomial feature expansion.\nNote the uncertainty starts going up on the right side of the plot.\nThis is because these test samples are outside of the range of the training\nsamples.\n"
|
19 | 19 | ]
|
20 | 20 | },
|
21 |
| - { |
22 |
| - "cell_type": "markdown", |
23 |
| - "metadata": {}, |
24 |
| - "source": [ |
25 |
| - "## Generate simulated data with Gaussian weights\n\n" |
26 |
| - ] |
27 |
| - }, |
28 |
| - { |
29 |
| - "cell_type": "code", |
30 |
| - "execution_count": null, |
31 |
| - "metadata": { |
32 |
| - "collapsed": false |
33 |
| - }, |
34 |
| - "outputs": [], |
35 |
| - "source": [ |
36 |
| - "import numpy as np\nfrom scipy import stats\n\nnp.random.seed(0)\nn_samples, n_features = 100, 100\nX = np.random.randn(n_samples, n_features) # Create Gaussian data\n# Create weights with a precision lambda_ of 4.\nlambda_ = 4.0\nw = np.zeros(n_features)\n# Only keep 10 weights of interest\nrelevant_features = np.random.randint(0, n_features, 10)\n\nfor i in relevant_features:\n w[i] = stats.norm.rvs(loc=0, scale=1.0 / np.sqrt(lambda_))\n# Create noise with a precision alpha of 50.\nalpha_ = 50.0\nnoise = stats.norm.rvs(loc=0, scale=1.0 / np.sqrt(alpha_), size=n_samples)\n# Create the target\ny = np.dot(X, w) + noise" |
37 |
| - ] |
38 |
| - }, |
39 |
| - { |
40 |
| - "cell_type": "markdown", |
41 |
| - "metadata": {}, |
42 |
| - "source": [ |
43 |
| - "## Fit the Bayesian Ridge Regression and an OLS for comparison\n\n" |
44 |
| - ] |
45 |
| - }, |
46 |
| - { |
47 |
| - "cell_type": "code", |
48 |
| - "execution_count": null, |
49 |
| - "metadata": { |
50 |
| - "collapsed": false |
51 |
| - }, |
52 |
| - "outputs": [], |
53 |
| - "source": [ |
54 |
| - "from sklearn.linear_model import BayesianRidge, LinearRegression\n\nclf = BayesianRidge(compute_score=True)\nclf.fit(X, y)\n\nols = LinearRegression()\nols.fit(X, y)" |
55 |
| - ] |
56 |
| - }, |
57 |
| - { |
58 |
| - "cell_type": "markdown", |
59 |
| - "metadata": {}, |
60 |
| - "source": [ |
61 |
| - "## Plot true weights and estimated weights\n\n" |
62 |
| - ] |
63 |
| - }, |
64 |
| - { |
65 |
| - "cell_type": "code", |
66 |
| - "execution_count": null, |
67 |
| - "metadata": { |
68 |
| - "collapsed": false |
69 |
| - }, |
70 |
| - "outputs": [], |
71 |
| - "source": [ |
72 |
| - "import matplotlib.pyplot as plt\n\nlw = 2\nplt.figure(figsize=(6, 5))\nplt.title(\"Weights of the model\")\nplt.plot(clf.coef_, color=\"lightgreen\", linewidth=lw, label=\"Bayesian Ridge estimate\")\nplt.plot(w, color=\"gold\", linewidth=lw, label=\"Ground truth\")\nplt.plot(ols.coef_, color=\"navy\", linestyle=\"--\", label=\"OLS estimate\")\nplt.xlabel(\"Features\")\nplt.ylabel(\"Values of the weights\")\n_ = plt.legend(loc=\"best\", prop=dict(size=12))" |
73 |
| - ] |
74 |
| - }, |
75 |
| - { |
76 |
| - "cell_type": "markdown", |
77 |
| - "metadata": {}, |
78 |
| - "source": [ |
79 |
| - "## Plot histogram of the weights\n\n" |
80 |
| - ] |
81 |
| - }, |
82 |
| - { |
83 |
| - "cell_type": "code", |
84 |
| - "execution_count": null, |
85 |
| - "metadata": { |
86 |
| - "collapsed": false |
87 |
| - }, |
88 |
| - "outputs": [], |
89 |
| - "source": [ |
90 |
| - "plt.figure(figsize=(6, 5))\nplt.title(\"Histogram of the weights\")\nplt.hist(clf.coef_, bins=n_features, color=\"gold\", log=True, edgecolor=\"black\")\nplt.scatter(\n clf.coef_[relevant_features],\n np.full(len(relevant_features), 5.0),\n color=\"navy\",\n label=\"Relevant features\",\n)\nplt.ylabel(\"Features\")\nplt.xlabel(\"Values of the weights\")\n_ = plt.legend(loc=\"upper left\")" |
91 |
| - ] |
92 |
| - }, |
93 |
| - { |
94 |
| - "cell_type": "markdown", |
95 |
| - "metadata": {}, |
96 |
| - "source": [ |
97 |
| - "## Plot marginal log-likelihood\n\n" |
98 |
| - ] |
99 |
| - }, |
100 |
| - { |
101 |
| - "cell_type": "code", |
102 |
| - "execution_count": null, |
103 |
| - "metadata": { |
104 |
| - "collapsed": false |
105 |
| - }, |
106 |
| - "outputs": [], |
107 |
| - "source": [ |
108 |
| - "plt.figure(figsize=(6, 5))\nplt.title(\"Marginal log-likelihood\")\nplt.plot(clf.scores_, color=\"navy\", linewidth=lw)\nplt.ylabel(\"Score\")\n_ = plt.xlabel(\"Iterations\")" |
109 |
| - ] |
110 |
| - }, |
111 |
| - { |
112 |
| - "cell_type": "markdown", |
113 |
| - "metadata": {}, |
114 |
| - "source": [ |
115 |
| - "## Plot some predictions for polynomial regression with standard deviations\n\n" |
116 |
| - ] |
117 |
| - }, |
118 | 21 | {
|
119 | 22 | "cell_type": "code",
|
120 | 23 | "execution_count": null,
|
|
123 | 26 | },
|
124 | 27 | "outputs": [],
|
125 | 28 | "source": [
|
126 |
| - "def f(x, noise_amount):\n y = np.sqrt(x) * np.sin(x)\n noise = np.random.normal(0, 1, len(x))\n return y + noise_amount * noise\n\n\ndegree = 10\nX = np.linspace(0, 10, 100)\ny = f(X, noise_amount=0.1)\nclf_poly = BayesianRidge()\nclf_poly.fit(np.vander(X, degree), y)\n\nX_plot = np.linspace(0, 11, 25)\ny_plot = f(X_plot, noise_amount=0)\ny_mean, y_std = clf_poly.predict(np.vander(X_plot, degree), return_std=True)\nplt.figure(figsize=(6, 5))\nplt.errorbar(\n X_plot,\n y_mean,\n y_std,\n color=\"navy\",\n label=\"Polynomial Bayesian Ridge Regression\",\n linewidth=lw,\n)\nplt.plot(X_plot, y_plot, color=\"gold\", linewidth=lw, label=\"Ground Truth\")\nplt.ylabel(\"Output y\")\nplt.xlabel(\"Feature X\")\n_ = plt.legend(loc=\"lower left\")" |
| 29 | + "import numpy as np\nimport matplotlib.pyplot as plt\nfrom scipy import stats\n\nfrom sklearn.linear_model import BayesianRidge, LinearRegression\n\n# #############################################################################\n# Generating simulated data with Gaussian weights\nnp.random.seed(0)\nn_samples, n_features = 100, 100\nX = np.random.randn(n_samples, n_features) # Create Gaussian data\n# Create weights with a precision lambda_ of 4.\nlambda_ = 4.0\nw = np.zeros(n_features)\n# Only keep 10 weights of interest\nrelevant_features = np.random.randint(0, n_features, 10)\nfor i in relevant_features:\n w[i] = stats.norm.rvs(loc=0, scale=1.0 / np.sqrt(lambda_))\n# Create noise with a precision alpha of 50.\nalpha_ = 50.0\nnoise = stats.norm.rvs(loc=0, scale=1.0 / np.sqrt(alpha_), size=n_samples)\n# Create the target\ny = np.dot(X, w) + noise\n\n# #############################################################################\n# Fit the Bayesian Ridge Regression and an OLS for comparison\nclf = BayesianRidge(compute_score=True)\nclf.fit(X, y)\n\nols = LinearRegression()\nols.fit(X, y)\n\n# #############################################################################\n# Plot true weights, estimated weights, histogram of the weights, and\n# predictions with standard deviations\nlw = 2\nplt.figure(figsize=(6, 5))\nplt.title(\"Weights of the model\")\nplt.plot(clf.coef_, color=\"lightgreen\", linewidth=lw, label=\"Bayesian Ridge estimate\")\nplt.plot(w, color=\"gold\", linewidth=lw, label=\"Ground truth\")\nplt.plot(ols.coef_, color=\"navy\", linestyle=\"--\", label=\"OLS estimate\")\nplt.xlabel(\"Features\")\nplt.ylabel(\"Values of the weights\")\nplt.legend(loc=\"best\", prop=dict(size=12))\n\nplt.figure(figsize=(6, 5))\nplt.title(\"Histogram of the weights\")\nplt.hist(clf.coef_, bins=n_features, color=\"gold\", log=True, edgecolor=\"black\")\nplt.scatter(\n clf.coef_[relevant_features],\n np.full(len(relevant_features), 5.0),\n color=\"navy\",\n label=\"Relevant features\",\n)\nplt.ylabel(\"Features\")\nplt.xlabel(\"Values of the weights\")\nplt.legend(loc=\"upper left\")\n\nplt.figure(figsize=(6, 5))\nplt.title(\"Marginal log-likelihood\")\nplt.plot(clf.scores_, color=\"navy\", linewidth=lw)\nplt.ylabel(\"Score\")\nplt.xlabel(\"Iterations\")\n\n\n# Plotting some predictions for polynomial regression\ndef f(x, noise_amount):\n y = np.sqrt(x) * np.sin(x)\n noise = np.random.normal(0, 1, len(x))\n return y + noise_amount * noise\n\n\ndegree = 10\nX = np.linspace(0, 10, 100)\ny = f(X, noise_amount=0.1)\nclf_poly = BayesianRidge()\nclf_poly.fit(np.vander(X, degree), y)\n\nX_plot = np.linspace(0, 11, 25)\ny_plot = f(X_plot, noise_amount=0)\ny_mean, y_std = clf_poly.predict(np.vander(X_plot, degree), return_std=True)\nplt.figure(figsize=(6, 5))\nplt.errorbar(\n X_plot,\n y_mean,\n y_std,\n color=\"navy\",\n label=\"Polynomial Bayesian Ridge Regression\",\n linewidth=lw,\n)\nplt.plot(X_plot, y_plot, color=\"gold\", linewidth=lw, label=\"Ground Truth\")\nplt.ylabel(\"Output y\")\nplt.xlabel(\"Feature X\")\nplt.legend(loc=\"lower left\")\nplt.show()" |
127 | 30 | ]
|
128 | 31 | }
|
129 | 32 | ],
|
|
0 commit comments