scikit-learn
diff --git a/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
-105 Bytes b/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
-105 Bytes
diff --git a/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
-1.29 KB b/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
-1.29 KB
diff --git a/‎dev/_downloads/788b8c55a85f84a55e652c6048c4f623/plot_bayesian_ridge.py
Lines changed: 15 additions & 29 deletions b/‎dev/_downloads/788b8c55a85f84a55e652c6048c4f623/plot_bayesian_ridge.py
Lines changed: 15 additions & 29 deletions
diff --git a/‎dev/_downloads/eac1d0922187b84e57eb8359500f5bfb/plot_bayesian_ridge.ipynb
Lines changed: 1 addition & 98 deletions b/‎dev/_downloads/eac1d0922187b84e57eb8359500f5bfb/plot_bayesian_ridge.ipynb
Lines changed: 1 addition & 98 deletions
diff --git a/‎dev/_downloads/scikit-learn-docs.zip
-4.85 KB b/‎dev/_downloads/scikit-learn-docs.zip
-4.85 KB
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-212 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-212 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-189 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-189 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
-120 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
-120 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
-146 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
-146 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_thumb.png
-62 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_thumb.png
-62 Bytes
@@ -24,12 +24,14 @@
 
 """
 
-# %%
-# Generate simulated data with Gaussian weights
-# ---------------------------------------------
 import numpy as np
+import matplotlib.pyplot as plt
 from scipy import stats
 
+from sklearn.linear_model import BayesianRidge, LinearRegression
+
+# #############################################################################
+# Generating simulated data with Gaussian weights
 np.random.seed(0)
 n_samples, n_features = 100, 100
 X = np.random.randn(n_samples, n_features)  # Create Gaussian data
@@ -38,7 +40,6 @@
 w = np.zeros(n_features)
 # Only keep 10 weights of interest
 relevant_features = np.random.randint(0, n_features, 10)
-
 for i in relevant_features:
     w[i] = stats.norm.rvs(loc=0, scale=1.0 / np.sqrt(lambda_))
 # Create noise with a precision alpha of 50.
@@ -47,22 +48,17 @@
 # Create the target
 y = np.dot(X, w) + noise
 
-# %%
+# #############################################################################
 # Fit the Bayesian Ridge Regression and an OLS for comparison
-# -----------------------------------------------------------
-from sklearn.linear_model import BayesianRidge, LinearRegression
-
 clf = BayesianRidge(compute_score=True)
 clf.fit(X, y)
 
 ols = LinearRegression()
 ols.fit(X, y)
 
-# %%
-# Plot true weights and estimated weights
-# ---------------------------------------
-import matplotlib.pyplot as plt
-
+# #############################################################################
+# Plot true weights, estimated weights, histogram of the weights, and
+# predictions with standard deviations
 lw = 2
 plt.figure(figsize=(6, 5))
 plt.title("Weights of the model")
@@ -71,11 +67,7 @@
 plt.plot(ols.coef_, color="navy", linestyle="--", label="OLS estimate")
 plt.xlabel("Features")
 plt.ylabel("Values of the weights")
-_ = plt.legend(loc="best", prop=dict(size=12))
-
-# %%
-# Plot histogram of the weights
-# -----------------------------
+plt.legend(loc="best", prop=dict(size=12))
 
 plt.figure(figsize=(6, 5))
 plt.title("Histogram of the weights")
@@ -88,23 +80,16 @@
 )
 plt.ylabel("Features")
 plt.xlabel("Values of the weights")
-_ = plt.legend(loc="upper left")
-
-# %%
-# Plot marginal log-likelihood
-# ----------------------------
+plt.legend(loc="upper left")
 
 plt.figure(figsize=(6, 5))
 plt.title("Marginal log-likelihood")
 plt.plot(clf.scores_, color="navy", linewidth=lw)
 plt.ylabel("Score")
-_ = plt.xlabel("Iterations")
-
-# %%
-# Plot some predictions for polynomial regression with standard deviations
-# ------------------------------------------------------------------------
+plt.xlabel("Iterations")
 
 
+# Plotting some predictions for polynomial regression
 def f(x, noise_amount):
     y = np.sqrt(x) * np.sin(x)
     noise = np.random.normal(0, 1, len(x))
@@ -132,4 +117,5 @@ def f(x, noise_amount):
 plt.plot(X_plot, y_plot, color="gold", linewidth=lw, label="Ground Truth")
 plt.ylabel("Output y")
 plt.xlabel("Feature X")
-_ = plt.legend(loc="lower left")
+plt.legend(loc="lower left")
+plt.show()
@@ -18,103 +18,6 @@
         "\n# Bayesian Ridge Regression\n\nComputes a Bayesian Ridge Regression on a synthetic dataset.\n\nSee `bayesian_ridge_regression` for more information on the regressor.\n\nCompared to the OLS (ordinary least squares) estimator, the coefficient\nweights are slightly shifted toward zeros, which stabilises them.\n\nAs the prior on the weights is a Gaussian prior, the histogram of the\nestimated weights is Gaussian.\n\nThe estimation of the model is done by iteratively maximizing the\nmarginal log-likelihood of the observations.\n\nWe also plot predictions and uncertainties for Bayesian Ridge Regression\nfor one dimensional regression using polynomial feature expansion.\nNote the uncertainty starts going up on the right side of the plot.\nThis is because these test samples are outside of the range of the training\nsamples.\n"
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Generate simulated data with Gaussian weights\n\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "import numpy as np\nfrom scipy import stats\n\nnp.random.seed(0)\nn_samples, n_features = 100, 100\nX = np.random.randn(n_samples, n_features)  # Create Gaussian data\n# Create weights with a precision lambda_ of 4.\nlambda_ = 4.0\nw = np.zeros(n_features)\n# Only keep 10 weights of interest\nrelevant_features = np.random.randint(0, n_features, 10)\n\nfor i in relevant_features:\n    w[i] = stats.norm.rvs(loc=0, scale=1.0 / np.sqrt(lambda_))\n# Create noise with a precision alpha of 50.\nalpha_ = 50.0\nnoise = stats.norm.rvs(loc=0, scale=1.0 / np.sqrt(alpha_), size=n_samples)\n# Create the target\ny = np.dot(X, w) + noise"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Fit the Bayesian Ridge Regression and an OLS for comparison\n\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "from sklearn.linear_model import BayesianRidge, LinearRegression\n\nclf = BayesianRidge(compute_score=True)\nclf.fit(X, y)\n\nols = LinearRegression()\nols.fit(X, y)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Plot true weights and estimated weights\n\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "import matplotlib.pyplot as plt\n\nlw = 2\nplt.figure(figsize=(6, 5))\nplt.title(\"Weights of the model\")\nplt.plot(clf.coef_, color=\"lightgreen\", linewidth=lw, label=\"Bayesian Ridge estimate\")\nplt.plot(w, color=\"gold\", linewidth=lw, label=\"Ground truth\")\nplt.plot(ols.coef_, color=\"navy\", linestyle=\"--\", label=\"OLS estimate\")\nplt.xlabel(\"Features\")\nplt.ylabel(\"Values of the weights\")\n_ = plt.legend(loc=\"best\", prop=dict(size=12))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Plot histogram of the weights\n\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "plt.figure(figsize=(6, 5))\nplt.title(\"Histogram of the weights\")\nplt.hist(clf.coef_, bins=n_features, color=\"gold\", log=True, edgecolor=\"black\")\nplt.scatter(\n    clf.coef_[relevant_features],\n    np.full(len(relevant_features), 5.0),\n    color=\"navy\",\n    label=\"Relevant features\",\n)\nplt.ylabel(\"Features\")\nplt.xlabel(\"Values of the weights\")\n_ = plt.legend(loc=\"upper left\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Plot marginal log-likelihood\n\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "plt.figure(figsize=(6, 5))\nplt.title(\"Marginal log-likelihood\")\nplt.plot(clf.scores_, color=\"navy\", linewidth=lw)\nplt.ylabel(\"Score\")\n_ = plt.xlabel(\"Iterations\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Plot some predictions for polynomial regression with standard deviations\n\n"
-      ]
-    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -123,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "def f(x, noise_amount):\n    y = np.sqrt(x) * np.sin(x)\n    noise = np.random.normal(0, 1, len(x))\n    return y + noise_amount * noise\n\n\ndegree = 10\nX = np.linspace(0, 10, 100)\ny = f(X, noise_amount=0.1)\nclf_poly = BayesianRidge()\nclf_poly.fit(np.vander(X, degree), y)\n\nX_plot = np.linspace(0, 11, 25)\ny_plot = f(X_plot, noise_amount=0)\ny_mean, y_std = clf_poly.predict(np.vander(X_plot, degree), return_std=True)\nplt.figure(figsize=(6, 5))\nplt.errorbar(\n    X_plot,\n    y_mean,\n    y_std,\n    color=\"navy\",\n    label=\"Polynomial Bayesian Ridge Regression\",\n    linewidth=lw,\n)\nplt.plot(X_plot, y_plot, color=\"gold\", linewidth=lw, label=\"Ground Truth\")\nplt.ylabel(\"Output y\")\nplt.xlabel(\"Feature X\")\n_ = plt.legend(loc=\"lower left\")"
+        "import numpy as np\nimport matplotlib.pyplot as plt\nfrom scipy import stats\n\nfrom sklearn.linear_model import BayesianRidge, LinearRegression\n\n# #############################################################################\n# Generating simulated data with Gaussian weights\nnp.random.seed(0)\nn_samples, n_features = 100, 100\nX = np.random.randn(n_samples, n_features)  # Create Gaussian data\n# Create weights with a precision lambda_ of 4.\nlambda_ = 4.0\nw = np.zeros(n_features)\n# Only keep 10 weights of interest\nrelevant_features = np.random.randint(0, n_features, 10)\nfor i in relevant_features:\n    w[i] = stats.norm.rvs(loc=0, scale=1.0 / np.sqrt(lambda_))\n# Create noise with a precision alpha of 50.\nalpha_ = 50.0\nnoise = stats.norm.rvs(loc=0, scale=1.0 / np.sqrt(alpha_), size=n_samples)\n# Create the target\ny = np.dot(X, w) + noise\n\n# #############################################################################\n# Fit the Bayesian Ridge Regression and an OLS for comparison\nclf = BayesianRidge(compute_score=True)\nclf.fit(X, y)\n\nols = LinearRegression()\nols.fit(X, y)\n\n# #############################################################################\n# Plot true weights, estimated weights, histogram of the weights, and\n# predictions with standard deviations\nlw = 2\nplt.figure(figsize=(6, 5))\nplt.title(\"Weights of the model\")\nplt.plot(clf.coef_, color=\"lightgreen\", linewidth=lw, label=\"Bayesian Ridge estimate\")\nplt.plot(w, color=\"gold\", linewidth=lw, label=\"Ground truth\")\nplt.plot(ols.coef_, color=\"navy\", linestyle=\"--\", label=\"OLS estimate\")\nplt.xlabel(\"Features\")\nplt.ylabel(\"Values of the weights\")\nplt.legend(loc=\"best\", prop=dict(size=12))\n\nplt.figure(figsize=(6, 5))\nplt.title(\"Histogram of the weights\")\nplt.hist(clf.coef_, bins=n_features, color=\"gold\", log=True, edgecolor=\"black\")\nplt.scatter(\n    clf.coef_[relevant_features],\n    np.full(len(relevant_features), 5.0),\n    color=\"navy\",\n    label=\"Relevant features\",\n)\nplt.ylabel(\"Features\")\nplt.xlabel(\"Values of the weights\")\nplt.legend(loc=\"upper left\")\n\nplt.figure(figsize=(6, 5))\nplt.title(\"Marginal log-likelihood\")\nplt.plot(clf.scores_, color=\"navy\", linewidth=lw)\nplt.ylabel(\"Score\")\nplt.xlabel(\"Iterations\")\n\n\n# Plotting some predictions for polynomial regression\ndef f(x, noise_amount):\n    y = np.sqrt(x) * np.sin(x)\n    noise = np.random.normal(0, 1, len(x))\n    return y + noise_amount * noise\n\n\ndegree = 10\nX = np.linspace(0, 10, 100)\ny = f(X, noise_amount=0.1)\nclf_poly = BayesianRidge()\nclf_poly.fit(np.vander(X, degree), y)\n\nX_plot = np.linspace(0, 11, 25)\ny_plot = f(X_plot, noise_amount=0)\ny_mean, y_std = clf_poly.predict(np.vander(X_plot, degree), return_std=True)\nplt.figure(figsize=(6, 5))\nplt.errorbar(\n    X_plot,\n    y_mean,\n    y_std,\n    color=\"navy\",\n    label=\"Polynomial Bayesian Ridge Regression\",\n    linewidth=lw,\n)\nplt.plot(X_plot, y_plot, color=\"gold\", linewidth=lw, label=\"Ground Truth\")\nplt.ylabel(\"Output y\")\nplt.xlabel(\"Feature X\")\nplt.legend(loc=\"lower left\")\nplt.show()"
       ]
     }
   ],