linuxdevhub
diff --git a/‎dev/_downloads/00a5ddd24a9ad44708f4ab3b157ef0ff/plot_stack_predictors.py
Lines changed: 123 additions & 0 deletions b/‎dev/_downloads/00a5ddd24a9ad44708f4ab3b157ef0ff/plot_stack_predictors.py
Lines changed: 123 additions & 0 deletions
diff --git a/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
4.89 KB b/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
4.89 KB
diff --git a/‎dev/_downloads/cef4e9221dbf2e80aa757cadeba0ee6c/plot_stack_predictors.ipynb
Lines changed: 115 additions & 0 deletions b/‎dev/_downloads/cef4e9221dbf2e80aa757cadeba0ee6c/plot_stack_predictors.ipynb
Lines changed: 115 additions & 0 deletions
diff --git a/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
6.5 KB b/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
6.5 KB
diff --git a/‎dev/_downloads/scikit-learn-docs.pdf
180 KB b/‎dev/_downloads/scikit-learn-docs.pdf
180 KB
diff --git a/‎dev/_images/iris.png
0 Bytes b/‎dev/_images/iris.png
0 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
42 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
42 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
42 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
42 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
-595 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
-595 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0031.png
-595 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0031.png
-595 Bytes
@@ -0,0 +1,123 @@
+"""
+=================================
+Combine predictors using stacking
+=================================
+
+Stacking refers to a method to blend estimators. In this strategy, some
+estimators are individually fitted on some training data while a final
+estimator is trained using the stacked predictions of these base estimators.
+
+In this example, we illustrate the use case in which different regressors are
+stacked together and a final linear penalized regressor is used to output the
+prediction. We compare the performance of each individual regressor with the
+stacking strategy. Stacking slightly improves the overall performance.
+
+"""
+print(__doc__)
+
+# Authors: Guillaume Lemaitre <[email protected]>
+# License: BSD 3 clause
+
+###############################################################################
+# The function ``plot_regression_results`` is used to plot the predicted and
+# true targets.
+
+import matplotlib.pyplot as plt
+
+
+def plot_regression_results(ax, y_true, y_pred, title, scores, elapsed_time):
+    """Scatter plot of the predicted vs true targets."""
+    ax.plot([y_true.min(), y_true.max()],
+            [y_true.min(), y_true.max()],
+            '--r', linewidth=2)
+    ax.scatter(y_true, y_pred, alpha=0.2)
+
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    ax.get_xaxis().tick_bottom()
+    ax.get_yaxis().tick_left()
+    ax.spines['left'].set_position(('outward', 10))
+    ax.spines['bottom'].set_position(('outward', 10))
+    ax.set_xlim([y_true.min(), y_true.max()])
+    ax.set_ylim([y_true.min(), y_true.max()])
+    ax.set_xlabel('Measured')
+    ax.set_ylabel('Predicted')
+    extra = plt.Rectangle((0, 0), 0, 0, fc="w", fill=False,
+                          edgecolor='none', linewidth=0)
+    ax.legend([extra], [scores], loc='upper left')
+    title = title + '\n Evaluation in {:.2f} seconds'.format(elapsed_time)
+    ax.set_title(title)
+
+
+###############################################################################
+# Stack of predictors on a single data set
+###############################################################################
+# It is sometimes tedious to find the model which will best perform on a given
+# dataset. Stacking provide an alternative by combining the outputs of several
+# learners, without the need to choose a model specifically. The performance of
+# stacking is usually close to the best model and sometimes it can outperform
+# the prediction performance of each individual model.
+#
+# Here, we combine 3 learners (linear and non-linear) and use a ridge regressor
+# to combine their outputs together.
+
+from sklearn.ensemble import StackingRegressor
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.experimental import enable_hist_gradient_boosting  # noqa
+from sklearn.ensemble import HistGradientBoostingRegressor
+from sklearn.linear_model import LassoCV
+from sklearn.linear_model import RidgeCV
+
+estimators = [
+    ('Random Forest', RandomForestRegressor(random_state=42)),
+    ('Lasso', LassoCV()),
+    ('Gradient Boosting', HistGradientBoostingRegressor(random_state=0))
+]
+stacking_regressor = StackingRegressor(
+    estimators=estimators, final_estimator=RidgeCV()
+)
+
+
+###############################################################################
+# We used the Boston data set (prediction of house prices). We check the
+# performance of each individual predictor as well as the stack of the
+# regressors.
+
+import time
+import numpy as np
+from sklearn.datasets import load_boston
+from sklearn.model_selection import cross_validate, cross_val_predict
+
+X, y = load_boston(return_X_y=True)
+
+fig, axs = plt.subplots(2, 2, figsize=(9, 7))
+axs = np.ravel(axs)
+
+for ax, (name, est) in zip(axs, estimators + [('Stacking Regressor',
+                                               stacking_regressor)]):
+    start_time = time.time()
+    score = cross_validate(est, X, y,
+                           scoring=['r2', 'neg_mean_absolute_error'],
+                           n_jobs=-1, verbose=0)
+    elapsed_time = time.time() - time.time()
+
+    y_pred = cross_val_predict(est, X, y, n_jobs=-1, verbose=0)
+    plot_regression_results(
+        ax, y, y_pred,
+        name,
+        (r'$R^2={:.2f} \pm {:.2f}$' + '\n' + r'$MAE={:.2f} \pm {:.2f}$')
+        .format(np.mean(score['test_r2']),
+                np.std(score['test_r2']),
+                -np.mean(score['test_neg_mean_absolute_error']),
+                np.std(score['test_neg_mean_absolute_error'])),
+        elapsed_time)
+
+plt.suptitle('Single predictors versus stacked predictors')
+plt.tight_layout()
+plt.subplots_adjust(top=0.9)
+plt.show()
+
+###############################################################################
+# The stacked regressor will combine the strengths of the different regressors.
+# However, we also see that training the stacked regressor is much more
+# computationally expensive.
@@ -0,0 +1,115 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n# Combine predictors using stacking\n\n\nStacking refers to a method to blend estimators. In this strategy, some\nestimators are individually fitted on some training data while a final\nestimator is trained using the stacked predictions of these base estimators.\n\nIn this example, we illustrate the use case in which different regressors are\nstacked together and a final linear penalized regressor is used to output the\nprediction. We compare the performance of each individual regressor with the\nstacking strategy. Stacking slightly improves the overall performance.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "print(__doc__)\n\n# Authors: Guillaume Lemaitre <[email protected]>\n# License: BSD 3 clause"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "The function ``plot_regression_results`` is used to plot the predicted and\ntrue targets.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n\n\ndef plot_regression_results(ax, y_true, y_pred, title, scores, elapsed_time):\n    \"\"\"Scatter plot of the predicted vs true targets.\"\"\"\n    ax.plot([y_true.min(), y_true.max()],\n            [y_true.min(), y_true.max()],\n            '--r', linewidth=2)\n    ax.scatter(y_true, y_pred, alpha=0.2)\n\n    ax.spines['top'].set_visible(False)\n    ax.spines['right'].set_visible(False)\n    ax.get_xaxis().tick_bottom()\n    ax.get_yaxis().tick_left()\n    ax.spines['left'].set_position(('outward', 10))\n    ax.spines['bottom'].set_position(('outward', 10))\n    ax.set_xlim([y_true.min(), y_true.max()])\n    ax.set_ylim([y_true.min(), y_true.max()])\n    ax.set_xlabel('Measured')\n    ax.set_ylabel('Predicted')\n    extra = plt.Rectangle((0, 0), 0, 0, fc=\"w\", fill=False,\n                          edgecolor='none', linewidth=0)\n    ax.legend([extra], [scores], loc='upper left')\n    title = title + '\\n Evaluation in {:.2f} seconds'.format(elapsed_time)\n    ax.set_title(title)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Stack of predictors on a single data set\n##############################################################################\n It is sometimes tedious to find the model which will best perform on a given\n dataset. Stacking provide an alternative by combining the outputs of several\n learners, without the need to choose a model specifically. The performance of\n stacking is usually close to the best model and sometimes it can outperform\n the prediction performance of each individual model.\n\n Here, we combine 3 learners (linear and non-linear) and use a ridge regressor\n to combine their outputs together.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn.ensemble import StackingRegressor\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.experimental import enable_hist_gradient_boosting  # noqa\nfrom sklearn.ensemble import HistGradientBoostingRegressor\nfrom sklearn.linear_model import LassoCV\nfrom sklearn.linear_model import RidgeCV\n\nestimators = [\n    ('Random Forest', RandomForestRegressor(random_state=42)),\n    ('Lasso', LassoCV()),\n    ('Gradient Boosting', HistGradientBoostingRegressor(random_state=0))\n]\nstacking_regressor = StackingRegressor(\n    estimators=estimators, final_estimator=RidgeCV()\n)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "We used the Boston data set (prediction of house prices). We check the\nperformance of each individual predictor as well as the stack of the\nregressors.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import time\nimport numpy as np\nfrom sklearn.datasets import load_boston\nfrom sklearn.model_selection import cross_validate, cross_val_predict\n\nX, y = load_boston(return_X_y=True)\n\nfig, axs = plt.subplots(2, 2, figsize=(9, 7))\naxs = np.ravel(axs)\n\nfor ax, (name, est) in zip(axs, estimators + [('Stacking Regressor',\n                                               stacking_regressor)]):\n    start_time = time.time()\n    score = cross_validate(est, X, y,\n                           scoring=['r2', 'neg_mean_absolute_error'],\n                           n_jobs=-1, verbose=0)\n    elapsed_time = time.time() - time.time()\n\n    y_pred = cross_val_predict(est, X, y, n_jobs=-1, verbose=0)\n    plot_regression_results(\n        ax, y, y_pred,\n        name,\n        (r'$R^2={:.2f} \\pm {:.2f}$' + '\\n' + r'$MAE={:.2f} \\pm {:.2f}$')\n        .format(np.mean(score['test_r2']),\n                np.std(score['test_r2']),\n                -np.mean(score['test_neg_mean_absolute_error']),\n                np.std(score['test_neg_mean_absolute_error'])),\n        elapsed_time)\n\nplt.suptitle('Single predictors versus stacked predictors')\nplt.tight_layout()\nplt.subplots_adjust(top=0.9)\nplt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "The stacked regressor will combine the strengths of the different regressors.\nHowever, we also see that training the stacked regressor is much more\ncomputationally expensive.\n\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.7.4"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}