bsipocz
diff --git a/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
1.11 KB b/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
1.11 KB
diff --git a/‎dev/_downloads/a9a92784a7617f5a14aa93d32f95dff7/plot_voting_regressor.ipynb
Lines changed: 56 additions & 2 deletions b/‎dev/_downloads/a9a92784a7617f5a14aa93d32f95dff7/plot_voting_regressor.ipynb
Lines changed: 56 additions & 2 deletions
diff --git a/‎dev/_downloads/acb1430b51f399d6660add7428cadb67/plot_voting_regressor.py
Lines changed: 53 additions & 17 deletions b/‎dev/_downloads/acb1430b51f399d6660add7428cadb67/plot_voting_regressor.py
Lines changed: 53 additions & 17 deletions
diff --git a/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
1.71 KB b/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
1.71 KB
diff --git a/‎dev/_downloads/scikit-learn-docs.pdf
1.39 KB b/‎dev/_downloads/scikit-learn-docs.pdf
1.39 KB
diff --git a/‎dev/_images/iris.png
0 Bytes b/‎dev/_images/iris.png
0 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-263 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-263 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
-263 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
-263 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
-318 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
-318 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0031.png
-318 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0031.png
-318 Bytes
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\n# Plot individual and voting regression predictions\n\n\n.. currentmodule:: sklearn\n\nPlot individual and averaged regression predictions for Boston dataset.\n\nFirst, three exemplary regressors are initialized\n(:class:`~ensemble.GradientBoostingRegressor`,\n:class:`~ensemble.RandomForestRegressor`, and\n:class:`~linear_model.LinearRegression`) and used to initialize a\n:class:`~ensemble.VotingRegressor`.\n\nThe red starred dots are the averaged predictions.\n"
+        "\n# Plot individual and voting regression predictions\n\n\n.. currentmodule:: sklearn\n\nA voting regressor is an ensemble meta-estimator that fits base regressors each\non the whole dataset. It, then, averages the individual predictions to form a\nfinal prediction.\nWe will use three different regressors to predict the data:\n:class:`~ensemble.GradientBoostingRegressor`,\n:class:`~ensemble.RandomForestRegressor`, and\n:class:`~linear_model.LinearRegression`).\nThen, using them we will make voting regressor\n:class:`~ensemble.VotingRegressor`.\n\nFinally, we will plot all of them for comparison.\n\nWe will work with the diabetes dataset which consists of the 10 features\ncollected from a cohort of diabetes patients. The target is the disease\nprogression after one year from the baseline.\n"
       ]
     },
     {
@@ -26,7 +26,61 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\nimport matplotlib.pyplot as plt\n\nfrom sklearn import datasets\nfrom sklearn.ensemble import GradientBoostingRegressor\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.linear_model import LinearRegression\nfrom sklearn.ensemble import VotingRegressor\n\n# Loading some example data\nX, y = datasets.load_boston(return_X_y=True)\n\n# Training classifiers\nreg1 = GradientBoostingRegressor(random_state=1, n_estimators=10)\nreg2 = RandomForestRegressor(random_state=1, n_estimators=10)\nreg3 = LinearRegression()\nereg = VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)])\nreg1.fit(X, y)\nreg2.fit(X, y)\nreg3.fit(X, y)\nereg.fit(X, y)\n\nxt = X[:20]\n\nplt.figure()\nplt.plot(reg1.predict(xt), 'gd', label='GradientBoostingRegressor')\nplt.plot(reg2.predict(xt), 'b^', label='RandomForestRegressor')\nplt.plot(reg3.predict(xt), 'ys', label='LinearRegression')\nplt.plot(ereg.predict(xt), 'r*', label='VotingRegressor')\nplt.tick_params(axis='x', which='both', bottom=False, top=False,\n                labelbottom=False)\nplt.ylabel('predicted')\nplt.xlabel('training samples')\nplt.legend(loc=\"best\")\nplt.title('Comparison of individual predictions with averaged')\nplt.show()"
+        "print(__doc__)\n\nimport matplotlib.pyplot as plt\n\nfrom sklearn import datasets\nfrom sklearn.ensemble import GradientBoostingRegressor\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.linear_model import LinearRegression\nfrom sklearn.ensemble import VotingRegressor"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Training classifiers\n--------------------------------\n\nFirst, we are going to load diabetes dataset and initiate gradient boosting\nregressor, random forest regressor and linear regression. Next, we are going\nto use each of them to build the voting regressor:\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "X, y = datasets.load_diabetes(return_X_y=True)\n\n# Train classifiers\nreg1 = GradientBoostingRegressor(random_state=1)\nreg2 = RandomForestRegressor(random_state=1)\nreg3 = LinearRegression()\n\nreg1.fit(X, y)\nreg2.fit(X, y)\nreg3.fit(X, y)\n\nereg = VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)])\nereg.fit(X, y)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Making predictions\n--------------------------------\n\nNow we will use each of the regressors to make 20 first predictions about the\ndiabetes dataset.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "xt = X[:20]\n\npred1 = reg1.predict(xt)\npred2 = reg2.predict(xt)\npred3 = reg3.predict(xt)\npred4 = ereg.predict(xt)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Plot the results\n--------------------------------\n\nFinally, we will visualize the 20 predictions. The red stars show the average\nprediction\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "plt.figure()\nplt.plot(pred1, 'gd', label='GradientBoostingRegressor')\nplt.plot(pred2, 'b^', label='RandomForestRegressor')\nplt.plot(pred3, 'ys', label='LinearRegression')\nplt.plot(pred4, 'r*', ms=10, label='VotingRegressor')\n\nplt.tick_params(axis='x', which='both', bottom=False, top=False,\n                labelbottom=False)\nplt.ylabel('predicted')\nplt.xlabel('training samples')\nplt.legend(loc=\"best\")\nplt.title('Regressor predictions and their average')\n\nplt.show()"
       ]
     }
   ],
 
@@ -5,15 +5,21 @@
 
 .. currentmodule:: sklearn
 
-Plot individual and averaged regression predictions for Boston dataset.
-
-First, three exemplary regressors are initialized
-(:class:`~ensemble.GradientBoostingRegressor`,
+A voting regressor is an ensemble meta-estimator that fits base regressors each
+on the whole dataset. It, then, averages the individual predictions to form a
+final prediction.
+We will use three different regressors to predict the data:
+:class:`~ensemble.GradientBoostingRegressor`,
 :class:`~ensemble.RandomForestRegressor`, and
-:class:`~linear_model.LinearRegression`) and used to initialize a
+:class:`~linear_model.LinearRegression`).
+Then, using them we will make voting regressor
 :class:`~ensemble.VotingRegressor`.
 
-The red starred dots are the averaged predictions.
+Finally, we will plot all of them for comparison.
+
+We will work with the diabetes dataset which consists of the 10 features
+collected from a cohort of diabetes patients. The target is the disease
+progression after one year from the baseline.
 
 """
 print(__doc__)
@@ -26,30 +32,60 @@
 from sklearn.linear_model import LinearRegression
 from sklearn.ensemble import VotingRegressor
 
-# Loading some example data
-X, y = datasets.load_boston(return_X_y=True)
-
+##############################################################################
 # Training classifiers
-reg1 = GradientBoostingRegressor(random_state=1, n_estimators=10)
-reg2 = RandomForestRegressor(random_state=1, n_estimators=10)
+# --------------------------------
+#
+# First, we are going to load diabetes dataset and initiate gradient boosting
+# regressor, random forest regressor and linear regression. Next, we are going
+# to use each of them to build the voting regressor:
+
+X, y = datasets.load_diabetes(return_X_y=True)
+
+# Train classifiers
+reg1 = GradientBoostingRegressor(random_state=1)
+reg2 = RandomForestRegressor(random_state=1)
 reg3 = LinearRegression()
-ereg = VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)])
+
 reg1.fit(X, y)
 reg2.fit(X, y)
 reg3.fit(X, y)
+
+ereg = VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)])
 ereg.fit(X, y)
 
+##############################################################################
+# Making predictions
+# --------------------------------
+#
+# Now we will use each of the regressors to make 20 first predictions about the
+# diabetes dataset.
+
 xt = X[:20]
 
+pred1 = reg1.predict(xt)
+pred2 = reg2.predict(xt)
+pred3 = reg3.predict(xt)
+pred4 = ereg.predict(xt)
+
+##############################################################################
+# Plot the results
+# --------------------------------
+#
+# Finally, we will visualize the 20 predictions. The red stars show the average
+# prediction
+
 plt.figure()
-plt.plot(reg1.predict(xt), 'gd', label='GradientBoostingRegressor')
-plt.plot(reg2.predict(xt), 'b^', label='RandomForestRegressor')
-plt.plot(reg3.predict(xt), 'ys', label='LinearRegression')
-plt.plot(ereg.predict(xt), 'r*', label='VotingRegressor')
+plt.plot(pred1, 'gd', label='GradientBoostingRegressor')
+plt.plot(pred2, 'b^', label='RandomForestRegressor')
+plt.plot(pred3, 'ys', label='LinearRegression')
+plt.plot(pred4, 'r*', ms=10, label='VotingRegressor')
+
 plt.tick_params(axis='x', which='both', bottom=False, top=False,
                 labelbottom=False)
 plt.ylabel('predicted')
 plt.xlabel('training samples')
 plt.legend(loc="best")
-plt.title('Comparison of individual predictions with averaged')
+plt.title('Regressor predictions and their average')
+
 plt.show()