Skip to content

Commit 5614bc3

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 0a075179c69eb2edfc8cea44feb01ce28dfbf95b
1 parent c9698e8 commit 5614bc3

File tree

1,229 files changed

+5140
-4596
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,229 files changed

+5140
-4596
lines changed
Binary file not shown.

dev/_downloads/2c8a162a0e436f4ca9af35453585fc81/plot_adaboost_hastie_10_2.py

Lines changed: 71 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
Discrete versus Real AdaBoost
44
=============================
55
6-
This example is based on Figure 10.2 from Hastie et al 2009 [1]_ and
6+
This notebook is based on Figure 10.2 from Hastie et al 2009 [1]_ and
77
illustrates the difference in performance between the discrete SAMME [2]_
88
boosting algorithm and real SAMME.R boosting algorithm. Both algorithms are
99
evaluated on a binary classification task where the target Y is a non-linear
@@ -15,32 +15,44 @@
1515
.. [1] T. Hastie, R. Tibshirani and J. Friedman, "Elements of Statistical
1616
Learning Ed. 2", Springer, 2009.
1717
18-
.. [2] J. Zhu, H. Zou, S. Rosset, T. Hastie, "Multi-class AdaBoost", 2009.
18+
.. [2] J Zhu, H. Zou, S. Rosset, T. Hastie, "Multi-class AdaBoost",
19+
Statistics and Its Interface, 2009.
1920
2021
"""
2122

22-
# Author: Peter Prettenhofer <[email protected]>,
23-
# Noel Dawe <[email protected]>
23+
# %%
24+
# Preparing the data and baseline models
25+
# --------------------------------------
26+
# We start by generating the binary classification dataset
27+
# used in Hastie et al. 2009, Example 10.2.
28+
29+
# Authors: Peter Prettenhofer <[email protected]>,
30+
# Noel Dawe <[email protected]>
2431
#
2532
# License: BSD 3 clause
2633

27-
import numpy as np
28-
import matplotlib.pyplot as plt
29-
3034
from sklearn import datasets
31-
from sklearn.tree import DecisionTreeClassifier
32-
from sklearn.metrics import zero_one_loss
33-
from sklearn.ensemble import AdaBoostClassifier
3435

36+
X, y = datasets.make_hastie_10_2(n_samples=12_000, random_state=1)
37+
38+
# %%
39+
# Now, we set the hyperparameters for our AdaBoost classifiers.
40+
# Be aware, a learning rate of 1.0 may not be optimal for both SAMME and SAMME.R
3541

3642
n_estimators = 400
37-
# A learning rate of 1. may not be optimal for both SAMME and SAMME.R
3843
learning_rate = 1.0
3944

40-
X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)
45+
# %%
46+
# We split the data into a training and a test set.
47+
# Then, we train our baseline classifiers, a `DecisionTreeClassifier` with `depth=9`
48+
# and a "stump" `DecisionTreeClassifier` with `depth=1` and compute the test error.
4149

42-
X_test, y_test = X[2000:], y[2000:]
43-
X_train, y_train = X[:2000], y[:2000]
50+
from sklearn.model_selection import train_test_split
51+
from sklearn.tree import DecisionTreeClassifier
52+
53+
X_train, X_test, y_train, y_test = train_test_split(
54+
X, y, test_size=2_000, shuffle=False
55+
)
4456

4557
dt_stump = DecisionTreeClassifier(max_depth=1, min_samples_leaf=1)
4658
dt_stump.fit(X_train, y_train)
@@ -50,6 +62,14 @@
5062
dt.fit(X_train, y_train)
5163
dt_err = 1.0 - dt.score(X_test, y_test)
5264

65+
# %%
66+
# Adaboost with discrete SAMME and real SAMME.R
67+
# ---------------------------------------------
68+
# We now define the discrete and real AdaBoost classifiers
69+
# and fit them to the training set.
70+
71+
from sklearn.ensemble import AdaBoostClassifier
72+
5373
ada_discrete = AdaBoostClassifier(
5474
base_estimator=dt_stump,
5575
learning_rate=learning_rate,
@@ -58,6 +78,8 @@
5878
)
5979
ada_discrete.fit(X_train, y_train)
6080

81+
# %%
82+
6183
ada_real = AdaBoostClassifier(
6284
base_estimator=dt_stump,
6385
learning_rate=learning_rate,
@@ -66,11 +88,13 @@
6688
)
6789
ada_real.fit(X_train, y_train)
6890

69-
fig = plt.figure()
70-
ax = fig.add_subplot(111)
91+
# %%
92+
# Now, let's compute the test error of the discrete and
93+
# real AdaBoost classifiers for each new stump in `n_estimators`
94+
# added to the ensemble.
7195

72-
ax.plot([1, n_estimators], [dt_stump_err] * 2, "k-", label="Decision Stump Error")
73-
ax.plot([1, n_estimators], [dt_err] * 2, "k--", label="Decision Tree Error")
96+
import numpy as np
97+
from sklearn.metrics import zero_one_loss
7498

7599
ada_discrete_err = np.zeros((n_estimators,))
76100
for i, y_pred in enumerate(ada_discrete.staged_predict(X_test)):
@@ -88,36 +112,60 @@
88112
for i, y_pred in enumerate(ada_real.staged_predict(X_train)):
89113
ada_real_err_train[i] = zero_one_loss(y_pred, y_train)
90114

115+
# %%
116+
# Plotting the results
117+
# --------------------
118+
# Finally, we plot the train and test errors of our baselines
119+
# and of the discrete and real AdaBoost classifiers
120+
121+
import matplotlib.pyplot as plt
122+
import seaborn as sns
123+
124+
fig = plt.figure()
125+
ax = fig.add_subplot(111)
126+
127+
ax.plot([1, n_estimators], [dt_stump_err] * 2, "k-", label="Decision Stump Error")
128+
ax.plot([1, n_estimators], [dt_err] * 2, "k--", label="Decision Tree Error")
129+
130+
colors = sns.color_palette("colorblind")
131+
91132
ax.plot(
92133
np.arange(n_estimators) + 1,
93134
ada_discrete_err,
94135
label="Discrete AdaBoost Test Error",
95-
color="red",
136+
color=colors[0],
96137
)
97138
ax.plot(
98139
np.arange(n_estimators) + 1,
99140
ada_discrete_err_train,
100141
label="Discrete AdaBoost Train Error",
101-
color="blue",
142+
color=colors[1],
102143
)
103144
ax.plot(
104145
np.arange(n_estimators) + 1,
105146
ada_real_err,
106147
label="Real AdaBoost Test Error",
107-
color="orange",
148+
color=colors[2],
108149
)
109150
ax.plot(
110151
np.arange(n_estimators) + 1,
111152
ada_real_err_train,
112153
label="Real AdaBoost Train Error",
113-
color="green",
154+
color=colors[4],
114155
)
115156

116157
ax.set_ylim((0.0, 0.5))
117-
ax.set_xlabel("n_estimators")
158+
ax.set_xlabel("Number of weak learners")
118159
ax.set_ylabel("error rate")
119160

120161
leg = ax.legend(loc="upper right", fancybox=True)
121162
leg.get_frame().set_alpha(0.7)
122163

123164
plt.show()
165+
# %%
166+
#
167+
# Concluding remarks
168+
# ------------------
169+
#
170+
# We observe that the error rate for both train and test sets of real AdaBoost
171+
# is lower than that of discrete AdaBoost.
Binary file not shown.

dev/_downloads/97c9b8aba1989fb600a73f3afb354726/plot_adaboost_hastie_10_2.ipynb

Lines changed: 117 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,79 @@
1515
"cell_type": "markdown",
1616
"metadata": {},
1717
"source": [
18-
"\n# Discrete versus Real AdaBoost\n\nThis example is based on Figure 10.2 from Hastie et al 2009 [1]_ and\nillustrates the difference in performance between the discrete SAMME [2]_\nboosting algorithm and real SAMME.R boosting algorithm. Both algorithms are\nevaluated on a binary classification task where the target Y is a non-linear\nfunction of 10 input features.\n\nDiscrete SAMME AdaBoost adapts based on errors in predicted class labels\nwhereas real SAMME.R uses the predicted class probabilities.\n\n.. [1] T. Hastie, R. Tibshirani and J. Friedman, \"Elements of Statistical\n Learning Ed. 2\", Springer, 2009.\n\n.. [2] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\", 2009.\n"
18+
"\n# Discrete versus Real AdaBoost\n\nThis notebook is based on Figure 10.2 from Hastie et al 2009 [1]_ and\nillustrates the difference in performance between the discrete SAMME [2]_\nboosting algorithm and real SAMME.R boosting algorithm. Both algorithms are\nevaluated on a binary classification task where the target Y is a non-linear\nfunction of 10 input features.\n\nDiscrete SAMME AdaBoost adapts based on errors in predicted class labels\nwhereas real SAMME.R uses the predicted class probabilities.\n\n.. [1] T. Hastie, R. Tibshirani and J. Friedman, \"Elements of Statistical\n Learning Ed. 2\", Springer, 2009.\n\n.. [2] J Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\",\n Statistics and Its Interface, 2009.\n"
19+
]
20+
},
21+
{
22+
"cell_type": "markdown",
23+
"metadata": {},
24+
"source": [
25+
"## Preparing the data and baseline models\nWe start by generating the binary classification dataset\nused in Hastie et al. 2009, Example 10.2.\n\n"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": null,
31+
"metadata": {
32+
"collapsed": false
33+
},
34+
"outputs": [],
35+
"source": [
36+
"# Authors: Peter Prettenhofer <[email protected]>,\n# Noel Dawe <[email protected]>\n#\n# License: BSD 3 clause\n\nfrom sklearn import datasets\n\nX, y = datasets.make_hastie_10_2(n_samples=12_000, random_state=1)"
37+
]
38+
},
39+
{
40+
"cell_type": "markdown",
41+
"metadata": {},
42+
"source": [
43+
"Now, we set the hyperparameters for our AdaBoost classifiers.\nBe aware, a learning rate of 1.0 may not be optimal for both SAMME and SAMME.R\n\n"
44+
]
45+
},
46+
{
47+
"cell_type": "code",
48+
"execution_count": null,
49+
"metadata": {
50+
"collapsed": false
51+
},
52+
"outputs": [],
53+
"source": [
54+
"n_estimators = 400\nlearning_rate = 1.0"
55+
]
56+
},
57+
{
58+
"cell_type": "markdown",
59+
"metadata": {},
60+
"source": [
61+
"We split the data into a training and a test set.\nThen, we train our baseline classifiers, a `DecisionTreeClassifier` with `depth=9`\nand a \"stump\" `DecisionTreeClassifier` with `depth=1` and compute the test error.\n\n"
62+
]
63+
},
64+
{
65+
"cell_type": "code",
66+
"execution_count": null,
67+
"metadata": {
68+
"collapsed": false
69+
},
70+
"outputs": [],
71+
"source": [
72+
"from sklearn.model_selection import train_test_split\nfrom sklearn.tree import DecisionTreeClassifier\n\nX_train, X_test, y_train, y_test = train_test_split(\n X, y, test_size=2_000, shuffle=False\n)\n\ndt_stump = DecisionTreeClassifier(max_depth=1, min_samples_leaf=1)\ndt_stump.fit(X_train, y_train)\ndt_stump_err = 1.0 - dt_stump.score(X_test, y_test)\n\ndt = DecisionTreeClassifier(max_depth=9, min_samples_leaf=1)\ndt.fit(X_train, y_train)\ndt_err = 1.0 - dt.score(X_test, y_test)"
73+
]
74+
},
75+
{
76+
"cell_type": "markdown",
77+
"metadata": {},
78+
"source": [
79+
"## Adaboost with discrete SAMME and real SAMME.R\nWe now define the discrete and real AdaBoost classifiers\nand fit them to the training set.\n\n"
80+
]
81+
},
82+
{
83+
"cell_type": "code",
84+
"execution_count": null,
85+
"metadata": {
86+
"collapsed": false
87+
},
88+
"outputs": [],
89+
"source": [
90+
"from sklearn.ensemble import AdaBoostClassifier\n\nada_discrete = AdaBoostClassifier(\n base_estimator=dt_stump,\n learning_rate=learning_rate,\n n_estimators=n_estimators,\n algorithm=\"SAMME\",\n)\nada_discrete.fit(X_train, y_train)"
1991
]
2092
},
2193
{
@@ -26,7 +98,50 @@
2698
},
2799
"outputs": [],
28100
"source": [
29-
"# Author: Peter Prettenhofer <[email protected]>,\n# Noel Dawe <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import datasets\nfrom sklearn.tree import DecisionTreeClassifier\nfrom sklearn.metrics import zero_one_loss\nfrom sklearn.ensemble import AdaBoostClassifier\n\n\nn_estimators = 400\n# A learning rate of 1. may not be optimal for both SAMME and SAMME.R\nlearning_rate = 1.0\n\nX, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)\n\nX_test, y_test = X[2000:], y[2000:]\nX_train, y_train = X[:2000], y[:2000]\n\ndt_stump = DecisionTreeClassifier(max_depth=1, min_samples_leaf=1)\ndt_stump.fit(X_train, y_train)\ndt_stump_err = 1.0 - dt_stump.score(X_test, y_test)\n\ndt = DecisionTreeClassifier(max_depth=9, min_samples_leaf=1)\ndt.fit(X_train, y_train)\ndt_err = 1.0 - dt.score(X_test, y_test)\n\nada_discrete = AdaBoostClassifier(\n base_estimator=dt_stump,\n learning_rate=learning_rate,\n n_estimators=n_estimators,\n algorithm=\"SAMME\",\n)\nada_discrete.fit(X_train, y_train)\n\nada_real = AdaBoostClassifier(\n base_estimator=dt_stump,\n learning_rate=learning_rate,\n n_estimators=n_estimators,\n algorithm=\"SAMME.R\",\n)\nada_real.fit(X_train, y_train)\n\nfig = plt.figure()\nax = fig.add_subplot(111)\n\nax.plot([1, n_estimators], [dt_stump_err] * 2, \"k-\", label=\"Decision Stump Error\")\nax.plot([1, n_estimators], [dt_err] * 2, \"k--\", label=\"Decision Tree Error\")\n\nada_discrete_err = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_discrete.staged_predict(X_test)):\n ada_discrete_err[i] = zero_one_loss(y_pred, y_test)\n\nada_discrete_err_train = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_discrete.staged_predict(X_train)):\n ada_discrete_err_train[i] = zero_one_loss(y_pred, y_train)\n\nada_real_err = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_real.staged_predict(X_test)):\n ada_real_err[i] = zero_one_loss(y_pred, y_test)\n\nada_real_err_train = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_real.staged_predict(X_train)):\n ada_real_err_train[i] = zero_one_loss(y_pred, y_train)\n\nax.plot(\n np.arange(n_estimators) + 1,\n ada_discrete_err,\n label=\"Discrete AdaBoost Test Error\",\n color=\"red\",\n)\nax.plot(\n np.arange(n_estimators) + 1,\n ada_discrete_err_train,\n label=\"Discrete AdaBoost Train Error\",\n color=\"blue\",\n)\nax.plot(\n np.arange(n_estimators) + 1,\n ada_real_err,\n label=\"Real AdaBoost Test Error\",\n color=\"orange\",\n)\nax.plot(\n np.arange(n_estimators) + 1,\n ada_real_err_train,\n label=\"Real AdaBoost Train Error\",\n color=\"green\",\n)\n\nax.set_ylim((0.0, 0.5))\nax.set_xlabel(\"n_estimators\")\nax.set_ylabel(\"error rate\")\n\nleg = ax.legend(loc=\"upper right\", fancybox=True)\nleg.get_frame().set_alpha(0.7)\n\nplt.show()"
101+
"ada_real = AdaBoostClassifier(\n base_estimator=dt_stump,\n learning_rate=learning_rate,\n n_estimators=n_estimators,\n algorithm=\"SAMME.R\",\n)\nada_real.fit(X_train, y_train)"
102+
]
103+
},
104+
{
105+
"cell_type": "markdown",
106+
"metadata": {},
107+
"source": [
108+
"Now, let's compute the test error of the discrete and\nreal AdaBoost classifiers for each new stump in `n_estimators`\nadded to the ensemble.\n\n"
109+
]
110+
},
111+
{
112+
"cell_type": "code",
113+
"execution_count": null,
114+
"metadata": {
115+
"collapsed": false
116+
},
117+
"outputs": [],
118+
"source": [
119+
"import numpy as np\nfrom sklearn.metrics import zero_one_loss\n\nada_discrete_err = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_discrete.staged_predict(X_test)):\n ada_discrete_err[i] = zero_one_loss(y_pred, y_test)\n\nada_discrete_err_train = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_discrete.staged_predict(X_train)):\n ada_discrete_err_train[i] = zero_one_loss(y_pred, y_train)\n\nada_real_err = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_real.staged_predict(X_test)):\n ada_real_err[i] = zero_one_loss(y_pred, y_test)\n\nada_real_err_train = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_real.staged_predict(X_train)):\n ada_real_err_train[i] = zero_one_loss(y_pred, y_train)"
120+
]
121+
},
122+
{
123+
"cell_type": "markdown",
124+
"metadata": {},
125+
"source": [
126+
"## Plotting the results\nFinally, we plot the train and test errors of our baselines\nand of the discrete and real AdaBoost classifiers\n\n"
127+
]
128+
},
129+
{
130+
"cell_type": "code",
131+
"execution_count": null,
132+
"metadata": {
133+
"collapsed": false
134+
},
135+
"outputs": [],
136+
"source": [
137+
"import matplotlib.pyplot as plt\nimport seaborn as sns\n\nfig = plt.figure()\nax = fig.add_subplot(111)\n\nax.plot([1, n_estimators], [dt_stump_err] * 2, \"k-\", label=\"Decision Stump Error\")\nax.plot([1, n_estimators], [dt_err] * 2, \"k--\", label=\"Decision Tree Error\")\n\ncolors = sns.color_palette(\"colorblind\")\n\nax.plot(\n np.arange(n_estimators) + 1,\n ada_discrete_err,\n label=\"Discrete AdaBoost Test Error\",\n color=colors[0],\n)\nax.plot(\n np.arange(n_estimators) + 1,\n ada_discrete_err_train,\n label=\"Discrete AdaBoost Train Error\",\n color=colors[1],\n)\nax.plot(\n np.arange(n_estimators) + 1,\n ada_real_err,\n label=\"Real AdaBoost Test Error\",\n color=colors[2],\n)\nax.plot(\n np.arange(n_estimators) + 1,\n ada_real_err_train,\n label=\"Real AdaBoost Train Error\",\n color=colors[4],\n)\n\nax.set_ylim((0.0, 0.5))\nax.set_xlabel(\"Number of weak learners\")\nax.set_ylabel(\"error rate\")\n\nleg = ax.legend(loc=\"upper right\", fancybox=True)\nleg.get_frame().set_alpha(0.7)\n\nplt.show()"
138+
]
139+
},
140+
{
141+
"cell_type": "markdown",
142+
"metadata": {},
143+
"source": [
144+
"## Concluding remarks\n\nWe observe that the error rate for both train and test sets of real AdaBoost\nis lower than that of discrete AdaBoost.\n\n"
30145
]
31146
}
32147
],

dev/_downloads/scikit-learn-docs.zip

3.61 KB
Binary file not shown.
3.37 KB
3.52 KB
-212 Bytes
-76 Bytes
102 Bytes

0 commit comments

Comments
 (0)