Skip to content

Commit 51e737b

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 8122e77bee8414c787f4bcd730673d2c0e137d06
1 parent 4e57ead commit 51e737b

File tree

1,191 files changed

+3716
-3728
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,191 files changed

+3716
-3728
lines changed
Binary file not shown.

dev/_downloads/679566501b743cb339497968edb9d62f/plot_gradient_boosting_regression.py

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
and 500 regression trees of depth 4.
1212
1313
Note: For larger datasets (n_samples >= 10000), please refer to
14-
:class:`sklearn.ensemble.HistGradientBoostingRegressor`
14+
:class:`sklearn.ensemble.HistGradientBoostingRegressor`.
1515
"""
1616
print(__doc__)
1717

@@ -32,8 +32,7 @@
3232
# Load the data
3333
# -------------------------------------
3434
#
35-
# First we need to load the data. We set random state to be consistent with the
36-
# result.
35+
# First we need to load the data.
3736

3837
diabetes = datasets.load_diabetes()
3938
X, y = diabetes.data, diabetes.target
@@ -43,26 +42,23 @@
4342
# -------------------------------------
4443
#
4544
# Next, we will split our dataset to use 90% for training and leave the rest
46-
# for testing. We will also prepare the parameters we want to use to fit our
47-
# regression model. You can play with those parameters to see how the
48-
# results change:
45+
# for testing. We will also set the regression model parameters. You can play
46+
# with these parameters to see how the results change.
4947
#
50-
# n_estimators : the number of boosting stages which will be performed.
51-
# Later, we will plot and see how the deviance changes with those boosting
52-
# operations.
48+
# n_estimators : the number of boosting stages that will be performed.
49+
# Later, we will plot deviance against boosting iterations.
5350
#
5451
# max_depth : limits the number of nodes in the tree.
5552
# The best value depends on the interaction of the input variables.
5653
#
5754
# min_samples_split : the minimum number of samples required to split an
5855
# internal node.
5956
#
60-
# learning_rate : how much the contribution of each tree will shrink
57+
# learning_rate : how much the contribution of each tree will shrink.
6158
#
62-
# loss : here, we decided to use least squeares as a loss function.
63-
# However there are many other options (check
64-
# :class:`~sklearn.ensemble.GradientBoostingRegressor` to see what are
65-
# other possibilities)
59+
# loss : loss function to optimize. The least squares function is used in this
60+
# case however, there are many other options (see
61+
# :class:`~sklearn.ensemble.GradientBoostingRegressor` ).
6662

6763
X_train, X_test, y_train, y_test = train_test_split(
6864
X, y, test_size=0.1, random_state=13)
@@ -80,27 +76,27 @@
8076
# Now we will initiate the gradient boosting regressors and fit it with our
8177
# training data. Let's also look and the mean squared error on the test data.
8278

83-
clf = ensemble.GradientBoostingRegressor(**params)
84-
clf.fit(X_train, y_train)
79+
reg = ensemble.GradientBoostingRegressor(**params)
80+
reg.fit(X_train, y_train)
8581

86-
mse = mean_squared_error(y_test, clf.predict(X_test))
82+
mse = mean_squared_error(y_test, reg.predict(X_test))
8783
print("The mean squared error (MSE) on test set: {:.4f}".format(mse))
8884

8985
##############################################################################
9086
# Plot training deviance
9187
# -------------------------------------
9288
#
9389
# Finally, we will visualize the results. To do that we will first compute the
94-
# test set deviance and then plot it.
90+
# test set deviance and then plot it against boosting iterations.
9591

9692
test_score = np.zeros((params['n_estimators'],), dtype=np.float64)
97-
for i, y_pred in enumerate(clf.staged_predict(X_test)):
98-
test_score[i] = clf.loss_(y_test, y_pred)
93+
for i, y_pred in enumerate(reg.staged_predict(X_test)):
94+
test_score[i] = reg.loss_(y_test, y_pred)
9995

10096
fig = plt.figure(figsize=(6, 6))
10197
plt.subplot(1, 1, 1)
10298
plt.title('Deviance')
103-
plt.plot(np.arange(params['n_estimators']) + 1, clf.train_score_, 'b-',
99+
plt.plot(np.arange(params['n_estimators']) + 1, reg.train_score_, 'b-',
104100
label='Training Set Deviance')
105101
plt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-',
106102
label='Test Set Deviance')
@@ -116,16 +112,16 @@
116112
#
117113
# Careful, impurity-based feature importances can be misleading for
118114
# high cardinality features (many unique values). As an alternative,
119-
# the permutation importances of ``clf`` are computed on a
115+
# the permutation importances of ``reg`` can be computed on a
120116
# held out test set. See :ref:`permutation_importance` for more details.
121117
#
122-
# In this case, the two methods agree to identify the same top 2 features
123-
# as strongly predictive features but not in the same order. The third most
118+
# For this example, the impurity-based and permutation methods identify the
119+
# same 2 strongly predictive features but not in the same order. The third most
124120
# predictive feature, "bp", is also the same for the 2 methods. The remaining
125121
# features are less predictive and the error bars of the permutation plot
126122
# show that they overlap with 0.
127123

128-
feature_importance = clf.feature_importances_
124+
feature_importance = reg.feature_importances_
129125
sorted_idx = np.argsort(feature_importance)
130126
pos = np.arange(sorted_idx.shape[0]) + .5
131127
fig = plt.figure(figsize=(12, 6))
@@ -134,7 +130,7 @@
134130
plt.yticks(pos, np.array(diabetes.feature_names)[sorted_idx])
135131
plt.title('Feature Importance (MDI)')
136132

137-
result = permutation_importance(clf, X_test, y_test, n_repeats=10,
133+
result = permutation_importance(reg, X_test, y_test, n_repeats=10,
138134
random_state=42, n_jobs=2)
139135
sorted_idx = result.importances_mean.argsort()
140136
plt.subplot(1, 2, 2)

dev/_downloads/cdc6134a701824f26cc08df7bd1e479a/plot_gradient_boosting_regression.ipynb

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"cell_type": "markdown",
1616
"metadata": {},
1717
"source": [
18-
"\n# Gradient Boosting regression\n\n\nThis example demonstrates Gradient Boosting to produce a predictive\nmodel from an ensemble of weak predictive models. Gradient boosting can be used\nfor regression and classification problems. Here, we will train a model to\ntackle a diabetes regression task. We will obtain the results from\n:class:`~sklearn.ensemble.GradientBoostingRegressor` with least squares loss\nand 500 regression trees of depth 4.\n\nNote: For larger datasets (n_samples >= 10000), please refer to\n:class:`sklearn.ensemble.HistGradientBoostingRegressor`\n"
18+
"\n# Gradient Boosting regression\n\n\nThis example demonstrates Gradient Boosting to produce a predictive\nmodel from an ensemble of weak predictive models. Gradient boosting can be used\nfor regression and classification problems. Here, we will train a model to\ntackle a diabetes regression task. We will obtain the results from\n:class:`~sklearn.ensemble.GradientBoostingRegressor` with least squares loss\nand 500 regression trees of depth 4.\n\nNote: For larger datasets (n_samples >= 10000), please refer to\n:class:`sklearn.ensemble.HistGradientBoostingRegressor`.\n"
1919
]
2020
},
2121
{
@@ -33,7 +33,7 @@
3333
"cell_type": "markdown",
3434
"metadata": {},
3535
"source": [
36-
"Load the data\n-------------------------------------\n\nFirst we need to load the data. We set random state to be consistent with the\nresult.\n\n"
36+
"Load the data\n-------------------------------------\n\nFirst we need to load the data.\n\n"
3737
]
3838
},
3939
{
@@ -51,7 +51,7 @@
5151
"cell_type": "markdown",
5252
"metadata": {},
5353
"source": [
54-
"Data preprocessing\n-------------------------------------\n\nNext, we will split our dataset to use 90% for training and leave the rest\nfor testing. We will also prepare the parameters we want to use to fit our\nregression model. You can play with those parameters to see how the\nresults change:\n\nn_estimators : the number of boosting stages which will be performed.\nLater, we will plot and see how the deviance changes with those boosting\noperations.\n\nmax_depth : limits the number of nodes in the tree.\nThe best value depends on the interaction of the input variables.\n\nmin_samples_split : the minimum number of samples required to split an\ninternal node.\n\nlearning_rate : how much the contribution of each tree will shrink\n\nloss : here, we decided to use least squeares as a loss function.\nHowever there are many other options (check\n:class:`~sklearn.ensemble.GradientBoostingRegressor` to see what are\nother possibilities)\n\n"
54+
"Data preprocessing\n-------------------------------------\n\nNext, we will split our dataset to use 90% for training and leave the rest\nfor testing. We will also set the regression model parameters. You can play\nwith these parameters to see how the results change.\n\nn_estimators : the number of boosting stages that will be performed.\nLater, we will plot deviance against boosting iterations.\n\nmax_depth : limits the number of nodes in the tree.\nThe best value depends on the interaction of the input variables.\n\nmin_samples_split : the minimum number of samples required to split an\ninternal node.\n\nlearning_rate : how much the contribution of each tree will shrink.\n\nloss : loss function to optimize. The least squares function is used in this\ncase however, there are many other options (see\n:class:`~sklearn.ensemble.GradientBoostingRegressor` ).\n\n"
5555
]
5656
},
5757
{
@@ -80,14 +80,14 @@
8080
},
8181
"outputs": [],
8282
"source": [
83-
"clf = ensemble.GradientBoostingRegressor(**params)\nclf.fit(X_train, y_train)\n\nmse = mean_squared_error(y_test, clf.predict(X_test))\nprint(\"The mean squared error (MSE) on test set: {:.4f}\".format(mse))"
83+
"reg = ensemble.GradientBoostingRegressor(**params)\nreg.fit(X_train, y_train)\n\nmse = mean_squared_error(y_test, reg.predict(X_test))\nprint(\"The mean squared error (MSE) on test set: {:.4f}\".format(mse))"
8484
]
8585
},
8686
{
8787
"cell_type": "markdown",
8888
"metadata": {},
8989
"source": [
90-
"Plot training deviance\n-------------------------------------\n\nFinally, we will visualize the results. To do that we will first compute the\ntest set deviance and then plot it.\n\n"
90+
"Plot training deviance\n-------------------------------------\n\nFinally, we will visualize the results. To do that we will first compute the\ntest set deviance and then plot it against boosting iterations.\n\n"
9191
]
9292
},
9393
{
@@ -98,14 +98,14 @@
9898
},
9999
"outputs": [],
100100
"source": [
101-
"test_score = np.zeros((params['n_estimators'],), dtype=np.float64)\nfor i, y_pred in enumerate(clf.staged_predict(X_test)):\n test_score[i] = clf.loss_(y_test, y_pred)\n\nfig = plt.figure(figsize=(6, 6))\nplt.subplot(1, 1, 1)\nplt.title('Deviance')\nplt.plot(np.arange(params['n_estimators']) + 1, clf.train_score_, 'b-',\n label='Training Set Deviance')\nplt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-',\n label='Test Set Deviance')\nplt.legend(loc='upper right')\nplt.xlabel('Boosting Iterations')\nplt.ylabel('Deviance')\nfig.tight_layout()\nplt.show()"
101+
"test_score = np.zeros((params['n_estimators'],), dtype=np.float64)\nfor i, y_pred in enumerate(reg.staged_predict(X_test)):\n test_score[i] = reg.loss_(y_test, y_pred)\n\nfig = plt.figure(figsize=(6, 6))\nplt.subplot(1, 1, 1)\nplt.title('Deviance')\nplt.plot(np.arange(params['n_estimators']) + 1, reg.train_score_, 'b-',\n label='Training Set Deviance')\nplt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-',\n label='Test Set Deviance')\nplt.legend(loc='upper right')\nplt.xlabel('Boosting Iterations')\nplt.ylabel('Deviance')\nfig.tight_layout()\nplt.show()"
102102
]
103103
},
104104
{
105105
"cell_type": "markdown",
106106
"metadata": {},
107107
"source": [
108-
"Plot feature importance\n-------------------------------------\n\nCareful, impurity-based feature importances can be misleading for\nhigh cardinality features (many unique values). As an alternative,\nthe permutation importances of ``clf`` are computed on a\nheld out test set. See `permutation_importance` for more details.\n\nIn this case, the two methods agree to identify the same top 2 features\nas strongly predictive features but not in the same order. The third most\npredictive feature, \"bp\", is also the same for the 2 methods. The remaining\nfeatures are less predictive and the error bars of the permutation plot\nshow that they overlap with 0.\n\n"
108+
"Plot feature importance\n-------------------------------------\n\nCareful, impurity-based feature importances can be misleading for\nhigh cardinality features (many unique values). As an alternative,\nthe permutation importances of ``reg`` can be computed on a\nheld out test set. See `permutation_importance` for more details.\n\nFor this example, the impurity-based and permutation methods identify the\nsame 2 strongly predictive features but not in the same order. The third most\npredictive feature, \"bp\", is also the same for the 2 methods. The remaining\nfeatures are less predictive and the error bars of the permutation plot\nshow that they overlap with 0.\n\n"
109109
]
110110
},
111111
{
@@ -116,7 +116,7 @@
116116
},
117117
"outputs": [],
118118
"source": [
119-
"feature_importance = clf.feature_importances_\nsorted_idx = np.argsort(feature_importance)\npos = np.arange(sorted_idx.shape[0]) + .5\nfig = plt.figure(figsize=(12, 6))\nplt.subplot(1, 2, 1)\nplt.barh(pos, feature_importance[sorted_idx], align='center')\nplt.yticks(pos, np.array(diabetes.feature_names)[sorted_idx])\nplt.title('Feature Importance (MDI)')\n\nresult = permutation_importance(clf, X_test, y_test, n_repeats=10,\n random_state=42, n_jobs=2)\nsorted_idx = result.importances_mean.argsort()\nplt.subplot(1, 2, 2)\nplt.boxplot(result.importances[sorted_idx].T,\n vert=False, labels=np.array(diabetes.feature_names)[sorted_idx])\nplt.title(\"Permutation Importance (test set)\")\nfig.tight_layout()\nplt.show()"
119+
"feature_importance = reg.feature_importances_\nsorted_idx = np.argsort(feature_importance)\npos = np.arange(sorted_idx.shape[0]) + .5\nfig = plt.figure(figsize=(12, 6))\nplt.subplot(1, 2, 1)\nplt.barh(pos, feature_importance[sorted_idx], align='center')\nplt.yticks(pos, np.array(diabetes.feature_names)[sorted_idx])\nplt.title('Feature Importance (MDI)')\n\nresult = permutation_importance(reg, X_test, y_test, n_repeats=10,\n random_state=42, n_jobs=2)\nsorted_idx = result.importances_mean.argsort()\nplt.subplot(1, 2, 2)\nplt.boxplot(result.importances[sorted_idx].T,\n vert=False, labels=np.array(diabetes.feature_names)[sorted_idx])\nplt.title(\"Permutation Importance (test set)\")\nfig.tight_layout()\nplt.show()"
120120
]
121121
}
122122
],
Binary file not shown.

dev/_downloads/scikit-learn-docs.pdf

-39.9 KB
Binary file not shown.

dev/_images/iris.png

0 Bytes
377 Bytes
377 Bytes
50 Bytes
50 Bytes

0 commit comments

Comments
 (0)