|
3 | 3 | Gradient Boosting regression
|
4 | 4 | ============================
|
5 | 5 |
|
6 |
| -Demonstrate Gradient Boosting on the Boston housing dataset. |
| 6 | +This example demonstrates Gradient Boosting to produce a predictive |
| 7 | +model from an ensemble of weak predictive models. Gradient boosting can be used |
| 8 | +for regression and classification problems. Here, we will train a model to |
| 9 | +tackle a diabetes regression task. |
| 10 | +
|
| 11 | +We will obtain the results from |
| 12 | +:class:`~sklearn.ensemble.GradientBoostingRegressor` with least squares loss |
| 13 | +and 500 regression trees of depth 4. |
7 | 14 |
|
8 |
| -This example fits a Gradient Boosting model with least squares loss and |
9 |
| -500 regression trees of depth 4. |
10 | 15 | """
|
11 | 16 | print(__doc__)
|
12 | 17 |
|
13 | 18 | # Author: Peter Prettenhofer <[email protected]>
|
| 19 | +# Maria Telenczuk <https://github.com/maikia> |
14 | 20 | #
|
15 | 21 | # License: BSD 3 clause
|
16 | 22 |
|
|
19 | 25 |
|
20 | 26 | from sklearn import ensemble
|
21 | 27 | from sklearn import datasets
|
22 |
| -from sklearn.utils import shuffle |
23 | 28 | from sklearn.metrics import mean_squared_error
|
| 29 | +from sklearn.model_selection import train_test_split |
| 30 | + |
| 31 | +############################################################################## |
| 32 | +# Load the data |
| 33 | +# ------------------------------------- |
| 34 | +# |
| 35 | +# First we need to load the data. We set random state to be consistent with the |
| 36 | +# result. |
| 37 | + |
| 38 | +diabetes = datasets.load_diabetes() |
| 39 | +X, y = diabetes.data, diabetes.target |
| 40 | + |
| 41 | +############################################################################## |
| 42 | +# Data preprocessing |
| 43 | +# ------------------------------------- |
| 44 | +# |
| 45 | +# Next, we will split our dataset to use 90% for training and leave the rest |
| 46 | +# for testing. We will also prepare the parameters we want to use to fit our |
| 47 | +# regression model. You can play with those parameters to see how the |
| 48 | +# results change: |
| 49 | +# |
| 50 | +# Here: |
| 51 | +# n_estimators : is the number of boosting stages which will be performed. |
| 52 | +# Later, we will plot and see how the deviance changes with those boosting |
| 53 | +# operations. |
| 54 | +# max_depth : this limits the number of nodes in the tree. The best value |
| 55 | +# depends on the interaction of the input variables. |
| 56 | +# min_samples_split : is the minimum number of samples required to split an |
| 57 | +# internal node. |
| 58 | +# learning_rate: tells how much the contribution of each tree will shrink |
| 59 | +# loss: here, we decided to use least squeares as a loss function, however |
| 60 | +# there are many other options (check |
| 61 | +# :class:`~sklearn.ensemble.GradientBoostingRegressor` to see what are |
| 62 | +# other possibilities) |
| 63 | + |
| 64 | +X_train, X_test, y_train, y_test = train_test_split(X, y, |
| 65 | + test_size=0.1, |
| 66 | + random_state=13) |
24 | 67 |
|
25 |
| -# ############################################################################# |
26 |
| -# Load data |
27 |
| -boston = datasets.load_boston() |
28 |
| -X, y = shuffle(boston.data, boston.target, random_state=13) |
29 |
| -X = X.astype(np.float32) |
30 |
| -offset = int(X.shape[0] * 0.9) |
31 |
| -X_train, y_train = X[:offset], y[:offset] |
32 |
| -X_test, y_test = X[offset:], y[offset:] |
| 68 | +params = {'n_estimators': 500, |
| 69 | + 'max_depth': 4, |
| 70 | + 'min_samples_split': 5, |
| 71 | + 'learning_rate': 0.01, |
| 72 | + 'loss': 'ls'} |
33 | 73 |
|
34 |
| -# ############################################################################# |
| 74 | +############################################################################## |
35 | 75 | # Fit regression model
|
36 |
| -params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 2, |
37 |
| - 'learning_rate': 0.01, 'loss': 'ls'} |
38 |
| -clf = ensemble.GradientBoostingRegressor(**params) |
| 76 | +# ------------------------------------- |
| 77 | +# |
| 78 | +# Now we will initiate the gradient boosting regressors and fit it with our |
| 79 | +# training data. Let's also look and the mean squared error on the test data. |
39 | 80 |
|
| 81 | +clf = ensemble.GradientBoostingRegressor(**params) |
40 | 82 | clf.fit(X_train, y_train)
|
| 83 | + |
41 | 84 | mse = mean_squared_error(y_test, clf.predict(X_test))
|
42 |
| -print("MSE: %.4f" % mse) |
| 85 | +print("The mean squared error (MSE) on test set: {:.4f}".format(mse)) |
43 | 86 |
|
44 |
| -# ############################################################################# |
| 87 | +############################################################################## |
45 | 88 | # Plot training deviance
|
| 89 | +# ------------------------------------- |
| 90 | +# |
| 91 | +# Finally, we will visualize the results. To do that we will first compute the |
| 92 | +# test set deviance and then plot it. |
46 | 93 |
|
47 |
| -# compute test set deviance |
48 | 94 | test_score = np.zeros((params['n_estimators'],), dtype=np.float64)
|
49 | 95 |
|
50 | 96 | for i, y_pred in enumerate(clf.staged_predict(X_test)):
|
51 | 97 | test_score[i] = clf.loss_(y_test, y_pred)
|
52 | 98 |
|
53 |
| -plt.figure(figsize=(12, 6)) |
| 99 | +fig = plt.figure(figsize=(12, 8)) |
| 100 | + |
54 | 101 | plt.subplot(1, 2, 1)
|
55 | 102 | plt.title('Deviance')
|
56 | 103 | plt.plot(np.arange(params['n_estimators']) + 1, clf.train_score_, 'b-',
|
|
61 | 108 | plt.xlabel('Boosting Iterations')
|
62 | 109 | plt.ylabel('Deviance')
|
63 | 110 |
|
64 |
| -# ############################################################################# |
| 111 | +############################################################################## |
65 | 112 | # Plot impurity-based feature importance
|
| 113 | +# ------------------------------------- |
66 | 114 | #
|
67 |
| -# Warning: impurity-based feature importances can be misleading for |
| 115 | +# Careful, impurity-based feature importances can be misleading for |
68 | 116 | # high cardinality features (many unique values). See
|
69 | 117 | # :func:`sklearn.inspection.permutation_importance` as an alternative.
|
70 | 118 |
|
|
75 | 123 | pos = np.arange(sorted_idx.shape[0]) + .5
|
76 | 124 | plt.subplot(1, 2, 2)
|
77 | 125 | plt.barh(pos, feature_importance[sorted_idx], align='center')
|
78 |
| -plt.yticks(pos, boston.feature_names[sorted_idx]) |
| 126 | +plt.yticks(pos, np.array(diabetes.feature_names)[sorted_idx]) |
79 | 127 | plt.xlabel('Relative Importance')
|
80 | 128 | plt.title('Variable Importance')
|
| 129 | +fig.tight_layout() |
| 130 | + |
81 | 131 | plt.show()
|
0 commit comments