|
11 | 11 | and 500 regression trees of depth 4.
|
12 | 12 |
|
13 | 13 | Note: For larger datasets (n_samples >= 10000), please refer to
|
14 |
| -:class:`sklearn.ensemble.HistGradientBoostingRegressor` |
| 14 | +:class:`sklearn.ensemble.HistGradientBoostingRegressor`. |
15 | 15 | """
|
16 | 16 | print(__doc__)
|
17 | 17 |
|
|
32 | 32 | # Load the data
|
33 | 33 | # -------------------------------------
|
34 | 34 | #
|
35 |
| -# First we need to load the data. We set random state to be consistent with the |
36 |
| -# result. |
| 35 | +# First we need to load the data. |
37 | 36 |
|
38 | 37 | diabetes = datasets.load_diabetes()
|
39 | 38 | X, y = diabetes.data, diabetes.target
|
|
43 | 42 | # -------------------------------------
|
44 | 43 | #
|
45 | 44 | # Next, we will split our dataset to use 90% for training and leave the rest
|
46 |
| -# for testing. We will also prepare the parameters we want to use to fit our |
47 |
| -# regression model. You can play with those parameters to see how the |
48 |
| -# results change: |
| 45 | +# for testing. We will also set the regression model parameters. You can play |
| 46 | +# with these parameters to see how the results change. |
49 | 47 | #
|
50 |
| -# n_estimators : the number of boosting stages which will be performed. |
51 |
| -# Later, we will plot and see how the deviance changes with those boosting |
52 |
| -# operations. |
| 48 | +# n_estimators : the number of boosting stages that will be performed. |
| 49 | +# Later, we will plot deviance against boosting iterations. |
53 | 50 | #
|
54 | 51 | # max_depth : limits the number of nodes in the tree.
|
55 | 52 | # The best value depends on the interaction of the input variables.
|
56 | 53 | #
|
57 | 54 | # min_samples_split : the minimum number of samples required to split an
|
58 | 55 | # internal node.
|
59 | 56 | #
|
60 |
| -# learning_rate : how much the contribution of each tree will shrink |
| 57 | +# learning_rate : how much the contribution of each tree will shrink. |
61 | 58 | #
|
62 |
| -# loss : here, we decided to use least squeares as a loss function. |
63 |
| -# However there are many other options (check |
64 |
| -# :class:`~sklearn.ensemble.GradientBoostingRegressor` to see what are |
65 |
| -# other possibilities) |
| 59 | +# loss : loss function to optimize. The least squares function is used in this |
| 60 | +# case however, there are many other options (see |
| 61 | +# :class:`~sklearn.ensemble.GradientBoostingRegressor` ). |
66 | 62 |
|
67 | 63 | X_train, X_test, y_train, y_test = train_test_split(
|
68 | 64 | X, y, test_size=0.1, random_state=13)
|
|
80 | 76 | # Now we will initiate the gradient boosting regressors and fit it with our
|
81 | 77 | # training data. Let's also look and the mean squared error on the test data.
|
82 | 78 |
|
83 |
| -clf = ensemble.GradientBoostingRegressor(**params) |
84 |
| -clf.fit(X_train, y_train) |
| 79 | +reg = ensemble.GradientBoostingRegressor(**params) |
| 80 | +reg.fit(X_train, y_train) |
85 | 81 |
|
86 |
| -mse = mean_squared_error(y_test, clf.predict(X_test)) |
| 82 | +mse = mean_squared_error(y_test, reg.predict(X_test)) |
87 | 83 | print("The mean squared error (MSE) on test set: {:.4f}".format(mse))
|
88 | 84 |
|
89 | 85 | ##############################################################################
|
90 | 86 | # Plot training deviance
|
91 | 87 | # -------------------------------------
|
92 | 88 | #
|
93 | 89 | # Finally, we will visualize the results. To do that we will first compute the
|
94 |
| -# test set deviance and then plot it. |
| 90 | +# test set deviance and then plot it against boosting iterations. |
95 | 91 |
|
96 | 92 | test_score = np.zeros((params['n_estimators'],), dtype=np.float64)
|
97 |
| -for i, y_pred in enumerate(clf.staged_predict(X_test)): |
98 |
| - test_score[i] = clf.loss_(y_test, y_pred) |
| 93 | +for i, y_pred in enumerate(reg.staged_predict(X_test)): |
| 94 | + test_score[i] = reg.loss_(y_test, y_pred) |
99 | 95 |
|
100 | 96 | fig = plt.figure(figsize=(6, 6))
|
101 | 97 | plt.subplot(1, 1, 1)
|
102 | 98 | plt.title('Deviance')
|
103 |
| -plt.plot(np.arange(params['n_estimators']) + 1, clf.train_score_, 'b-', |
| 99 | +plt.plot(np.arange(params['n_estimators']) + 1, reg.train_score_, 'b-', |
104 | 100 | label='Training Set Deviance')
|
105 | 101 | plt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-',
|
106 | 102 | label='Test Set Deviance')
|
|
116 | 112 | #
|
117 | 113 | # Careful, impurity-based feature importances can be misleading for
|
118 | 114 | # high cardinality features (many unique values). As an alternative,
|
119 |
| -# the permutation importances of ``clf`` are computed on a |
| 115 | +# the permutation importances of ``reg`` can be computed on a |
120 | 116 | # held out test set. See :ref:`permutation_importance` for more details.
|
121 | 117 | #
|
122 |
| -# In this case, the two methods agree to identify the same top 2 features |
123 |
| -# as strongly predictive features but not in the same order. The third most |
| 118 | +# For this example, the impurity-based and permutation methods identify the |
| 119 | +# same 2 strongly predictive features but not in the same order. The third most |
124 | 120 | # predictive feature, "bp", is also the same for the 2 methods. The remaining
|
125 | 121 | # features are less predictive and the error bars of the permutation plot
|
126 | 122 | # show that they overlap with 0.
|
127 | 123 |
|
128 |
| -feature_importance = clf.feature_importances_ |
| 124 | +feature_importance = reg.feature_importances_ |
129 | 125 | sorted_idx = np.argsort(feature_importance)
|
130 | 126 | pos = np.arange(sorted_idx.shape[0]) + .5
|
131 | 127 | fig = plt.figure(figsize=(12, 6))
|
|
134 | 130 | plt.yticks(pos, np.array(diabetes.feature_names)[sorted_idx])
|
135 | 131 | plt.title('Feature Importance (MDI)')
|
136 | 132 |
|
137 |
| -result = permutation_importance(clf, X_test, y_test, n_repeats=10, |
| 133 | +result = permutation_importance(reg, X_test, y_test, n_repeats=10, |
138 | 134 | random_state=42, n_jobs=2)
|
139 | 135 | sorted_idx = result.importances_mean.argsort()
|
140 | 136 | plt.subplot(1, 2, 2)
|
|
0 commit comments