Skip to content

Commit b06f68d

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 75db1bc2dbd716016170241d8daae97624d01252
1 parent 0526b38 commit b06f68d

File tree

1,355 files changed

+4729
-4798
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,355 files changed

+4729
-4798
lines changed
Binary file not shown.

dev/_downloads/0a90f2b8e2dadb7d37ca67b3f7adb656/plot_gradient_boosting_regularization.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Author: Peter Prettenhofer <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import ensemble\nfrom sklearn import datasets\n\nfrom sklearn.model_selection import train_test_split\n\nX, y = datasets.make_hastie_10_2(n_samples=4000, random_state=1)\n\n# map labels from {-1, 1} to {0, 1}\nlabels, y = np.unique(y, return_inverse=True)\n\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=0)\n\noriginal_params = {\n \"n_estimators\": 400,\n \"max_leaf_nodes\": 4,\n \"max_depth\": None,\n \"random_state\": 2,\n \"min_samples_split\": 5,\n}\n\nplt.figure()\n\nfor label, color, setting in [\n (\"No shrinkage\", \"orange\", {\"learning_rate\": 1.0, \"subsample\": 1.0}),\n (\"learning_rate=0.2\", \"turquoise\", {\"learning_rate\": 0.2, \"subsample\": 1.0}),\n (\"subsample=0.5\", \"blue\", {\"learning_rate\": 1.0, \"subsample\": 0.5}),\n (\n \"learning_rate=0.2, subsample=0.5\",\n \"gray\",\n {\"learning_rate\": 0.2, \"subsample\": 0.5},\n ),\n (\n \"learning_rate=0.2, max_features=2\",\n \"magenta\",\n {\"learning_rate\": 0.2, \"max_features\": 2},\n ),\n]:\n params = dict(original_params)\n params.update(setting)\n\n clf = ensemble.GradientBoostingClassifier(**params)\n clf.fit(X_train, y_train)\n\n # compute test set deviance\n test_deviance = np.zeros((params[\"n_estimators\"],), dtype=np.float64)\n\n for i, y_pred in enumerate(clf.staged_decision_function(X_test)):\n # clf.loss_ assumes that y_test[i] in {0, 1}\n test_deviance[i] = clf.loss_(y_test, y_pred)\n\n plt.plot(\n (np.arange(test_deviance.shape[0]) + 1)[::5],\n test_deviance[::5],\n \"-\",\n color=color,\n label=label,\n )\n\nplt.legend(loc=\"upper right\")\nplt.xlabel(\"Boosting Iterations\")\nplt.ylabel(\"Test Set Deviance\")\n\nplt.show()"
29+
"# Author: Peter Prettenhofer <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import ensemble\nfrom sklearn import datasets\nfrom sklearn.metrics import log_loss\nfrom sklearn.model_selection import train_test_split\n\nX, y = datasets.make_hastie_10_2(n_samples=4000, random_state=1)\n\n# map labels from {-1, 1} to {0, 1}\nlabels, y = np.unique(y, return_inverse=True)\n\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=0)\n\noriginal_params = {\n \"n_estimators\": 400,\n \"max_leaf_nodes\": 4,\n \"max_depth\": None,\n \"random_state\": 2,\n \"min_samples_split\": 5,\n}\n\nplt.figure()\n\nfor label, color, setting in [\n (\"No shrinkage\", \"orange\", {\"learning_rate\": 1.0, \"subsample\": 1.0}),\n (\"learning_rate=0.2\", \"turquoise\", {\"learning_rate\": 0.2, \"subsample\": 1.0}),\n (\"subsample=0.5\", \"blue\", {\"learning_rate\": 1.0, \"subsample\": 0.5}),\n (\n \"learning_rate=0.2, subsample=0.5\",\n \"gray\",\n {\"learning_rate\": 0.2, \"subsample\": 0.5},\n ),\n (\n \"learning_rate=0.2, max_features=2\",\n \"magenta\",\n {\"learning_rate\": 0.2, \"max_features\": 2},\n ),\n]:\n params = dict(original_params)\n params.update(setting)\n\n clf = ensemble.GradientBoostingClassifier(**params)\n clf.fit(X_train, y_train)\n\n # compute test set deviance\n test_deviance = np.zeros((params[\"n_estimators\"],), dtype=np.float64)\n\n for i, y_proba in enumerate(clf.staged_predict_proba(X_test)):\n test_deviance[i] = 2 * log_loss(y_test, y_proba[:, 1])\n\n plt.plot(\n (np.arange(test_deviance.shape[0]) + 1)[::5],\n test_deviance[::5],\n \"-\",\n color=color,\n label=label,\n )\n\nplt.legend(loc=\"upper right\")\nplt.xlabel(\"Boosting Iterations\")\nplt.ylabel(\"Test Set Deviance\")\n\nplt.show()"
3030
]
3131
}
3232
],
Binary file not shown.

dev/_downloads/e641093af989b69bc2b89b130bcf320f/plot_gradient_boosting_regularization.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030

3131
from sklearn import ensemble
3232
from sklearn import datasets
33-
33+
from sklearn.metrics import log_loss
3434
from sklearn.model_selection import train_test_split
3535

3636
X, y = datasets.make_hastie_10_2(n_samples=4000, random_state=1)
@@ -74,9 +74,8 @@
7474
# compute test set deviance
7575
test_deviance = np.zeros((params["n_estimators"],), dtype=np.float64)
7676

77-
for i, y_pred in enumerate(clf.staged_decision_function(X_test)):
78-
# clf.loss_ assumes that y_test[i] in {0, 1}
79-
test_deviance[i] = clf.loss_(y_test, y_pred)
77+
for i, y_proba in enumerate(clf.staged_predict_proba(X_test)):
78+
test_deviance[i] = 2 * log_loss(y_test, y_proba[:, 1])
8079

8180
plt.plot(
8281
(np.arange(test_deviance.shape[0]) + 1)[::5],

dev/_downloads/scikit-learn-docs.zip

11 KB
Binary file not shown.
346 Bytes
38 Bytes
170 Bytes
0 Bytes
0 Bytes

0 commit comments

Comments
 (0)