Skip to content

Commit 721c936

Browse files
committed
Pushing the docs to dev/ for branch: main, commit f19bf4c406e9b0155038d995cc6e1c792d38bc4e
1 parent a11abdd commit 721c936

File tree

1,237 files changed

+4535
-4532
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,237 files changed

+4535
-4532
lines changed
Binary file not shown.

dev/_downloads/0a90f2b8e2dadb7d37ca67b3f7adb656/plot_gradient_boosting_regularization.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Author: Peter Prettenhofer <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import ensemble\nfrom sklearn import datasets\n\n\nX, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)\nX = X.astype(np.float32)\n\n# map labels from {-1, 1} to {0, 1}\nlabels, y = np.unique(y, return_inverse=True)\n\nX_train, X_test = X[:2000], X[2000:]\ny_train, y_test = y[:2000], y[2000:]\n\noriginal_params = {\n \"n_estimators\": 1000,\n \"max_leaf_nodes\": 4,\n \"max_depth\": None,\n \"random_state\": 2,\n \"min_samples_split\": 5,\n}\n\nplt.figure()\n\nfor label, color, setting in [\n (\"No shrinkage\", \"orange\", {\"learning_rate\": 1.0, \"subsample\": 1.0}),\n (\"learning_rate=0.1\", \"turquoise\", {\"learning_rate\": 0.1, \"subsample\": 1.0}),\n (\"subsample=0.5\", \"blue\", {\"learning_rate\": 1.0, \"subsample\": 0.5}),\n (\n \"learning_rate=0.1, subsample=0.5\",\n \"gray\",\n {\"learning_rate\": 0.1, \"subsample\": 0.5},\n ),\n (\n \"learning_rate=0.1, max_features=2\",\n \"magenta\",\n {\"learning_rate\": 0.1, \"max_features\": 2},\n ),\n]:\n params = dict(original_params)\n params.update(setting)\n\n clf = ensemble.GradientBoostingClassifier(**params)\n clf.fit(X_train, y_train)\n\n # compute test set deviance\n test_deviance = np.zeros((params[\"n_estimators\"],), dtype=np.float64)\n\n for i, y_pred in enumerate(clf.staged_decision_function(X_test)):\n # clf.loss_ assumes that y_test[i] in {0, 1}\n test_deviance[i] = clf.loss_(y_test, y_pred)\n\n plt.plot(\n (np.arange(test_deviance.shape[0]) + 1)[::5],\n test_deviance[::5],\n \"-\",\n color=color,\n label=label,\n )\n\nplt.legend(loc=\"upper left\")\nplt.xlabel(\"Boosting Iterations\")\nplt.ylabel(\"Test Set Deviance\")\n\nplt.show()"
29+
"# Author: Peter Prettenhofer <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import ensemble\nfrom sklearn import datasets\n\nfrom sklearn.model_selection import train_test_split\n\nX, y = datasets.make_hastie_10_2(n_samples=4000, random_state=1)\n\n# map labels from {-1, 1} to {0, 1}\nlabels, y = np.unique(y, return_inverse=True)\n\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=0)\n\noriginal_params = {\n \"n_estimators\": 400,\n \"max_leaf_nodes\": 4,\n \"max_depth\": None,\n \"random_state\": 2,\n \"min_samples_split\": 5,\n}\n\nplt.figure()\n\nfor label, color, setting in [\n (\"No shrinkage\", \"orange\", {\"learning_rate\": 1.0, \"subsample\": 1.0}),\n (\"learning_rate=0.2\", \"turquoise\", {\"learning_rate\": 0.2, \"subsample\": 1.0}),\n (\"subsample=0.5\", \"blue\", {\"learning_rate\": 1.0, \"subsample\": 0.5}),\n (\n \"learning_rate=0.2, subsample=0.5\",\n \"gray\",\n {\"learning_rate\": 0.2, \"subsample\": 0.5},\n ),\n (\n \"learning_rate=0.2, max_features=2\",\n \"magenta\",\n {\"learning_rate\": 0.2, \"max_features\": 2},\n ),\n]:\n params = dict(original_params)\n params.update(setting)\n\n clf = ensemble.GradientBoostingClassifier(**params)\n clf.fit(X_train, y_train)\n\n # compute test set deviance\n test_deviance = np.zeros((params[\"n_estimators\"],), dtype=np.float64)\n\n for i, y_pred in enumerate(clf.staged_decision_function(X_test)):\n # clf.loss_ assumes that y_test[i] in {0, 1}\n test_deviance[i] = clf.loss_(y_test, y_pred)\n\n plt.plot(\n (np.arange(test_deviance.shape[0]) + 1)[::5],\n test_deviance[::5],\n \"-\",\n color=color,\n label=label,\n )\n\nplt.legend(loc=\"upper left\")\nplt.xlabel(\"Boosting Iterations\")\nplt.ylabel(\"Test Set Deviance\")\n\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/23fb33f64b3c23edf25165a3a4f04237/plot_successive_halving_iterations.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
},
4545
"outputs": [],
4646
"source": [
47-
"rng = np.random.RandomState(0)\n\nX, y = datasets.make_classification(n_samples=700, random_state=rng)\n\nclf = RandomForestClassifier(n_estimators=20, random_state=rng)\n\nparam_dist = {\n \"max_depth\": [3, None],\n \"max_features\": randint(1, 11),\n \"min_samples_split\": randint(2, 11),\n \"bootstrap\": [True, False],\n \"criterion\": [\"gini\", \"entropy\"],\n}\n\nrsh = HalvingRandomSearchCV(\n estimator=clf, param_distributions=param_dist, factor=2, random_state=rng\n)\nrsh.fit(X, y)"
47+
"rng = np.random.RandomState(0)\n\nX, y = datasets.make_classification(n_samples=400, n_features=12, random_state=rng)\n\nclf = RandomForestClassifier(n_estimators=20, random_state=rng)\n\nparam_dist = {\n \"max_depth\": [3, None],\n \"max_features\": randint(1, 6),\n \"min_samples_split\": randint(2, 11),\n \"bootstrap\": [True, False],\n \"criterion\": [\"gini\", \"entropy\"],\n}\n\nrsh = HalvingRandomSearchCV(\n estimator=clf, param_distributions=param_dist, factor=2, random_state=rng\n)\nrsh.fit(X, y)"
4848
]
4949
},
5050
{

dev/_downloads/49fae0b4f6ab58738dcbf62236756548/plot_successive_halving_iterations.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,13 @@
2727

2828
rng = np.random.RandomState(0)
2929

30-
X, y = datasets.make_classification(n_samples=700, random_state=rng)
30+
X, y = datasets.make_classification(n_samples=400, n_features=12, random_state=rng)
3131

3232
clf = RandomForestClassifier(n_estimators=20, random_state=rng)
3333

3434
param_dist = {
3535
"max_depth": [3, None],
36-
"max_features": randint(1, 11),
36+
"max_features": randint(1, 6),
3737
"min_samples_split": randint(2, 11),
3838
"bootstrap": [True, False],
3939
"criterion": ["gini", "entropy"],

0 commit comments

Comments
 (0)