Skip to content

Commit 0895dc8

Browse files
committed
Pushing the docs to dev/ for branch: main, commit f8f77b4acca401904f6e7332bea55067f9d1e797
1 parent 9151a4e commit 0895dc8

File tree

1,232 files changed

+4407
-4415
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,232 files changed

+4407
-4415
lines changed
Binary file not shown.

dev/_downloads/6c50dbd9c6dc52f3da913f8d8f82274d/plot_ensemble_oob.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Author: Kian Ho <[email protected]>\n# Gilles Louppe <[email protected]>\n# Andreas Mueller <[email protected]>\n#\n# License: BSD 3 Clause\n\nimport matplotlib.pyplot as plt\n\nfrom collections import OrderedDict\nfrom sklearn.datasets import make_classification\nfrom sklearn.ensemble import RandomForestClassifier\n\nRANDOM_STATE = 123\n\n# Generate a binary classification dataset.\nX, y = make_classification(\n n_samples=500,\n n_features=25,\n n_clusters_per_class=1,\n n_informative=15,\n random_state=RANDOM_STATE,\n)\n\n# NOTE: Setting the `warm_start` construction parameter to `True` disables\n# support for parallelized ensembles but is necessary for tracking the OOB\n# error trajectory during training.\nensemble_clfs = [\n (\n \"RandomForestClassifier, max_features='sqrt'\",\n RandomForestClassifier(\n warm_start=True,\n oob_score=True,\n max_features=\"sqrt\",\n random_state=RANDOM_STATE,\n ),\n ),\n (\n \"RandomForestClassifier, max_features='log2'\",\n RandomForestClassifier(\n warm_start=True,\n max_features=\"log2\",\n oob_score=True,\n random_state=RANDOM_STATE,\n ),\n ),\n (\n \"RandomForestClassifier, max_features=None\",\n RandomForestClassifier(\n warm_start=True,\n max_features=None,\n oob_score=True,\n random_state=RANDOM_STATE,\n ),\n ),\n]\n\n# Map a classifier name to a list of (<n_estimators>, <error rate>) pairs.\nerror_rate = OrderedDict((label, []) for label, _ in ensemble_clfs)\n\n# Range of `n_estimators` values to explore.\nmin_estimators = 15\nmax_estimators = 175\n\nfor label, clf in ensemble_clfs:\n for i in range(min_estimators, max_estimators + 1):\n clf.set_params(n_estimators=i)\n clf.fit(X, y)\n\n # Record the OOB error for each `n_estimators=i` setting.\n oob_error = 1 - clf.oob_score_\n error_rate[label].append((i, oob_error))\n\n# Generate the \"OOB error rate\" vs. \"n_estimators\" plot.\nfor label, clf_err in error_rate.items():\n xs, ys = zip(*clf_err)\n plt.plot(xs, ys, label=label)\n\nplt.xlim(min_estimators, max_estimators)\nplt.xlabel(\"n_estimators\")\nplt.ylabel(\"OOB error rate\")\nplt.legend(loc=\"upper right\")\nplt.show()"
29+
"# Author: Kian Ho <[email protected]>\n# Gilles Louppe <[email protected]>\n# Andreas Mueller <[email protected]>\n#\n# License: BSD 3 Clause\n\nimport matplotlib.pyplot as plt\n\nfrom collections import OrderedDict\nfrom sklearn.datasets import make_classification\nfrom sklearn.ensemble import RandomForestClassifier\n\nRANDOM_STATE = 123\n\n# Generate a binary classification dataset.\nX, y = make_classification(\n n_samples=500,\n n_features=25,\n n_clusters_per_class=1,\n n_informative=15,\n random_state=RANDOM_STATE,\n)\n\n# NOTE: Setting the `warm_start` construction parameter to `True` disables\n# support for parallelized ensembles but is necessary for tracking the OOB\n# error trajectory during training.\nensemble_clfs = [\n (\n \"RandomForestClassifier, max_features='sqrt'\",\n RandomForestClassifier(\n warm_start=True,\n oob_score=True,\n max_features=\"sqrt\",\n random_state=RANDOM_STATE,\n ),\n ),\n (\n \"RandomForestClassifier, max_features='log2'\",\n RandomForestClassifier(\n warm_start=True,\n max_features=\"log2\",\n oob_score=True,\n random_state=RANDOM_STATE,\n ),\n ),\n (\n \"RandomForestClassifier, max_features=None\",\n RandomForestClassifier(\n warm_start=True,\n max_features=None,\n oob_score=True,\n random_state=RANDOM_STATE,\n ),\n ),\n]\n\n# Map a classifier name to a list of (<n_estimators>, <error rate>) pairs.\nerror_rate = OrderedDict((label, []) for label, _ in ensemble_clfs)\n\n# Range of `n_estimators` values to explore.\nmin_estimators = 15\nmax_estimators = 150\n\nfor label, clf in ensemble_clfs:\n for i in range(min_estimators, max_estimators + 1, 5):\n clf.set_params(n_estimators=i)\n clf.fit(X, y)\n\n # Record the OOB error for each `n_estimators=i` setting.\n oob_error = 1 - clf.oob_score_\n error_rate[label].append((i, oob_error))\n\n# Generate the \"OOB error rate\" vs. \"n_estimators\" plot.\nfor label, clf_err in error_rate.items():\n xs, ys = zip(*clf_err)\n plt.plot(xs, ys, label=label)\n\nplt.xlim(min_estimators, max_estimators)\nplt.xlabel(\"n_estimators\")\nplt.ylabel(\"OOB error rate\")\nplt.legend(loc=\"upper right\")\nplt.show()"
3030
]
3131
}
3232
],
Binary file not shown.

dev/_downloads/75191b2eb3b4aa13066927321dd3fdcf/plot_ensemble_oob.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,10 @@
8181

8282
# Range of `n_estimators` values to explore.
8383
min_estimators = 15
84-
max_estimators = 175
84+
max_estimators = 150
8585

8686
for label, clf in ensemble_clfs:
87-
for i in range(min_estimators, max_estimators + 1):
87+
for i in range(min_estimators, max_estimators + 1, 5):
8888
clf.set_params(n_estimators=i)
8989
clf.fit(X, y)
9090

dev/_downloads/scikit-learn-docs.zip

-20.5 KB
Binary file not shown.
75 Bytes
138 Bytes
-542 Bytes
132 Bytes
62 Bytes

0 commit comments

Comments
 (0)