scikit-learn
diff --git a/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
3 Bytes b/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
3 Bytes
diff --git a/‎dev/_downloads/6c50dbd9c6dc52f3da913f8d8f82274d/plot_ensemble_oob.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/6c50dbd9c6dc52f3da913f8d8f82274d/plot_ensemble_oob.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
3 Bytes b/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
3 Bytes
diff --git a/‎dev/_downloads/75191b2eb3b4aa13066927321dd3fdcf/plot_ensemble_oob.py
Lines changed: 2 additions & 2 deletions b/‎dev/_downloads/75191b2eb3b4aa13066927321dd3fdcf/plot_ensemble_oob.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎dev/_downloads/scikit-learn-docs.zip
-20.5 KB b/‎dev/_downloads/scikit-learn-docs.zip
-20.5 KB
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
75 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
75 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
138 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
138 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
-542 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
-542 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
132 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
132 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_thumb.png
62 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_thumb.png
62 Bytes
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "# Author: Kian Ho <[email protected]>\n#         Gilles Louppe <[email protected]>\n#         Andreas Mueller <[email protected]>\n#\n# License: BSD 3 Clause\n\nimport matplotlib.pyplot as plt\n\nfrom collections import OrderedDict\nfrom sklearn.datasets import make_classification\nfrom sklearn.ensemble import RandomForestClassifier\n\nRANDOM_STATE = 123\n\n# Generate a binary classification dataset.\nX, y = make_classification(\n    n_samples=500,\n    n_features=25,\n    n_clusters_per_class=1,\n    n_informative=15,\n    random_state=RANDOM_STATE,\n)\n\n# NOTE: Setting the `warm_start` construction parameter to `True` disables\n# support for parallelized ensembles but is necessary for tracking the OOB\n# error trajectory during training.\nensemble_clfs = [\n    (\n        \"RandomForestClassifier, max_features='sqrt'\",\n        RandomForestClassifier(\n            warm_start=True,\n            oob_score=True,\n            max_features=\"sqrt\",\n            random_state=RANDOM_STATE,\n        ),\n    ),\n    (\n        \"RandomForestClassifier, max_features='log2'\",\n        RandomForestClassifier(\n            warm_start=True,\n            max_features=\"log2\",\n            oob_score=True,\n            random_state=RANDOM_STATE,\n        ),\n    ),\n    (\n        \"RandomForestClassifier, max_features=None\",\n        RandomForestClassifier(\n            warm_start=True,\n            max_features=None,\n            oob_score=True,\n            random_state=RANDOM_STATE,\n        ),\n    ),\n]\n\n# Map a classifier name to a list of (<n_estimators>, <error rate>) pairs.\nerror_rate = OrderedDict((label, []) for label, _ in ensemble_clfs)\n\n# Range of `n_estimators` values to explore.\nmin_estimators = 15\nmax_estimators = 175\n\nfor label, clf in ensemble_clfs:\n    for i in range(min_estimators, max_estimators + 1):\n        clf.set_params(n_estimators=i)\n        clf.fit(X, y)\n\n        # Record the OOB error for each `n_estimators=i` setting.\n        oob_error = 1 - clf.oob_score_\n        error_rate[label].append((i, oob_error))\n\n# Generate the \"OOB error rate\" vs. \"n_estimators\" plot.\nfor label, clf_err in error_rate.items():\n    xs, ys = zip(*clf_err)\n    plt.plot(xs, ys, label=label)\n\nplt.xlim(min_estimators, max_estimators)\nplt.xlabel(\"n_estimators\")\nplt.ylabel(\"OOB error rate\")\nplt.legend(loc=\"upper right\")\nplt.show()"
+        "# Author: Kian Ho <[email protected]>\n#         Gilles Louppe <[email protected]>\n#         Andreas Mueller <[email protected]>\n#\n# License: BSD 3 Clause\n\nimport matplotlib.pyplot as plt\n\nfrom collections import OrderedDict\nfrom sklearn.datasets import make_classification\nfrom sklearn.ensemble import RandomForestClassifier\n\nRANDOM_STATE = 123\n\n# Generate a binary classification dataset.\nX, y = make_classification(\n    n_samples=500,\n    n_features=25,\n    n_clusters_per_class=1,\n    n_informative=15,\n    random_state=RANDOM_STATE,\n)\n\n# NOTE: Setting the `warm_start` construction parameter to `True` disables\n# support for parallelized ensembles but is necessary for tracking the OOB\n# error trajectory during training.\nensemble_clfs = [\n    (\n        \"RandomForestClassifier, max_features='sqrt'\",\n        RandomForestClassifier(\n            warm_start=True,\n            oob_score=True,\n            max_features=\"sqrt\",\n            random_state=RANDOM_STATE,\n        ),\n    ),\n    (\n        \"RandomForestClassifier, max_features='log2'\",\n        RandomForestClassifier(\n            warm_start=True,\n            max_features=\"log2\",\n            oob_score=True,\n            random_state=RANDOM_STATE,\n        ),\n    ),\n    (\n        \"RandomForestClassifier, max_features=None\",\n        RandomForestClassifier(\n            warm_start=True,\n            max_features=None,\n            oob_score=True,\n            random_state=RANDOM_STATE,\n        ),\n    ),\n]\n\n# Map a classifier name to a list of (<n_estimators>, <error rate>) pairs.\nerror_rate = OrderedDict((label, []) for label, _ in ensemble_clfs)\n\n# Range of `n_estimators` values to explore.\nmin_estimators = 15\nmax_estimators = 150\n\nfor label, clf in ensemble_clfs:\n    for i in range(min_estimators, max_estimators + 1, 5):\n        clf.set_params(n_estimators=i)\n        clf.fit(X, y)\n\n        # Record the OOB error for each `n_estimators=i` setting.\n        oob_error = 1 - clf.oob_score_\n        error_rate[label].append((i, oob_error))\n\n# Generate the \"OOB error rate\" vs. \"n_estimators\" plot.\nfor label, clf_err in error_rate.items():\n    xs, ys = zip(*clf_err)\n    plt.plot(xs, ys, label=label)\n\nplt.xlim(min_estimators, max_estimators)\nplt.xlabel(\"n_estimators\")\nplt.ylabel(\"OOB error rate\")\nplt.legend(loc=\"upper right\")\nplt.show()"
       ]
     }
   ],
 
@@ -81,10 +81,10 @@
 
 # Range of `n_estimators` values to explore.
 min_estimators = 15
-max_estimators = 175
+max_estimators = 150
 
 for label, clf in ensemble_clfs:
-    for i in range(min_estimators, max_estimators + 1):
+    for i in range(min_estimators, max_estimators + 1, 5):
         clf.set_params(n_estimators=i)
         clf.fit(X, y)
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "# Author: Kian Ho <[email protected]>\n# Gilles Louppe <[email protected]>\n# Andreas Mueller <[email protected]>\n#\n# License: BSD 3 Clause\n\nimport matplotlib.pyplot as plt\n\nfrom collections import OrderedDict\nfrom sklearn.datasets import make_classification\nfrom sklearn.ensemble import RandomForestClassifier\n\nRANDOM_STATE = 123\n\n# Generate a binary classification dataset.\nX, y = make_classification(\n n_samples=500,\n n_features=25,\n n_clusters_per_class=1,\n n_informative=15,\n random_state=RANDOM_STATE,\n)\n\n# NOTE: Setting the `warm_start` construction parameter to `True` disables\n# support for parallelized ensembles but is necessary for tracking the OOB\n# error trajectory during training.\nensemble_clfs = [\n (\n \"RandomForestClassifier, max_features='sqrt'\",\n RandomForestClassifier(\n warm_start=True,\n oob_score=True,\n max_features=\"sqrt\",\n random_state=RANDOM_STATE,\n ),\n ),\n (\n \"RandomForestClassifier, max_features='log2'\",\n RandomForestClassifier(\n warm_start=True,\n max_features=\"log2\",\n oob_score=True,\n random_state=RANDOM_STATE,\n ),\n ),\n (\n \"RandomForestClassifier, max_features=None\",\n RandomForestClassifier(\n warm_start=True,\n max_features=None,\n oob_score=True,\n random_state=RANDOM_STATE,\n ),\n ),\n]\n\n# Map a classifier name to a list of (<n_estimators>, <error rate>) pairs.\nerror_rate = OrderedDict((label, []) for label, _ in ensemble_clfs)\n\n# Range of `n_estimators` values to explore.\nmin_estimators = 15\nmax_estimators = 175\n\nfor label, clf in ensemble_clfs:\n for i in range(min_estimators, max_estimators + 1):\n clf.set_params(n_estimators=i)\n clf.fit(X, y)\n\n # Record the OOB error for each `n_estimators=i` setting.\n oob_error = 1 - clf.oob_score_\n error_rate[label].append((i, oob_error))\n\n# Generate the \"OOB error rate\" vs. \"n_estimators\" plot.\nfor label, clf_err in error_rate.items():\n xs, ys = zip(*clf_err)\n plt.plot(xs, ys, label=label)\n\nplt.xlim(min_estimators, max_estimators)\nplt.xlabel(\"n_estimators\")\nplt.ylabel(\"OOB error rate\")\nplt.legend(loc=\"upper right\")\nplt.show()"
	`29`	+ "# Author: Kian Ho <[email protected]>\n# Gilles Louppe <[email protected]>\n# Andreas Mueller <[email protected]>\n#\n# License: BSD 3 Clause\n\nimport matplotlib.pyplot as plt\n\nfrom collections import OrderedDict\nfrom sklearn.datasets import make_classification\nfrom sklearn.ensemble import RandomForestClassifier\n\nRANDOM_STATE = 123\n\n# Generate a binary classification dataset.\nX, y = make_classification(\n n_samples=500,\n n_features=25,\n n_clusters_per_class=1,\n n_informative=15,\n random_state=RANDOM_STATE,\n)\n\n# NOTE: Setting the `warm_start` construction parameter to `True` disables\n# support for parallelized ensembles but is necessary for tracking the OOB\n# error trajectory during training.\nensemble_clfs = [\n (\n \"RandomForestClassifier, max_features='sqrt'\",\n RandomForestClassifier(\n warm_start=True,\n oob_score=True,\n max_features=\"sqrt\",\n random_state=RANDOM_STATE,\n ),\n ),\n (\n \"RandomForestClassifier, max_features='log2'\",\n RandomForestClassifier(\n warm_start=True,\n max_features=\"log2\",\n oob_score=True,\n random_state=RANDOM_STATE,\n ),\n ),\n (\n \"RandomForestClassifier, max_features=None\",\n RandomForestClassifier(\n warm_start=True,\n max_features=None,\n oob_score=True,\n random_state=RANDOM_STATE,\n ),\n ),\n]\n\n# Map a classifier name to a list of (<n_estimators>, <error rate>) pairs.\nerror_rate = OrderedDict((label, []) for label, _ in ensemble_clfs)\n\n# Range of `n_estimators` values to explore.\nmin_estimators = 15\nmax_estimators = 150\n\nfor label, clf in ensemble_clfs:\n for i in range(min_estimators, max_estimators + 1, 5):\n clf.set_params(n_estimators=i)\n clf.fit(X, y)\n\n # Record the OOB error for each `n_estimators=i` setting.\n oob_error = 1 - clf.oob_score_\n error_rate[label].append((i, oob_error))\n\n# Generate the \"OOB error rate\" vs. \"n_estimators\" plot.\nfor label, clf_err in error_rate.items():\n xs, ys = zip(*clf_err)\n plt.plot(xs, ys, label=label)\n\nplt.xlim(min_estimators, max_estimators)\nplt.xlabel(\"n_estimators\")\nplt.ylabel(\"OOB error rate\")\nplt.legend(loc=\"upper right\")\nplt.show()"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`