Skip to content

Commit af359ff

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 6d604d1d9342397b90e5e59e1ec1932da3374e7a
1 parent 5b2f5c3 commit af359ff

File tree

952 files changed

+2838
-2838
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

952 files changed

+2838
-2838
lines changed
0 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.

dev/_downloads/randomized_search.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
"execution_count": null,
2525
"cell_type": "code",
2626
"source": [
27-
"print(__doc__)\n\nimport numpy as np\n\nfrom time import time\nfrom scipy.stats import randint as sp_randint\n\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.model_selection import RandomizedSearchCV\nfrom sklearn.datasets import load_digits\nfrom sklearn.ensemble import RandomForestClassifier\n\n# get some data\ndigits = load_digits()\nX, y = digits.data, digits.target\n\n# build a classifier\nclf = RandomForestClassifier(n_estimators=20)\n\n\n# Utility function to report best scores\ndef report(results, n_top=3):\n for i in range(1, n_top + 1):\n candidates = np.flatnonzero(results['rank_test_score'] == i)\n for candidate in candidates:\n print(\"Model with rank: {0}\".format(i))\n print(\"Mean validation score: {0:.3f} (std: {1:.3f})\".format(\n results['mean_test_score'][candidate],\n results['std_test_score'][candidate]))\n print(\"Parameters: {0}\".format(results['params'][candidate]))\n print(\"\")\n\n\n# specify parameters and distributions to sample from\nparam_dist = {\"max_depth\": [3, None],\n \"max_features\": sp_randint(1, 11),\n \"min_samples_split\": sp_randint(1, 11),\n \"min_samples_leaf\": sp_randint(1, 11),\n \"bootstrap\": [True, False],\n \"criterion\": [\"gini\", \"entropy\"]}\n\n# run randomized search\nn_iter_search = 20\nrandom_search = RandomizedSearchCV(clf, param_distributions=param_dist,\n n_iter=n_iter_search)\n\nstart = time()\nrandom_search.fit(X, y)\nprint(\"RandomizedSearchCV took %.2f seconds for %d candidates\"\n \" parameter settings.\" % ((time() - start), n_iter_search))\nreport(random_search.cv_results_)\n\n# use a full grid over all parameters\nparam_grid = {\"max_depth\": [3, None],\n \"max_features\": [1, 3, 10],\n \"min_samples_split\": [1, 3, 10],\n \"min_samples_leaf\": [1, 3, 10],\n \"bootstrap\": [True, False],\n \"criterion\": [\"gini\", \"entropy\"]}\n\n# run grid search\ngrid_search = GridSearchCV(clf, param_grid=param_grid)\nstart = time()\ngrid_search.fit(X, y)\n\nprint(\"GridSearchCV took %.2f seconds for %d candidate parameter settings.\"\n % (time() - start, len(grid_search.cv_results_['params'])))\nreport(grid_search.cv_results_)"
27+
"print(__doc__)\n\nimport numpy as np\n\nfrom time import time\nfrom scipy.stats import randint as sp_randint\n\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.model_selection import RandomizedSearchCV\nfrom sklearn.datasets import load_digits\nfrom sklearn.ensemble import RandomForestClassifier\n\n# get some data\ndigits = load_digits()\nX, y = digits.data, digits.target\n\n# build a classifier\nclf = RandomForestClassifier(n_estimators=20)\n\n\n# Utility function to report best scores\ndef report(results, n_top=3):\n for i in range(1, n_top + 1):\n candidates = np.flatnonzero(results['rank_test_score'] == i)\n for candidate in candidates:\n print(\"Model with rank: {0}\".format(i))\n print(\"Mean validation score: {0:.3f} (std: {1:.3f})\".format(\n results['mean_test_score'][candidate],\n results['std_test_score'][candidate]))\n print(\"Parameters: {0}\".format(results['params'][candidate]))\n print(\"\")\n\n\n# specify parameters and distributions to sample from\nparam_dist = {\"max_depth\": [3, None],\n \"max_features\": sp_randint(1, 11),\n \"min_samples_split\": sp_randint(2, 11),\n \"min_samples_leaf\": sp_randint(1, 11),\n \"bootstrap\": [True, False],\n \"criterion\": [\"gini\", \"entropy\"]}\n\n# run randomized search\nn_iter_search = 20\nrandom_search = RandomizedSearchCV(clf, param_distributions=param_dist,\n n_iter=n_iter_search)\n\nstart = time()\nrandom_search.fit(X, y)\nprint(\"RandomizedSearchCV took %.2f seconds for %d candidates\"\n \" parameter settings.\" % ((time() - start), n_iter_search))\nreport(random_search.cv_results_)\n\n# use a full grid over all parameters\nparam_grid = {\"max_depth\": [3, None],\n \"max_features\": [1, 3, 10],\n \"min_samples_split\": [2, 3, 10],\n \"min_samples_leaf\": [1, 3, 10],\n \"bootstrap\": [True, False],\n \"criterion\": [\"gini\", \"entropy\"]}\n\n# run grid search\ngrid_search = GridSearchCV(clf, param_grid=param_grid)\nstart = time()\ngrid_search.fit(X, y)\n\nprint(\"GridSearchCV took %.2f seconds for %d candidate parameter settings.\"\n % (time() - start, len(grid_search.cv_results_['params'])))\nreport(grid_search.cv_results_)"
2828
],
2929
"outputs": [],
3030
"metadata": {

dev/_downloads/randomized_search.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def report(results, n_top=3):
5454
# specify parameters and distributions to sample from
5555
param_dist = {"max_depth": [3, None],
5656
"max_features": sp_randint(1, 11),
57-
"min_samples_split": sp_randint(1, 11),
57+
"min_samples_split": sp_randint(2, 11),
5858
"min_samples_leaf": sp_randint(1, 11),
5959
"bootstrap": [True, False],
6060
"criterion": ["gini", "entropy"]}
@@ -73,7 +73,7 @@ def report(results, n_top=3):
7373
# use a full grid over all parameters
7474
param_grid = {"max_depth": [3, None],
7575
"max_features": [1, 3, 10],
76-
"min_samples_split": [1, 3, 10],
76+
"min_samples_split": [2, 3, 10],
7777
"min_samples_leaf": [1, 3, 10],
7878
"bootstrap": [True, False],
7979
"criterion": ["gini", "entropy"]}

dev/_downloads/scikit-learn-docs.pdf

4.75 KB
Binary file not shown.

0 commit comments

Comments
 (0)