Skip to content

Commit cdf815e

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 8a8e21b2a395aea950f878c572771541d097ac16
1 parent 72fdc01 commit cdf815e

File tree

1,106 files changed

+3352
-3369
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,106 files changed

+3352
-3369
lines changed
-344 Bytes
Binary file not shown.
-339 Bytes
Binary file not shown.

dev/_downloads/plot_compare_calibration.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\n# Author: Jan Hendrik Metzen <[email protected]>\n# License: BSD Style.\n\nimport numpy as np\nnp.random.seed(0)\n\nimport matplotlib.pyplot as plt\n\nfrom sklearn import datasets\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.svm import LinearSVC\nfrom sklearn.calibration import calibration_curve\n\nX, y = datasets.make_classification(n_samples=100000, n_features=20,\n n_informative=2, n_redundant=2)\n\ntrain_samples = 100 # Samples used for training the models\n\nX_train = X[:train_samples]\nX_test = X[train_samples:]\ny_train = y[:train_samples]\ny_test = y[train_samples:]\n\n# Create classifiers\nlr = LogisticRegression()\ngnb = GaussianNB()\nsvc = LinearSVC(C=1.0)\nrfc = RandomForestClassifier(n_estimators=100)\n\n\n# #############################################################################\n# Plot calibration plots\n\nplt.figure(figsize=(10, 10))\nax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)\nax2 = plt.subplot2grid((3, 1), (2, 0))\n\nax1.plot([0, 1], [0, 1], \"k:\", label=\"Perfectly calibrated\")\nfor clf, name in [(lr, 'Logistic'),\n (gnb, 'Naive Bayes'),\n (svc, 'Support Vector Classification'),\n (rfc, 'Random Forest')]:\n clf.fit(X_train, y_train)\n if hasattr(clf, \"predict_proba\"):\n prob_pos = clf.predict_proba(X_test)[:, 1]\n else: # use decision function\n prob_pos = clf.decision_function(X_test)\n prob_pos = \\\n (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())\n fraction_of_positives, mean_predicted_value = \\\n calibration_curve(y_test, prob_pos, n_bins=10)\n\n ax1.plot(mean_predicted_value, fraction_of_positives, \"s-\",\n label=\"%s\" % (name, ))\n\n ax2.hist(prob_pos, range=(0, 1), bins=10, label=name,\n histtype=\"step\", lw=2)\n\nax1.set_ylabel(\"Fraction of positives\")\nax1.set_ylim([-0.05, 1.05])\nax1.legend(loc=\"lower right\")\nax1.set_title('Calibration plots (reliability curve)')\n\nax2.set_xlabel(\"Mean predicted value\")\nax2.set_ylabel(\"Count\")\nax2.legend(loc=\"upper center\", ncol=2)\n\nplt.tight_layout()\nplt.show()"
29+
"print(__doc__)\n\n# Author: Jan Hendrik Metzen <[email protected]>\n# License: BSD Style.\n\nimport numpy as np\nnp.random.seed(0)\n\nimport matplotlib.pyplot as plt\n\nfrom sklearn import datasets\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.svm import LinearSVC\nfrom sklearn.calibration import calibration_curve\n\nX, y = datasets.make_classification(n_samples=100000, n_features=20,\n n_informative=2, n_redundant=2)\n\ntrain_samples = 100 # Samples used for training the models\n\nX_train = X[:train_samples]\nX_test = X[train_samples:]\ny_train = y[:train_samples]\ny_test = y[train_samples:]\n\n# Create classifiers\nlr = LogisticRegression()\ngnb = GaussianNB()\nsvc = LinearSVC(C=1.0)\nrfc = RandomForestClassifier()\n\n\n# #############################################################################\n# Plot calibration plots\n\nplt.figure(figsize=(10, 10))\nax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)\nax2 = plt.subplot2grid((3, 1), (2, 0))\n\nax1.plot([0, 1], [0, 1], \"k:\", label=\"Perfectly calibrated\")\nfor clf, name in [(lr, 'Logistic'),\n (gnb, 'Naive Bayes'),\n (svc, 'Support Vector Classification'),\n (rfc, 'Random Forest')]:\n clf.fit(X_train, y_train)\n if hasattr(clf, \"predict_proba\"):\n prob_pos = clf.predict_proba(X_test)[:, 1]\n else: # use decision function\n prob_pos = clf.decision_function(X_test)\n prob_pos = \\\n (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())\n fraction_of_positives, mean_predicted_value = \\\n calibration_curve(y_test, prob_pos, n_bins=10)\n\n ax1.plot(mean_predicted_value, fraction_of_positives, \"s-\",\n label=\"%s\" % (name, ))\n\n ax2.hist(prob_pos, range=(0, 1), bins=10, label=name,\n histtype=\"step\", lw=2)\n\nax1.set_ylabel(\"Fraction of positives\")\nax1.set_ylim([-0.05, 1.05])\nax1.legend(loc=\"lower right\")\nax1.set_title('Calibration plots (reliability curve)')\n\nax2.set_xlabel(\"Mean predicted value\")\nax2.set_ylabel(\"Count\")\nax2.legend(loc=\"upper center\", ncol=2)\n\nplt.tight_layout()\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_compare_calibration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@
7878
lr = LogisticRegression()
7979
gnb = GaussianNB()
8080
svc = LinearSVC(C=1.0)
81-
rfc = RandomForestClassifier(n_estimators=100)
81+
rfc = RandomForestClassifier()
8282

8383

8484
# #############################################################################

dev/_downloads/plot_document_classification_20newsgroups.ipynb

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

dev/_downloads/plot_document_classification_20newsgroups.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ def benchmark(clf):
251251
(PassiveAggressiveClassifier(max_iter=50, tol=1e-3),
252252
"Passive-Aggressive"),
253253
(KNeighborsClassifier(n_neighbors=10), "kNN"),
254-
(RandomForestClassifier(n_estimators=100), "Random forest")):
254+
(RandomForestClassifier(), "Random forest")):
255255
print('=' * 80)
256256
print(name)
257257
results.append(benchmark(clf))

dev/_downloads/plot_ensemble_oob.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"import matplotlib.pyplot as plt\n\nfrom collections import OrderedDict\nfrom sklearn.datasets import make_classification\nfrom sklearn.ensemble import RandomForestClassifier\n\n# Author: Kian Ho <[email protected]>\n# Gilles Louppe <[email protected]>\n# Andreas Mueller <[email protected]>\n#\n# License: BSD 3 Clause\n\nprint(__doc__)\n\nRANDOM_STATE = 123\n\n# Generate a binary classification dataset.\nX, y = make_classification(n_samples=500, n_features=25,\n n_clusters_per_class=1, n_informative=15,\n random_state=RANDOM_STATE)\n\n# NOTE: Setting the `warm_start` construction parameter to `True` disables\n# support for parallelized ensembles but is necessary for tracking the OOB\n# error trajectory during training.\nensemble_clfs = [\n (\"RandomForestClassifier, max_features='sqrt'\",\n RandomForestClassifier(n_estimators=100,\n warm_start=True, oob_score=True,\n max_features=\"sqrt\",\n random_state=RANDOM_STATE)),\n (\"RandomForestClassifier, max_features='log2'\",\n RandomForestClassifier(n_estimators=100,\n warm_start=True, max_features='log2',\n oob_score=True,\n random_state=RANDOM_STATE)),\n (\"RandomForestClassifier, max_features=None\",\n RandomForestClassifier(n_estimators=100,\n warm_start=True, max_features=None,\n oob_score=True,\n random_state=RANDOM_STATE))\n]\n\n# Map a classifier name to a list of (<n_estimators>, <error rate>) pairs.\nerror_rate = OrderedDict((label, []) for label, _ in ensemble_clfs)\n\n# Range of `n_estimators` values to explore.\nmin_estimators = 15\nmax_estimators = 175\n\nfor label, clf in ensemble_clfs:\n for i in range(min_estimators, max_estimators + 1):\n clf.set_params(n_estimators=i)\n clf.fit(X, y)\n\n # Record the OOB error for each `n_estimators=i` setting.\n oob_error = 1 - clf.oob_score_\n error_rate[label].append((i, oob_error))\n\n# Generate the \"OOB error rate\" vs. \"n_estimators\" plot.\nfor label, clf_err in error_rate.items():\n xs, ys = zip(*clf_err)\n plt.plot(xs, ys, label=label)\n\nplt.xlim(min_estimators, max_estimators)\nplt.xlabel(\"n_estimators\")\nplt.ylabel(\"OOB error rate\")\nplt.legend(loc=\"upper right\")\nplt.show()"
29+
"import matplotlib.pyplot as plt\n\nfrom collections import OrderedDict\nfrom sklearn.datasets import make_classification\nfrom sklearn.ensemble import RandomForestClassifier\n\n# Author: Kian Ho <[email protected]>\n# Gilles Louppe <[email protected]>\n# Andreas Mueller <[email protected]>\n#\n# License: BSD 3 Clause\n\nprint(__doc__)\n\nRANDOM_STATE = 123\n\n# Generate a binary classification dataset.\nX, y = make_classification(n_samples=500, n_features=25,\n n_clusters_per_class=1, n_informative=15,\n random_state=RANDOM_STATE)\n\n# NOTE: Setting the `warm_start` construction parameter to `True` disables\n# support for parallelized ensembles but is necessary for tracking the OOB\n# error trajectory during training.\nensemble_clfs = [\n (\"RandomForestClassifier, max_features='sqrt'\",\n RandomForestClassifier(warm_start=True, oob_score=True,\n max_features=\"sqrt\",\n random_state=RANDOM_STATE)),\n (\"RandomForestClassifier, max_features='log2'\",\n RandomForestClassifier(warm_start=True, max_features='log2',\n oob_score=True,\n random_state=RANDOM_STATE)),\n (\"RandomForestClassifier, max_features=None\",\n RandomForestClassifier(warm_start=True, max_features=None,\n oob_score=True,\n random_state=RANDOM_STATE))\n]\n\n# Map a classifier name to a list of (<n_estimators>, <error rate>) pairs.\nerror_rate = OrderedDict((label, []) for label, _ in ensemble_clfs)\n\n# Range of `n_estimators` values to explore.\nmin_estimators = 15\nmax_estimators = 175\n\nfor label, clf in ensemble_clfs:\n for i in range(min_estimators, max_estimators + 1):\n clf.set_params(n_estimators=i)\n clf.fit(X, y)\n\n # Record the OOB error for each `n_estimators=i` setting.\n oob_error = 1 - clf.oob_score_\n error_rate[label].append((i, oob_error))\n\n# Generate the \"OOB error rate\" vs. \"n_estimators\" plot.\nfor label, clf_err in error_rate.items():\n xs, ys = zip(*clf_err)\n plt.plot(xs, ys, label=label)\n\nplt.xlim(min_estimators, max_estimators)\nplt.xlabel(\"n_estimators\")\nplt.ylabel(\"OOB error rate\")\nplt.legend(loc=\"upper right\")\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_ensemble_oob.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,18 +45,15 @@
4545
# error trajectory during training.
4646
ensemble_clfs = [
4747
("RandomForestClassifier, max_features='sqrt'",
48-
RandomForestClassifier(n_estimators=100,
49-
warm_start=True, oob_score=True,
48+
RandomForestClassifier(warm_start=True, oob_score=True,
5049
max_features="sqrt",
5150
random_state=RANDOM_STATE)),
5251
("RandomForestClassifier, max_features='log2'",
53-
RandomForestClassifier(n_estimators=100,
54-
warm_start=True, max_features='log2',
52+
RandomForestClassifier(warm_start=True, max_features='log2',
5553
oob_score=True,
5654
random_state=RANDOM_STATE)),
5755
("RandomForestClassifier, max_features=None",
58-
RandomForestClassifier(n_estimators=100,
59-
warm_start=True, max_features=None,
56+
RandomForestClassifier(warm_start=True, max_features=None,
6057
oob_score=True,
6158
random_state=RANDOM_STATE))
6259
]

0 commit comments

Comments
 (0)