Skip to content

Commit 1f87e69

Browse files
committed
Pushing the docs to dev/ for branch: master, commit a6753f3ed38d25cec0af8ed95697f48eaacaed24
1 parent 66a6408 commit 1f87e69

File tree

1,009 files changed

+4718
-3205
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,009 files changed

+4718
-3205
lines changed
5.13 KB
Binary file not shown.
3.68 KB
Binary file not shown.
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {
7+
"collapsed": false
8+
},
9+
"outputs": [],
10+
"source": [
11+
"%matplotlib inline"
12+
]
13+
},
14+
{
15+
"cell_type": "markdown",
16+
"metadata": {},
17+
"source": [
18+
"Demonstration of multi-metric evaluation on cross_val_score and GridSearchCV\n\nMultiple metric parameter search can be done by setting the ``scoring``\nparameter to a list of metric scorer names or a dict mapping the scorer names\nto the scorer callables.\n\nThe scores of all the scorers are available in the ``cv_results_`` dict at keys\nending in ``'_<scorer_name>'`` (``'mean_test_precision'``,\n``'rank_test_precision'``, etc...)\n\nThe ``best_estimator_``, ``best_index_``, ``best_score_`` and ``best_params_``\ncorrespond to the scorer (key) that is set to the ``refit`` attribute.\n\n"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": null,
24+
"metadata": {
25+
"collapsed": false
26+
},
27+
"outputs": [],
28+
"source": [
29+
"# Author: Raghav RV <[email protected]>\n# License: BSD\n\nimport numpy as np\nfrom matplotlib import pyplot as plt\n\nfrom sklearn.datasets import make_hastie_10_2\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.metrics import make_scorer\nfrom sklearn.metrics import accuracy_score\nfrom sklearn.tree import DecisionTreeClassifier\n\nprint(__doc__)"
30+
]
31+
},
32+
{
33+
"cell_type": "markdown",
34+
"metadata": {},
35+
"source": [
36+
"Running ``GridSearchCV`` using multiple evaluation metrics\n----------------------------------------------------------\n\n\n"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": null,
42+
"metadata": {
43+
"collapsed": false
44+
},
45+
"outputs": [],
46+
"source": [
47+
"X, y = make_hastie_10_2(n_samples=8000, random_state=42)\n\n# The scorers can be either be one of the predefined metric strings or a scorer\n# callable, like the one returned by make_scorer\nscoring = {'AUC': 'roc_auc', 'Accuracy': make_scorer(accuracy_score)}\n\n# Setting refit='AUC', refits an estimator on the whole dataset with the\n# parameter setting that has the best cross-validated AUC score.\n# That estimator is made available at ``gs.best_estimator_`` along with\n# parameters like ``gs.best_score_``, ``gs.best_parameters_`` and\n# ``gs.best_index_``\ngs = GridSearchCV(DecisionTreeClassifier(random_state=42),\n param_grid={'min_samples_split': range(2, 403, 10)},\n scoring=scoring, cv=5, refit='AUC')\ngs.fit(X, y)\nresults = gs.cv_results_"
48+
]
49+
},
50+
{
51+
"cell_type": "markdown",
52+
"metadata": {},
53+
"source": [
54+
"Plotting the result\n-------------------\n\n"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"execution_count": null,
60+
"metadata": {
61+
"collapsed": false
62+
},
63+
"outputs": [],
64+
"source": [
65+
"plt.figure(figsize=(13, 13))\nplt.title(\"GridSearchCV evaluating using multiple scorers simultaneously\",\n fontsize=16)\n\nplt.xlabel(\"min_samples_split\")\nplt.ylabel(\"Score\")\nplt.grid()\n\nax = plt.axes()\nax.set_xlim(0, 402)\nax.set_ylim(0.73, 1)\n\n# Get the regular numpy array from the MaskedArray\nX_axis = np.array(results['param_min_samples_split'].data, dtype=float)\n\nfor scorer, color in zip(sorted(scoring), ['g', 'k']):\n for sample, style in (('train', '--'), ('test', '-')):\n sample_score_mean = results['mean_%s_%s' % (sample, scorer)]\n sample_score_std = results['std_%s_%s' % (sample, scorer)]\n ax.fill_between(X_axis, sample_score_mean - sample_score_std,\n sample_score_mean + sample_score_std,\n alpha=0.1 if sample == 'test' else 0, color=color)\n ax.plot(X_axis, sample_score_mean, style, color=color,\n alpha=1 if sample == 'test' else 0.7,\n label=\"%s (%s)\" % (scorer, sample))\n\n best_index = np.nonzero(results['rank_test_%s' % scorer] == 1)[0][0]\n best_score = results['mean_test_%s' % scorer][best_index]\n\n # Plot a dotted vertical line at the best score for that scorer marked by x\n ax.plot([X_axis[best_index], ] * 2, [0, best_score],\n linestyle='-.', color=color, marker='x', markeredgewidth=3, ms=8)\n\n # Annotate the best score for that scorer\n ax.annotate(\"%0.2f\" % best_score,\n (X_axis[best_index], best_score + 0.005))\n\nplt.legend(loc=\"best\")\nplt.grid('off')\nplt.show()"
66+
]
67+
}
68+
],
69+
"metadata": {
70+
"kernelspec": {
71+
"display_name": "Python 3",
72+
"language": "python",
73+
"name": "python3"
74+
},
75+
"language_info": {
76+
"codemirror_mode": {
77+
"name": "ipython",
78+
"version": 3
79+
},
80+
"file_extension": ".py",
81+
"mimetype": "text/x-python",
82+
"name": "python",
83+
"nbconvert_exporter": "python",
84+
"pygments_lexer": "ipython3",
85+
"version": "3.6.1"
86+
}
87+
},
88+
"nbformat": 4,
89+
"nbformat_minor": 0
90+
}
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
"""Demonstration of multi-metric evaluation on cross_val_score and GridSearchCV
2+
3+
Multiple metric parameter search can be done by setting the ``scoring``
4+
parameter to a list of metric scorer names or a dict mapping the scorer names
5+
to the scorer callables.
6+
7+
The scores of all the scorers are available in the ``cv_results_`` dict at keys
8+
ending in ``'_<scorer_name>'`` (``'mean_test_precision'``,
9+
``'rank_test_precision'``, etc...)
10+
11+
The ``best_estimator_``, ``best_index_``, ``best_score_`` and ``best_params_``
12+
correspond to the scorer (key) that is set to the ``refit`` attribute.
13+
"""
14+
15+
# Author: Raghav RV <[email protected]>
16+
# License: BSD
17+
18+
import numpy as np
19+
from matplotlib import pyplot as plt
20+
21+
from sklearn.datasets import make_hastie_10_2
22+
from sklearn.model_selection import GridSearchCV
23+
from sklearn.metrics import make_scorer
24+
from sklearn.metrics import accuracy_score
25+
from sklearn.tree import DecisionTreeClassifier
26+
27+
print(__doc__)
28+
29+
###############################################################################
30+
# Running ``GridSearchCV`` using multiple evaluation metrics
31+
# ----------------------------------------------------------
32+
#
33+
34+
X, y = make_hastie_10_2(n_samples=8000, random_state=42)
35+
36+
# The scorers can be either be one of the predefined metric strings or a scorer
37+
# callable, like the one returned by make_scorer
38+
scoring = {'AUC': 'roc_auc', 'Accuracy': make_scorer(accuracy_score)}
39+
40+
# Setting refit='AUC', refits an estimator on the whole dataset with the
41+
# parameter setting that has the best cross-validated AUC score.
42+
# That estimator is made available at ``gs.best_estimator_`` along with
43+
# parameters like ``gs.best_score_``, ``gs.best_parameters_`` and
44+
# ``gs.best_index_``
45+
gs = GridSearchCV(DecisionTreeClassifier(random_state=42),
46+
param_grid={'min_samples_split': range(2, 403, 10)},
47+
scoring=scoring, cv=5, refit='AUC')
48+
gs.fit(X, y)
49+
results = gs.cv_results_
50+
51+
###############################################################################
52+
# Plotting the result
53+
# -------------------
54+
55+
plt.figure(figsize=(13, 13))
56+
plt.title("GridSearchCV evaluating using multiple scorers simultaneously",
57+
fontsize=16)
58+
59+
plt.xlabel("min_samples_split")
60+
plt.ylabel("Score")
61+
plt.grid()
62+
63+
ax = plt.axes()
64+
ax.set_xlim(0, 402)
65+
ax.set_ylim(0.73, 1)
66+
67+
# Get the regular numpy array from the MaskedArray
68+
X_axis = np.array(results['param_min_samples_split'].data, dtype=float)
69+
70+
for scorer, color in zip(sorted(scoring), ['g', 'k']):
71+
for sample, style in (('train', '--'), ('test', '-')):
72+
sample_score_mean = results['mean_%s_%s' % (sample, scorer)]
73+
sample_score_std = results['std_%s_%s' % (sample, scorer)]
74+
ax.fill_between(X_axis, sample_score_mean - sample_score_std,
75+
sample_score_mean + sample_score_std,
76+
alpha=0.1 if sample == 'test' else 0, color=color)
77+
ax.plot(X_axis, sample_score_mean, style, color=color,
78+
alpha=1 if sample == 'test' else 0.7,
79+
label="%s (%s)" % (scorer, sample))
80+
81+
best_index = np.nonzero(results['rank_test_%s' % scorer] == 1)[0][0]
82+
best_score = results['mean_test_%s' % scorer][best_index]
83+
84+
# Plot a dotted vertical line at the best score for that scorer marked by x
85+
ax.plot([X_axis[best_index], ] * 2, [0, best_score],
86+
linestyle='-.', color=color, marker='x', markeredgewidth=3, ms=8)
87+
88+
# Annotate the best score for that scorer
89+
ax.annotate("%0.2f" % best_score,
90+
(X_axis[best_index], best_score + 0.005))
91+
92+
plt.legend(loc="best")
93+
plt.grid('off')
94+
plt.show()

dev/_downloads/scikit-learn-docs.pdf

136 KB
Binary file not shown.
68 Bytes
68 Bytes
85 Bytes
85 Bytes
30 Bytes

0 commit comments

Comments
 (0)