Skip to content

Commit 646aadd

Browse files
committed
Pushing the docs to dev/ for branch: master, commit dc9955b5d4fde7c9d28c9ae9a7372a25caf12794
1 parent 5ebee14 commit 646aadd

File tree

1,084 files changed

+3392
-3071
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,084 files changed

+3392
-3071
lines changed
1.56 KB
Binary file not shown.
1.33 KB
Binary file not shown.

dev/_downloads/plot_roc.ipynb

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"cell_type": "markdown",
1616
"metadata": {},
1717
"source": [
18-
"\n=======================================\nReceiver Operating Characteristic (ROC)\n=======================================\n\nExample of Receiver Operating Characteristic (ROC) metric to evaluate\nclassifier output quality.\n\nROC curves typically feature true positive rate on the Y axis, and false\npositive rate on the X axis. This means that the top left corner of the plot is\nthe \"ideal\" point - a false positive rate of zero, and a true positive rate of\none. This is not very realistic, but it does mean that a larger area under the\ncurve (AUC) is usually better.\n\nThe \"steepness\" of ROC curves is also important, since it is ideal to maximize\nthe true positive rate while minimizing the false positive rate.\n\nMulticlass settings\n-------------------\n\nROC curves are typically used in binary classification to study the output of\na classifier. In order to extend ROC curve and ROC area to multi-class\nor multi-label classification, it is necessary to binarize the output. One ROC\ncurve can be drawn per label, but one can also draw a ROC curve by considering\neach element of the label indicator matrix as a binary prediction\n(micro-averaging).\n\nAnother evaluation measure for multi-class classification is\nmacro-averaging, which gives equal weight to the classification of each\nlabel.\n\n<div class=\"alert alert-info\"><h4>Note</h4><p>See also :func:`sklearn.metrics.roc_auc_score`,\n `sphx_glr_auto_examples_model_selection_plot_roc_crossval.py`.</p></div>\n\n\n"
18+
"\n=======================================\nReceiver Operating Characteristic (ROC)\n=======================================\n\nExample of Receiver Operating Characteristic (ROC) metric to evaluate\nclassifier output quality.\n\nROC curves typically feature true positive rate on the Y axis, and false\npositive rate on the X axis. This means that the top left corner of the plot is\nthe \"ideal\" point - a false positive rate of zero, and a true positive rate of\none. This is not very realistic, but it does mean that a larger area under the\ncurve (AUC) is usually better.\n\nThe \"steepness\" of ROC curves is also important, since it is ideal to maximize\nthe true positive rate while minimizing the false positive rate.\n\nROC curves are typically used in binary classification to study the output of\na classifier. In order to extend ROC curve and ROC area to multi-label\nclassification, it is necessary to binarize the output. One ROC\ncurve can be drawn per label, but one can also draw a ROC curve by considering\neach element of the label indicator matrix as a binary prediction\n(micro-averaging).\n\nAnother evaluation measure for multi-label classification is\nmacro-averaging, which gives equal weight to the classification of each\nlabel.\n\n<div class=\"alert alert-info\"><h4>Note</h4><p>See also :func:`sklearn.metrics.roc_auc_score`,\n `sphx_glr_auto_examples_model_selection_plot_roc_crossval.py`</p></div>\n\n\n"
1919
]
2020
},
2121
{
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom itertools import cycle\n\nfrom sklearn import svm, datasets\nfrom sklearn.metrics import roc_curve, auc\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import label_binarize\nfrom sklearn.multiclass import OneVsRestClassifier\nfrom scipy import interp\n\n# Import some data to play with\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\n\n# Binarize the output\ny = label_binarize(y, classes=[0, 1, 2])\nn_classes = y.shape[1]\n\n# Add noisy features to make the problem harder\nrandom_state = np.random.RandomState(0)\nn_samples, n_features = X.shape\nX = np.c_[X, random_state.randn(n_samples, 200 * n_features)]\n\n# shuffle and split training and test sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5,\n random_state=0)\n\n# Learn to predict each class against the other\nclassifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True,\n random_state=random_state))\ny_score = classifier.fit(X_train, y_train).decision_function(X_test)\n\n# Compute ROC curve and ROC area for each class\nfpr = dict()\ntpr = dict()\nroc_auc = dict()\nfor i in range(n_classes):\n fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])\n roc_auc[i] = auc(fpr[i], tpr[i])\n\n# Compute micro-average ROC curve and ROC area\nfpr[\"micro\"], tpr[\"micro\"], _ = roc_curve(y_test.ravel(), y_score.ravel())\nroc_auc[\"micro\"] = auc(fpr[\"micro\"], tpr[\"micro\"])"
29+
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom itertools import cycle\n\nfrom sklearn import svm, datasets\nfrom sklearn.metrics import roc_curve, auc\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import label_binarize\nfrom sklearn.multiclass import OneVsRestClassifier\nfrom scipy import interp\nfrom sklearn.metrics import roc_auc_score\n\n# Import some data to play with\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\n\n# Binarize the output\ny = label_binarize(y, classes=[0, 1, 2])\nn_classes = y.shape[1]\n\n# Add noisy features to make the problem harder\nrandom_state = np.random.RandomState(0)\nn_samples, n_features = X.shape\nX = np.c_[X, random_state.randn(n_samples, 200 * n_features)]\n\n# shuffle and split training and test sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5,\n random_state=0)\n\n# Learn to predict each class against the other\nclassifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True,\n random_state=random_state))\ny_score = classifier.fit(X_train, y_train).decision_function(X_test)\n\n# Compute ROC curve and ROC area for each class\nfpr = dict()\ntpr = dict()\nroc_auc = dict()\nfor i in range(n_classes):\n fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])\n roc_auc[i] = auc(fpr[i], tpr[i])\n\n# Compute micro-average ROC curve and ROC area\nfpr[\"micro\"], tpr[\"micro\"], _ = roc_curve(y_test.ravel(), y_score.ravel())\nroc_auc[\"micro\"] = auc(fpr[\"micro\"], tpr[\"micro\"])"
3030
]
3131
},
3232
{
@@ -51,7 +51,7 @@
5151
"cell_type": "markdown",
5252
"metadata": {},
5353
"source": [
54-
"Plot ROC curves for the multiclass problem\n\n"
54+
"Plot ROC curves for the multilabel problem\n..........................................\nCompute macro-average ROC curve and ROC area\n\n"
5555
]
5656
},
5757
{
@@ -62,7 +62,25 @@
6262
},
6363
"outputs": [],
6464
"source": [
65-
"# Compute macro-average ROC curve and ROC area\n\n# First aggregate all false positive rates\nall_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))\n\n# Then interpolate all ROC curves at this points\nmean_tpr = np.zeros_like(all_fpr)\nfor i in range(n_classes):\n mean_tpr += interp(all_fpr, fpr[i], tpr[i])\n\n# Finally average it and compute AUC\nmean_tpr /= n_classes\n\nfpr[\"macro\"] = all_fpr\ntpr[\"macro\"] = mean_tpr\nroc_auc[\"macro\"] = auc(fpr[\"macro\"], tpr[\"macro\"])\n\n# Plot all ROC curves\nplt.figure()\nplt.plot(fpr[\"micro\"], tpr[\"micro\"],\n label='micro-average ROC curve (area = {0:0.2f})'\n ''.format(roc_auc[\"micro\"]),\n color='deeppink', linestyle=':', linewidth=4)\n\nplt.plot(fpr[\"macro\"], tpr[\"macro\"],\n label='macro-average ROC curve (area = {0:0.2f})'\n ''.format(roc_auc[\"macro\"]),\n color='navy', linestyle=':', linewidth=4)\n\ncolors = cycle(['aqua', 'darkorange', 'cornflowerblue'])\nfor i, color in zip(range(n_classes), colors):\n plt.plot(fpr[i], tpr[i], color=color, lw=lw,\n label='ROC curve of class {0} (area = {1:0.2f})'\n ''.format(i, roc_auc[i]))\n\nplt.plot([0, 1], [0, 1], 'k--', lw=lw)\nplt.xlim([0.0, 1.0])\nplt.ylim([0.0, 1.05])\nplt.xlabel('False Positive Rate')\nplt.ylabel('True Positive Rate')\nplt.title('Some extension of Receiver operating characteristic to multi-class')\nplt.legend(loc=\"lower right\")\nplt.show()"
65+
"# First aggregate all false positive rates\nall_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))\n\n# Then interpolate all ROC curves at this points\nmean_tpr = np.zeros_like(all_fpr)\nfor i in range(n_classes):\n mean_tpr += interp(all_fpr, fpr[i], tpr[i])\n\n# Finally average it and compute AUC\nmean_tpr /= n_classes\n\nfpr[\"macro\"] = all_fpr\ntpr[\"macro\"] = mean_tpr\nroc_auc[\"macro\"] = auc(fpr[\"macro\"], tpr[\"macro\"])\n\n# Plot all ROC curves\nplt.figure()\nplt.plot(fpr[\"micro\"], tpr[\"micro\"],\n label='micro-average ROC curve (area = {0:0.2f})'\n ''.format(roc_auc[\"micro\"]),\n color='deeppink', linestyle=':', linewidth=4)\n\nplt.plot(fpr[\"macro\"], tpr[\"macro\"],\n label='macro-average ROC curve (area = {0:0.2f})'\n ''.format(roc_auc[\"macro\"]),\n color='navy', linestyle=':', linewidth=4)\n\ncolors = cycle(['aqua', 'darkorange', 'cornflowerblue'])\nfor i, color in zip(range(n_classes), colors):\n plt.plot(fpr[i], tpr[i], color=color, lw=lw,\n label='ROC curve of class {0} (area = {1:0.2f})'\n ''.format(i, roc_auc[i]))\n\nplt.plot([0, 1], [0, 1], 'k--', lw=lw)\nplt.xlim([0.0, 1.0])\nplt.ylim([0.0, 1.05])\nplt.xlabel('False Positive Rate')\nplt.ylabel('True Positive Rate')\nplt.title('Some extension of Receiver operating characteristic to multi-class')\nplt.legend(loc=\"lower right\")\nplt.show()"
66+
]
67+
},
68+
{
69+
"cell_type": "markdown",
70+
"metadata": {},
71+
"source": [
72+
"Area under ROC for the multiclass problem\n.........................................\nThe :func:`sklearn.metrics.roc_auc_score` function can be used for\nmulti-class classification. The mutliclass One-vs-One scheme compares every\nunique pairwise combination of classes. In this section, we calcuate the AUC\nusing the OvR and OvO schemes. We report a macro average, and a\nprevalence-weighted average.\n\n"
73+
]
74+
},
75+
{
76+
"cell_type": "code",
77+
"execution_count": null,
78+
"metadata": {
79+
"collapsed": false
80+
},
81+
"outputs": [],
82+
"source": [
83+
"y_prob = classifier.predict_proba(X_test)\n\nmacro_roc_auc_ovo = roc_auc_score(y_test, y_prob, multi_class=\"ovo\",\n average=\"macro\")\nweighted_roc_auc_ovo = roc_auc_score(y_test, y_prob, multi_class=\"ovo\",\n average=\"weighted\")\nmacro_roc_auc_ovr = roc_auc_score(y_test, y_prob, multi_class=\"ovr\",\n average=\"macro\")\nweighted_roc_auc_ovr = roc_auc_score(y_test, y_prob, multi_class=\"ovr\",\n average=\"weighted\")\nprint(\"One-vs-One ROC AUC scores:\\n{:.6f} (macro),\\n{:.6f} \"\n \"(weighted by prevalence)\"\n .format(macro_roc_auc_ovo, weighted_roc_auc_ovo))\nprint(\"One-vs-Rest ROC AUC scores:\\n{:.6f} (macro),\\n{:.6f} \"\n \"(weighted by prevalence)\"\n .format(macro_roc_auc_ovr, weighted_roc_auc_ovr))"
6684
]
6785
}
6886
],

dev/_downloads/plot_roc.py

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,24 +15,21 @@
1515
The "steepness" of ROC curves is also important, since it is ideal to maximize
1616
the true positive rate while minimizing the false positive rate.
1717
18-
Multiclass settings
19-
-------------------
20-
2118
ROC curves are typically used in binary classification to study the output of
22-
a classifier. In order to extend ROC curve and ROC area to multi-class
23-
or multi-label classification, it is necessary to binarize the output. One ROC
19+
a classifier. In order to extend ROC curve and ROC area to multi-label
20+
classification, it is necessary to binarize the output. One ROC
2421
curve can be drawn per label, but one can also draw a ROC curve by considering
2522
each element of the label indicator matrix as a binary prediction
2623
(micro-averaging).
2724
28-
Another evaluation measure for multi-class classification is
25+
Another evaluation measure for multi-label classification is
2926
macro-averaging, which gives equal weight to the classification of each
3027
label.
3128
3229
.. note::
3330
3431
See also :func:`sklearn.metrics.roc_auc_score`,
35-
:ref:`sphx_glr_auto_examples_model_selection_plot_roc_crossval.py`.
32+
:ref:`sphx_glr_auto_examples_model_selection_plot_roc_crossval.py`
3633
3734
"""
3835
print(__doc__)
@@ -47,6 +44,7 @@
4744
from sklearn.preprocessing import label_binarize
4845
from sklearn.multiclass import OneVsRestClassifier
4946
from scipy import interp
47+
from sklearn.metrics import roc_auc_score
5048

5149
# Import some data to play with
5250
iris = datasets.load_iris()
@@ -101,8 +99,8 @@
10199

102100

103101
##############################################################################
104-
# Plot ROC curves for the multiclass problem
105-
102+
# Plot ROC curves for the multilabel problem
103+
# ..........................................
106104
# Compute macro-average ROC curve and ROC area
107105

108106
# First aggregate all false positive rates
@@ -146,3 +144,29 @@
146144
plt.title('Some extension of Receiver operating characteristic to multi-class')
147145
plt.legend(loc="lower right")
148146
plt.show()
147+
148+
149+
##############################################################################
150+
# Area under ROC for the multiclass problem
151+
# .........................................
152+
# The :func:`sklearn.metrics.roc_auc_score` function can be used for
153+
# multi-class classification. The mutliclass One-vs-One scheme compares every
154+
# unique pairwise combination of classes. In this section, we calcuate the AUC
155+
# using the OvR and OvO schemes. We report a macro average, and a
156+
# prevalence-weighted average.
157+
y_prob = classifier.predict_proba(X_test)
158+
159+
macro_roc_auc_ovo = roc_auc_score(y_test, y_prob, multi_class="ovo",
160+
average="macro")
161+
weighted_roc_auc_ovo = roc_auc_score(y_test, y_prob, multi_class="ovo",
162+
average="weighted")
163+
macro_roc_auc_ovr = roc_auc_score(y_test, y_prob, multi_class="ovr",
164+
average="macro")
165+
weighted_roc_auc_ovr = roc_auc_score(y_test, y_prob, multi_class="ovr",
166+
average="weighted")
167+
print("One-vs-One ROC AUC scores:\n{:.6f} (macro),\n{:.6f} "
168+
"(weighted by prevalence)"
169+
.format(macro_roc_auc_ovo, weighted_roc_auc_ovo))
170+
print("One-vs-Rest ROC AUC scores:\n{:.6f} (macro),\n{:.6f} "
171+
"(weighted by prevalence)"
172+
.format(macro_roc_auc_ovr, weighted_roc_auc_ovr))

dev/_downloads/scikit-learn-docs.pdf

16.3 KB
Binary file not shown.

dev/_images/iris.png

0 Bytes
-3 Bytes
-3 Bytes
423 Bytes
423 Bytes

0 commit comments

Comments
 (0)