scikit-learn
diff --git a/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
2.29 KB b/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
2.29 KB
diff --git a/‎dev/_downloads/c60067c77f8feac9913d498602b7257e/plot_roc_curve_visualization_api.ipynb
Lines changed: 108 additions & 0 deletions b/‎dev/_downloads/c60067c77f8feac9913d498602b7257e/plot_roc_curve_visualization_api.ipynb
Lines changed: 108 additions & 0 deletions
diff --git a/‎dev/_downloads/d33f7865941f1e2c2c62fcc641599cc5/plot_roc_crossval.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/d33f7865941f1e2c2c62fcc641599cc5/plot_roc_crossval.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
3.8 KB b/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
3.8 KB
diff --git a/‎dev/_downloads/e2f118a17ad70541f445f35934fdbb99/plot_roc_curve_visualization_api.py
Lines changed: 55 additions & 0 deletions b/‎dev/_downloads/e2f118a17ad70541f445f35934fdbb99/plot_roc_curve_visualization_api.py
Lines changed: 55 additions & 0 deletions
diff --git a/‎dev/_downloads/f314befaab67f267a90ac9b4fd21b13e/plot_roc_crossval.py
Lines changed: 24 additions & 28 deletions b/‎dev/_downloads/f314befaab67f267a90ac9b4fd21b13e/plot_roc_crossval.py
Lines changed: 24 additions & 28 deletions
diff --git a/‎dev/_downloads/scikit-learn-docs.pdf
49.6 KB b/‎dev/_downloads/scikit-learn-docs.pdf
49.6 KB
diff --git a/‎dev/_images/iris.png
0 Bytes b/‎dev/_images/iris.png
0 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
228 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
228 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
228 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
228 Bytes
@@ -0,0 +1,108 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n# ROC Curve with Visualization API\n\nScikit-learn defines a simple API for creating visualizations for machine\nlearning. The key features of this API is to allow for quick plotting and\nvisual adjustments without recalculation. In this example, we will demonstrate\nhow to use the visualization API by comparing ROC curves.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "print(__doc__)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Load Data and Train a SVC\n-------------------------\nFirst, we load the wine dataset and convert it to a binary classification\nproblem. Then, we train a support vector classifier on a training dataset.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\nfrom sklearn.svm import SVC\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import plot_roc_curve\nfrom sklearn.datasets import load_wine\nfrom sklearn.model_selection import train_test_split\n\nX, y = load_wine(return_X_y=True)\ny = y == 2\n\nX_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)\nsvc = SVC(random_state=42)\nsvc.fit(X_train, y_train)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Plotting the ROC Curve\n----------------------\nNext, we plot the ROC curve with a single call to\n:func:`sklearn.metrics.plot_roc_curve`. The returned `svc_disp` object allows\nus to continue using the already computed ROC curve for the SVC in future\nplots.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "svc_disp = plot_roc_curve(svc, X_test, y_test)\nplt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Training a Random Forest and Plotting the ROC Curve\n--------------------------------------------------------\nWe train a random forest classifier and create a plot comparing it to the SVC\nROC curve. Notice how `svc_disp` uses\n:func:`~sklearn.metrics.RocCurveDisplay.plot` to plot the SVC ROC curve\nwithout recomputing the values of the roc curve itself. Futhermore, we\npass `alpha=0.8` to the plot functions to adjust the alpha values of the\ncurves.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "rfc = RandomForestClassifier(n_estimators=10, random_state=42)\nrfc.fit(X_train, y_train)\nax = plt.gca()\nrfc_disp = plot_roc_curve(rfc, X_test, y_test, ax=ax, alpha=0.8)\nsvc_disp.plot(ax=ax, alpha=0.8)\nplt.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.7.3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\nimport numpy as np\nfrom scipy import interp\nimport matplotlib.pyplot as plt\n\nfrom sklearn import svm, datasets\nfrom sklearn.metrics import roc_curve, auc\nfrom sklearn.model_selection import StratifiedKFold\n\n# #############################################################################\n# Data IO and generation\n\n# Import some data to play with\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\nX, y = X[y != 2], y[y != 2]\nn_samples, n_features = X.shape\n\n# Add noisy features\nrandom_state = np.random.RandomState(0)\nX = np.c_[X, random_state.randn(n_samples, 200 * n_features)]\n\n# #############################################################################\n# Classification and ROC analysis\n\n# Run classifier with cross-validation and plot ROC curves\ncv = StratifiedKFold(n_splits=6)\nclassifier = svm.SVC(kernel='linear', probability=True,\n                     random_state=random_state)\n\ntprs = []\naucs = []\nmean_fpr = np.linspace(0, 1, 100)\n\ni = 0\nfor train, test in cv.split(X, y):\n    probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])\n    # Compute ROC curve and area the curve\n    fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])\n    tprs.append(interp(mean_fpr, fpr, tpr))\n    tprs[-1][0] = 0.0\n    roc_auc = auc(fpr, tpr)\n    aucs.append(roc_auc)\n    plt.plot(fpr, tpr, lw=1, alpha=0.3,\n             label='ROC fold %d (AUC = %0.2f)' % (i, roc_auc))\n\n    i += 1\nplt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',\n         label='Chance', alpha=.8)\n\nmean_tpr = np.mean(tprs, axis=0)\nmean_tpr[-1] = 1.0\nmean_auc = auc(mean_fpr, mean_tpr)\nstd_auc = np.std(aucs)\nplt.plot(mean_fpr, mean_tpr, color='b',\n         label=r'Mean ROC (AUC = %0.2f $\\pm$ %0.2f)' % (mean_auc, std_auc),\n         lw=2, alpha=.8)\n\nstd_tpr = np.std(tprs, axis=0)\ntprs_upper = np.minimum(mean_tpr + std_tpr, 1)\ntprs_lower = np.maximum(mean_tpr - std_tpr, 0)\nplt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,\n                 label=r'$\\pm$ 1 std. dev.')\n\nplt.xlim([-0.05, 1.05])\nplt.ylim([-0.05, 1.05])\nplt.xlabel('False Positive Rate')\nplt.ylabel('True Positive Rate')\nplt.title('Receiver operating characteristic example')\nplt.legend(loc=\"lower right\")\nplt.show()"
+        "print(__doc__)\n\nimport numpy as np\nfrom scipy import interp\nimport matplotlib.pyplot as plt\n\nfrom sklearn import svm, datasets\nfrom sklearn.metrics import auc\nfrom sklearn.metrics import plot_roc_curve\nfrom sklearn.model_selection import StratifiedKFold\n\n# #############################################################################\n# Data IO and generation\n\n# Import some data to play with\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\nX, y = X[y != 2], y[y != 2]\nn_samples, n_features = X.shape\n\n# Add noisy features\nrandom_state = np.random.RandomState(0)\nX = np.c_[X, random_state.randn(n_samples, 200 * n_features)]\n\n# #############################################################################\n# Classification and ROC analysis\n\n# Run classifier with cross-validation and plot ROC curves\ncv = StratifiedKFold(n_splits=6)\nclassifier = svm.SVC(kernel='linear', probability=True,\n                     random_state=random_state)\n\ntprs = []\naucs = []\nmean_fpr = np.linspace(0, 1, 100)\n\nfig, ax = plt.subplots()\nfor i, (train, test) in enumerate(cv.split(X, y)):\n    classifier.fit(X[train], y[train])\n    viz = plot_roc_curve(classifier, X[test], y[test],\n                         name='ROC fold {}'.format(i),\n                         alpha=0.3, lw=1, ax=ax)\n    interp_tpr = interp(mean_fpr, viz.fpr, viz.tpr)\n    interp_tpr[0] = 0.0\n    tprs.append(interp_tpr)\n    aucs.append(viz.roc_auc)\n\nax.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',\n        label='Chance', alpha=.8)\n\nmean_tpr = np.mean(tprs, axis=0)\nmean_tpr[-1] = 1.0\nmean_auc = auc(mean_fpr, mean_tpr)\nstd_auc = np.std(aucs)\nax.plot(mean_fpr, mean_tpr, color='b',\n        label=r'Mean ROC (AUC = %0.2f $\\pm$ %0.2f)' % (mean_auc, std_auc),\n        lw=2, alpha=.8)\n\nstd_tpr = np.std(tprs, axis=0)\ntprs_upper = np.minimum(mean_tpr + std_tpr, 1)\ntprs_lower = np.maximum(mean_tpr - std_tpr, 0)\nax.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,\n                label=r'$\\pm$ 1 std. dev.')\n\nax.set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05],\n       title=\"Receiver operating characteristic example\")\nax.legend(loc=\"lower right\")\nplt.show()"
       ]
     }
   ],
 
@@ -0,0 +1,55 @@
+"""
+================================
+ROC Curve with Visualization API
+================================
+Scikit-learn defines a simple API for creating visualizations for machine
+learning. The key features of this API is to allow for quick plotting and
+visual adjustments without recalculation. In this example, we will demonstrate
+how to use the visualization API by comparing ROC curves.
+"""
+print(__doc__)
+
+##############################################################################
+# Load Data and Train a SVC
+# -------------------------
+# First, we load the wine dataset and convert it to a binary classification
+# problem. Then, we train a support vector classifier on a training dataset.
+import matplotlib.pyplot as plt
+from sklearn.svm import SVC
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import plot_roc_curve
+from sklearn.datasets import load_wine
+from sklearn.model_selection import train_test_split
+
+X, y = load_wine(return_X_y=True)
+y = y == 2
+
+X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
+svc = SVC(random_state=42)
+svc.fit(X_train, y_train)
+
+##############################################################################
+# Plotting the ROC Curve
+# ----------------------
+# Next, we plot the ROC curve with a single call to
+# :func:`sklearn.metrics.plot_roc_curve`. The returned `svc_disp` object allows
+# us to continue using the already computed ROC curve for the SVC in future
+# plots.
+svc_disp = plot_roc_curve(svc, X_test, y_test)
+plt.show()
+
+##############################################################################
+# Training a Random Forest and Plotting the ROC Curve
+# --------------------------------------------------------
+# We train a random forest classifier and create a plot comparing it to the SVC
+# ROC curve. Notice how `svc_disp` uses
+# :func:`~sklearn.metrics.RocCurveDisplay.plot` to plot the SVC ROC curve
+# without recomputing the values of the roc curve itself. Futhermore, we
+# pass `alpha=0.8` to the plot functions to adjust the alpha values of the
+# curves.
+rfc = RandomForestClassifier(n_estimators=10, random_state=42)
+rfc.fit(X_train, y_train)
+ax = plt.gca()
+rfc_disp = plot_roc_curve(rfc, X_test, y_test, ax=ax, alpha=0.8)
+svc_disp.plot(ax=ax, alpha=0.8)
+plt.show()
@@ -36,7 +36,8 @@
 import matplotlib.pyplot as plt
 
 from sklearn import svm, datasets
-from sklearn.metrics import roc_curve, auc
+from sklearn.metrics import auc
+from sklearn.metrics import plot_roc_curve
 from sklearn.model_selection import StratifiedKFold
 
 # #############################################################################
@@ -65,40 +66,35 @@
 aucs = []
 mean_fpr = np.linspace(0, 1, 100)
 
-i = 0
-for train, test in cv.split(X, y):
-    probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
-    # Compute ROC curve and area the curve
-    fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
-    tprs.append(interp(mean_fpr, fpr, tpr))
-    tprs[-1][0] = 0.0
-    roc_auc = auc(fpr, tpr)
-    aucs.append(roc_auc)
-    plt.plot(fpr, tpr, lw=1, alpha=0.3,
-             label='ROC fold %d (AUC = %0.2f)' % (i, roc_auc))
-
-    i += 1
-plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',
-         label='Chance', alpha=.8)
+fig, ax = plt.subplots()
+for i, (train, test) in enumerate(cv.split(X, y)):
+    classifier.fit(X[train], y[train])
+    viz = plot_roc_curve(classifier, X[test], y[test],
+                         name='ROC fold {}'.format(i),
+                         alpha=0.3, lw=1, ax=ax)
+    interp_tpr = interp(mean_fpr, viz.fpr, viz.tpr)
+    interp_tpr[0] = 0.0
+    tprs.append(interp_tpr)
+    aucs.append(viz.roc_auc)
+
+ax.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',
+        label='Chance', alpha=.8)
 
 mean_tpr = np.mean(tprs, axis=0)
 mean_tpr[-1] = 1.0
 mean_auc = auc(mean_fpr, mean_tpr)
 std_auc = np.std(aucs)
-plt.plot(mean_fpr, mean_tpr, color='b',
-         label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc),
-         lw=2, alpha=.8)
+ax.plot(mean_fpr, mean_tpr, color='b',
+        label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc),
+        lw=2, alpha=.8)
 
 std_tpr = np.std(tprs, axis=0)
 tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
 tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
-plt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,
-                 label=r'$\pm$ 1 std. dev.')
-
-plt.xlim([-0.05, 1.05])
-plt.ylim([-0.05, 1.05])
-plt.xlabel('False Positive Rate')
-plt.ylabel('True Positive Rate')
-plt.title('Receiver operating characteristic example')
-plt.legend(loc="lower right")
+ax.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,
+                label=r'$\pm$ 1 std. dev.')
+
+ax.set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05],
+       title="Receiver operating characteristic example")
+ax.legend(loc="lower right")
 plt.show()
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "print(__doc__)\n\nimport numpy as np\nfrom scipy import interp\nimport matplotlib.pyplot as plt\n\nfrom sklearn import svm, datasets\nfrom sklearn.metrics import roc_curve, auc\nfrom sklearn.model_selection import StratifiedKFold\n\n# #############################################################################\n# Data IO and generation\n\n# Import some data to play with\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\nX, y = X[y != 2], y[y != 2]\nn_samples, n_features = X.shape\n\n# Add noisy features\nrandom_state = np.random.RandomState(0)\nX = np.c_[X, random_state.randn(n_samples, 200 * n_features)]\n\n# #############################################################################\n# Classification and ROC analysis\n\n# Run classifier with cross-validation and plot ROC curves\ncv = StratifiedKFold(n_splits=6)\nclassifier = svm.SVC(kernel='linear', probability=True,\n random_state=random_state)\n\ntprs = []\naucs = []\nmean_fpr = np.linspace(0, 1, 100)\n\ni = 0\nfor train, test in cv.split(X, y):\n probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])\n # Compute ROC curve and area the curve\n fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])\n tprs.append(interp(mean_fpr, fpr, tpr))\n tprs[-1][0] = 0.0\n roc_auc = auc(fpr, tpr)\n aucs.append(roc_auc)\n plt.plot(fpr, tpr, lw=1, alpha=0.3,\n label='ROC fold %d (AUC = %0.2f)' % (i, roc_auc))\n\n i += 1\nplt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',\n label='Chance', alpha=.8)\n\nmean_tpr = np.mean(tprs, axis=0)\nmean_tpr[-1] = 1.0\nmean_auc = auc(mean_fpr, mean_tpr)\nstd_auc = np.std(aucs)\nplt.plot(mean_fpr, mean_tpr, color='b',\n label=r'Mean ROC (AUC = %0.2f $\\pm$ %0.2f)' % (mean_auc, std_auc),\n lw=2, alpha=.8)\n\nstd_tpr = np.std(tprs, axis=0)\ntprs_upper = np.minimum(mean_tpr + std_tpr, 1)\ntprs_lower = np.maximum(mean_tpr - std_tpr, 0)\nplt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,\n label=r'$\\pm$ 1 std. dev.')\n\nplt.xlim([-0.05, 1.05])\nplt.ylim([-0.05, 1.05])\nplt.xlabel('False Positive Rate')\nplt.ylabel('True Positive Rate')\nplt.title('Receiver operating characteristic example')\nplt.legend(loc=\"lower right\")\nplt.show()"
	`29`	+ "print(__doc__)\n\nimport numpy as np\nfrom scipy import interp\nimport matplotlib.pyplot as plt\n\nfrom sklearn import svm, datasets\nfrom sklearn.metrics import auc\nfrom sklearn.metrics import plot_roc_curve\nfrom sklearn.model_selection import StratifiedKFold\n\n# #############################################################################\n# Data IO and generation\n\n# Import some data to play with\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\nX, y = X[y != 2], y[y != 2]\nn_samples, n_features = X.shape\n\n# Add noisy features\nrandom_state = np.random.RandomState(0)\nX = np.c_[X, random_state.randn(n_samples, 200 * n_features)]\n\n# #############################################################################\n# Classification and ROC analysis\n\n# Run classifier with cross-validation and plot ROC curves\ncv = StratifiedKFold(n_splits=6)\nclassifier = svm.SVC(kernel='linear', probability=True,\n random_state=random_state)\n\ntprs = []\naucs = []\nmean_fpr = np.linspace(0, 1, 100)\n\nfig, ax = plt.subplots()\nfor i, (train, test) in enumerate(cv.split(X, y)):\n classifier.fit(X[train], y[train])\n viz = plot_roc_curve(classifier, X[test], y[test],\n name='ROC fold {}'.format(i),\n alpha=0.3, lw=1, ax=ax)\n interp_tpr = interp(mean_fpr, viz.fpr, viz.tpr)\n interp_tpr[0] = 0.0\n tprs.append(interp_tpr)\n aucs.append(viz.roc_auc)\n\nax.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',\n label='Chance', alpha=.8)\n\nmean_tpr = np.mean(tprs, axis=0)\nmean_tpr[-1] = 1.0\nmean_auc = auc(mean_fpr, mean_tpr)\nstd_auc = np.std(aucs)\nax.plot(mean_fpr, mean_tpr, color='b',\n label=r'Mean ROC (AUC = %0.2f $\\pm$ %0.2f)' % (mean_auc, std_auc),\n lw=2, alpha=.8)\n\nstd_tpr = np.std(tprs, axis=0)\ntprs_upper = np.minimum(mean_tpr + std_tpr, 1)\ntprs_lower = np.maximum(mean_tpr - std_tpr, 0)\nax.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,\n label=r'$\\pm$ 1 std. dev.')\n\nax.set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05],\n title=\"Receiver operating characteristic example\")\nax.legend(loc=\"lower right\")\nplt.show()"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`