scikit-learn
diff --git a/‎dev/_downloads/auto_examples_jupyter.zip
5.13 KB b/‎dev/_downloads/auto_examples_jupyter.zip
5.13 KB
diff --git a/‎dev/_downloads/auto_examples_python.zip
3.68 KB b/‎dev/_downloads/auto_examples_python.zip
3.68 KB
diff --git a/‎dev/_downloads/plot_multi_metric_evaluation.ipynb
Lines changed: 90 additions & 0 deletions b/‎dev/_downloads/plot_multi_metric_evaluation.ipynb
Lines changed: 90 additions & 0 deletions
diff --git a/‎dev/_downloads/plot_multi_metric_evaluation.py
Lines changed: 94 additions & 0 deletions b/‎dev/_downloads/plot_multi_metric_evaluation.py
Lines changed: 94 additions & 0 deletions
diff --git a/‎dev/_downloads/scikit-learn-docs.pdf
136 KB b/‎dev/_downloads/scikit-learn-docs.pdf
136 KB
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
68 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
68 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
68 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
68 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
85 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
85 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0021.png
85 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0021.png
85 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
30 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
30 Bytes
@@ -0,0 +1,90 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Demonstration of multi-metric evaluation on cross_val_score and GridSearchCV\n\nMultiple metric parameter search can be done by setting the ``scoring``\nparameter to a list of metric scorer names or a dict mapping the scorer names\nto the scorer callables.\n\nThe scores of all the scorers are available in the ``cv_results_`` dict at keys\nending in ``'_<scorer_name>'`` (``'mean_test_precision'``,\n``'rank_test_precision'``, etc...)\n\nThe ``best_estimator_``, ``best_index_``, ``best_score_`` and ``best_params_``\ncorrespond to the scorer (key) that is set to the ``refit`` attribute.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# Author: Raghav RV <[email protected]>\n# License: BSD\n\nimport numpy as np\nfrom matplotlib import pyplot as plt\n\nfrom sklearn.datasets import make_hastie_10_2\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.metrics import make_scorer\nfrom sklearn.metrics import accuracy_score\nfrom sklearn.tree import DecisionTreeClassifier\n\nprint(__doc__)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Running ``GridSearchCV`` using multiple evaluation metrics\n----------------------------------------------------------\n\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "X, y = make_hastie_10_2(n_samples=8000, random_state=42)\n\n# The scorers can be either be one of the predefined metric strings or a scorer\n# callable, like the one returned by make_scorer\nscoring = {'AUC': 'roc_auc', 'Accuracy': make_scorer(accuracy_score)}\n\n# Setting refit='AUC', refits an estimator on the whole dataset with the\n# parameter setting that has the best cross-validated AUC score.\n# That estimator is made available at ``gs.best_estimator_`` along with\n# parameters like ``gs.best_score_``, ``gs.best_parameters_`` and\n# ``gs.best_index_``\ngs = GridSearchCV(DecisionTreeClassifier(random_state=42),\n                  param_grid={'min_samples_split': range(2, 403, 10)},\n                  scoring=scoring, cv=5, refit='AUC')\ngs.fit(X, y)\nresults = gs.cv_results_"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Plotting the result\n-------------------\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(13, 13))\nplt.title(\"GridSearchCV evaluating using multiple scorers simultaneously\",\n          fontsize=16)\n\nplt.xlabel(\"min_samples_split\")\nplt.ylabel(\"Score\")\nplt.grid()\n\nax = plt.axes()\nax.set_xlim(0, 402)\nax.set_ylim(0.73, 1)\n\n# Get the regular numpy array from the MaskedArray\nX_axis = np.array(results['param_min_samples_split'].data, dtype=float)\n\nfor scorer, color in zip(sorted(scoring), ['g', 'k']):\n    for sample, style in (('train', '--'), ('test', '-')):\n        sample_score_mean = results['mean_%s_%s' % (sample, scorer)]\n        sample_score_std = results['std_%s_%s' % (sample, scorer)]\n        ax.fill_between(X_axis, sample_score_mean - sample_score_std,\n                        sample_score_mean + sample_score_std,\n                        alpha=0.1 if sample == 'test' else 0, color=color)\n        ax.plot(X_axis, sample_score_mean, style, color=color,\n                alpha=1 if sample == 'test' else 0.7,\n                label=\"%s (%s)\" % (scorer, sample))\n\n    best_index = np.nonzero(results['rank_test_%s' % scorer] == 1)[0][0]\n    best_score = results['mean_test_%s' % scorer][best_index]\n\n    # Plot a dotted vertical line at the best score for that scorer marked by x\n    ax.plot([X_axis[best_index], ] * 2, [0, best_score],\n            linestyle='-.', color=color, marker='x', markeredgewidth=3, ms=8)\n\n    # Annotate the best score for that scorer\n    ax.annotate(\"%0.2f\" % best_score,\n                (X_axis[best_index], best_score + 0.005))\n\nplt.legend(loc=\"best\")\nplt.grid('off')\nplt.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.1"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
@@ -0,0 +1,94 @@
+"""Demonstration of multi-metric evaluation on cross_val_score and GridSearchCV
+
+Multiple metric parameter search can be done by setting the ``scoring``
+parameter to a list of metric scorer names or a dict mapping the scorer names
+to the scorer callables.
+
+The scores of all the scorers are available in the ``cv_results_`` dict at keys
+ending in ``'_<scorer_name>'`` (``'mean_test_precision'``,
+``'rank_test_precision'``, etc...)
+
+The ``best_estimator_``, ``best_index_``, ``best_score_`` and ``best_params_``
+correspond to the scorer (key) that is set to the ``refit`` attribute.
+"""
+
+# Author: Raghav RV <[email protected]>
+# License: BSD
+
+import numpy as np
+from matplotlib import pyplot as plt
+
+from sklearn.datasets import make_hastie_10_2
+from sklearn.model_selection import GridSearchCV
+from sklearn.metrics import make_scorer
+from sklearn.metrics import accuracy_score
+from sklearn.tree import DecisionTreeClassifier
+
+print(__doc__)
+
+###############################################################################
+# Running ``GridSearchCV`` using multiple evaluation metrics
+# ----------------------------------------------------------
+#
+
+X, y = make_hastie_10_2(n_samples=8000, random_state=42)
+
+# The scorers can be either be one of the predefined metric strings or a scorer
+# callable, like the one returned by make_scorer
+scoring = {'AUC': 'roc_auc', 'Accuracy': make_scorer(accuracy_score)}
+
+# Setting refit='AUC', refits an estimator on the whole dataset with the
+# parameter setting that has the best cross-validated AUC score.
+# That estimator is made available at ``gs.best_estimator_`` along with
+# parameters like ``gs.best_score_``, ``gs.best_parameters_`` and
+# ``gs.best_index_``
+gs = GridSearchCV(DecisionTreeClassifier(random_state=42),
+                  param_grid={'min_samples_split': range(2, 403, 10)},
+                  scoring=scoring, cv=5, refit='AUC')
+gs.fit(X, y)
+results = gs.cv_results_
+
+###############################################################################
+# Plotting the result
+# -------------------
+
+plt.figure(figsize=(13, 13))
+plt.title("GridSearchCV evaluating using multiple scorers simultaneously",
+          fontsize=16)
+
+plt.xlabel("min_samples_split")
+plt.ylabel("Score")
+plt.grid()
+
+ax = plt.axes()
+ax.set_xlim(0, 402)
+ax.set_ylim(0.73, 1)
+
+# Get the regular numpy array from the MaskedArray
+X_axis = np.array(results['param_min_samples_split'].data, dtype=float)
+
+for scorer, color in zip(sorted(scoring), ['g', 'k']):
+    for sample, style in (('train', '--'), ('test', '-')):
+        sample_score_mean = results['mean_%s_%s' % (sample, scorer)]
+        sample_score_std = results['std_%s_%s' % (sample, scorer)]
+        ax.fill_between(X_axis, sample_score_mean - sample_score_std,
+                        sample_score_mean + sample_score_std,
+                        alpha=0.1 if sample == 'test' else 0, color=color)
+        ax.plot(X_axis, sample_score_mean, style, color=color,
+                alpha=1 if sample == 'test' else 0.7,
+                label="%s (%s)" % (scorer, sample))
+
+    best_index = np.nonzero(results['rank_test_%s' % scorer] == 1)[0][0]
+    best_score = results['mean_test_%s' % scorer][best_index]
+
+    # Plot a dotted vertical line at the best score for that scorer marked by x
+    ax.plot([X_axis[best_index], ] * 2, [0, best_score],
+            linestyle='-.', color=color, marker='x', markeredgewidth=3, ms=8)
+
+    # Annotate the best score for that scorer
+    ax.annotate("%0.2f" % best_score,
+                (X_axis[best_index], best_score + 0.005))
+
+plt.legend(loc="best")
+plt.grid('off')
+plt.show()