codeur66
diff --git a/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
2.75 KB b/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
2.75 KB
diff --git a/‎dev/_downloads/47fc6db2ef595b75bbf0d40b3b00b7b0/plot_permutation_test_for_classification.py
Lines changed: 118 additions & 51 deletions b/‎dev/_downloads/47fc6db2ef595b75bbf0d40b3b00b7b0/plot_permutation_test_for_classification.py
Lines changed: 118 additions & 51 deletions
diff --git a/‎dev/_downloads/64e850d00f3e594b7bf9079d7b796fcb/plot_permutation_test_for_classification.ipynb
Lines changed: 81 additions & 2 deletions b/‎dev/_downloads/64e850d00f3e594b7bf9079d7b796fcb/plot_permutation_test_for_classification.ipynb
Lines changed: 81 additions & 2 deletions
diff --git a/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
3.91 KB b/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
3.91 KB
diff --git a/‎dev/_downloads/scikit-learn-docs.pdf
6.75 KB b/‎dev/_downloads/scikit-learn-docs.pdf
6.75 KB
diff --git a/‎dev/_images/iris.png
0 Bytes b/‎dev/_images/iris.png
0 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-219 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-219 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
-219 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
-219 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-239 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-239 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0021.png
-239 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0021.png
-239 Bytes
@@ -3,67 +3,134 @@
 Test with permutations the significance of a classification score
 =================================================================
 
-In order to test if a classification score is significative a technique
-in repeating the classification procedure after randomizing, permuting,
-the labels. The p-value is then given by the percentage of runs for
-which the score obtained is greater than the classification score
-obtained in the first place.
-
+This example demonstrates the use of
+:func:`~sklearn.model_selection.permutation_test_score` to evaluate the
+significance of a cross-valdiated score using permutations.
 """
 
-# Author:  Alexandre Gramfort <[email protected]>
+# Authors:  Alexandre Gramfort <[email protected]>
+#           Lucy Liu
 # License: BSD 3 clause
+#
+# Dataset
+# -------
+#
+# We will use the :ref:`iris_dataset`, which consists of measurements taken
+# from 3 types of irises.
+
+from sklearn.datasets import load_iris
+
+iris = load_iris()
+X = iris.data
+y = iris.target
 
-print(__doc__)
+# %%
+# We will also generate some random feature data (i.e., 2200 features),
+# uncorrelated with the class labels in the iris dataset.
 
 import numpy as np
-import matplotlib.pyplot as plt
+
+n_uncorrelated_features = 2200
+rng = np.random.RandomState(seed=0)
+# Use same number of samples as in iris and 2200 features
+X_rand = rng.normal(size=(X.shape[0], n_uncorrelated_features))
+
+# %%
+# Permutation test score
+# ----------------------
+#
+# Next, we calculate the
+# :func:`~sklearn.model_selection.permutation_test_score` using the original
+# iris dataset, which strongly predict the labels and
+# the randomly generated features and iris labels, which should have
+# no dependency between features and labels. We use the
+# :class:`~sklearn.svm.SVC` classifier and :ref:`accuracy_score` to evaluate
+# the model at each round.
+#
+# :func:`~sklearn.model_selection.permutation_test_score` generates a null
+# distribution by calculating the accuracy of the classifier
+# on 1000 different permutations of the dataset, where features
+# remain the same but labels undergo different permutations. This is the
+# distribution for the null hypothesis which states there is no dependency
+# between the features and labels. An empirical p-value is then calculated as
+# the percentage of permutations for which the score obtained is greater
+# that the score obtained using the original data.
 
 from sklearn.svm import SVC
 from sklearn.model_selection import StratifiedKFold
 from sklearn.model_selection import permutation_test_score
-from sklearn import datasets
 
+clf = SVC(kernel='linear', random_state=7)
+cv = StratifiedKFold(2, shuffle=True, random_state=0)
 
-# #############################################################################
-# Loading a dataset
-iris = datasets.load_iris()
-X = iris.data
-y = iris.target
-n_classes = np.unique(y).size
-
-# Some noisy data not correlated
-random = np.random.RandomState(seed=0)
-E = random.normal(size=(len(X), 2200))
-
-# Add noisy data to the informative features for make the task harder
-X = np.c_[X, E]
-
-svm = SVC(kernel='linear')
-cv = StratifiedKFold(2)
-
-score, permutation_scores, pvalue = permutation_test_score(
-    svm, X, y, scoring="accuracy", cv=cv, n_permutations=100, n_jobs=1)
-
-print("Classification score %s (pvalue : %s)" % (score, pvalue))
-
-# #############################################################################
-# View histogram of permutation scores
-plt.hist(permutation_scores, 20, label='Permutation scores',
-         edgecolor='black')
-ylim = plt.ylim()
-# BUG: vlines(..., linestyle='--') fails on older versions of matplotlib
-# plt.vlines(score, ylim[0], ylim[1], linestyle='--',
-#          color='g', linewidth=3, label='Classification Score'
-#          ' (pvalue %s)' % pvalue)
-# plt.vlines(1.0 / n_classes, ylim[0], ylim[1], linestyle='--',
-#          color='k', linewidth=3, label='Luck')
-plt.plot(2 * [score], ylim, '--g', linewidth=3,
-         label='Classification Score'
-         ' (pvalue %s)' % pvalue)
-plt.plot(2 * [1. / n_classes], ylim, '--k', linewidth=3, label='Luck')
-
-plt.ylim(ylim)
-plt.legend()
-plt.xlabel('Score')
+score_iris, perm_scores_iris, pvalue_iris = permutation_test_score(
+    clf, X, y, scoring="accuracy", cv=cv, n_permutations=1000)
+
+score_rand, perm_scores_rand, pvalue_rand = permutation_test_score(
+    clf, X_rand, y, scoring="accuracy", cv=cv, n_permutations=1000)
+
+# %%
+# Original data
+# ^^^^^^^^^^^^^
+#
+# Below we plot a histogram of the permutation scores (the null
+# distribution). The red line indicates the score obtained by the classifier
+# on the original data. The score is much better than those obtained by
+# using permuted data and the p-value is thus very low. This indicates that
+# there is a low likelihood that this good score would be obtained by chance
+# alone. It provides evidence that the iris dataset contains real dependency
+# between features and labels and the classifier was able to utilize this
+# to obtain good results.
+
+import matplotlib.pyplot as plt
+
+fig, ax = plt.subplots()
+
+ax.hist(perm_scores_iris, bins=20, density=True)
+ax.axvline(score_iris, ls='--', color='r')
+score_label = (f"Score on original\ndata: {score_iris:.2f}\n"
+               f"(p-value: {pvalue_iris:.3f})")
+ax.text(0.7, 260, score_label, fontsize=12)
+ax.set_xlabel("Accuracy score")
+_ = ax.set_ylabel("Probability")
+
+# %%
+# Random data
+# ^^^^^^^^^^^
+#
+# Below we plot the null distribution for the randomized data. The permutation
+# scores are similar to those obtained using the original iris dataset
+# because the permutation always destroys any feature label dependency present.
+# The score obtained on the original randomized data in this case though, is
+# very poor. This results in a large p-value, confirming that there was no
+# feature label dependency in the original data.
+
+fig, ax = plt.subplots()
+
+ax.hist(perm_scores_rand, bins=20, density=True)
+ax.set_xlim(0.13)
+ax.axvline(score_rand, ls='--', color='r')
+score_label = (f"Score on original\ndata: {score_rand:.2f}\n"
+               f"(p-value: {pvalue_rand:.3f})")
+ax.text(0.14, 125, score_label, fontsize=12)
+ax.set_xlabel("Accuracy score")
+ax.set_ylabel("Probability")
 plt.show()
+
+# %%
+# Another possible reason for obtaining a high p-value is that the classifier
+# was not able to use the structure in the data. In this case, the p-value
+# would only be low for classifiers that are able to utilize the dependency
+# present. In our case above, where the data is random, all classifiers would
+# have a high p-value as there is no structure present in the data.
+#
+# Finally, note that this test has been shown to produce low p-values even
+# if there is only weak structure in the data [1]_.
+#
+# .. topic:: References:
+#
+#   .. [1] Ojala and Garriga. `Permutation Tests for Studying Classifier
+#          Performance
+#          <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_. The
+#          Journal of Machine Learning Research (2010) vol. 11
+#
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\n# Test with permutations the significance of a classification score\n\n\nIn order to test if a classification score is significative a technique\nin repeating the classification procedure after randomizing, permuting,\nthe labels. The p-value is then given by the percentage of runs for\nwhich the score obtained is greater than the classification score\nobtained in the first place.\n"
+        "\n# Test with permutations the significance of a classification score\n\n\nThis example demonstrates the use of\n:func:`~sklearn.model_selection.permutation_test_score` to evaluate the\nsignificance of a cross-valdiated score using permutations.\n"
       ]
     },
     {
@@ -26,7 +26,86 @@
       },
       "outputs": [],
       "source": [
-        "# Author:  Alexandre Gramfort <[email protected]>\n# License: BSD 3 clause\n\nprint(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.svm import SVC\nfrom sklearn.model_selection import StratifiedKFold\nfrom sklearn.model_selection import permutation_test_score\nfrom sklearn import datasets\n\n\n# #############################################################################\n# Loading a dataset\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\nn_classes = np.unique(y).size\n\n# Some noisy data not correlated\nrandom = np.random.RandomState(seed=0)\nE = random.normal(size=(len(X), 2200))\n\n# Add noisy data to the informative features for make the task harder\nX = np.c_[X, E]\n\nsvm = SVC(kernel='linear')\ncv = StratifiedKFold(2)\n\nscore, permutation_scores, pvalue = permutation_test_score(\n    svm, X, y, scoring=\"accuracy\", cv=cv, n_permutations=100, n_jobs=1)\n\nprint(\"Classification score %s (pvalue : %s)\" % (score, pvalue))\n\n# #############################################################################\n# View histogram of permutation scores\nplt.hist(permutation_scores, 20, label='Permutation scores',\n         edgecolor='black')\nylim = plt.ylim()\n# BUG: vlines(..., linestyle='--') fails on older versions of matplotlib\n# plt.vlines(score, ylim[0], ylim[1], linestyle='--',\n#          color='g', linewidth=3, label='Classification Score'\n#          ' (pvalue %s)' % pvalue)\n# plt.vlines(1.0 / n_classes, ylim[0], ylim[1], linestyle='--',\n#          color='k', linewidth=3, label='Luck')\nplt.plot(2 * [score], ylim, '--g', linewidth=3,\n         label='Classification Score'\n         ' (pvalue %s)' % pvalue)\nplt.plot(2 * [1. / n_classes], ylim, '--k', linewidth=3, label='Luck')\n\nplt.ylim(ylim)\nplt.legend()\nplt.xlabel('Score')\nplt.show()"
+        "# Authors:  Alexandre Gramfort <[email protected]>\n#           Lucy Liu\n# License: BSD 3 clause\n#\n# Dataset\n# -------\n#\n# We will use the :ref:`iris_dataset`, which consists of measurements taken\n# from 3 types of irises.\n\nfrom sklearn.datasets import load_iris\n\niris = load_iris()\nX = iris.data\ny = iris.target"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "We will also generate some random feature data (i.e., 2200 features),\nuncorrelated with the class labels in the iris dataset.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\n\nn_uncorrelated_features = 2200\nrng = np.random.RandomState(seed=0)\n# Use same number of samples as in iris and 2200 features\nX_rand = rng.normal(size=(X.shape[0], n_uncorrelated_features))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Permutation test score\n----------------------\n\nNext, we calculate the\n:func:`~sklearn.model_selection.permutation_test_score` using the original\niris dataset, which strongly predict the labels and\nthe randomly generated features and iris labels, which should have\nno dependency between features and labels. We use the\n:class:`~sklearn.svm.SVC` classifier and `accuracy_score` to evaluate\nthe model at each round.\n\n:func:`~sklearn.model_selection.permutation_test_score` generates a null\ndistribution by calculating the accuracy of the classifier\non 1000 different permutations of the dataset, where features\nremain the same but labels undergo different permutations. This is the\ndistribution for the null hypothesis which states there is no dependency\nbetween the features and labels. An empirical p-value is then calculated as\nthe percentage of permutations for which the score obtained is greater\nthat the score obtained using the original data.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn.svm import SVC\nfrom sklearn.model_selection import StratifiedKFold\nfrom sklearn.model_selection import permutation_test_score\n\nclf = SVC(kernel='linear', random_state=7)\ncv = StratifiedKFold(2, shuffle=True, random_state=0)\n\nscore_iris, perm_scores_iris, pvalue_iris = permutation_test_score(\n    clf, X, y, scoring=\"accuracy\", cv=cv, n_permutations=1000)\n\nscore_rand, perm_scores_rand, pvalue_rand = permutation_test_score(\n    clf, X_rand, y, scoring=\"accuracy\", cv=cv, n_permutations=1000)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Original data\n^^^^^^^^^^^^^\n\nBelow we plot a histogram of the permutation scores (the null\ndistribution). The red line indicates the score obtained by the classifier\non the original data. The score is much better than those obtained by\nusing permuted data and the p-value is thus very low. This indicates that\nthere is a low likelihood that this good score would be obtained by chance\nalone. It provides evidence that the iris dataset contains real dependency\nbetween features and labels and the classifier was able to utilize this\nto obtain good results.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n\nfig, ax = plt.subplots()\n\nax.hist(perm_scores_iris, bins=20, density=True)\nax.axvline(score_iris, ls='--', color='r')\nscore_label = (f\"Score on original\\ndata: {score_iris:.2f}\\n\"\n               f\"(p-value: {pvalue_iris:.3f})\")\nax.text(0.7, 260, score_label, fontsize=12)\nax.set_xlabel(\"Accuracy score\")\n_ = ax.set_ylabel(\"Probability\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Random data\n^^^^^^^^^^^\n\nBelow we plot the null distribution for the randomized data. The permutation\nscores are similar to those obtained using the original iris dataset\nbecause the permutation always destroys any feature label dependency present.\nThe score obtained on the original randomized data in this case though, is\nvery poor. This results in a large p-value, confirming that there was no\nfeature label dependency in the original data.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "fig, ax = plt.subplots()\n\nax.hist(perm_scores_rand, bins=20, density=True)\nax.set_xlim(0.13)\nax.axvline(score_rand, ls='--', color='r')\nscore_label = (f\"Score on original\\ndata: {score_rand:.2f}\\n\"\n               f\"(p-value: {pvalue_rand:.3f})\")\nax.text(0.14, 125, score_label, fontsize=12)\nax.set_xlabel(\"Accuracy score\")\nax.set_ylabel(\"Probability\")\nplt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Another possible reason for obtaining a high p-value is that the classifier\nwas not able to use the structure in the data. In this case, the p-value\nwould only be low for classifiers that are able to utilize the dependency\npresent. In our case above, where the data is random, all classifiers would\nhave a high p-value as there is no structure present in the data.\n\nFinally, note that this test has been shown to produce low p-values even\nif there is only weak structure in the data [1]_.\n\n.. topic:: References:\n\n  .. [1] Ojala and Garriga. `Permutation Tests for Studying Classifier\n         Performance\n         <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_. The\n         Journal of Machine Learning Research (2010) vol. 11\n\n\n"
       ]
     }
   ],