scikit-learn
diff --git a/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
-4 Bytes b/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
-4 Bytes
diff --git a/‎dev/_downloads/0b39f715b5e32f01df3d212b6d822b82/plot_calibration.py
Lines changed: 25 additions & 16 deletions b/‎dev/_downloads/0b39f715b5e32f01df3d212b6d822b82/plot_calibration.py
Lines changed: 25 additions & 16 deletions
diff --git a/‎dev/_downloads/0c15970ac17183d2bf864a9563081aeb/plot_calibration.ipynb
Lines changed: 55 additions & 1 deletion b/‎dev/_downloads/0c15970ac17183d2bf864a9563081aeb/plot_calibration.ipynb
Lines changed: 55 additions & 1 deletion
diff --git a/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
751 Bytes b/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
751 Bytes
diff --git a/‎dev/_downloads/scikit-learn-docs.zip
4.41 KB b/‎dev/_downloads/scikit-learn-docs.zip
4.41 KB
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
207 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
207 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
173 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
173 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
53 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
53 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
-14 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
-14 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_thumb.png
58 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_thumb.png
58 Bytes
@@ -22,24 +22,21 @@
 Brier score.
 
 """
-
-# Author: Mathieu Blondel <[email protected]>
-#         Alexandre Gramfort <[email protected]>
-#         Balazs Kegl <[email protected]>
-#         Jan Hendrik Metzen <[email protected]>
+# Authors:
+# Mathieu Blondel <[email protected]>
+# Alexandre Gramfort <[email protected]>
+# Balazs Kegl <[email protected]>
+# Jan Hendrik Metzen <[email protected]>
 # License: BSD Style.
 
+# %%
+# Generate synthetic dataset
+# --------------------------
 import numpy as np
-import matplotlib.pyplot as plt
-from matplotlib import cm
 
 from sklearn.datasets import make_blobs
-from sklearn.naive_bayes import GaussianNB
-from sklearn.metrics import brier_score_loss
-from sklearn.calibration import CalibratedClassifierCV
 from sklearn.model_selection import train_test_split
 
-
 n_samples = 50000
 n_bins = 3  # use 3 bins for calibration_curve as we have 3 clusters here
 
@@ -58,17 +55,24 @@
     X, y, sample_weight, test_size=0.9, random_state=42
 )
 
-# Gaussian Naive-Bayes with no calibration
+# %%
+# Gaussian Naive-Bayes
+# --------------------
+from sklearn.calibration import CalibratedClassifierCV
+from sklearn.metrics import brier_score_loss
+from sklearn.naive_bayes import GaussianNB
+
+# With no calibration
 clf = GaussianNB()
 clf.fit(X_train, y_train)  # GaussianNB itself does not support sample-weights
 prob_pos_clf = clf.predict_proba(X_test)[:, 1]
 
-# Gaussian Naive-Bayes with isotonic calibration
+# With isotonic calibration
 clf_isotonic = CalibratedClassifierCV(clf, cv=2, method="isotonic")
 clf_isotonic.fit(X_train, y_train, sample_weight=sw_train)
 prob_pos_isotonic = clf_isotonic.predict_proba(X_test)[:, 1]
 
-# Gaussian Naive-Bayes with sigmoid calibration
+# With sigmoid calibration
 clf_sigmoid = CalibratedClassifierCV(clf, cv=2, method="sigmoid")
 clf_sigmoid.fit(X_train, y_train, sample_weight=sw_train)
 prob_pos_sigmoid = clf_sigmoid.predict_proba(X_test)[:, 1]
@@ -84,8 +88,12 @@
 clf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid, sample_weight=sw_test)
 print("With sigmoid calibration: %1.3f" % clf_sigmoid_score)
 
-# #############################################################################
-# Plot the data and the predicted probabilities
+# %%
+# Plot data and the predicted probabilities
+# -----------------------------------------
+from matplotlib import cm
+import matplotlib.pyplot as plt
+
 plt.figure()
 y_unique = np.unique(y)
 colors = cm.rainbow(np.linspace(0.0, 1.0, y_unique.size))
@@ -105,6 +113,7 @@
 plt.title("Data")
 
 plt.figure()
+
 order = np.lexsort((prob_pos_clf,))
 plt.plot(prob_pos_clf[order], "r", label="No calibration (%1.3f)" % clf_score)
 plt.plot(
 
@@ -26,7 +26,61 @@
       },
       "outputs": [],
       "source": [
-        "# Author: Mathieu Blondel <[email protected]>\n#         Alexandre Gramfort <[email protected]>\n#         Balazs Kegl <[email protected]>\n#         Jan Hendrik Metzen <[email protected]>\n# License: BSD Style.\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib import cm\n\nfrom sklearn.datasets import make_blobs\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.metrics import brier_score_loss\nfrom sklearn.calibration import CalibratedClassifierCV\nfrom sklearn.model_selection import train_test_split\n\n\nn_samples = 50000\nn_bins = 3  # use 3 bins for calibration_curve as we have 3 clusters here\n\n# Generate 3 blobs with 2 classes where the second blob contains\n# half positive samples and half negative samples. Probability in this\n# blob is therefore 0.5.\ncenters = [(-5, -5), (0, 0), (5, 5)]\nX, y = make_blobs(n_samples=n_samples, centers=centers, shuffle=False, random_state=42)\n\ny[: n_samples // 2] = 0\ny[n_samples // 2 :] = 1\nsample_weight = np.random.RandomState(42).rand(y.shape[0])\n\n# split train, test for calibration\nX_train, X_test, y_train, y_test, sw_train, sw_test = train_test_split(\n    X, y, sample_weight, test_size=0.9, random_state=42\n)\n\n# Gaussian Naive-Bayes with no calibration\nclf = GaussianNB()\nclf.fit(X_train, y_train)  # GaussianNB itself does not support sample-weights\nprob_pos_clf = clf.predict_proba(X_test)[:, 1]\n\n# Gaussian Naive-Bayes with isotonic calibration\nclf_isotonic = CalibratedClassifierCV(clf, cv=2, method=\"isotonic\")\nclf_isotonic.fit(X_train, y_train, sample_weight=sw_train)\nprob_pos_isotonic = clf_isotonic.predict_proba(X_test)[:, 1]\n\n# Gaussian Naive-Bayes with sigmoid calibration\nclf_sigmoid = CalibratedClassifierCV(clf, cv=2, method=\"sigmoid\")\nclf_sigmoid.fit(X_train, y_train, sample_weight=sw_train)\nprob_pos_sigmoid = clf_sigmoid.predict_proba(X_test)[:, 1]\n\nprint(\"Brier score losses: (the smaller the better)\")\n\nclf_score = brier_score_loss(y_test, prob_pos_clf, sample_weight=sw_test)\nprint(\"No calibration: %1.3f\" % clf_score)\n\nclf_isotonic_score = brier_score_loss(y_test, prob_pos_isotonic, sample_weight=sw_test)\nprint(\"With isotonic calibration: %1.3f\" % clf_isotonic_score)\n\nclf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid, sample_weight=sw_test)\nprint(\"With sigmoid calibration: %1.3f\" % clf_sigmoid_score)\n\n# #############################################################################\n# Plot the data and the predicted probabilities\nplt.figure()\ny_unique = np.unique(y)\ncolors = cm.rainbow(np.linspace(0.0, 1.0, y_unique.size))\nfor this_y, color in zip(y_unique, colors):\n    this_X = X_train[y_train == this_y]\n    this_sw = sw_train[y_train == this_y]\n    plt.scatter(\n        this_X[:, 0],\n        this_X[:, 1],\n        s=this_sw * 50,\n        c=color[np.newaxis, :],\n        alpha=0.5,\n        edgecolor=\"k\",\n        label=\"Class %s\" % this_y,\n    )\nplt.legend(loc=\"best\")\nplt.title(\"Data\")\n\nplt.figure()\norder = np.lexsort((prob_pos_clf,))\nplt.plot(prob_pos_clf[order], \"r\", label=\"No calibration (%1.3f)\" % clf_score)\nplt.plot(\n    prob_pos_isotonic[order],\n    \"g\",\n    linewidth=3,\n    label=\"Isotonic calibration (%1.3f)\" % clf_isotonic_score,\n)\nplt.plot(\n    prob_pos_sigmoid[order],\n    \"b\",\n    linewidth=3,\n    label=\"Sigmoid calibration (%1.3f)\" % clf_sigmoid_score,\n)\nplt.plot(\n    np.linspace(0, y_test.size, 51)[1::2],\n    y_test[order].reshape(25, -1).mean(1),\n    \"k\",\n    linewidth=3,\n    label=r\"Empirical\",\n)\nplt.ylim([-0.05, 1.05])\nplt.xlabel(\"Instances sorted according to predicted probability (uncalibrated GNB)\")\nplt.ylabel(\"P(y=1)\")\nplt.legend(loc=\"upper left\")\nplt.title(\"Gaussian naive Bayes probabilities\")\n\nplt.show()"
+        "# Authors:\n# Mathieu Blondel <[email protected]>\n# Alexandre Gramfort <[email protected]>\n# Balazs Kegl <[email protected]>\n# Jan Hendrik Metzen <[email protected]>\n# License: BSD Style."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Generate synthetic dataset\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\n\nfrom sklearn.datasets import make_blobs\nfrom sklearn.model_selection import train_test_split\n\nn_samples = 50000\nn_bins = 3  # use 3 bins for calibration_curve as we have 3 clusters here\n\n# Generate 3 blobs with 2 classes where the second blob contains\n# half positive samples and half negative samples. Probability in this\n# blob is therefore 0.5.\ncenters = [(-5, -5), (0, 0), (5, 5)]\nX, y = make_blobs(n_samples=n_samples, centers=centers, shuffle=False, random_state=42)\n\ny[: n_samples // 2] = 0\ny[n_samples // 2 :] = 1\nsample_weight = np.random.RandomState(42).rand(y.shape[0])\n\n# split train, test for calibration\nX_train, X_test, y_train, y_test, sw_train, sw_test = train_test_split(\n    X, y, sample_weight, test_size=0.9, random_state=42\n)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Gaussian Naive-Bayes\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn.calibration import CalibratedClassifierCV\nfrom sklearn.metrics import brier_score_loss\nfrom sklearn.naive_bayes import GaussianNB\n\n# With no calibration\nclf = GaussianNB()\nclf.fit(X_train, y_train)  # GaussianNB itself does not support sample-weights\nprob_pos_clf = clf.predict_proba(X_test)[:, 1]\n\n# With isotonic calibration\nclf_isotonic = CalibratedClassifierCV(clf, cv=2, method=\"isotonic\")\nclf_isotonic.fit(X_train, y_train, sample_weight=sw_train)\nprob_pos_isotonic = clf_isotonic.predict_proba(X_test)[:, 1]\n\n# With sigmoid calibration\nclf_sigmoid = CalibratedClassifierCV(clf, cv=2, method=\"sigmoid\")\nclf_sigmoid.fit(X_train, y_train, sample_weight=sw_train)\nprob_pos_sigmoid = clf_sigmoid.predict_proba(X_test)[:, 1]\n\nprint(\"Brier score losses: (the smaller the better)\")\n\nclf_score = brier_score_loss(y_test, prob_pos_clf, sample_weight=sw_test)\nprint(\"No calibration: %1.3f\" % clf_score)\n\nclf_isotonic_score = brier_score_loss(y_test, prob_pos_isotonic, sample_weight=sw_test)\nprint(\"With isotonic calibration: %1.3f\" % clf_isotonic_score)\n\nclf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid, sample_weight=sw_test)\nprint(\"With sigmoid calibration: %1.3f\" % clf_sigmoid_score)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Plot data and the predicted probabilities\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "from matplotlib import cm\nimport matplotlib.pyplot as plt\n\nplt.figure()\ny_unique = np.unique(y)\ncolors = cm.rainbow(np.linspace(0.0, 1.0, y_unique.size))\nfor this_y, color in zip(y_unique, colors):\n    this_X = X_train[y_train == this_y]\n    this_sw = sw_train[y_train == this_y]\n    plt.scatter(\n        this_X[:, 0],\n        this_X[:, 1],\n        s=this_sw * 50,\n        c=color[np.newaxis, :],\n        alpha=0.5,\n        edgecolor=\"k\",\n        label=\"Class %s\" % this_y,\n    )\nplt.legend(loc=\"best\")\nplt.title(\"Data\")\n\nplt.figure()\n\norder = np.lexsort((prob_pos_clf,))\nplt.plot(prob_pos_clf[order], \"r\", label=\"No calibration (%1.3f)\" % clf_score)\nplt.plot(\n    prob_pos_isotonic[order],\n    \"g\",\n    linewidth=3,\n    label=\"Isotonic calibration (%1.3f)\" % clf_isotonic_score,\n)\nplt.plot(\n    prob_pos_sigmoid[order],\n    \"b\",\n    linewidth=3,\n    label=\"Sigmoid calibration (%1.3f)\" % clf_sigmoid_score,\n)\nplt.plot(\n    np.linspace(0, y_test.size, 51)[1::2],\n    y_test[order].reshape(25, -1).mean(1),\n    \"k\",\n    linewidth=3,\n    label=r\"Empirical\",\n)\nplt.ylim([-0.05, 1.05])\nplt.xlabel(\"Instances sorted according to predicted probability (uncalibrated GNB)\")\nplt.ylabel(\"P(y=1)\")\nplt.legend(loc=\"upper left\")\nplt.title(\"Gaussian naive Bayes probabilities\")\n\nplt.show()"
       ]
     }
   ],