linuxdevhub
diff --git a/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
-63 Bytes b/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
-63 Bytes
diff --git a/‎dev/_downloads/38082c4eb06099bc72a5e5fbfff35118/plot_calibration.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/38082c4eb06099bc72a5e5fbfff35118/plot_calibration.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/a12ceb5312546e499cfd619117b5ba2f/plot_calibration_multiclass.py
Lines changed: 1 addition & 2 deletions b/‎dev/_downloads/a12ceb5312546e499cfd619117b5ba2f/plot_calibration_multiclass.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
-64 Bytes b/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
-64 Bytes
diff --git a/‎dev/_downloads/e853206a366f394126c8c0c0b59bd3ac/plot_calibration_multiclass.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/e853206a366f394126c8c0c0b59bd3ac/plot_calibration_multiclass.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/fe8fa87637ff54598ea6bde113ad553b/plot_calibration.py
Lines changed: 2 additions & 2 deletions b/‎dev/_downloads/fe8fa87637ff54598ea6bde113ad553b/plot_calibration.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎dev/_downloads/scikit-learn-docs.pdf
-2.11 KB b/‎dev/_downloads/scikit-learn-docs.pdf
-2.11 KB
diff --git a/‎dev/_images/iris.png
0 Bytes b/‎dev/_images/iris.png
0 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
274 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
274 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
274 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
274 Bytes
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\n# Author: Mathieu Blondel <[email protected]>\n#         Alexandre Gramfort <[email protected]>\n#         Balazs Kegl <[email protected]>\n#         Jan Hendrik Metzen <[email protected]>\n# License: BSD Style.\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib import cm\n\nfrom sklearn.datasets import make_blobs\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.metrics import brier_score_loss\nfrom sklearn.calibration import CalibratedClassifierCV\nfrom sklearn.model_selection import train_test_split\n\n\nn_samples = 50000\nn_bins = 3  # use 3 bins for calibration_curve as we have 3 clusters here\n\n# Generate 3 blobs with 2 classes where the second blob contains\n# half positive samples and half negative samples. Probability in this\n# blob is therefore 0.5.\ncenters = [(-5, -5), (0, 0), (5, 5)]\nX, y = make_blobs(n_samples=n_samples, n_features=2, cluster_std=1.0,\n                  centers=centers, shuffle=False, random_state=42)\n\ny[:n_samples // 2] = 0\ny[n_samples // 2:] = 1\nsample_weight = np.random.RandomState(42).rand(y.shape[0])\n\n# split train, test for calibration\nX_train, X_test, y_train, y_test, sw_train, sw_test = \\\n    train_test_split(X, y, sample_weight, test_size=0.9, random_state=42)\n\n# Gaussian Naive-Bayes with no calibration\nclf = GaussianNB()\nclf.fit(X_train, y_train)  # GaussianNB itself does not support sample-weights\nprob_pos_clf = clf.predict_proba(X_test)[:, 1]\n\n# Gaussian Naive-Bayes with isotonic calibration\nclf_isotonic = CalibratedClassifierCV(clf, cv=2, method='isotonic')\nclf_isotonic.fit(X_train, y_train, sw_train)\nprob_pos_isotonic = clf_isotonic.predict_proba(X_test)[:, 1]\n\n# Gaussian Naive-Bayes with sigmoid calibration\nclf_sigmoid = CalibratedClassifierCV(clf, cv=2, method='sigmoid')\nclf_sigmoid.fit(X_train, y_train, sw_train)\nprob_pos_sigmoid = clf_sigmoid.predict_proba(X_test)[:, 1]\n\nprint(\"Brier scores: (the smaller the better)\")\n\nclf_score = brier_score_loss(y_test, prob_pos_clf, sw_test)\nprint(\"No calibration: %1.3f\" % clf_score)\n\nclf_isotonic_score = brier_score_loss(y_test, prob_pos_isotonic, sw_test)\nprint(\"With isotonic calibration: %1.3f\" % clf_isotonic_score)\n\nclf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid, sw_test)\nprint(\"With sigmoid calibration: %1.3f\" % clf_sigmoid_score)\n\n# #############################################################################\n# Plot the data and the predicted probabilities\nplt.figure()\ny_unique = np.unique(y)\ncolors = cm.rainbow(np.linspace(0.0, 1.0, y_unique.size))\nfor this_y, color in zip(y_unique, colors):\n    this_X = X_train[y_train == this_y]\n    this_sw = sw_train[y_train == this_y]\n    plt.scatter(this_X[:, 0], this_X[:, 1], s=this_sw * 50,\n                c=color[np.newaxis, :],\n                alpha=0.5, edgecolor='k',\n                label=\"Class %s\" % this_y)\nplt.legend(loc=\"best\")\nplt.title(\"Data\")\n\nplt.figure()\norder = np.lexsort((prob_pos_clf, ))\nplt.plot(prob_pos_clf[order], 'r', label='No calibration (%1.3f)' % clf_score)\nplt.plot(prob_pos_isotonic[order], 'g', linewidth=3,\n         label='Isotonic calibration (%1.3f)' % clf_isotonic_score)\nplt.plot(prob_pos_sigmoid[order], 'b', linewidth=3,\n         label='Sigmoid calibration (%1.3f)' % clf_sigmoid_score)\nplt.plot(np.linspace(0, y_test.size, 51)[1::2],\n         y_test[order].reshape(25, -1).mean(1),\n         'k', linewidth=3, label=r'Empirical')\nplt.ylim([-0.05, 1.05])\nplt.xlabel(\"Instances sorted according to predicted probability \"\n           \"(uncalibrated GNB)\")\nplt.ylabel(\"P(y=1)\")\nplt.legend(loc=\"upper left\")\nplt.title(\"Gaussian naive Bayes probabilities\")\n\nplt.show()"
+        "print(__doc__)\n\n# Author: Mathieu Blondel <[email protected]>\n#         Alexandre Gramfort <[email protected]>\n#         Balazs Kegl <[email protected]>\n#         Jan Hendrik Metzen <[email protected]>\n# License: BSD Style.\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib import cm\n\nfrom sklearn.datasets import make_blobs\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.metrics import brier_score_loss\nfrom sklearn.calibration import CalibratedClassifierCV\nfrom sklearn.model_selection import train_test_split\n\n\nn_samples = 50000\nn_bins = 3  # use 3 bins for calibration_curve as we have 3 clusters here\n\n# Generate 3 blobs with 2 classes where the second blob contains\n# half positive samples and half negative samples. Probability in this\n# blob is therefore 0.5.\ncenters = [(-5, -5), (0, 0), (5, 5)]\nX, y = make_blobs(n_samples=n_samples, centers=centers, shuffle=False,\n                  random_state=42)\n\ny[:n_samples // 2] = 0\ny[n_samples // 2:] = 1\nsample_weight = np.random.RandomState(42).rand(y.shape[0])\n\n# split train, test for calibration\nX_train, X_test, y_train, y_test, sw_train, sw_test = \\\n    train_test_split(X, y, sample_weight, test_size=0.9, random_state=42)\n\n# Gaussian Naive-Bayes with no calibration\nclf = GaussianNB()\nclf.fit(X_train, y_train)  # GaussianNB itself does not support sample-weights\nprob_pos_clf = clf.predict_proba(X_test)[:, 1]\n\n# Gaussian Naive-Bayes with isotonic calibration\nclf_isotonic = CalibratedClassifierCV(clf, cv=2, method='isotonic')\nclf_isotonic.fit(X_train, y_train, sw_train)\nprob_pos_isotonic = clf_isotonic.predict_proba(X_test)[:, 1]\n\n# Gaussian Naive-Bayes with sigmoid calibration\nclf_sigmoid = CalibratedClassifierCV(clf, cv=2, method='sigmoid')\nclf_sigmoid.fit(X_train, y_train, sw_train)\nprob_pos_sigmoid = clf_sigmoid.predict_proba(X_test)[:, 1]\n\nprint(\"Brier scores: (the smaller the better)\")\n\nclf_score = brier_score_loss(y_test, prob_pos_clf, sw_test)\nprint(\"No calibration: %1.3f\" % clf_score)\n\nclf_isotonic_score = brier_score_loss(y_test, prob_pos_isotonic, sw_test)\nprint(\"With isotonic calibration: %1.3f\" % clf_isotonic_score)\n\nclf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid, sw_test)\nprint(\"With sigmoid calibration: %1.3f\" % clf_sigmoid_score)\n\n# #############################################################################\n# Plot the data and the predicted probabilities\nplt.figure()\ny_unique = np.unique(y)\ncolors = cm.rainbow(np.linspace(0.0, 1.0, y_unique.size))\nfor this_y, color in zip(y_unique, colors):\n    this_X = X_train[y_train == this_y]\n    this_sw = sw_train[y_train == this_y]\n    plt.scatter(this_X[:, 0], this_X[:, 1], s=this_sw * 50,\n                c=color[np.newaxis, :],\n                alpha=0.5, edgecolor='k',\n                label=\"Class %s\" % this_y)\nplt.legend(loc=\"best\")\nplt.title(\"Data\")\n\nplt.figure()\norder = np.lexsort((prob_pos_clf, ))\nplt.plot(prob_pos_clf[order], 'r', label='No calibration (%1.3f)' % clf_score)\nplt.plot(prob_pos_isotonic[order], 'g', linewidth=3,\n         label='Isotonic calibration (%1.3f)' % clf_isotonic_score)\nplt.plot(prob_pos_sigmoid[order], 'b', linewidth=3,\n         label='Sigmoid calibration (%1.3f)' % clf_sigmoid_score)\nplt.plot(np.linspace(0, y_test.size, 51)[1::2],\n         y_test[order].reshape(25, -1).mean(1),\n         'k', linewidth=3, label=r'Empirical')\nplt.ylim([-0.05, 1.05])\nplt.xlabel(\"Instances sorted according to predicted probability \"\n           \"(uncalibrated GNB)\")\nplt.ylabel(\"P(y=1)\")\nplt.legend(loc=\"upper left\")\nplt.title(\"Gaussian naive Bayes probabilities\")\n\nplt.show()"
       ]
     }
   ],
 
@@ -39,8 +39,7 @@ class of an instance (red: class 1, green: class 2, blue: class 3).
 np.random.seed(0)
 
 # Generate data
-X, y = make_blobs(n_samples=1000, n_features=2, random_state=42,
-                  cluster_std=5.0)
+X, y = make_blobs(n_samples=1000, random_state=42, cluster_std=5.0)
 X_train, y_train = X[:600], y[:600]
 X_valid, y_valid = X[600:800], y[600:800]
 X_train_valid, y_train_valid = X[:800], y[:800]
 
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\n# Author: Jan Hendrik Metzen <[email protected]>\n# License: BSD Style.\n\n\nimport matplotlib.pyplot as plt\n\nimport numpy as np\n\nfrom sklearn.datasets import make_blobs\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.calibration import CalibratedClassifierCV\nfrom sklearn.metrics import log_loss\n\nnp.random.seed(0)\n\n# Generate data\nX, y = make_blobs(n_samples=1000, n_features=2, random_state=42,\n                  cluster_std=5.0)\nX_train, y_train = X[:600], y[:600]\nX_valid, y_valid = X[600:800], y[600:800]\nX_train_valid, y_train_valid = X[:800], y[:800]\nX_test, y_test = X[800:], y[800:]\n\n# Train uncalibrated random forest classifier on whole train and validation\n# data and evaluate on test data\nclf = RandomForestClassifier(n_estimators=25)\nclf.fit(X_train_valid, y_train_valid)\nclf_probs = clf.predict_proba(X_test)\nscore = log_loss(y_test, clf_probs)\n\n# Train random forest classifier, calibrate on validation data and evaluate\n# on test data\nclf = RandomForestClassifier(n_estimators=25)\nclf.fit(X_train, y_train)\nclf_probs = clf.predict_proba(X_test)\nsig_clf = CalibratedClassifierCV(clf, method=\"sigmoid\", cv=\"prefit\")\nsig_clf.fit(X_valid, y_valid)\nsig_clf_probs = sig_clf.predict_proba(X_test)\nsig_score = log_loss(y_test, sig_clf_probs)\n\n# Plot changes in predicted probabilities via arrows\nplt.figure()\ncolors = [\"r\", \"g\", \"b\"]\nfor i in range(clf_probs.shape[0]):\n    plt.arrow(clf_probs[i, 0], clf_probs[i, 1],\n              sig_clf_probs[i, 0] - clf_probs[i, 0],\n              sig_clf_probs[i, 1] - clf_probs[i, 1],\n              color=colors[y_test[i]], head_width=1e-2)\n\n# Plot perfect predictions\nplt.plot([1.0], [0.0], 'ro', ms=20, label=\"Class 1\")\nplt.plot([0.0], [1.0], 'go', ms=20, label=\"Class 2\")\nplt.plot([0.0], [0.0], 'bo', ms=20, label=\"Class 3\")\n\n# Plot boundaries of unit simplex\nplt.plot([0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], 'k', label=\"Simplex\")\n\n# Annotate points on the simplex\nplt.annotate(r'($\\frac{1}{3}$, $\\frac{1}{3}$, $\\frac{1}{3}$)',\n             xy=(1.0/3, 1.0/3), xytext=(1.0/3, .23), xycoords='data',\n             arrowprops=dict(facecolor='black', shrink=0.05),\n             horizontalalignment='center', verticalalignment='center')\nplt.plot([1.0/3], [1.0/3], 'ko', ms=5)\nplt.annotate(r'($\\frac{1}{2}$, $0$, $\\frac{1}{2}$)',\n             xy=(.5, .0), xytext=(.5, .1), xycoords='data',\n             arrowprops=dict(facecolor='black', shrink=0.05),\n             horizontalalignment='center', verticalalignment='center')\nplt.annotate(r'($0$, $\\frac{1}{2}$, $\\frac{1}{2}$)',\n             xy=(.0, .5), xytext=(.1, .5), xycoords='data',\n             arrowprops=dict(facecolor='black', shrink=0.05),\n             horizontalalignment='center', verticalalignment='center')\nplt.annotate(r'($\\frac{1}{2}$, $\\frac{1}{2}$, $0$)',\n             xy=(.5, .5), xytext=(.6, .6), xycoords='data',\n             arrowprops=dict(facecolor='black', shrink=0.05),\n             horizontalalignment='center', verticalalignment='center')\nplt.annotate(r'($0$, $0$, $1$)',\n             xy=(0, 0), xytext=(.1, .1), xycoords='data',\n             arrowprops=dict(facecolor='black', shrink=0.05),\n             horizontalalignment='center', verticalalignment='center')\nplt.annotate(r'($1$, $0$, $0$)',\n             xy=(1, 0), xytext=(1, .1), xycoords='data',\n             arrowprops=dict(facecolor='black', shrink=0.05),\n             horizontalalignment='center', verticalalignment='center')\nplt.annotate(r'($0$, $1$, $0$)',\n             xy=(0, 1), xytext=(.1, 1), xycoords='data',\n             arrowprops=dict(facecolor='black', shrink=0.05),\n             horizontalalignment='center', verticalalignment='center')\n# Add grid\nplt.grid(False)\nfor x in [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:\n    plt.plot([0, x], [x, 0], 'k', alpha=0.2)\n    plt.plot([0, 0 + (1-x)/2], [x, x + (1-x)/2], 'k', alpha=0.2)\n    plt.plot([x, x + (1-x)/2], [0, 0 + (1-x)/2], 'k', alpha=0.2)\n\nplt.title(\"Change of predicted probabilities after sigmoid calibration\")\nplt.xlabel(\"Probability class 1\")\nplt.ylabel(\"Probability class 2\")\nplt.xlim(-0.05, 1.05)\nplt.ylim(-0.05, 1.05)\nplt.legend(loc=\"best\")\n\nprint(\"Log-loss of\")\nprint(\" * uncalibrated classifier trained on 800 datapoints: %.3f \"\n      % score)\nprint(\" * classifier trained on 600 datapoints and calibrated on \"\n      \"200 datapoint: %.3f\" % sig_score)\n\n# Illustrate calibrator\nplt.figure()\n# generate grid over 2-simplex\np1d = np.linspace(0, 1, 20)\np0, p1 = np.meshgrid(p1d, p1d)\np2 = 1 - p0 - p1\np = np.c_[p0.ravel(), p1.ravel(), p2.ravel()]\np = p[p[:, 2] >= 0]\n\ncalibrated_classifier = sig_clf.calibrated_classifiers_[0]\nprediction = np.vstack([calibrator.predict(this_p)\n                        for calibrator, this_p in\n                        zip(calibrated_classifier.calibrators_, p.T)]).T\nprediction /= prediction.sum(axis=1)[:, None]\n\n# Plot modifications of calibrator\nfor i in range(prediction.shape[0]):\n    plt.arrow(p[i, 0], p[i, 1],\n              prediction[i, 0] - p[i, 0], prediction[i, 1] - p[i, 1],\n              head_width=1e-2, color=colors[np.argmax(p[i])])\n# Plot boundaries of unit simplex\nplt.plot([0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], 'k', label=\"Simplex\")\n\nplt.grid(False)\nfor x in [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:\n    plt.plot([0, x], [x, 0], 'k', alpha=0.2)\n    plt.plot([0, 0 + (1-x)/2], [x, x + (1-x)/2], 'k', alpha=0.2)\n    plt.plot([x, x + (1-x)/2], [0, 0 + (1-x)/2], 'k', alpha=0.2)\n\nplt.title(\"Illustration of sigmoid calibrator\")\nplt.xlabel(\"Probability class 1\")\nplt.ylabel(\"Probability class 2\")\nplt.xlim(-0.05, 1.05)\nplt.ylim(-0.05, 1.05)\n\nplt.show()"
+        "print(__doc__)\n\n# Author: Jan Hendrik Metzen <[email protected]>\n# License: BSD Style.\n\n\nimport matplotlib.pyplot as plt\n\nimport numpy as np\n\nfrom sklearn.datasets import make_blobs\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.calibration import CalibratedClassifierCV\nfrom sklearn.metrics import log_loss\n\nnp.random.seed(0)\n\n# Generate data\nX, y = make_blobs(n_samples=1000, random_state=42, cluster_std=5.0)\nX_train, y_train = X[:600], y[:600]\nX_valid, y_valid = X[600:800], y[600:800]\nX_train_valid, y_train_valid = X[:800], y[:800]\nX_test, y_test = X[800:], y[800:]\n\n# Train uncalibrated random forest classifier on whole train and validation\n# data and evaluate on test data\nclf = RandomForestClassifier(n_estimators=25)\nclf.fit(X_train_valid, y_train_valid)\nclf_probs = clf.predict_proba(X_test)\nscore = log_loss(y_test, clf_probs)\n\n# Train random forest classifier, calibrate on validation data and evaluate\n# on test data\nclf = RandomForestClassifier(n_estimators=25)\nclf.fit(X_train, y_train)\nclf_probs = clf.predict_proba(X_test)\nsig_clf = CalibratedClassifierCV(clf, method=\"sigmoid\", cv=\"prefit\")\nsig_clf.fit(X_valid, y_valid)\nsig_clf_probs = sig_clf.predict_proba(X_test)\nsig_score = log_loss(y_test, sig_clf_probs)\n\n# Plot changes in predicted probabilities via arrows\nplt.figure()\ncolors = [\"r\", \"g\", \"b\"]\nfor i in range(clf_probs.shape[0]):\n    plt.arrow(clf_probs[i, 0], clf_probs[i, 1],\n              sig_clf_probs[i, 0] - clf_probs[i, 0],\n              sig_clf_probs[i, 1] - clf_probs[i, 1],\n              color=colors[y_test[i]], head_width=1e-2)\n\n# Plot perfect predictions\nplt.plot([1.0], [0.0], 'ro', ms=20, label=\"Class 1\")\nplt.plot([0.0], [1.0], 'go', ms=20, label=\"Class 2\")\nplt.plot([0.0], [0.0], 'bo', ms=20, label=\"Class 3\")\n\n# Plot boundaries of unit simplex\nplt.plot([0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], 'k', label=\"Simplex\")\n\n# Annotate points on the simplex\nplt.annotate(r'($\\frac{1}{3}$, $\\frac{1}{3}$, $\\frac{1}{3}$)',\n             xy=(1.0/3, 1.0/3), xytext=(1.0/3, .23), xycoords='data',\n             arrowprops=dict(facecolor='black', shrink=0.05),\n             horizontalalignment='center', verticalalignment='center')\nplt.plot([1.0/3], [1.0/3], 'ko', ms=5)\nplt.annotate(r'($\\frac{1}{2}$, $0$, $\\frac{1}{2}$)',\n             xy=(.5, .0), xytext=(.5, .1), xycoords='data',\n             arrowprops=dict(facecolor='black', shrink=0.05),\n             horizontalalignment='center', verticalalignment='center')\nplt.annotate(r'($0$, $\\frac{1}{2}$, $\\frac{1}{2}$)',\n             xy=(.0, .5), xytext=(.1, .5), xycoords='data',\n             arrowprops=dict(facecolor='black', shrink=0.05),\n             horizontalalignment='center', verticalalignment='center')\nplt.annotate(r'($\\frac{1}{2}$, $\\frac{1}{2}$, $0$)',\n             xy=(.5, .5), xytext=(.6, .6), xycoords='data',\n             arrowprops=dict(facecolor='black', shrink=0.05),\n             horizontalalignment='center', verticalalignment='center')\nplt.annotate(r'($0$, $0$, $1$)',\n             xy=(0, 0), xytext=(.1, .1), xycoords='data',\n             arrowprops=dict(facecolor='black', shrink=0.05),\n             horizontalalignment='center', verticalalignment='center')\nplt.annotate(r'($1$, $0$, $0$)',\n             xy=(1, 0), xytext=(1, .1), xycoords='data',\n             arrowprops=dict(facecolor='black', shrink=0.05),\n             horizontalalignment='center', verticalalignment='center')\nplt.annotate(r'($0$, $1$, $0$)',\n             xy=(0, 1), xytext=(.1, 1), xycoords='data',\n             arrowprops=dict(facecolor='black', shrink=0.05),\n             horizontalalignment='center', verticalalignment='center')\n# Add grid\nplt.grid(False)\nfor x in [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:\n    plt.plot([0, x], [x, 0], 'k', alpha=0.2)\n    plt.plot([0, 0 + (1-x)/2], [x, x + (1-x)/2], 'k', alpha=0.2)\n    plt.plot([x, x + (1-x)/2], [0, 0 + (1-x)/2], 'k', alpha=0.2)\n\nplt.title(\"Change of predicted probabilities after sigmoid calibration\")\nplt.xlabel(\"Probability class 1\")\nplt.ylabel(\"Probability class 2\")\nplt.xlim(-0.05, 1.05)\nplt.ylim(-0.05, 1.05)\nplt.legend(loc=\"best\")\n\nprint(\"Log-loss of\")\nprint(\" * uncalibrated classifier trained on 800 datapoints: %.3f \"\n      % score)\nprint(\" * classifier trained on 600 datapoints and calibrated on \"\n      \"200 datapoint: %.3f\" % sig_score)\n\n# Illustrate calibrator\nplt.figure()\n# generate grid over 2-simplex\np1d = np.linspace(0, 1, 20)\np0, p1 = np.meshgrid(p1d, p1d)\np2 = 1 - p0 - p1\np = np.c_[p0.ravel(), p1.ravel(), p2.ravel()]\np = p[p[:, 2] >= 0]\n\ncalibrated_classifier = sig_clf.calibrated_classifiers_[0]\nprediction = np.vstack([calibrator.predict(this_p)\n                        for calibrator, this_p in\n                        zip(calibrated_classifier.calibrators_, p.T)]).T\nprediction /= prediction.sum(axis=1)[:, None]\n\n# Plot modifications of calibrator\nfor i in range(prediction.shape[0]):\n    plt.arrow(p[i, 0], p[i, 1],\n              prediction[i, 0] - p[i, 0], prediction[i, 1] - p[i, 1],\n              head_width=1e-2, color=colors[np.argmax(p[i])])\n# Plot boundaries of unit simplex\nplt.plot([0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], 'k', label=\"Simplex\")\n\nplt.grid(False)\nfor x in [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:\n    plt.plot([0, x], [x, 0], 'k', alpha=0.2)\n    plt.plot([0, 0 + (1-x)/2], [x, x + (1-x)/2], 'k', alpha=0.2)\n    plt.plot([x, x + (1-x)/2], [0, 0 + (1-x)/2], 'k', alpha=0.2)\n\nplt.title(\"Illustration of sigmoid calibrator\")\nplt.xlabel(\"Probability class 1\")\nplt.ylabel(\"Probability class 2\")\nplt.xlim(-0.05, 1.05)\nplt.ylim(-0.05, 1.05)\n\nplt.show()"
       ]
     }
   ],
 
@@ -47,8 +47,8 @@
 # half positive samples and half negative samples. Probability in this
 # blob is therefore 0.5.
 centers = [(-5, -5), (0, 0), (5, 5)]
-X, y = make_blobs(n_samples=n_samples, n_features=2, cluster_std=1.0,
-                  centers=centers, shuffle=False, random_state=42)
+X, y = make_blobs(n_samples=n_samples, centers=centers, shuffle=False,
+                  random_state=42)
 
 y[:n_samples // 2] = 0
 y[n_samples // 2:] = 1
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "print(__doc__)\n\n# Author: Mathieu Blondel <[email protected]>\n# Alexandre Gramfort <[email protected]>\n# Balazs Kegl <[email protected]>\n# Jan Hendrik Metzen <[email protected]>\n# License: BSD Style.\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib import cm\n\nfrom sklearn.datasets import make_blobs\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.metrics import brier_score_loss\nfrom sklearn.calibration import CalibratedClassifierCV\nfrom sklearn.model_selection import train_test_split\n\n\nn_samples = 50000\nn_bins = 3 # use 3 bins for calibration_curve as we have 3 clusters here\n\n# Generate 3 blobs with 2 classes where the second blob contains\n# half positive samples and half negative samples. Probability in this\n# blob is therefore 0.5.\ncenters = [(-5, -5), (0, 0), (5, 5)]\nX, y = make_blobs(n_samples=n_samples, n_features=2, cluster_std=1.0,\n centers=centers, shuffle=False, random_state=42)\n\ny[:n_samples // 2] = 0\ny[n_samples // 2:] = 1\nsample_weight = np.random.RandomState(42).rand(y.shape[0])\n\n# split train, test for calibration\nX_train, X_test, y_train, y_test, sw_train, sw_test = \\\n train_test_split(X, y, sample_weight, test_size=0.9, random_state=42)\n\n# Gaussian Naive-Bayes with no calibration\nclf = GaussianNB()\nclf.fit(X_train, y_train) # GaussianNB itself does not support sample-weights\nprob_pos_clf = clf.predict_proba(X_test)[:, 1]\n\n# Gaussian Naive-Bayes with isotonic calibration\nclf_isotonic = CalibratedClassifierCV(clf, cv=2, method='isotonic')\nclf_isotonic.fit(X_train, y_train, sw_train)\nprob_pos_isotonic = clf_isotonic.predict_proba(X_test)[:, 1]\n\n# Gaussian Naive-Bayes with sigmoid calibration\nclf_sigmoid = CalibratedClassifierCV(clf, cv=2, method='sigmoid')\nclf_sigmoid.fit(X_train, y_train, sw_train)\nprob_pos_sigmoid = clf_sigmoid.predict_proba(X_test)[:, 1]\n\nprint(\"Brier scores: (the smaller the better)\")\n\nclf_score = brier_score_loss(y_test, prob_pos_clf, sw_test)\nprint(\"No calibration: %1.3f\" % clf_score)\n\nclf_isotonic_score = brier_score_loss(y_test, prob_pos_isotonic, sw_test)\nprint(\"With isotonic calibration: %1.3f\" % clf_isotonic_score)\n\nclf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid, sw_test)\nprint(\"With sigmoid calibration: %1.3f\" % clf_sigmoid_score)\n\n# #############################################################################\n# Plot the data and the predicted probabilities\nplt.figure()\ny_unique = np.unique(y)\ncolors = cm.rainbow(np.linspace(0.0, 1.0, y_unique.size))\nfor this_y, color in zip(y_unique, colors):\n this_X = X_train[y_train == this_y]\n this_sw = sw_train[y_train == this_y]\n plt.scatter(this_X[:, 0], this_X[:, 1], s=this_sw * 50,\n c=color[np.newaxis, :],\n alpha=0.5, edgecolor='k',\n label=\"Class %s\" % this_y)\nplt.legend(loc=\"best\")\nplt.title(\"Data\")\n\nplt.figure()\norder = np.lexsort((prob_pos_clf, ))\nplt.plot(prob_pos_clf[order], 'r', label='No calibration (%1.3f)' % clf_score)\nplt.plot(prob_pos_isotonic[order], 'g', linewidth=3,\n label='Isotonic calibration (%1.3f)' % clf_isotonic_score)\nplt.plot(prob_pos_sigmoid[order], 'b', linewidth=3,\n label='Sigmoid calibration (%1.3f)' % clf_sigmoid_score)\nplt.plot(np.linspace(0, y_test.size, 51)[1::2],\n y_test[order].reshape(25, -1).mean(1),\n 'k', linewidth=3, label=r'Empirical')\nplt.ylim([-0.05, 1.05])\nplt.xlabel(\"Instances sorted according to predicted probability \"\n \"(uncalibrated GNB)\")\nplt.ylabel(\"P(y=1)\")\nplt.legend(loc=\"upper left\")\nplt.title(\"Gaussian naive Bayes probabilities\")\n\nplt.show()"
	`29`	+ "print(__doc__)\n\n# Author: Mathieu Blondel <[email protected]>\n# Alexandre Gramfort <[email protected]>\n# Balazs Kegl <[email protected]>\n# Jan Hendrik Metzen <[email protected]>\n# License: BSD Style.\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib import cm\n\nfrom sklearn.datasets import make_blobs\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.metrics import brier_score_loss\nfrom sklearn.calibration import CalibratedClassifierCV\nfrom sklearn.model_selection import train_test_split\n\n\nn_samples = 50000\nn_bins = 3 # use 3 bins for calibration_curve as we have 3 clusters here\n\n# Generate 3 blobs with 2 classes where the second blob contains\n# half positive samples and half negative samples. Probability in this\n# blob is therefore 0.5.\ncenters = [(-5, -5), (0, 0), (5, 5)]\nX, y = make_blobs(n_samples=n_samples, centers=centers, shuffle=False,\n random_state=42)\n\ny[:n_samples // 2] = 0\ny[n_samples // 2:] = 1\nsample_weight = np.random.RandomState(42).rand(y.shape[0])\n\n# split train, test for calibration\nX_train, X_test, y_train, y_test, sw_train, sw_test = \\\n train_test_split(X, y, sample_weight, test_size=0.9, random_state=42)\n\n# Gaussian Naive-Bayes with no calibration\nclf = GaussianNB()\nclf.fit(X_train, y_train) # GaussianNB itself does not support sample-weights\nprob_pos_clf = clf.predict_proba(X_test)[:, 1]\n\n# Gaussian Naive-Bayes with isotonic calibration\nclf_isotonic = CalibratedClassifierCV(clf, cv=2, method='isotonic')\nclf_isotonic.fit(X_train, y_train, sw_train)\nprob_pos_isotonic = clf_isotonic.predict_proba(X_test)[:, 1]\n\n# Gaussian Naive-Bayes with sigmoid calibration\nclf_sigmoid = CalibratedClassifierCV(clf, cv=2, method='sigmoid')\nclf_sigmoid.fit(X_train, y_train, sw_train)\nprob_pos_sigmoid = clf_sigmoid.predict_proba(X_test)[:, 1]\n\nprint(\"Brier scores: (the smaller the better)\")\n\nclf_score = brier_score_loss(y_test, prob_pos_clf, sw_test)\nprint(\"No calibration: %1.3f\" % clf_score)\n\nclf_isotonic_score = brier_score_loss(y_test, prob_pos_isotonic, sw_test)\nprint(\"With isotonic calibration: %1.3f\" % clf_isotonic_score)\n\nclf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid, sw_test)\nprint(\"With sigmoid calibration: %1.3f\" % clf_sigmoid_score)\n\n# #############################################################################\n# Plot the data and the predicted probabilities\nplt.figure()\ny_unique = np.unique(y)\ncolors = cm.rainbow(np.linspace(0.0, 1.0, y_unique.size))\nfor this_y, color in zip(y_unique, colors):\n this_X = X_train[y_train == this_y]\n this_sw = sw_train[y_train == this_y]\n plt.scatter(this_X[:, 0], this_X[:, 1], s=this_sw * 50,\n c=color[np.newaxis, :],\n alpha=0.5, edgecolor='k',\n label=\"Class %s\" % this_y)\nplt.legend(loc=\"best\")\nplt.title(\"Data\")\n\nplt.figure()\norder = np.lexsort((prob_pos_clf, ))\nplt.plot(prob_pos_clf[order], 'r', label='No calibration (%1.3f)' % clf_score)\nplt.plot(prob_pos_isotonic[order], 'g', linewidth=3,\n label='Isotonic calibration (%1.3f)' % clf_isotonic_score)\nplt.plot(prob_pos_sigmoid[order], 'b', linewidth=3,\n label='Sigmoid calibration (%1.3f)' % clf_sigmoid_score)\nplt.plot(np.linspace(0, y_test.size, 51)[1::2],\n y_test[order].reshape(25, -1).mean(1),\n 'k', linewidth=3, label=r'Empirical')\nplt.ylim([-0.05, 1.05])\nplt.xlabel(\"Instances sorted according to predicted probability \"\n \"(uncalibrated GNB)\")\nplt.ylabel(\"P(y=1)\")\nplt.legend(loc=\"upper left\")\nplt.title(\"Gaussian naive Bayes probabilities\")\n\nplt.show()"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`