Skip to content

Commit 42393e9

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 38ff5be25d0164bf9598bcfdde3b791ad6e261b0
1 parent 49af9c3 commit 42393e9

File tree

1,219 files changed

+4630
-4454
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,219 files changed

+4630
-4454
lines changed
Binary file not shown.

dev/_downloads/0b39f715b5e32f01df3d212b6d822b82/plot_calibration.py

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,24 +22,21 @@
2222
Brier score.
2323
2424
"""
25-
26-
# Author: Mathieu Blondel <[email protected]>
27-
# Alexandre Gramfort <[email protected]>
28-
# Balazs Kegl <[email protected]>
29-
# Jan Hendrik Metzen <[email protected]>
25+
# Authors:
26+
# Mathieu Blondel <[email protected]>
27+
# Alexandre Gramfort <[email protected]>
28+
# Balazs Kegl <[email protected]>
29+
# Jan Hendrik Metzen <[email protected]>
3030
# License: BSD Style.
3131

32+
# %%
33+
# Generate synthetic dataset
34+
# --------------------------
3235
import numpy as np
33-
import matplotlib.pyplot as plt
34-
from matplotlib import cm
3536

3637
from sklearn.datasets import make_blobs
37-
from sklearn.naive_bayes import GaussianNB
38-
from sklearn.metrics import brier_score_loss
39-
from sklearn.calibration import CalibratedClassifierCV
4038
from sklearn.model_selection import train_test_split
4139

42-
4340
n_samples = 50000
4441
n_bins = 3 # use 3 bins for calibration_curve as we have 3 clusters here
4542

@@ -58,17 +55,24 @@
5855
X, y, sample_weight, test_size=0.9, random_state=42
5956
)
6057

61-
# Gaussian Naive-Bayes with no calibration
58+
# %%
59+
# Gaussian Naive-Bayes
60+
# --------------------
61+
from sklearn.calibration import CalibratedClassifierCV
62+
from sklearn.metrics import brier_score_loss
63+
from sklearn.naive_bayes import GaussianNB
64+
65+
# With no calibration
6266
clf = GaussianNB()
6367
clf.fit(X_train, y_train) # GaussianNB itself does not support sample-weights
6468
prob_pos_clf = clf.predict_proba(X_test)[:, 1]
6569

66-
# Gaussian Naive-Bayes with isotonic calibration
70+
# With isotonic calibration
6771
clf_isotonic = CalibratedClassifierCV(clf, cv=2, method="isotonic")
6872
clf_isotonic.fit(X_train, y_train, sample_weight=sw_train)
6973
prob_pos_isotonic = clf_isotonic.predict_proba(X_test)[:, 1]
7074

71-
# Gaussian Naive-Bayes with sigmoid calibration
75+
# With sigmoid calibration
7276
clf_sigmoid = CalibratedClassifierCV(clf, cv=2, method="sigmoid")
7377
clf_sigmoid.fit(X_train, y_train, sample_weight=sw_train)
7478
prob_pos_sigmoid = clf_sigmoid.predict_proba(X_test)[:, 1]
@@ -84,8 +88,12 @@
8488
clf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid, sample_weight=sw_test)
8589
print("With sigmoid calibration: %1.3f" % clf_sigmoid_score)
8690

87-
# #############################################################################
88-
# Plot the data and the predicted probabilities
91+
# %%
92+
# Plot data and the predicted probabilities
93+
# -----------------------------------------
94+
from matplotlib import cm
95+
import matplotlib.pyplot as plt
96+
8997
plt.figure()
9098
y_unique = np.unique(y)
9199
colors = cm.rainbow(np.linspace(0.0, 1.0, y_unique.size))
@@ -105,6 +113,7 @@
105113
plt.title("Data")
106114

107115
plt.figure()
116+
108117
order = np.lexsort((prob_pos_clf,))
109118
plt.plot(prob_pos_clf[order], "r", label="No calibration (%1.3f)" % clf_score)
110119
plt.plot(

dev/_downloads/0c15970ac17183d2bf864a9563081aeb/plot_calibration.ipynb

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,61 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Author: Mathieu Blondel <[email protected]>\n# Alexandre Gramfort <[email protected]>\n# Balazs Kegl <[email protected]>\n# Jan Hendrik Metzen <[email protected]>\n# License: BSD Style.\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib import cm\n\nfrom sklearn.datasets import make_blobs\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.metrics import brier_score_loss\nfrom sklearn.calibration import CalibratedClassifierCV\nfrom sklearn.model_selection import train_test_split\n\n\nn_samples = 50000\nn_bins = 3 # use 3 bins for calibration_curve as we have 3 clusters here\n\n# Generate 3 blobs with 2 classes where the second blob contains\n# half positive samples and half negative samples. Probability in this\n# blob is therefore 0.5.\ncenters = [(-5, -5), (0, 0), (5, 5)]\nX, y = make_blobs(n_samples=n_samples, centers=centers, shuffle=False, random_state=42)\n\ny[: n_samples // 2] = 0\ny[n_samples // 2 :] = 1\nsample_weight = np.random.RandomState(42).rand(y.shape[0])\n\n# split train, test for calibration\nX_train, X_test, y_train, y_test, sw_train, sw_test = train_test_split(\n X, y, sample_weight, test_size=0.9, random_state=42\n)\n\n# Gaussian Naive-Bayes with no calibration\nclf = GaussianNB()\nclf.fit(X_train, y_train) # GaussianNB itself does not support sample-weights\nprob_pos_clf = clf.predict_proba(X_test)[:, 1]\n\n# Gaussian Naive-Bayes with isotonic calibration\nclf_isotonic = CalibratedClassifierCV(clf, cv=2, method=\"isotonic\")\nclf_isotonic.fit(X_train, y_train, sample_weight=sw_train)\nprob_pos_isotonic = clf_isotonic.predict_proba(X_test)[:, 1]\n\n# Gaussian Naive-Bayes with sigmoid calibration\nclf_sigmoid = CalibratedClassifierCV(clf, cv=2, method=\"sigmoid\")\nclf_sigmoid.fit(X_train, y_train, sample_weight=sw_train)\nprob_pos_sigmoid = clf_sigmoid.predict_proba(X_test)[:, 1]\n\nprint(\"Brier score losses: (the smaller the better)\")\n\nclf_score = brier_score_loss(y_test, prob_pos_clf, sample_weight=sw_test)\nprint(\"No calibration: %1.3f\" % clf_score)\n\nclf_isotonic_score = brier_score_loss(y_test, prob_pos_isotonic, sample_weight=sw_test)\nprint(\"With isotonic calibration: %1.3f\" % clf_isotonic_score)\n\nclf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid, sample_weight=sw_test)\nprint(\"With sigmoid calibration: %1.3f\" % clf_sigmoid_score)\n\n# #############################################################################\n# Plot the data and the predicted probabilities\nplt.figure()\ny_unique = np.unique(y)\ncolors = cm.rainbow(np.linspace(0.0, 1.0, y_unique.size))\nfor this_y, color in zip(y_unique, colors):\n this_X = X_train[y_train == this_y]\n this_sw = sw_train[y_train == this_y]\n plt.scatter(\n this_X[:, 0],\n this_X[:, 1],\n s=this_sw * 50,\n c=color[np.newaxis, :],\n alpha=0.5,\n edgecolor=\"k\",\n label=\"Class %s\" % this_y,\n )\nplt.legend(loc=\"best\")\nplt.title(\"Data\")\n\nplt.figure()\norder = np.lexsort((prob_pos_clf,))\nplt.plot(prob_pos_clf[order], \"r\", label=\"No calibration (%1.3f)\" % clf_score)\nplt.plot(\n prob_pos_isotonic[order],\n \"g\",\n linewidth=3,\n label=\"Isotonic calibration (%1.3f)\" % clf_isotonic_score,\n)\nplt.plot(\n prob_pos_sigmoid[order],\n \"b\",\n linewidth=3,\n label=\"Sigmoid calibration (%1.3f)\" % clf_sigmoid_score,\n)\nplt.plot(\n np.linspace(0, y_test.size, 51)[1::2],\n y_test[order].reshape(25, -1).mean(1),\n \"k\",\n linewidth=3,\n label=r\"Empirical\",\n)\nplt.ylim([-0.05, 1.05])\nplt.xlabel(\"Instances sorted according to predicted probability (uncalibrated GNB)\")\nplt.ylabel(\"P(y=1)\")\nplt.legend(loc=\"upper left\")\nplt.title(\"Gaussian naive Bayes probabilities\")\n\nplt.show()"
29+
"# Authors:\n# Mathieu Blondel <[email protected]>\n# Alexandre Gramfort <[email protected]>\n# Balazs Kegl <[email protected]>\n# Jan Hendrik Metzen <[email protected]>\n# License: BSD Style."
30+
]
31+
},
32+
{
33+
"cell_type": "markdown",
34+
"metadata": {},
35+
"source": [
36+
"## Generate synthetic dataset\n\n"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": null,
42+
"metadata": {
43+
"collapsed": false
44+
},
45+
"outputs": [],
46+
"source": [
47+
"import numpy as np\n\nfrom sklearn.datasets import make_blobs\nfrom sklearn.model_selection import train_test_split\n\nn_samples = 50000\nn_bins = 3 # use 3 bins for calibration_curve as we have 3 clusters here\n\n# Generate 3 blobs with 2 classes where the second blob contains\n# half positive samples and half negative samples. Probability in this\n# blob is therefore 0.5.\ncenters = [(-5, -5), (0, 0), (5, 5)]\nX, y = make_blobs(n_samples=n_samples, centers=centers, shuffle=False, random_state=42)\n\ny[: n_samples // 2] = 0\ny[n_samples // 2 :] = 1\nsample_weight = np.random.RandomState(42).rand(y.shape[0])\n\n# split train, test for calibration\nX_train, X_test, y_train, y_test, sw_train, sw_test = train_test_split(\n X, y, sample_weight, test_size=0.9, random_state=42\n)"
48+
]
49+
},
50+
{
51+
"cell_type": "markdown",
52+
"metadata": {},
53+
"source": [
54+
"## Gaussian Naive-Bayes\n\n"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"execution_count": null,
60+
"metadata": {
61+
"collapsed": false
62+
},
63+
"outputs": [],
64+
"source": [
65+
"from sklearn.calibration import CalibratedClassifierCV\nfrom sklearn.metrics import brier_score_loss\nfrom sklearn.naive_bayes import GaussianNB\n\n# With no calibration\nclf = GaussianNB()\nclf.fit(X_train, y_train) # GaussianNB itself does not support sample-weights\nprob_pos_clf = clf.predict_proba(X_test)[:, 1]\n\n# With isotonic calibration\nclf_isotonic = CalibratedClassifierCV(clf, cv=2, method=\"isotonic\")\nclf_isotonic.fit(X_train, y_train, sample_weight=sw_train)\nprob_pos_isotonic = clf_isotonic.predict_proba(X_test)[:, 1]\n\n# With sigmoid calibration\nclf_sigmoid = CalibratedClassifierCV(clf, cv=2, method=\"sigmoid\")\nclf_sigmoid.fit(X_train, y_train, sample_weight=sw_train)\nprob_pos_sigmoid = clf_sigmoid.predict_proba(X_test)[:, 1]\n\nprint(\"Brier score losses: (the smaller the better)\")\n\nclf_score = brier_score_loss(y_test, prob_pos_clf, sample_weight=sw_test)\nprint(\"No calibration: %1.3f\" % clf_score)\n\nclf_isotonic_score = brier_score_loss(y_test, prob_pos_isotonic, sample_weight=sw_test)\nprint(\"With isotonic calibration: %1.3f\" % clf_isotonic_score)\n\nclf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid, sample_weight=sw_test)\nprint(\"With sigmoid calibration: %1.3f\" % clf_sigmoid_score)"
66+
]
67+
},
68+
{
69+
"cell_type": "markdown",
70+
"metadata": {},
71+
"source": [
72+
"## Plot data and the predicted probabilities\n\n"
73+
]
74+
},
75+
{
76+
"cell_type": "code",
77+
"execution_count": null,
78+
"metadata": {
79+
"collapsed": false
80+
},
81+
"outputs": [],
82+
"source": [
83+
"from matplotlib import cm\nimport matplotlib.pyplot as plt\n\nplt.figure()\ny_unique = np.unique(y)\ncolors = cm.rainbow(np.linspace(0.0, 1.0, y_unique.size))\nfor this_y, color in zip(y_unique, colors):\n this_X = X_train[y_train == this_y]\n this_sw = sw_train[y_train == this_y]\n plt.scatter(\n this_X[:, 0],\n this_X[:, 1],\n s=this_sw * 50,\n c=color[np.newaxis, :],\n alpha=0.5,\n edgecolor=\"k\",\n label=\"Class %s\" % this_y,\n )\nplt.legend(loc=\"best\")\nplt.title(\"Data\")\n\nplt.figure()\n\norder = np.lexsort((prob_pos_clf,))\nplt.plot(prob_pos_clf[order], \"r\", label=\"No calibration (%1.3f)\" % clf_score)\nplt.plot(\n prob_pos_isotonic[order],\n \"g\",\n linewidth=3,\n label=\"Isotonic calibration (%1.3f)\" % clf_isotonic_score,\n)\nplt.plot(\n prob_pos_sigmoid[order],\n \"b\",\n linewidth=3,\n label=\"Sigmoid calibration (%1.3f)\" % clf_sigmoid_score,\n)\nplt.plot(\n np.linspace(0, y_test.size, 51)[1::2],\n y_test[order].reshape(25, -1).mean(1),\n \"k\",\n linewidth=3,\n label=r\"Empirical\",\n)\nplt.ylim([-0.05, 1.05])\nplt.xlabel(\"Instances sorted according to predicted probability (uncalibrated GNB)\")\nplt.ylabel(\"P(y=1)\")\nplt.legend(loc=\"upper left\")\nplt.title(\"Gaussian naive Bayes probabilities\")\n\nplt.show()"
3084
]
3185
}
3286
],
Binary file not shown.

dev/_downloads/scikit-learn-docs.zip

4.41 KB
Binary file not shown.
207 Bytes
173 Bytes
53 Bytes
-14 Bytes
58 Bytes

0 commit comments

Comments
 (0)