Skip to content

Commit 5484896

Browse files
committed
Pushing the docs to dev/ for branch: main, commit ca0862a9dbc5dadab2ccd30828de6de0c6f1f69d
1 parent 342aeb0 commit 5484896

File tree

1,269 files changed

+5452
-4926
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,269 files changed

+5452
-4926
lines changed
Binary file not shown.

dev/_downloads/61adcadacdb5cfe445011b0b0d065d44/plot_gmm_selection.py

Lines changed: 142 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -3,108 +3,171 @@
33
Gaussian Mixture Model Selection
44
================================
55
6-
This example shows that model selection can be performed with
7-
Gaussian Mixture Models using :ref:`information-theoretic criteria (BIC) <aic_bic>`.
8-
Model selection concerns both the covariance type
9-
and the number of components in the model.
10-
In that case, AIC also provides the right result (not shown to save time),
11-
but BIC is better suited if the problem is to identify the right model.
12-
Unlike Bayesian procedures, such inferences are prior-free.
6+
This example shows that model selection can be performed with Gaussian Mixture
7+
Models (GMM) using :ref:`information-theory criteria <aic_bic>`. Model selection
8+
concerns both the covariance type and the number of components in the model.
139
14-
In that case, the model with 2 components and full covariance
15-
(which corresponds to the true generative model) is selected.
10+
In this case, both the Akaike Information Criterion (AIC) and the Bayes
11+
Information Criterion (BIC) provide the right result, but we only demo the
12+
latter as BIC is better suited to identify the true model among a set of
13+
candidates. Unlike Bayesian procedures, such inferences are prior-free.
1614
1715
"""
1816

17+
# %%
18+
# Data generation
19+
# ---------------
20+
#
21+
# We generate two components (each one containing `n_samples`) by randomly
22+
# sampling the standard normal distribution as returned by `numpy.random.randn`.
23+
# One component is kept spherical yet shifted and re-scaled. The other one is
24+
# deformed to have a more general covariance matrix.
25+
1926
import numpy as np
20-
import itertools
2127

22-
from scipy import linalg
28+
n_samples = 500
29+
np.random.seed(0)
30+
C = np.array([[0.0, -0.1], [1.7, 0.4]])
31+
component_1 = np.dot(np.random.randn(n_samples, 2), C) # general
32+
component_2 = 0.7 * np.random.randn(n_samples, 2) + np.array([-4, 1]) # spherical
33+
34+
X = np.concatenate([component_1, component_2])
35+
36+
# %%
37+
# We can visualize the different components:
38+
2339
import matplotlib.pyplot as plt
24-
import matplotlib as mpl
2540

26-
from sklearn import mixture
41+
plt.scatter(component_1[:, 0], component_1[:, 1], s=0.8)
42+
plt.scatter(component_2[:, 0], component_2[:, 1], s=0.8)
43+
plt.title("Gaussian Mixture components")
44+
plt.axis("equal")
45+
plt.show()
2746

28-
# Number of samples per component
29-
n_samples = 500
47+
# %%
48+
# Model training and selection
49+
# ----------------------------
50+
#
51+
# We vary the number of components from 1 to 6 and the type of covariance
52+
# parameters to use:
53+
#
54+
# - `"full"`: each component has its own general covariance matrix.
55+
# - `"tied"`: all components share the same general covariance matrix.
56+
# - `"diag"`: each component has its own diagonal covariance matrix.
57+
# - `"spherical"`: each component has its own single variance.
58+
#
59+
# We score the different models and keep the best model (the lowest BIC). This
60+
# is done by using :class:`~sklearn.model_selection.GridSearchCV` and a
61+
# user-defined score function which returns the negative BIC score, as
62+
# :class:`~sklearn.model_selection.GridSearchCV` is designed to **maximize** a
63+
# score (maximizing the negative BIC is equivalent to minimizing the BIC).
64+
#
65+
# The best set of parameters and estimator are stored in `best_parameters_` and
66+
# `best_estimator_`, respectively.
67+
68+
from sklearn.mixture import GaussianMixture
69+
from sklearn.model_selection import GridSearchCV
70+
71+
72+
def gmm_bic_score(estimator, X):
73+
"""Callable to pass to GridSearchCV that will use the BIC score."""
74+
# Make it negative since GridSearchCV expects a score to maximize
75+
return -estimator.bic(X)
3076

31-
# Generate random sample, two components
32-
np.random.seed(0)
33-
C = np.array([[0.0, -0.1], [1.7, 0.4]])
34-
X = np.r_[
35-
np.dot(np.random.randn(n_samples, 2), C),
36-
0.7 * np.random.randn(n_samples, 2) + np.array([-6, 3]),
37-
]
3877

39-
lowest_bic = np.infty
40-
bic = []
41-
n_components_range = range(1, 7)
42-
cv_types = ["spherical", "tied", "diag", "full"]
43-
for cv_type in cv_types:
44-
for n_components in n_components_range:
45-
# Fit a Gaussian mixture with EM
46-
gmm = mixture.GaussianMixture(
47-
n_components=n_components, covariance_type=cv_type
48-
)
49-
gmm.fit(X)
50-
bic.append(gmm.bic(X))
51-
if bic[-1] < lowest_bic:
52-
lowest_bic = bic[-1]
53-
best_gmm = gmm
54-
55-
bic = np.array(bic)
56-
color_iter = itertools.cycle(["navy", "turquoise", "cornflowerblue", "darkorange"])
57-
clf = best_gmm
58-
bars = []
78+
param_grid = {
79+
"n_components": range(1, 7),
80+
"covariance_type": ["spherical", "tied", "diag", "full"],
81+
}
82+
grid_search = GridSearchCV(
83+
GaussianMixture(), param_grid=param_grid, scoring=gmm_bic_score
84+
)
85+
grid_search.fit(X)
5986

87+
# %%
6088
# Plot the BIC scores
61-
plt.figure(figsize=(8, 6))
62-
spl = plt.subplot(2, 1, 1)
63-
for i, (cv_type, color) in enumerate(zip(cv_types, color_iter)):
64-
xpos = np.array(n_components_range) + 0.2 * (i - 2)
65-
bars.append(
66-
plt.bar(
67-
xpos,
68-
bic[i * len(n_components_range) : (i + 1) * len(n_components_range)],
69-
width=0.2,
70-
color=color,
71-
)
72-
)
73-
plt.xticks(n_components_range)
74-
plt.ylim([bic.min() * 1.01 - 0.01 * bic.max(), bic.max()])
75-
plt.title("BIC score per model")
76-
xpos = (
77-
np.mod(bic.argmin(), len(n_components_range))
78-
+ 0.65
79-
+ 0.2 * np.floor(bic.argmin() / len(n_components_range))
89+
# -------------------
90+
#
91+
# To ease the plotting we can create a `pandas.DataFrame` from the results of
92+
# the cross-validation done by the grid search. We re-inverse the sign of the
93+
# BIC score to show the effect of minimizing it.
94+
95+
import pandas as pd
96+
97+
df = pd.DataFrame(grid_search.cv_results_)[
98+
["param_n_components", "param_covariance_type", "mean_test_score"]
99+
]
100+
df["mean_test_score"] = -df["mean_test_score"]
101+
df = df.rename(
102+
columns={
103+
"param_n_components": "Number of components",
104+
"param_covariance_type": "Type of covariance",
105+
"mean_test_score": "BIC score",
106+
}
107+
)
108+
df.sort_values(by="BIC score").head()
109+
110+
# %%
111+
import seaborn as sns
112+
113+
sns.catplot(
114+
data=df,
115+
kind="bar",
116+
x="Number of components",
117+
y="BIC score",
118+
hue="Type of covariance",
80119
)
81-
plt.text(xpos, bic.min() * 0.97 + 0.03 * bic.max(), "*", fontsize=14)
82-
spl.set_xlabel("Number of components")
83-
spl.legend([b[0] for b in bars], cv_types)
84-
85-
# Plot the winner
86-
splot = plt.subplot(2, 1, 2)
87-
Y_ = clf.predict(X)
88-
for i, (mean, cov, color) in enumerate(zip(clf.means_, clf.covariances_, color_iter)):
120+
plt.show()
121+
122+
# %%
123+
# In the present case, the model with 2 components and full covariance (which
124+
# corresponds to the true generative model) has the lowest BIC score and is
125+
# therefore selected by the grid search.
126+
#
127+
# Plot the best model
128+
# -------------------
129+
#
130+
# We plot an ellipse to show each Gaussian component of the selected model. For
131+
# such purpose, one needs to find the eigenvalues of the covariance matrices as
132+
# returned by the `covariances_` attribute. The shape of such matrices depends
133+
# on the `covariance_type`:
134+
#
135+
# - `"full"`: (`n_components`, `n_features`, `n_features`)
136+
# - `"tied"`: (`n_features`, `n_features`)
137+
# - `"diag"`: (`n_components`, `n_features`)
138+
# - `"spherical"`: (`n_components`,)
139+
140+
from matplotlib.patches import Ellipse
141+
from scipy import linalg
142+
143+
color_iter = sns.color_palette("tab10", 2)[::-1]
144+
Y_ = grid_search.predict(X)
145+
146+
fig, ax = plt.subplots()
147+
148+
for i, (mean, cov, color) in enumerate(
149+
zip(
150+
grid_search.best_estimator_.means_,
151+
grid_search.best_estimator_.covariances_,
152+
color_iter,
153+
)
154+
):
89155
v, w = linalg.eigh(cov)
90156
if not np.any(Y_ == i):
91157
continue
92158
plt.scatter(X[Y_ == i, 0], X[Y_ == i, 1], 0.8, color=color)
93159

94-
# Plot an ellipse to show the Gaussian component
95160
angle = np.arctan2(w[0][1], w[0][0])
96161
angle = 180.0 * angle / np.pi # convert to degrees
97162
v = 2.0 * np.sqrt(2.0) * np.sqrt(v)
98-
ell = mpl.patches.Ellipse(mean, v[0], v[1], 180.0 + angle, color=color)
99-
ell.set_clip_box(splot.bbox)
100-
ell.set_alpha(0.5)
101-
splot.add_artist(ell)
163+
ellipse = Ellipse(mean, v[0], v[1], angle=180.0 + angle, color=color)
164+
ellipse.set_clip_box(fig.bbox)
165+
ellipse.set_alpha(0.5)
166+
ax.add_artist(ellipse)
102167

103-
plt.xticks(())
104-
plt.yticks(())
105168
plt.title(
106-
f"Selected GMM: {best_gmm.covariance_type} model, "
107-
f"{best_gmm.n_components} components"
169+
f"Selected GMM: {grid_search.best_params_['covariance_type']} model, "
170+
f"{grid_search.best_params_['n_components']} components"
108171
)
109-
plt.subplots_adjust(hspace=0.35, bottom=0.02)
172+
plt.axis("equal")
110173
plt.show()

dev/_downloads/6d4f620ec6653356eb970c2a6ed62081/plot_calibration_curve.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@
8080
"cell_type": "markdown",
8181
"metadata": {},
8282
"source": [
83-
"Uncalibrated :class:`~sklearn.naive_bayes.GaussianNB` is poorly calibrated\nbecause of\nthe redundant features which violate the assumption of feature-independence\nand result in an overly confident classifier, which is indicated by the\ntypical transposed-sigmoid curve. Calibration of the probabilities of\n:class:`~sklearn.naive_bayes.GaussianNB` with `isotonic` can fix\nthis issue as can be seen from the nearly diagonal calibration curve.\n:ref:sigmoid regression `<sigmoid_regressor>` also improves calibration\nslightly,\nalbeit not as strongly as the non-parametric isotonic regression. This can be\nattributed to the fact that we have plenty of calibration data such that the\ngreater flexibility of the non-parametric model can be exploited.\n\nBelow we will make a quantitative analysis considering several classification\nmetrics: `brier_score_loss`, `log_loss`,\n`precision, recall, F1 score <precision_recall_f_measure_metrics>` and\n`ROC AUC <roc_metrics>`.\n\n"
83+
"Uncalibrated :class:`~sklearn.naive_bayes.GaussianNB` is poorly calibrated\nbecause of\nthe redundant features which violate the assumption of feature-independence\nand result in an overly confident classifier, which is indicated by the\ntypical transposed-sigmoid curve. Calibration of the probabilities of\n:class:`~sklearn.naive_bayes.GaussianNB` with `isotonic` can fix\nthis issue as can be seen from the nearly diagonal calibration curve.\n`Sigmoid regression <sigmoid_regressor>` also improves calibration\nslightly,\nalbeit not as strongly as the non-parametric isotonic regression. This can be\nattributed to the fact that we have plenty of calibration data such that the\ngreater flexibility of the non-parametric model can be exploited.\n\nBelow we will make a quantitative analysis considering several classification\nmetrics: `brier_score_loss`, `log_loss`,\n`precision, recall, F1 score <precision_recall_f_measure_metrics>` and\n`ROC AUC <roc_metrics>`.\n\n"
8484
]
8585
},
8686
{
Binary file not shown.

dev/_downloads/85db957603c93bd3e0a4265ea6565b13/plot_calibration_curve.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@
124124
# typical transposed-sigmoid curve. Calibration of the probabilities of
125125
# :class:`~sklearn.naive_bayes.GaussianNB` with :ref:`isotonic` can fix
126126
# this issue as can be seen from the nearly diagonal calibration curve.
127-
# :ref:sigmoid regression `<sigmoid_regressor>` also improves calibration
127+
# :ref:`Sigmoid regression <sigmoid_regressor>` also improves calibration
128128
# slightly,
129129
# albeit not as strongly as the non-parametric isotonic regression. This can be
130130
# attributed to the fact that we have plenty of calibration data such that the

dev/_downloads/9b5ca5a413df494778642d75caeb33d7/plot_omp.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
plt.subplot(4, 1, 1)
4242
plt.xlim(0, 512)
4343
plt.title("Sparse signal")
44-
plt.stem(idx, w[idx], use_line_collection=True)
44+
plt.stem(idx, w[idx])
4545

4646
# plot the noise-free reconstruction
4747
omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
@@ -51,7 +51,7 @@
5151
plt.subplot(4, 1, 2)
5252
plt.xlim(0, 512)
5353
plt.title("Recovered signal from noise-free measurements")
54-
plt.stem(idx_r, coef[idx_r], use_line_collection=True)
54+
plt.stem(idx_r, coef[idx_r])
5555

5656
# plot the noisy reconstruction
5757
omp.fit(X, y_noisy)
@@ -60,7 +60,7 @@
6060
plt.subplot(4, 1, 3)
6161
plt.xlim(0, 512)
6262
plt.title("Recovered signal from noisy measurements")
63-
plt.stem(idx_r, coef[idx_r], use_line_collection=True)
63+
plt.stem(idx_r, coef[idx_r])
6464

6565
# plot the noisy reconstruction with number of non-zeros set by CV
6666
omp_cv = OrthogonalMatchingPursuitCV()
@@ -70,7 +70,7 @@
7070
plt.subplot(4, 1, 4)
7171
plt.xlim(0, 512)
7272
plt.title("Recovered signal from noisy measurements with CV")
73-
plt.stem(idx_r, coef[idx_r], use_line_collection=True)
73+
plt.stem(idx_r, coef[idx_r])
7474

7575
plt.subplots_adjust(0.06, 0.04, 0.94, 0.90, 0.20, 0.38)
7676
plt.suptitle("Sparse signal recovery with Orthogonal Matching Pursuit", fontsize=16)

dev/_downloads/a79c521bd835e8739e06d7dafbfc4eb4/plot_omp.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"import matplotlib.pyplot as plt\nimport numpy as np\nfrom sklearn.linear_model import OrthogonalMatchingPursuit\nfrom sklearn.linear_model import OrthogonalMatchingPursuitCV\nfrom sklearn.datasets import make_sparse_coded_signal\n\nn_components, n_features = 512, 100\nn_nonzero_coefs = 17\n\n# generate the data\n\n# y = Xw\n# |x|_0 = n_nonzero_coefs\n\ny, X, w = make_sparse_coded_signal(\n n_samples=1,\n n_components=n_components,\n n_features=n_features,\n n_nonzero_coefs=n_nonzero_coefs,\n random_state=0,\n data_transposed=True,\n)\n\n(idx,) = w.nonzero()\n\n# distort the clean signal\ny_noisy = y + 0.05 * np.random.randn(len(y))\n\n# plot the sparse signal\nplt.figure(figsize=(7, 7))\nplt.subplot(4, 1, 1)\nplt.xlim(0, 512)\nplt.title(\"Sparse signal\")\nplt.stem(idx, w[idx], use_line_collection=True)\n\n# plot the noise-free reconstruction\nomp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)\nomp.fit(X, y)\ncoef = omp.coef_\n(idx_r,) = coef.nonzero()\nplt.subplot(4, 1, 2)\nplt.xlim(0, 512)\nplt.title(\"Recovered signal from noise-free measurements\")\nplt.stem(idx_r, coef[idx_r], use_line_collection=True)\n\n# plot the noisy reconstruction\nomp.fit(X, y_noisy)\ncoef = omp.coef_\n(idx_r,) = coef.nonzero()\nplt.subplot(4, 1, 3)\nplt.xlim(0, 512)\nplt.title(\"Recovered signal from noisy measurements\")\nplt.stem(idx_r, coef[idx_r], use_line_collection=True)\n\n# plot the noisy reconstruction with number of non-zeros set by CV\nomp_cv = OrthogonalMatchingPursuitCV()\nomp_cv.fit(X, y_noisy)\ncoef = omp_cv.coef_\n(idx_r,) = coef.nonzero()\nplt.subplot(4, 1, 4)\nplt.xlim(0, 512)\nplt.title(\"Recovered signal from noisy measurements with CV\")\nplt.stem(idx_r, coef[idx_r], use_line_collection=True)\n\nplt.subplots_adjust(0.06, 0.04, 0.94, 0.90, 0.20, 0.38)\nplt.suptitle(\"Sparse signal recovery with Orthogonal Matching Pursuit\", fontsize=16)\nplt.show()"
29+
"import matplotlib.pyplot as plt\nimport numpy as np\nfrom sklearn.linear_model import OrthogonalMatchingPursuit\nfrom sklearn.linear_model import OrthogonalMatchingPursuitCV\nfrom sklearn.datasets import make_sparse_coded_signal\n\nn_components, n_features = 512, 100\nn_nonzero_coefs = 17\n\n# generate the data\n\n# y = Xw\n# |x|_0 = n_nonzero_coefs\n\ny, X, w = make_sparse_coded_signal(\n n_samples=1,\n n_components=n_components,\n n_features=n_features,\n n_nonzero_coefs=n_nonzero_coefs,\n random_state=0,\n data_transposed=True,\n)\n\n(idx,) = w.nonzero()\n\n# distort the clean signal\ny_noisy = y + 0.05 * np.random.randn(len(y))\n\n# plot the sparse signal\nplt.figure(figsize=(7, 7))\nplt.subplot(4, 1, 1)\nplt.xlim(0, 512)\nplt.title(\"Sparse signal\")\nplt.stem(idx, w[idx])\n\n# plot the noise-free reconstruction\nomp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)\nomp.fit(X, y)\ncoef = omp.coef_\n(idx_r,) = coef.nonzero()\nplt.subplot(4, 1, 2)\nplt.xlim(0, 512)\nplt.title(\"Recovered signal from noise-free measurements\")\nplt.stem(idx_r, coef[idx_r])\n\n# plot the noisy reconstruction\nomp.fit(X, y_noisy)\ncoef = omp.coef_\n(idx_r,) = coef.nonzero()\nplt.subplot(4, 1, 3)\nplt.xlim(0, 512)\nplt.title(\"Recovered signal from noisy measurements\")\nplt.stem(idx_r, coef[idx_r])\n\n# plot the noisy reconstruction with number of non-zeros set by CV\nomp_cv = OrthogonalMatchingPursuitCV()\nomp_cv.fit(X, y_noisy)\ncoef = omp_cv.coef_\n(idx_r,) = coef.nonzero()\nplt.subplot(4, 1, 4)\nplt.xlim(0, 512)\nplt.title(\"Recovered signal from noisy measurements with CV\")\nplt.stem(idx_r, coef[idx_r])\n\nplt.subplots_adjust(0.06, 0.04, 0.94, 0.90, 0.20, 0.38)\nplt.suptitle(\"Sparse signal recovery with Orthogonal Matching Pursuit\", fontsize=16)\nplt.show()"
3030
]
3131
}
3232
],

0 commit comments

Comments
 (0)