Skip to content

Commit 116a255

Browse files
committed
Pushing the docs for revision for branch: master, commit a4f632f02f0e66bf3533e5df64e936d87f6e57ee
1 parent 0b06973 commit 116a255

File tree

915 files changed

+4350
-3567
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

915 files changed

+4350
-3567
lines changed
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
{
2+
"nbformat_minor": 0,
3+
"nbformat": 4,
4+
"cells": [
5+
{
6+
"execution_count": null,
7+
"cell_type": "code",
8+
"source": [
9+
"%matplotlib inline"
10+
],
11+
"outputs": [],
12+
"metadata": {
13+
"collapsed": false
14+
}
15+
},
16+
{
17+
"source": [
18+
"\n# Bayesian Gaussian Mixture Concentration Prior Analysis\n\n\nPlot the resulting ellipsoids of a mixture of three Gaussians with\nvariational Bayesian Gaussian Mixture for three different values on the\nprior the dirichlet concentration.\n\nFor all models, the Variationnal Bayesian Gaussian Mixture adapts its number of\nmixture automatically. The parameter `dirichlet_concentration_prior` has a\ndirect link with the resulting number of components. Specifying a high value of\n`dirichlet_concentration_prior` leads more often to uniformly-sized mixture\ncomponents, while specifying small (under 0.1) values will lead to some mixture\ncomponents getting almost all the points while most mixture components will be\ncentered on just a few of the remaining points.\n"
19+
],
20+
"cell_type": "markdown",
21+
"metadata": {}
22+
},
23+
{
24+
"execution_count": null,
25+
"cell_type": "code",
26+
"source": [
27+
"# Author: Thierry Guillemot <[email protected]>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib as mpl\nimport matplotlib.pyplot as plt\nimport matplotlib.gridspec as gridspec\n\nfrom sklearn.mixture import BayesianGaussianMixture\n\nprint(__doc__)\n\n\ndef plot_ellipses(ax, weights, means, covars):\n for n in range(means.shape[0]):\n v, w = np.linalg.eigh(covars[n][:2, :2])\n u = w[0] / np.linalg.norm(w[0])\n angle = np.arctan2(u[1], u[0])\n angle = 180 * angle / np.pi # convert to degrees\n v = 2 * np.sqrt(2) * np.sqrt(v)\n ell = mpl.patches.Ellipse(means[n, :2], v[0], v[1], 180 + angle)\n ell.set_clip_box(ax.bbox)\n ell.set_alpha(weights[n])\n ax.add_artist(ell)\n\n\ndef plot_results(ax1, ax2, estimator, dirichlet_concentration_prior, X, y, plot_title=False):\n estimator.dirichlet_concentration_prior = dirichlet_concentration_prior\n estimator.fit(X)\n ax1.set_title(\"Bayesian Gaussian Mixture for \"\n r\"$dc_0=%.1e$\" % dirichlet_concentration_prior)\n # ax1.axis('equal')\n ax1.scatter(X[:, 0], X[:, 1], s=5, marker='o', color=colors[y], alpha=0.8)\n ax1.set_xlim(-2., 2.)\n ax1.set_ylim(-3., 3.)\n ax1.set_xticks(())\n ax1.set_yticks(())\n plot_ellipses(ax1, estimator.weights_, estimator.means_,\n estimator.covariances_)\n\n ax2.get_xaxis().set_tick_params(direction='out')\n ax2.yaxis.grid(True, alpha=0.7)\n for k, w in enumerate(estimator.weights_):\n ax2.bar(k - .45, w, width=0.9, color='royalblue', zorder=3)\n ax2.text(k, w + 0.007, \"%.1f%%\" % (w * 100.),\n horizontalalignment='center')\n ax2.set_xlim(-.6, 2 * n_components - .4)\n ax2.set_ylim(0., 1.1)\n ax2.tick_params(axis='y', which='both', left='off',\n right='off', labelleft='off')\n ax2.tick_params(axis='x', which='both', top='off')\n\n if plot_title:\n ax1.set_ylabel('Estimated Mixtures')\n ax2.set_ylabel('Weight of each component')\n\n# Parameters\nrandom_state = 2\nn_components, n_features = 3, 2\ncolors = np.array(['mediumseagreen', 'royalblue', 'r', 'gold',\n 'orchid', 'indigo', 'darkcyan', 'tomato'])\ndirichlet_concentration_prior = np.logspace(-3, 3, 3)\ncovars = np.array([[[.7, .0], [.0, .1]],\n [[.5, .0], [.0, .1]],\n [[.5, .0], [.0, .1]]])\nsamples = np.array([200, 500, 200])\nmeans = np.array([[.0, -.70],\n [.0, .0],\n [.0, .70]])\n\n\n# Here we put beta_prior to 0.8 to minimize the influence of the prior for this\n# dataset\nestimator = BayesianGaussianMixture(n_components=2 * n_components,\n init_params='random', max_iter=1500,\n mean_precision_prior=.8, tol=1e-9,\n random_state=random_state)\n\n# Generate data\nrng = np.random.RandomState(random_state)\nX = np.vstack([\n rng.multivariate_normal(means[j], covars[j], samples[j])\n for j in range(n_components)])\ny = np.concatenate([j * np.ones(samples[j], dtype=int)\n for j in range(n_components)])\n\n# Plot Results\nplt.figure(figsize=(4.7 * 3, 8))\nplt.subplots_adjust(bottom=.04, top=0.95, hspace=.05, wspace=.05,\n left=.03, right=.97)\n\ngs = gridspec.GridSpec(3, len(dirichlet_concentration_prior))\nfor k, dc in enumerate(dirichlet_concentration_prior):\n plot_results(plt.subplot(gs[0:2, k]), plt.subplot(gs[2, k]),\n estimator, dc, X, y, plot_title=k == 0)\n\nplt.show()"
28+
],
29+
"outputs": [],
30+
"metadata": {
31+
"collapsed": false
32+
}
33+
}
34+
],
35+
"metadata": {
36+
"kernelspec": {
37+
"display_name": "Python 2",
38+
"name": "python2",
39+
"language": "python"
40+
},
41+
"language_info": {
42+
"mimetype": "text/x-python",
43+
"nbconvert_exporter": "python",
44+
"name": "python",
45+
"file_extension": ".py",
46+
"version": "2.7.12",
47+
"pygments_lexer": "ipython2",
48+
"codemirror_mode": {
49+
"version": 2,
50+
"name": "ipython"
51+
}
52+
}
53+
}
54+
}
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
"""
2+
======================================================
3+
Bayesian Gaussian Mixture Concentration Prior Analysis
4+
======================================================
5+
6+
Plot the resulting ellipsoids of a mixture of three Gaussians with
7+
variational Bayesian Gaussian Mixture for three different values on the
8+
prior the dirichlet concentration.
9+
10+
For all models, the Variationnal Bayesian Gaussian Mixture adapts its number of
11+
mixture automatically. The parameter `dirichlet_concentration_prior` has a
12+
direct link with the resulting number of components. Specifying a high value of
13+
`dirichlet_concentration_prior` leads more often to uniformly-sized mixture
14+
components, while specifying small (under 0.1) values will lead to some mixture
15+
components getting almost all the points while most mixture components will be
16+
centered on just a few of the remaining points.
17+
"""
18+
# Author: Thierry Guillemot <[email protected]>
19+
# License: BSD 3 clause
20+
21+
import numpy as np
22+
import matplotlib as mpl
23+
import matplotlib.pyplot as plt
24+
import matplotlib.gridspec as gridspec
25+
26+
from sklearn.mixture import BayesianGaussianMixture
27+
28+
print(__doc__)
29+
30+
31+
def plot_ellipses(ax, weights, means, covars):
32+
for n in range(means.shape[0]):
33+
v, w = np.linalg.eigh(covars[n][:2, :2])
34+
u = w[0] / np.linalg.norm(w[0])
35+
angle = np.arctan2(u[1], u[0])
36+
angle = 180 * angle / np.pi # convert to degrees
37+
v = 2 * np.sqrt(2) * np.sqrt(v)
38+
ell = mpl.patches.Ellipse(means[n, :2], v[0], v[1], 180 + angle)
39+
ell.set_clip_box(ax.bbox)
40+
ell.set_alpha(weights[n])
41+
ax.add_artist(ell)
42+
43+
44+
def plot_results(ax1, ax2, estimator, dirichlet_concentration_prior, X, y, plot_title=False):
45+
estimator.dirichlet_concentration_prior = dirichlet_concentration_prior
46+
estimator.fit(X)
47+
ax1.set_title("Bayesian Gaussian Mixture for "
48+
r"$dc_0=%.1e$" % dirichlet_concentration_prior)
49+
# ax1.axis('equal')
50+
ax1.scatter(X[:, 0], X[:, 1], s=5, marker='o', color=colors[y], alpha=0.8)
51+
ax1.set_xlim(-2., 2.)
52+
ax1.set_ylim(-3., 3.)
53+
ax1.set_xticks(())
54+
ax1.set_yticks(())
55+
plot_ellipses(ax1, estimator.weights_, estimator.means_,
56+
estimator.covariances_)
57+
58+
ax2.get_xaxis().set_tick_params(direction='out')
59+
ax2.yaxis.grid(True, alpha=0.7)
60+
for k, w in enumerate(estimator.weights_):
61+
ax2.bar(k - .45, w, width=0.9, color='royalblue', zorder=3)
62+
ax2.text(k, w + 0.007, "%.1f%%" % (w * 100.),
63+
horizontalalignment='center')
64+
ax2.set_xlim(-.6, 2 * n_components - .4)
65+
ax2.set_ylim(0., 1.1)
66+
ax2.tick_params(axis='y', which='both', left='off',
67+
right='off', labelleft='off')
68+
ax2.tick_params(axis='x', which='both', top='off')
69+
70+
if plot_title:
71+
ax1.set_ylabel('Estimated Mixtures')
72+
ax2.set_ylabel('Weight of each component')
73+
74+
# Parameters
75+
random_state = 2
76+
n_components, n_features = 3, 2
77+
colors = np.array(['mediumseagreen', 'royalblue', 'r', 'gold',
78+
'orchid', 'indigo', 'darkcyan', 'tomato'])
79+
dirichlet_concentration_prior = np.logspace(-3, 3, 3)
80+
covars = np.array([[[.7, .0], [.0, .1]],
81+
[[.5, .0], [.0, .1]],
82+
[[.5, .0], [.0, .1]]])
83+
samples = np.array([200, 500, 200])
84+
means = np.array([[.0, -.70],
85+
[.0, .0],
86+
[.0, .70]])
87+
88+
89+
# Here we put beta_prior to 0.8 to minimize the influence of the prior for this
90+
# dataset
91+
estimator = BayesianGaussianMixture(n_components=2 * n_components,
92+
init_params='random', max_iter=1500,
93+
mean_precision_prior=.8, tol=1e-9,
94+
random_state=random_state)
95+
96+
# Generate data
97+
rng = np.random.RandomState(random_state)
98+
X = np.vstack([
99+
rng.multivariate_normal(means[j], covars[j], samples[j])
100+
for j in range(n_components)])
101+
y = np.concatenate([j * np.ones(samples[j], dtype=int)
102+
for j in range(n_components)])
103+
104+
# Plot Results
105+
plt.figure(figsize=(4.7 * 3, 8))
106+
plt.subplots_adjust(bottom=.04, top=0.95, hspace=.05, wspace=.05,
107+
left=.03, right=.97)
108+
109+
gs = gridspec.GridSpec(3, len(dirichlet_concentration_prior))
110+
for k, dc in enumerate(dirichlet_concentration_prior):
111+
plot_results(plt.subplot(gs[0:2, k]), plt.subplot(gs[2, k]),
112+
estimator, dc, X, y, plot_title=k == 0)
113+
114+
plt.show()
-238 Bytes
-238 Bytes
51 Bytes
51 Bytes
323 Bytes
323 Bytes
146 Bytes
146 Bytes

0 commit comments

Comments
 (0)