sebvidal06
diff --git a/‎dev/_downloads/plot_gmm_classifier.py renamed to ‎dev/_downloads/plot_gmm_covariances.py
Lines changed: 27 additions & 21 deletions b/‎dev/_downloads/plot_gmm_classifier.py renamed to ‎dev/_downloads/plot_gmm_covariances.py
Lines changed: 27 additions & 21 deletions
diff --git a/‎dev/_images/plot_agglomerative_clustering.png
97 Bytes b/‎dev/_images/plot_agglomerative_clustering.png
97 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering1.png
97 Bytes b/‎dev/_images/plot_agglomerative_clustering1.png
97 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering_001.png
188 Bytes b/‎dev/_images/plot_agglomerative_clustering_001.png
188 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering_0011.png
188 Bytes b/‎dev/_images/plot_agglomerative_clustering_0011.png
188 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering_002.png
410 Bytes b/‎dev/_images/plot_agglomerative_clustering_002.png
410 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering_0021.png
410 Bytes b/‎dev/_images/plot_agglomerative_clustering_0021.png
410 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering_003.png
267 Bytes b/‎dev/_images/plot_agglomerative_clustering_003.png
267 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering_0031.png
267 Bytes b/‎dev/_images/plot_agglomerative_clustering_0031.png
267 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering_004.png
-63 Bytes b/‎dev/_images/plot_agglomerative_clustering_004.png
-63 Bytes
@@ -1,17 +1,21 @@
 """
-==================
-GMM classification
-==================
+===============
+GMM covariances
+===============
 
-Demonstration of Gaussian mixture models for classification.
+Demonstration of several covariances types for Gaussian mixture models.
 
 See :ref:`gmm` for more information on the estimator.
 
-Plots predicted labels on both training and held out test data using a
-variety of GMM classifiers on the iris dataset.
+Although GMM are often used for clustering, we can compare the obtained
+clusters with the actual classes from the dataset. We initialize the means
+of the Gaussians with the means of the classes from the training set to make
+this comparison valid.
 
-Compares GMMs with spherical, diagonal, full, and tied covariance
-matrices in increasing order of performance.  Although one would
+We plot predicted labels on both training and held out test data using a
+variety of GMM covariance types on the iris dataset.
+We compare GMMs with spherical, diagonal, full, and tied covariance
+matrices in increasing order of performance. Although one would
 expect full covariance to perform best in general, it is prone to
 overfitting on small datasets and does not generalize well to held out
 test data.
@@ -39,6 +43,8 @@
 
 
 colors = ['navy', 'turquoise', 'darkorange']
+
+
 def make_ellipses(gmm, ax):
     for n, color in enumerate(colors):
         v, w = np.linalg.eigh(gmm._get_covars()[n][:2, :2])
@@ -69,28 +75,29 @@ def make_ellipses(gmm, ax):
 n_classes = len(np.unique(y_train))
 
 # Try GMMs using different types of covariances.
-classifiers = dict((covar_type, GMM(n_components=n_classes,
-                    covariance_type=covar_type, init_params='wc', n_iter=20))
-                   for covar_type in ['spherical', 'diag', 'tied', 'full'])
+estimators = dict((covar_type,
+                   GMM(n_components=n_classes, covariance_type=covar_type,
+                       init_params='wc', n_iter=20))
+                  for covar_type in ['spherical', 'diag', 'tied', 'full'])
 
-n_classifiers = len(classifiers)
+n_estimators = len(estimators)
 
-plt.figure(figsize=(3 * n_classifiers / 2, 6))
+plt.figure(figsize=(3 * n_estimators / 2, 6))
 plt.subplots_adjust(bottom=.01, top=0.95, hspace=.15, wspace=.05,
                     left=.01, right=.99)
 
 
-for index, (name, classifier) in enumerate(classifiers.items()):
+for index, (name, estimator) in enumerate(estimators.items()):
     # Since we have class labels for the training data, we can
     # initialize the GMM parameters in a supervised manner.
-    classifier.means_ = np.array([X_train[y_train == i].mean(axis=0)
+    estimator.means_ = np.array([X_train[y_train == i].mean(axis=0)
                                   for i in xrange(n_classes)])
 
     # Train the other parameters using the EM algorithm.
-    classifier.fit(X_train)
+    estimator.fit(X_train)
 
-    h = plt.subplot(2, n_classifiers / 2, index + 1)
-    make_ellipses(classifier, h)
+    h = plt.subplot(2, n_estimators / 2, index + 1)
+    make_ellipses(estimator, h)
 
     for n, color in enumerate(colors):
         data = iris.data[iris.target == n]
@@ -99,15 +106,14 @@ def make_ellipses(gmm, ax):
     # Plot the test data with crosses
     for n, color in enumerate(colors):
         data = X_test[y_test == n]
-        print(color)
         plt.scatter(data[:, 0], data[:, 1], marker='x', color=color)
 
-    y_train_pred = classifier.predict(X_train)
+    y_train_pred = estimator.predict(X_train)
     train_accuracy = np.mean(y_train_pred.ravel() == y_train.ravel()) * 100
     plt.text(0.05, 0.9, 'Train accuracy: %.1f' % train_accuracy,
              transform=h.transAxes)
 
-    y_test_pred = classifier.predict(X_test)
+    y_test_pred = estimator.predict(X_test)
     test_accuracy = np.mean(y_test_pred.ravel() == y_test.ravel()) * 100
     plt.text(0.05, 0.8, 'Test accuracy: %.1f' % test_accuracy,
              transform=h.transAxes)