nadya-p
diff --git a/‎dev/_downloads/plot_outlier_detection.py
Lines changed: 16 additions & 14 deletions b/‎dev/_downloads/plot_outlier_detection.py
Lines changed: 16 additions & 14 deletions
diff --git a/‎dev/_images/plot_agglomerative_clustering.png
20 Bytes b/‎dev/_images/plot_agglomerative_clustering.png
20 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering1.png
20 Bytes b/‎dev/_images/plot_agglomerative_clustering1.png
20 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering_001.png
35 Bytes b/‎dev/_images/plot_agglomerative_clustering_001.png
35 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering_0011.png
35 Bytes b/‎dev/_images/plot_agglomerative_clustering_0011.png
35 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering_002.png
-196 Bytes b/‎dev/_images/plot_agglomerative_clustering_002.png
-196 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering_0021.png
-196 Bytes b/‎dev/_images/plot_agglomerative_clustering_0021.png
-196 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering_003.png
332 Bytes b/‎dev/_images/plot_agglomerative_clustering_003.png
332 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering_0031.png
332 Bytes b/‎dev/_images/plot_agglomerative_clustering_0031.png
332 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering_004.png
236 Bytes b/‎dev/_images/plot_agglomerative_clustering_004.png
236 Bytes
@@ -30,9 +30,9 @@
 print(__doc__)
 
 import numpy as np
+from scipy import stats
 import matplotlib.pyplot as plt
 import matplotlib.font_manager
-from scipy import stats
 
 from sklearn import svm
 from sklearn.covariance import EllipticEnvelope
@@ -49,41 +49,42 @@
 classifiers = {
     "One-Class SVM": svm.OneClassSVM(nu=0.95 * outliers_fraction + 0.05,
                                      kernel="rbf", gamma=0.1),
-    "robust covariance estimator": EllipticEnvelope(contamination=.1),
-    "Isolation Forest": IsolationForest(max_samples=n_samples, random_state=rng)}
+    "Robust covariance": EllipticEnvelope(contamination=outliers_fraction),
+    "Isolation Forest": IsolationForest(max_samples=n_samples,
+                                        contamination=outliers_fraction,
+                                        random_state=rng)}
 
 # Compare given classifiers under given settings
 xx, yy = np.meshgrid(np.linspace(-7, 7, 500), np.linspace(-7, 7, 500))
 n_inliers = int((1. - outliers_fraction) * n_samples)
 n_outliers = int(outliers_fraction * n_samples)
 ground_truth = np.ones(n_samples, dtype=int)
-ground_truth[-n_outliers:] = 0
+ground_truth[-n_outliers:] = -1
 
 # Fit the problem with varying cluster separation
 for i, offset in enumerate(clusters_separation):
     np.random.seed(42)
     # Data generation
-    X1 = 0.3 * np.random.randn(0.5 * n_inliers, 2) - offset
-    X2 = 0.3 * np.random.randn(0.5 * n_inliers, 2) + offset
+    X1 = 0.3 * np.random.randn(n_inliers // 2, 2) - offset
+    X2 = 0.3 * np.random.randn(n_inliers // 2, 2) + offset
     X = np.r_[X1, X2]
     # Add outliers
     X = np.r_[X, np.random.uniform(low=-6, high=6, size=(n_outliers, 2))]
 
     # Fit the model
-    plt.figure(figsize=(10, 5))
+    plt.figure(figsize=(10.8, 3.6))
     for i, (clf_name, clf) in enumerate(classifiers.items()):
         # fit the data and tag outliers
         clf.fit(X)
-        y_pred = clf.decision_function(X).ravel()
-        threshold = stats.scoreatpercentile(y_pred,
+        scores_pred = clf.decision_function(X)
+        threshold = stats.scoreatpercentile(scores_pred,
                                             100 * outliers_fraction)
-        y_pred = y_pred > threshold
+        y_pred = clf.predict(X)
         n_errors = (y_pred != ground_truth).sum()
         # plot the levels lines and the points
         Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
         Z = Z.reshape(xx.shape)
         subplot = plt.subplot(1, 3, i + 1)
-        subplot.set_title("Outlier detection")
         subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), threshold, 7),
                          cmap=plt.cm.Blues_r)
         a = subplot.contour(xx, yy, Z, levels=[threshold],
@@ -96,10 +97,11 @@
         subplot.legend(
             [a.collections[0], b, c],
             ['learned decision function', 'true inliers', 'true outliers'],
-            prop=matplotlib.font_manager.FontProperties(size=11))
-        subplot.set_xlabel("%d. %s (errors: %d)" % (i + 1, clf_name, n_errors))
+            prop=matplotlib.font_manager.FontProperties(size=11),
+            loc='lower right')
+        subplot.set_title("%d. %s (errors: %d)" % (i + 1, clf_name, n_errors))
         subplot.set_xlim((-7, 7))
         subplot.set_ylim((-7, 7))
-    plt.subplots_adjust(0.04, 0.1, 0.96, 0.94, 0.1, 0.26)
+    plt.subplots_adjust(0.04, 0.1, 0.96, 0.92, 0.1, 0.26)
 
 plt.show()