|
30 | 30 | print(__doc__)
|
31 | 31 |
|
32 | 32 | import numpy as np
|
| 33 | +from scipy import stats |
33 | 34 | import matplotlib.pyplot as plt
|
34 | 35 | import matplotlib.font_manager
|
35 |
| -from scipy import stats |
36 | 36 |
|
37 | 37 | from sklearn import svm
|
38 | 38 | from sklearn.covariance import EllipticEnvelope
|
|
49 | 49 | classifiers = {
|
50 | 50 | "One-Class SVM": svm.OneClassSVM(nu=0.95 * outliers_fraction + 0.05,
|
51 | 51 | kernel="rbf", gamma=0.1),
|
52 |
| - "robust covariance estimator": EllipticEnvelope(contamination=.1), |
53 |
| - "Isolation Forest": IsolationForest(max_samples=n_samples, random_state=rng)} |
| 52 | + "Robust covariance": EllipticEnvelope(contamination=outliers_fraction), |
| 53 | + "Isolation Forest": IsolationForest(max_samples=n_samples, |
| 54 | + contamination=outliers_fraction, |
| 55 | + random_state=rng)} |
54 | 56 |
|
55 | 57 | # Compare given classifiers under given settings
|
56 | 58 | xx, yy = np.meshgrid(np.linspace(-7, 7, 500), np.linspace(-7, 7, 500))
|
57 | 59 | n_inliers = int((1. - outliers_fraction) * n_samples)
|
58 | 60 | n_outliers = int(outliers_fraction * n_samples)
|
59 | 61 | ground_truth = np.ones(n_samples, dtype=int)
|
60 |
| -ground_truth[-n_outliers:] = 0 |
| 62 | +ground_truth[-n_outliers:] = -1 |
61 | 63 |
|
62 | 64 | # Fit the problem with varying cluster separation
|
63 | 65 | for i, offset in enumerate(clusters_separation):
|
64 | 66 | np.random.seed(42)
|
65 | 67 | # Data generation
|
66 |
| - X1 = 0.3 * np.random.randn(0.5 * n_inliers, 2) - offset |
67 |
| - X2 = 0.3 * np.random.randn(0.5 * n_inliers, 2) + offset |
| 68 | + X1 = 0.3 * np.random.randn(n_inliers // 2, 2) - offset |
| 69 | + X2 = 0.3 * np.random.randn(n_inliers // 2, 2) + offset |
68 | 70 | X = np.r_[X1, X2]
|
69 | 71 | # Add outliers
|
70 | 72 | X = np.r_[X, np.random.uniform(low=-6, high=6, size=(n_outliers, 2))]
|
71 | 73 |
|
72 | 74 | # Fit the model
|
73 |
| - plt.figure(figsize=(10, 5)) |
| 75 | + plt.figure(figsize=(10.8, 3.6)) |
74 | 76 | for i, (clf_name, clf) in enumerate(classifiers.items()):
|
75 | 77 | # fit the data and tag outliers
|
76 | 78 | clf.fit(X)
|
77 |
| - y_pred = clf.decision_function(X).ravel() |
78 |
| - threshold = stats.scoreatpercentile(y_pred, |
| 79 | + scores_pred = clf.decision_function(X) |
| 80 | + threshold = stats.scoreatpercentile(scores_pred, |
79 | 81 | 100 * outliers_fraction)
|
80 |
| - y_pred = y_pred > threshold |
| 82 | + y_pred = clf.predict(X) |
81 | 83 | n_errors = (y_pred != ground_truth).sum()
|
82 | 84 | # plot the levels lines and the points
|
83 | 85 | Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
|
84 | 86 | Z = Z.reshape(xx.shape)
|
85 | 87 | subplot = plt.subplot(1, 3, i + 1)
|
86 |
| - subplot.set_title("Outlier detection") |
87 | 88 | subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), threshold, 7),
|
88 | 89 | cmap=plt.cm.Blues_r)
|
89 | 90 | a = subplot.contour(xx, yy, Z, levels=[threshold],
|
|
96 | 97 | subplot.legend(
|
97 | 98 | [a.collections[0], b, c],
|
98 | 99 | ['learned decision function', 'true inliers', 'true outliers'],
|
99 |
| - prop=matplotlib.font_manager.FontProperties(size=11)) |
100 |
| - subplot.set_xlabel("%d. %s (errors: %d)" % (i + 1, clf_name, n_errors)) |
| 100 | + prop=matplotlib.font_manager.FontProperties(size=11), |
| 101 | + loc='lower right') |
| 102 | + subplot.set_title("%d. %s (errors: %d)" % (i + 1, clf_name, n_errors)) |
101 | 103 | subplot.set_xlim((-7, 7))
|
102 | 104 | subplot.set_ylim((-7, 7))
|
103 |
| - plt.subplots_adjust(0.04, 0.1, 0.96, 0.94, 0.1, 0.26) |
| 105 | + plt.subplots_adjust(0.04, 0.1, 0.96, 0.92, 0.1, 0.26) |
104 | 106 |
|
105 | 107 | plt.show()
|
0 commit comments