Skip to content

Commit 4e1546b

Browse files
committed
Pushing the docs for revision for branch: master, commit ea947b8adfd21fbdf2d0de5db40b72364c50bf40
1 parent 4ceb51d commit 4e1546b

File tree

808 files changed

+2774
-2763
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

808 files changed

+2774
-2763
lines changed

dev/_downloads/plot_outlier_detection.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@
3030
print(__doc__)
3131

3232
import numpy as np
33+
from scipy import stats
3334
import matplotlib.pyplot as plt
3435
import matplotlib.font_manager
35-
from scipy import stats
3636

3737
from sklearn import svm
3838
from sklearn.covariance import EllipticEnvelope
@@ -49,41 +49,42 @@
4949
classifiers = {
5050
"One-Class SVM": svm.OneClassSVM(nu=0.95 * outliers_fraction + 0.05,
5151
kernel="rbf", gamma=0.1),
52-
"robust covariance estimator": EllipticEnvelope(contamination=.1),
53-
"Isolation Forest": IsolationForest(max_samples=n_samples, random_state=rng)}
52+
"Robust covariance": EllipticEnvelope(contamination=outliers_fraction),
53+
"Isolation Forest": IsolationForest(max_samples=n_samples,
54+
contamination=outliers_fraction,
55+
random_state=rng)}
5456

5557
# Compare given classifiers under given settings
5658
xx, yy = np.meshgrid(np.linspace(-7, 7, 500), np.linspace(-7, 7, 500))
5759
n_inliers = int((1. - outliers_fraction) * n_samples)
5860
n_outliers = int(outliers_fraction * n_samples)
5961
ground_truth = np.ones(n_samples, dtype=int)
60-
ground_truth[-n_outliers:] = 0
62+
ground_truth[-n_outliers:] = -1
6163

6264
# Fit the problem with varying cluster separation
6365
for i, offset in enumerate(clusters_separation):
6466
np.random.seed(42)
6567
# Data generation
66-
X1 = 0.3 * np.random.randn(0.5 * n_inliers, 2) - offset
67-
X2 = 0.3 * np.random.randn(0.5 * n_inliers, 2) + offset
68+
X1 = 0.3 * np.random.randn(n_inliers // 2, 2) - offset
69+
X2 = 0.3 * np.random.randn(n_inliers // 2, 2) + offset
6870
X = np.r_[X1, X2]
6971
# Add outliers
7072
X = np.r_[X, np.random.uniform(low=-6, high=6, size=(n_outliers, 2))]
7173

7274
# Fit the model
73-
plt.figure(figsize=(10, 5))
75+
plt.figure(figsize=(10.8, 3.6))
7476
for i, (clf_name, clf) in enumerate(classifiers.items()):
7577
# fit the data and tag outliers
7678
clf.fit(X)
77-
y_pred = clf.decision_function(X).ravel()
78-
threshold = stats.scoreatpercentile(y_pred,
79+
scores_pred = clf.decision_function(X)
80+
threshold = stats.scoreatpercentile(scores_pred,
7981
100 * outliers_fraction)
80-
y_pred = y_pred > threshold
82+
y_pred = clf.predict(X)
8183
n_errors = (y_pred != ground_truth).sum()
8284
# plot the levels lines and the points
8385
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
8486
Z = Z.reshape(xx.shape)
8587
subplot = plt.subplot(1, 3, i + 1)
86-
subplot.set_title("Outlier detection")
8788
subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), threshold, 7),
8889
cmap=plt.cm.Blues_r)
8990
a = subplot.contour(xx, yy, Z, levels=[threshold],
@@ -96,10 +97,11 @@
9697
subplot.legend(
9798
[a.collections[0], b, c],
9899
['learned decision function', 'true inliers', 'true outliers'],
99-
prop=matplotlib.font_manager.FontProperties(size=11))
100-
subplot.set_xlabel("%d. %s (errors: %d)" % (i + 1, clf_name, n_errors))
100+
prop=matplotlib.font_manager.FontProperties(size=11),
101+
loc='lower right')
102+
subplot.set_title("%d. %s (errors: %d)" % (i + 1, clf_name, n_errors))
101103
subplot.set_xlim((-7, 7))
102104
subplot.set_ylim((-7, 7))
103-
plt.subplots_adjust(0.04, 0.1, 0.96, 0.94, 0.1, 0.26)
105+
plt.subplots_adjust(0.04, 0.1, 0.96, 0.92, 0.1, 0.26)
104106

105107
plt.show()
20 Bytes
20 Bytes
35 Bytes
-196 Bytes
-196 Bytes
332 Bytes
332 Bytes
236 Bytes

0 commit comments

Comments
 (0)