Skip to content

Commit 3831ebd

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 0788cd0c6a91c0d1cae17340cdf5d2af3c59ec57
1 parent 0da9dad commit 3831ebd

File tree

1,065 files changed

+3468
-3270
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,065 files changed

+3468
-3270
lines changed
-207 Bytes
Binary file not shown.
-204 Bytes
Binary file not shown.

dev/_downloads/plot_outlier_detection.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"import numpy as np\nfrom scipy import stats\nimport matplotlib.pyplot as plt\nimport matplotlib.font_manager\n\nfrom sklearn import svm\nfrom sklearn.covariance import EllipticEnvelope\nfrom sklearn.ensemble import IsolationForest\nfrom sklearn.neighbors import LocalOutlierFactor\n\nprint(__doc__)\n\nSEED = 42\nGRID_PRECISION = 100\n\nrng = np.random.RandomState(SEED)\n\n# Example settings\nn_samples = 200\noutliers_fraction = 0.25\nclusters_separation = (0, 1, 2)\n\n# define two outlier detection tools to be compared\nclassifiers = {\n \"One-Class SVM\": svm.OneClassSVM(nu=0.95 * outliers_fraction + 0.05,\n kernel=\"rbf\", gamma=0.1),\n \"Robust covariance\": EllipticEnvelope(contamination=outliers_fraction),\n \"Isolation Forest\": IsolationForest(max_samples=n_samples,\n contamination=outliers_fraction,\n random_state=rng),\n \"Local Outlier Factor\": LocalOutlierFactor(\n n_neighbors=35,\n contamination=outliers_fraction)}\n\n# Compare given classifiers under given settings\nxx, yy = np.meshgrid(np.linspace(-7, 7, GRID_PRECISION),\n np.linspace(-7, 7, GRID_PRECISION))\nn_outliers = int(outliers_fraction * n_samples)\nn_inliers = n_samples - n_outliers\nground_truth = np.ones(n_samples, dtype=int)\nground_truth[-n_outliers:] = -1\n\n# Fit the problem with varying cluster separation\nfor _, offset in enumerate(clusters_separation):\n np.random.seed(SEED)\n # Data generation\n X1 = 0.3 * np.random.randn(n_inliers // 2, 2) - offset\n X2 = 0.3 * np.random.randn(n_inliers // 2, 2) + offset\n X = np.concatenate([X1, X2], axis=0)\n # Add outliers\n X = np.concatenate([X, np.random.uniform(low=-6, high=6,\n size=(n_outliers, 2))], axis=0)\n\n # Fit the model\n plt.figure(figsize=(9, 7))\n for i, (clf_name, clf) in enumerate(classifiers.items()):\n # fit the data and tag outliers\n if clf_name == \"Local Outlier Factor\":\n y_pred = clf.fit_predict(X)\n scores_pred = clf.negative_outlier_factor_\n else:\n clf.fit(X)\n scores_pred = clf.decision_function(X)\n y_pred = clf.predict(X)\n threshold = stats.scoreatpercentile(scores_pred,\n 100 * outliers_fraction)\n n_errors = (y_pred != ground_truth).sum()\n # plot the levels lines and the points\n if clf_name == \"Local Outlier Factor\":\n # decision_function is private for LOF\n Z = clf._decision_function(np.c_[xx.ravel(), yy.ravel()])\n else:\n Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n Z = Z.reshape(xx.shape)\n subplot = plt.subplot(2, 2, i + 1)\n subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), threshold, 7),\n cmap=plt.cm.Blues_r)\n a = subplot.contour(xx, yy, Z, levels=[threshold],\n linewidths=2, colors='red')\n subplot.contourf(xx, yy, Z, levels=[threshold, Z.max()],\n colors='orange')\n b = subplot.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white',\n s=20, edgecolor='k')\n c = subplot.scatter(X[-n_outliers:, 0], X[-n_outliers:, 1], c='black',\n s=20, edgecolor='k')\n subplot.axis('tight')\n subplot.legend(\n [a.collections[0], b, c],\n ['learned decision function', 'true inliers', 'true outliers'],\n prop=matplotlib.font_manager.FontProperties(size=10),\n loc='lower right')\n subplot.set_xlabel(\"%d. %s (errors: %d)\" % (i + 1, clf_name, n_errors))\n subplot.set_xlim((-7, 7))\n subplot.set_ylim((-7, 7))\n plt.subplots_adjust(0.04, 0.1, 0.96, 0.94, 0.1, 0.26)\n plt.suptitle(\"Outlier detection\")\n\nplt.show()"
29+
"import numpy as np\nfrom scipy import stats\nimport matplotlib.pyplot as plt\nimport matplotlib.font_manager\n\nfrom sklearn import svm\nfrom sklearn.covariance import EllipticEnvelope\nfrom sklearn.ensemble import IsolationForest\nfrom sklearn.neighbors import LocalOutlierFactor\n\nprint(__doc__)\n\nSEED = 42\nGRID_PRECISION = 100\n\nrng = np.random.RandomState(SEED)\n\n# Example settings\nn_samples = 200\noutliers_fraction = 0.25\nclusters_separation = (0, 1, 2)\n\n# define two outlier detection tools to be compared\nclassifiers = {\n \"One-Class SVM\": svm.OneClassSVM(nu=0.95 * outliers_fraction + 0.05,\n kernel=\"rbf\", gamma=0.1),\n \"Robust covariance\": EllipticEnvelope(contamination=outliers_fraction),\n \"Isolation Forest\": IsolationForest(max_samples=n_samples,\n contamination=outliers_fraction,\n random_state=rng),\n \"Local Outlier Factor\": LocalOutlierFactor(\n n_neighbors=35,\n contamination=outliers_fraction)}\n\n# Compare given classifiers under given settings\nxx, yy = np.meshgrid(np.linspace(-7, 7, GRID_PRECISION),\n np.linspace(-7, 7, GRID_PRECISION))\nn_outliers = int(outliers_fraction * n_samples)\nn_inliers = n_samples - n_outliers\nground_truth = np.ones(n_samples, dtype=int)\nground_truth[-n_outliers:] = -1\n\n# Fit the problem with varying cluster separation\nfor _, offset in enumerate(clusters_separation):\n np.random.seed(SEED)\n # Data generation\n X1 = 0.3 * np.random.randn(n_inliers // 2, 2) - offset\n X2 = 0.3 * np.random.randn(n_inliers // 2, 2) + offset\n X = np.concatenate([X1, X2], axis=0)\n # Add outliers\n X = np.concatenate([X, np.random.uniform(low=-6, high=6,\n size=(n_outliers, 2))], axis=0)\n\n # Fit the model\n plt.figure(figsize=(9, 7))\n for i, (clf_name, clf) in enumerate(classifiers.items()):\n # fit the data and tag outliers\n if clf_name == \"Local Outlier Factor\":\n y_pred = clf.fit_predict(X)\n scores_pred = clf.negative_outlier_factor_\n else:\n clf.fit(X)\n scores_pred = clf.decision_function(X)\n y_pred = clf.predict(X)\n n_errors = (y_pred != ground_truth).sum()\n # plot the levels lines and the points\n if clf_name == \"Local Outlier Factor\":\n # decision_function is private for LOF\n Z = clf._decision_function(np.c_[xx.ravel(), yy.ravel()])\n else:\n Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n Z = Z.reshape(xx.shape)\n subplot = plt.subplot(2, 2, i + 1)\n subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7),\n cmap=plt.cm.Blues_r)\n a = subplot.contour(xx, yy, Z, levels=[0],\n linewidths=2, colors='red')\n subplot.contourf(xx, yy, Z, levels=[0, Z.max()],\n colors='orange')\n b = subplot.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white',\n s=20, edgecolor='k')\n c = subplot.scatter(X[-n_outliers:, 0], X[-n_outliers:, 1], c='black',\n s=20, edgecolor='k')\n subplot.axis('tight')\n subplot.legend(\n [a.collections[0], b, c],\n ['learned decision function', 'true inliers', 'true outliers'],\n prop=matplotlib.font_manager.FontProperties(size=10),\n loc='lower right')\n subplot.set_xlabel(\"%d. %s (errors: %d)\" % (i + 1, clf_name, n_errors))\n subplot.set_xlim((-7, 7))\n subplot.set_ylim((-7, 7))\n plt.subplots_adjust(0.04, 0.1, 0.96, 0.94, 0.1, 0.26)\n plt.suptitle(\"Outlier detection\")\n\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_outlier_detection.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,6 @@
9595
clf.fit(X)
9696
scores_pred = clf.decision_function(X)
9797
y_pred = clf.predict(X)
98-
threshold = stats.scoreatpercentile(scores_pred,
99-
100 * outliers_fraction)
10098
n_errors = (y_pred != ground_truth).sum()
10199
# plot the levels lines and the points
102100
if clf_name == "Local Outlier Factor":
@@ -106,11 +104,11 @@
106104
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
107105
Z = Z.reshape(xx.shape)
108106
subplot = plt.subplot(2, 2, i + 1)
109-
subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), threshold, 7),
107+
subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7),
110108
cmap=plt.cm.Blues_r)
111-
a = subplot.contour(xx, yy, Z, levels=[threshold],
109+
a = subplot.contour(xx, yy, Z, levels=[0],
112110
linewidths=2, colors='red')
113-
subplot.contourf(xx, yy, Z, levels=[threshold, Z.max()],
111+
subplot.contourf(xx, yy, Z, levels=[0, Z.max()],
114112
colors='orange')
115113
b = subplot.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white',
116114
s=20, edgecolor='k')

0 commit comments

Comments
 (0)