scikit-learn
diff --git a/‎dev/_downloads/auto_examples_jupyter.zip
5.56 KB b/‎dev/_downloads/auto_examples_jupyter.zip
5.56 KB
diff --git a/‎dev/_downloads/auto_examples_python.zip
4.62 KB b/‎dev/_downloads/auto_examples_python.zip
4.62 KB
diff --git a/‎dev/_downloads/plot_anomaly_comparison.ipynb
Lines changed: 54 additions & 0 deletions b/‎dev/_downloads/plot_anomaly_comparison.ipynb
Lines changed: 54 additions & 0 deletions
diff --git a/‎dev/_downloads/plot_anomaly_comparison.py
Lines changed: 121 additions & 0 deletions b/‎dev/_downloads/plot_anomaly_comparison.py
Lines changed: 121 additions & 0 deletions
diff --git a/‎dev/_downloads/scikit-learn-docs.pdf
590 KB b/‎dev/_downloads/scikit-learn-docs.pdf
590 KB
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-496 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-496 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0021.png
-496 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0021.png
-496 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
-220 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
-220 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0031.png
-220 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0031.png
-220 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
671 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
671 Bytes
@@ -0,0 +1,54 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n# Comparing anomaly detection algorithms for outlier detection on toy datasets\n\n\nThis example shows characteristics of different anomaly detection algorithms\non 2D datasets. Datasets contain one or two modes (regions of high density)\nto illustrate the ability of algorithms to cope with multimodal data.\n\nFor each dataset, 15% of samples are generated as random uniform noise. This\nproportion is the value given to the nu parameter of the OneClassSVM and the\ncontamination parameter of the other outlier detection algorithms.\nDecision boundaries between inliers and outliers are displayed in black.\n\nLocal Outlier Factor (LOF) does not show a decision boundary in black as it\nhas no predict method to be applied on new data.\n\nWhile these examples give some intuition about the algorithms, this\nintuition might not apply to very high dimensional data.\n\nFinally, note that parameters of the models have been here handpicked but\nthat in practice they need to be adjusted. In the absence of labelled data,\nthe problem is completely unsupervised so model selection can be a challenge.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# Author: Alexandre Gramfort <[email protected]>\n#         Albert Thomas <[email protected]>\n# License: BSD 3 clause\n\nimport time\n\nimport numpy as np\nimport matplotlib\nimport matplotlib.pyplot as plt\n\nfrom sklearn import svm\nfrom sklearn.datasets import make_moons, make_blobs\nfrom sklearn.covariance import EllipticEnvelope\nfrom sklearn.ensemble import IsolationForest\nfrom sklearn.neighbors import LocalOutlierFactor\n\nprint(__doc__)\n\nmatplotlib.rcParams['contour.negative_linestyle'] = 'solid'\n\n# Example settings\nn_samples = 300\noutliers_fraction = 0.15\nn_outliers = int(outliers_fraction * n_samples)\nn_inliers = n_samples - n_outliers\n\n# define outlier/anomaly detection methods to be compared\nanomaly_algorithms = [\n    (\"Robust covariance\", EllipticEnvelope(contamination=outliers_fraction)),\n    (\"One-Class SVM\", svm.OneClassSVM(nu=outliers_fraction, kernel=\"rbf\",\n                                      gamma=0.1)),\n    (\"Isolation Forest\", IsolationForest(contamination=outliers_fraction,\n                                         random_state=42)),\n    (\"Local Outlier Factor\", LocalOutlierFactor(\n        n_neighbors=35, contamination=outliers_fraction))]\n\n# Define datasets\nblobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2)\ndatasets = [\n    make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5,\n               **blobs_params)[0],\n    make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, .3],\n               **blobs_params)[0],\n    4. * (make_moons(n_samples=n_samples, noise=.05, random_state=0)[0] -\n          np.array([0.5, 0.25])),\n    14. * (np.random.RandomState(42).rand(n_samples, 2) - 0.5)]\n\n# Compare given classifiers under given settings\nxx, yy = np.meshgrid(np.linspace(-7, 7, 150),\n                     np.linspace(-7, 7, 150))\n\nplt.figure(figsize=(len(anomaly_algorithms) * 2 + 3, 12.5))\nplt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05,\n                    hspace=.01)\n\nplot_num = 1\nrng = np.random.RandomState(42)\n\nfor i_dataset, X in enumerate(datasets):\n    # Add outliers\n    X = np.concatenate([X, rng.uniform(low=-6, high=6,\n                       size=(n_outliers, 2))], axis=0)\n\n    for name, algorithm in anomaly_algorithms:\n        t0 = time.time()\n        algorithm.fit(X)\n        t1 = time.time()\n        plt.subplot(len(datasets), len(anomaly_algorithms), plot_num)\n        if i_dataset == 0:\n            plt.title(name, size=18)\n\n        # fit the data and tag outliers\n        if name == \"Local Outlier Factor\":\n            y_pred = algorithm.fit_predict(X)\n        else:\n            y_pred = algorithm.fit(X).predict(X)\n\n        # plot the levels lines and the points\n        if name != \"Local Outlier Factor\":  # LOF does not implement predict\n            Z = algorithm.predict(np.c_[xx.ravel(), yy.ravel()])\n            Z = Z.reshape(xx.shape)\n            plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors='black')\n\n        colors = np.array(['#377eb8', '#ff7f00'])\n        plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[(y_pred + 1) // 2])\n\n        plt.xlim(-7, 7)\n        plt.ylim(-7, 7)\n        plt.xticks(())\n        plt.yticks(())\n        plt.text(.99, .01, ('%.2fs' % (t1 - t0)).lstrip('0'),\n                 transform=plt.gca().transAxes, size=15,\n                 horizontalalignment='right')\n        plot_num += 1\n\nplt.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
@@ -0,0 +1,121 @@
+"""
+============================================================================
+Comparing anomaly detection algorithms for outlier detection on toy datasets
+============================================================================
+
+This example shows characteristics of different anomaly detection algorithms
+on 2D datasets. Datasets contain one or two modes (regions of high density)
+to illustrate the ability of algorithms to cope with multimodal data.
+
+For each dataset, 15% of samples are generated as random uniform noise. This
+proportion is the value given to the nu parameter of the OneClassSVM and the
+contamination parameter of the other outlier detection algorithms.
+Decision boundaries between inliers and outliers are displayed in black.
+
+Local Outlier Factor (LOF) does not show a decision boundary in black as it
+has no predict method to be applied on new data.
+
+While these examples give some intuition about the algorithms, this
+intuition might not apply to very high dimensional data.
+
+Finally, note that parameters of the models have been here handpicked but
+that in practice they need to be adjusted. In the absence of labelled data,
+the problem is completely unsupervised so model selection can be a challenge.
+"""
+
+# Author: Alexandre Gramfort <[email protected]>
+#         Albert Thomas <[email protected]>
+# License: BSD 3 clause
+
+import time
+
+import numpy as np
+import matplotlib
+import matplotlib.pyplot as plt
+
+from sklearn import svm
+from sklearn.datasets import make_moons, make_blobs
+from sklearn.covariance import EllipticEnvelope
+from sklearn.ensemble import IsolationForest
+from sklearn.neighbors import LocalOutlierFactor
+
+print(__doc__)
+
+matplotlib.rcParams['contour.negative_linestyle'] = 'solid'
+
+# Example settings
+n_samples = 300
+outliers_fraction = 0.15
+n_outliers = int(outliers_fraction * n_samples)
+n_inliers = n_samples - n_outliers
+
+# define outlier/anomaly detection methods to be compared
+anomaly_algorithms = [
+    ("Robust covariance", EllipticEnvelope(contamination=outliers_fraction)),
+    ("One-Class SVM", svm.OneClassSVM(nu=outliers_fraction, kernel="rbf",
+                                      gamma=0.1)),
+    ("Isolation Forest", IsolationForest(contamination=outliers_fraction,
+                                         random_state=42)),
+    ("Local Outlier Factor", LocalOutlierFactor(
+        n_neighbors=35, contamination=outliers_fraction))]
+
+# Define datasets
+blobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2)
+datasets = [
+    make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5,
+               **blobs_params)[0],
+    make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, .3],
+               **blobs_params)[0],
+    4. * (make_moons(n_samples=n_samples, noise=.05, random_state=0)[0] -
+          np.array([0.5, 0.25])),
+    14. * (np.random.RandomState(42).rand(n_samples, 2) - 0.5)]
+
+# Compare given classifiers under given settings
+xx, yy = np.meshgrid(np.linspace(-7, 7, 150),
+                     np.linspace(-7, 7, 150))
+
+plt.figure(figsize=(len(anomaly_algorithms) * 2 + 3, 12.5))
+plt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05,
+                    hspace=.01)
+
+plot_num = 1
+rng = np.random.RandomState(42)
+
+for i_dataset, X in enumerate(datasets):
+    # Add outliers
+    X = np.concatenate([X, rng.uniform(low=-6, high=6,
+                       size=(n_outliers, 2))], axis=0)
+
+    for name, algorithm in anomaly_algorithms:
+        t0 = time.time()
+        algorithm.fit(X)
+        t1 = time.time()
+        plt.subplot(len(datasets), len(anomaly_algorithms), plot_num)
+        if i_dataset == 0:
+            plt.title(name, size=18)
+
+        # fit the data and tag outliers
+        if name == "Local Outlier Factor":
+            y_pred = algorithm.fit_predict(X)
+        else:
+            y_pred = algorithm.fit(X).predict(X)
+
+        # plot the levels lines and the points
+        if name != "Local Outlier Factor":  # LOF does not implement predict
+            Z = algorithm.predict(np.c_[xx.ravel(), yy.ravel()])
+            Z = Z.reshape(xx.shape)
+            plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors='black')
+
+        colors = np.array(['#377eb8', '#ff7f00'])
+        plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[(y_pred + 1) // 2])
+
+        plt.xlim(-7, 7)
+        plt.ylim(-7, 7)
+        plt.xticks(())
+        plt.yticks(())
+        plt.text(.99, .01, ('%.2fs' % (t1 - t0)).lstrip('0'),
+                 transform=plt.gca().transAxes, size=15,
+                 horizontalalignment='right')
+        plot_num += 1
+
+plt.show()