scikit-learn
diff --git a/‎dev/_downloads/auto_examples_jupyter.zip
-207 Bytes b/‎dev/_downloads/auto_examples_jupyter.zip
-207 Bytes
diff --git a/‎dev/_downloads/auto_examples_python.zip
-204 Bytes b/‎dev/_downloads/auto_examples_python.zip
-204 Bytes
diff --git a/‎dev/_downloads/plot_outlier_detection.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_outlier_detection.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_outlier_detection.py
Lines changed: 3 additions & 5 deletions b/‎dev/_downloads/plot_outlier_detection.py
Lines changed: 3 additions & 5 deletions
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "import numpy as np\nfrom scipy import stats\nimport matplotlib.pyplot as plt\nimport matplotlib.font_manager\n\nfrom sklearn import svm\nfrom sklearn.covariance import EllipticEnvelope\nfrom sklearn.ensemble import IsolationForest\nfrom sklearn.neighbors import LocalOutlierFactor\n\nprint(__doc__)\n\nSEED = 42\nGRID_PRECISION = 100\n\nrng = np.random.RandomState(SEED)\n\n# Example settings\nn_samples = 200\noutliers_fraction = 0.25\nclusters_separation = (0, 1, 2)\n\n# define two outlier detection tools to be compared\nclassifiers = {\n    \"One-Class SVM\": svm.OneClassSVM(nu=0.95 * outliers_fraction + 0.05,\n                                     kernel=\"rbf\", gamma=0.1),\n    \"Robust covariance\": EllipticEnvelope(contamination=outliers_fraction),\n    \"Isolation Forest\": IsolationForest(max_samples=n_samples,\n                                        contamination=outliers_fraction,\n                                        random_state=rng),\n    \"Local Outlier Factor\": LocalOutlierFactor(\n        n_neighbors=35,\n        contamination=outliers_fraction)}\n\n# Compare given classifiers under given settings\nxx, yy = np.meshgrid(np.linspace(-7, 7, GRID_PRECISION),\n                     np.linspace(-7, 7, GRID_PRECISION))\nn_outliers = int(outliers_fraction * n_samples)\nn_inliers = n_samples - n_outliers\nground_truth = np.ones(n_samples, dtype=int)\nground_truth[-n_outliers:] = -1\n\n# Fit the problem with varying cluster separation\nfor _, offset in enumerate(clusters_separation):\n    np.random.seed(SEED)\n    # Data generation\n    X1 = 0.3 * np.random.randn(n_inliers // 2, 2) - offset\n    X2 = 0.3 * np.random.randn(n_inliers // 2, 2) + offset\n    X = np.concatenate([X1, X2], axis=0)\n    # Add outliers\n    X = np.concatenate([X, np.random.uniform(low=-6, high=6,\n                       size=(n_outliers, 2))], axis=0)\n\n    # Fit the model\n    plt.figure(figsize=(9, 7))\n    for i, (clf_name, clf) in enumerate(classifiers.items()):\n        # fit the data and tag outliers\n        if clf_name == \"Local Outlier Factor\":\n            y_pred = clf.fit_predict(X)\n            scores_pred = clf.negative_outlier_factor_\n        else:\n            clf.fit(X)\n            scores_pred = clf.decision_function(X)\n            y_pred = clf.predict(X)\n        threshold = stats.scoreatpercentile(scores_pred,\n                                            100 * outliers_fraction)\n        n_errors = (y_pred != ground_truth).sum()\n        # plot the levels lines and the points\n        if clf_name == \"Local Outlier Factor\":\n            # decision_function is private for LOF\n            Z = clf._decision_function(np.c_[xx.ravel(), yy.ravel()])\n        else:\n            Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n        Z = Z.reshape(xx.shape)\n        subplot = plt.subplot(2, 2, i + 1)\n        subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), threshold, 7),\n                         cmap=plt.cm.Blues_r)\n        a = subplot.contour(xx, yy, Z, levels=[threshold],\n                            linewidths=2, colors='red')\n        subplot.contourf(xx, yy, Z, levels=[threshold, Z.max()],\n                         colors='orange')\n        b = subplot.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white',\n                            s=20, edgecolor='k')\n        c = subplot.scatter(X[-n_outliers:, 0], X[-n_outliers:, 1], c='black',\n                            s=20, edgecolor='k')\n        subplot.axis('tight')\n        subplot.legend(\n            [a.collections[0], b, c],\n            ['learned decision function', 'true inliers', 'true outliers'],\n            prop=matplotlib.font_manager.FontProperties(size=10),\n            loc='lower right')\n        subplot.set_xlabel(\"%d. %s (errors: %d)\" % (i + 1, clf_name, n_errors))\n        subplot.set_xlim((-7, 7))\n        subplot.set_ylim((-7, 7))\n    plt.subplots_adjust(0.04, 0.1, 0.96, 0.94, 0.1, 0.26)\n    plt.suptitle(\"Outlier detection\")\n\nplt.show()"
+        "import numpy as np\nfrom scipy import stats\nimport matplotlib.pyplot as plt\nimport matplotlib.font_manager\n\nfrom sklearn import svm\nfrom sklearn.covariance import EllipticEnvelope\nfrom sklearn.ensemble import IsolationForest\nfrom sklearn.neighbors import LocalOutlierFactor\n\nprint(__doc__)\n\nSEED = 42\nGRID_PRECISION = 100\n\nrng = np.random.RandomState(SEED)\n\n# Example settings\nn_samples = 200\noutliers_fraction = 0.25\nclusters_separation = (0, 1, 2)\n\n# define two outlier detection tools to be compared\nclassifiers = {\n    \"One-Class SVM\": svm.OneClassSVM(nu=0.95 * outliers_fraction + 0.05,\n                                     kernel=\"rbf\", gamma=0.1),\n    \"Robust covariance\": EllipticEnvelope(contamination=outliers_fraction),\n    \"Isolation Forest\": IsolationForest(max_samples=n_samples,\n                                        contamination=outliers_fraction,\n                                        random_state=rng),\n    \"Local Outlier Factor\": LocalOutlierFactor(\n        n_neighbors=35,\n        contamination=outliers_fraction)}\n\n# Compare given classifiers under given settings\nxx, yy = np.meshgrid(np.linspace(-7, 7, GRID_PRECISION),\n                     np.linspace(-7, 7, GRID_PRECISION))\nn_outliers = int(outliers_fraction * n_samples)\nn_inliers = n_samples - n_outliers\nground_truth = np.ones(n_samples, dtype=int)\nground_truth[-n_outliers:] = -1\n\n# Fit the problem with varying cluster separation\nfor _, offset in enumerate(clusters_separation):\n    np.random.seed(SEED)\n    # Data generation\n    X1 = 0.3 * np.random.randn(n_inliers // 2, 2) - offset\n    X2 = 0.3 * np.random.randn(n_inliers // 2, 2) + offset\n    X = np.concatenate([X1, X2], axis=0)\n    # Add outliers\n    X = np.concatenate([X, np.random.uniform(low=-6, high=6,\n                       size=(n_outliers, 2))], axis=0)\n\n    # Fit the model\n    plt.figure(figsize=(9, 7))\n    for i, (clf_name, clf) in enumerate(classifiers.items()):\n        # fit the data and tag outliers\n        if clf_name == \"Local Outlier Factor\":\n            y_pred = clf.fit_predict(X)\n            scores_pred = clf.negative_outlier_factor_\n        else:\n            clf.fit(X)\n            scores_pred = clf.decision_function(X)\n            y_pred = clf.predict(X)\n        n_errors = (y_pred != ground_truth).sum()\n        # plot the levels lines and the points\n        if clf_name == \"Local Outlier Factor\":\n            # decision_function is private for LOF\n            Z = clf._decision_function(np.c_[xx.ravel(), yy.ravel()])\n        else:\n            Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n        Z = Z.reshape(xx.shape)\n        subplot = plt.subplot(2, 2, i + 1)\n        subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7),\n                         cmap=plt.cm.Blues_r)\n        a = subplot.contour(xx, yy, Z, levels=[0],\n                            linewidths=2, colors='red')\n        subplot.contourf(xx, yy, Z, levels=[0, Z.max()],\n                         colors='orange')\n        b = subplot.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white',\n                            s=20, edgecolor='k')\n        c = subplot.scatter(X[-n_outliers:, 0], X[-n_outliers:, 1], c='black',\n                            s=20, edgecolor='k')\n        subplot.axis('tight')\n        subplot.legend(\n            [a.collections[0], b, c],\n            ['learned decision function', 'true inliers', 'true outliers'],\n            prop=matplotlib.font_manager.FontProperties(size=10),\n            loc='lower right')\n        subplot.set_xlabel(\"%d. %s (errors: %d)\" % (i + 1, clf_name, n_errors))\n        subplot.set_xlim((-7, 7))\n        subplot.set_ylim((-7, 7))\n    plt.subplots_adjust(0.04, 0.1, 0.96, 0.94, 0.1, 0.26)\n    plt.suptitle(\"Outlier detection\")\n\nplt.show()"
       ]
     }
   ],
 
@@ -95,8 +95,6 @@
             clf.fit(X)
             scores_pred = clf.decision_function(X)
             y_pred = clf.predict(X)
-        threshold = stats.scoreatpercentile(scores_pred,
-                                            100 * outliers_fraction)
         n_errors = (y_pred != ground_truth).sum()
         # plot the levels lines and the points
         if clf_name == "Local Outlier Factor":
@@ -106,11 +104,11 @@
             Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
         Z = Z.reshape(xx.shape)
         subplot = plt.subplot(2, 2, i + 1)
-        subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), threshold, 7),
+        subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7),
                          cmap=plt.cm.Blues_r)
-        a = subplot.contour(xx, yy, Z, levels=[threshold],
+        a = subplot.contour(xx, yy, Z, levels=[0],
                             linewidths=2, colors='red')
-        subplot.contourf(xx, yy, Z, levels=[threshold, Z.max()],
+        subplot.contourf(xx, yy, Z, levels=[0, Z.max()],
                          colors='orange')
         b = subplot.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white',
                             s=20, edgecolor='k')
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "import numpy as np\nfrom scipy import stats\nimport matplotlib.pyplot as plt\nimport matplotlib.font_manager\n\nfrom sklearn import svm\nfrom sklearn.covariance import EllipticEnvelope\nfrom sklearn.ensemble import IsolationForest\nfrom sklearn.neighbors import LocalOutlierFactor\n\nprint(__doc__)\n\nSEED = 42\nGRID_PRECISION = 100\n\nrng = np.random.RandomState(SEED)\n\n# Example settings\nn_samples = 200\noutliers_fraction = 0.25\nclusters_separation = (0, 1, 2)\n\n# define two outlier detection tools to be compared\nclassifiers = {\n \"One-Class SVM\": svm.OneClassSVM(nu=0.95 * outliers_fraction + 0.05,\n kernel=\"rbf\", gamma=0.1),\n \"Robust covariance\": EllipticEnvelope(contamination=outliers_fraction),\n \"Isolation Forest\": IsolationForest(max_samples=n_samples,\n contamination=outliers_fraction,\n random_state=rng),\n \"Local Outlier Factor\": LocalOutlierFactor(\n n_neighbors=35,\n contamination=outliers_fraction)}\n\n# Compare given classifiers under given settings\nxx, yy = np.meshgrid(np.linspace(-7, 7, GRID_PRECISION),\n np.linspace(-7, 7, GRID_PRECISION))\nn_outliers = int(outliers_fraction * n_samples)\nn_inliers = n_samples - n_outliers\nground_truth = np.ones(n_samples, dtype=int)\nground_truth[-n_outliers:] = -1\n\n# Fit the problem with varying cluster separation\nfor _, offset in enumerate(clusters_separation):\n np.random.seed(SEED)\n # Data generation\n X1 = 0.3 * np.random.randn(n_inliers // 2, 2) - offset\n X2 = 0.3 * np.random.randn(n_inliers // 2, 2) + offset\n X = np.concatenate([X1, X2], axis=0)\n # Add outliers\n X = np.concatenate([X, np.random.uniform(low=-6, high=6,\n size=(n_outliers, 2))], axis=0)\n\n # Fit the model\n plt.figure(figsize=(9, 7))\n for i, (clf_name, clf) in enumerate(classifiers.items()):\n # fit the data and tag outliers\n if clf_name == \"Local Outlier Factor\":\n y_pred = clf.fit_predict(X)\n scores_pred = clf.negative_outlier_factor_\n else:\n clf.fit(X)\n scores_pred = clf.decision_function(X)\n y_pred = clf.predict(X)\n threshold = stats.scoreatpercentile(scores_pred,\n 100 * outliers_fraction)\n n_errors = (y_pred != ground_truth).sum()\n # plot the levels lines and the points\n if clf_name == \"Local Outlier Factor\":\n # decision_function is private for LOF\n Z = clf._decision_function(np.c_[xx.ravel(), yy.ravel()])\n else:\n Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n Z = Z.reshape(xx.shape)\n subplot = plt.subplot(2, 2, i + 1)\n subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), threshold, 7),\n cmap=plt.cm.Blues_r)\n a = subplot.contour(xx, yy, Z, levels=[threshold],\n linewidths=2, colors='red')\n subplot.contourf(xx, yy, Z, levels=[threshold, Z.max()],\n colors='orange')\n b = subplot.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white',\n s=20, edgecolor='k')\n c = subplot.scatter(X[-n_outliers:, 0], X[-n_outliers:, 1], c='black',\n s=20, edgecolor='k')\n subplot.axis('tight')\n subplot.legend(\n [a.collections[0], b, c],\n ['learned decision function', 'true inliers', 'true outliers'],\n prop=matplotlib.font_manager.FontProperties(size=10),\n loc='lower right')\n subplot.set_xlabel(\"%d. %s (errors: %d)\" % (i + 1, clf_name, n_errors))\n subplot.set_xlim((-7, 7))\n subplot.set_ylim((-7, 7))\n plt.subplots_adjust(0.04, 0.1, 0.96, 0.94, 0.1, 0.26)\n plt.suptitle(\"Outlier detection\")\n\nplt.show()"
	`29`	+ "import numpy as np\nfrom scipy import stats\nimport matplotlib.pyplot as plt\nimport matplotlib.font_manager\n\nfrom sklearn import svm\nfrom sklearn.covariance import EllipticEnvelope\nfrom sklearn.ensemble import IsolationForest\nfrom sklearn.neighbors import LocalOutlierFactor\n\nprint(__doc__)\n\nSEED = 42\nGRID_PRECISION = 100\n\nrng = np.random.RandomState(SEED)\n\n# Example settings\nn_samples = 200\noutliers_fraction = 0.25\nclusters_separation = (0, 1, 2)\n\n# define two outlier detection tools to be compared\nclassifiers = {\n \"One-Class SVM\": svm.OneClassSVM(nu=0.95 * outliers_fraction + 0.05,\n kernel=\"rbf\", gamma=0.1),\n \"Robust covariance\": EllipticEnvelope(contamination=outliers_fraction),\n \"Isolation Forest\": IsolationForest(max_samples=n_samples,\n contamination=outliers_fraction,\n random_state=rng),\n \"Local Outlier Factor\": LocalOutlierFactor(\n n_neighbors=35,\n contamination=outliers_fraction)}\n\n# Compare given classifiers under given settings\nxx, yy = np.meshgrid(np.linspace(-7, 7, GRID_PRECISION),\n np.linspace(-7, 7, GRID_PRECISION))\nn_outliers = int(outliers_fraction * n_samples)\nn_inliers = n_samples - n_outliers\nground_truth = np.ones(n_samples, dtype=int)\nground_truth[-n_outliers:] = -1\n\n# Fit the problem with varying cluster separation\nfor _, offset in enumerate(clusters_separation):\n np.random.seed(SEED)\n # Data generation\n X1 = 0.3 * np.random.randn(n_inliers // 2, 2) - offset\n X2 = 0.3 * np.random.randn(n_inliers // 2, 2) + offset\n X = np.concatenate([X1, X2], axis=0)\n # Add outliers\n X = np.concatenate([X, np.random.uniform(low=-6, high=6,\n size=(n_outliers, 2))], axis=0)\n\n # Fit the model\n plt.figure(figsize=(9, 7))\n for i, (clf_name, clf) in enumerate(classifiers.items()):\n # fit the data and tag outliers\n if clf_name == \"Local Outlier Factor\":\n y_pred = clf.fit_predict(X)\n scores_pred = clf.negative_outlier_factor_\n else:\n clf.fit(X)\n scores_pred = clf.decision_function(X)\n y_pred = clf.predict(X)\n n_errors = (y_pred != ground_truth).sum()\n # plot the levels lines and the points\n if clf_name == \"Local Outlier Factor\":\n # decision_function is private for LOF\n Z = clf._decision_function(np.c_[xx.ravel(), yy.ravel()])\n else:\n Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n Z = Z.reshape(xx.shape)\n subplot = plt.subplot(2, 2, i + 1)\n subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7),\n cmap=plt.cm.Blues_r)\n a = subplot.contour(xx, yy, Z, levels=[0],\n linewidths=2, colors='red')\n subplot.contourf(xx, yy, Z, levels=[0, Z.max()],\n colors='orange')\n b = subplot.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white',\n s=20, edgecolor='k')\n c = subplot.scatter(X[-n_outliers:, 0], X[-n_outliers:, 1], c='black',\n s=20, edgecolor='k')\n subplot.axis('tight')\n subplot.legend(\n [a.collections[0], b, c],\n ['learned decision function', 'true inliers', 'true outliers'],\n prop=matplotlib.font_manager.FontProperties(size=10),\n loc='lower right')\n subplot.set_xlabel(\"%d. %s (errors: %d)\" % (i + 1, clf_name, n_errors))\n subplot.set_xlim((-7, 7))\n subplot.set_ylim((-7, 7))\n plt.subplots_adjust(0.04, 0.1, 0.96, 0.94, 0.1, 0.26)\n plt.suptitle(\"Outlier detection\")\n\nplt.show()"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`