Skip to content

Commit f9414ec

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 778b11904e8ec0286e977582d37e7ca495947ee5
1 parent e27ab92 commit f9414ec

File tree

1,088 files changed

+3290
-3300
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,088 files changed

+3290
-3300
lines changed
-121 Bytes
Binary file not shown.
-119 Bytes
Binary file not shown.

dev/_downloads/plot_anomaly_comparison.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Author: Alexandre Gramfort <[email protected]>\n# Albert Thomas <[email protected]>\n# License: BSD 3 clause\n\nimport time\n\nimport numpy as np\nimport matplotlib\nimport matplotlib.pyplot as plt\n\nfrom sklearn import svm\nfrom sklearn.datasets import make_moons, make_blobs\nfrom sklearn.covariance import EllipticEnvelope\nfrom sklearn.ensemble import IsolationForest\nfrom sklearn.neighbors import LocalOutlierFactor\n\nprint(__doc__)\n\nmatplotlib.rcParams['contour.negative_linestyle'] = 'solid'\n\n# Example settings\nn_samples = 300\noutliers_fraction = 0.15\nn_outliers = int(outliers_fraction * n_samples)\nn_inliers = n_samples - n_outliers\n\n# define outlier/anomaly detection methods to be compared\nanomaly_algorithms = [\n (\"Robust covariance\", EllipticEnvelope(contamination=outliers_fraction)),\n (\"One-Class SVM\", svm.OneClassSVM(nu=outliers_fraction, kernel=\"rbf\",\n gamma=0.1)),\n (\"Isolation Forest\", IsolationForest(behaviour='new',\n contamination=outliers_fraction,\n random_state=42)),\n (\"Local Outlier Factor\", LocalOutlierFactor(\n n_neighbors=35, contamination=outliers_fraction))]\n\n# Define datasets\nblobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2)\ndatasets = [\n make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5,\n **blobs_params)[0],\n make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5],\n **blobs_params)[0],\n make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, .3],\n **blobs_params)[0],\n 4. * (make_moons(n_samples=n_samples, noise=.05, random_state=0)[0] -\n np.array([0.5, 0.25])),\n 14. * (np.random.RandomState(42).rand(n_samples, 2) - 0.5)]\n\n# Compare given classifiers under given settings\nxx, yy = np.meshgrid(np.linspace(-7, 7, 150),\n np.linspace(-7, 7, 150))\n\nplt.figure(figsize=(len(anomaly_algorithms) * 2 + 3, 12.5))\nplt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05,\n hspace=.01)\n\nplot_num = 1\nrng = np.random.RandomState(42)\n\nfor i_dataset, X in enumerate(datasets):\n # Add outliers\n X = np.concatenate([X, rng.uniform(low=-6, high=6,\n size=(n_outliers, 2))], axis=0)\n\n for name, algorithm in anomaly_algorithms:\n t0 = time.time()\n algorithm.fit(X)\n t1 = time.time()\n plt.subplot(len(datasets), len(anomaly_algorithms), plot_num)\n if i_dataset == 0:\n plt.title(name, size=18)\n\n # fit the data and tag outliers\n if name == \"Local Outlier Factor\":\n y_pred = algorithm.fit_predict(X)\n else:\n y_pred = algorithm.fit(X).predict(X)\n\n # plot the levels lines and the points\n if name != \"Local Outlier Factor\": # LOF does not implement predict\n Z = algorithm.predict(np.c_[xx.ravel(), yy.ravel()])\n Z = Z.reshape(xx.shape)\n plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors='black')\n\n colors = np.array(['#377eb8', '#ff7f00'])\n plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[(y_pred + 1) // 2])\n\n plt.xlim(-7, 7)\n plt.ylim(-7, 7)\n plt.xticks(())\n plt.yticks(())\n plt.text(.99, .01, ('%.2fs' % (t1 - t0)).lstrip('0'),\n transform=plt.gca().transAxes, size=15,\n horizontalalignment='right')\n plot_num += 1\n\nplt.show()"
29+
"# Author: Alexandre Gramfort <[email protected]>\n# Albert Thomas <[email protected]>\n# License: BSD 3 clause\n\nimport time\n\nimport numpy as np\nimport matplotlib\nimport matplotlib.pyplot as plt\n\nfrom sklearn import svm\nfrom sklearn.datasets import make_moons, make_blobs\nfrom sklearn.covariance import EllipticEnvelope\nfrom sklearn.ensemble import IsolationForest\nfrom sklearn.neighbors import LocalOutlierFactor\n\nprint(__doc__)\n\nmatplotlib.rcParams['contour.negative_linestyle'] = 'solid'\n\n# Example settings\nn_samples = 300\noutliers_fraction = 0.15\nn_outliers = int(outliers_fraction * n_samples)\nn_inliers = n_samples - n_outliers\n\n# define outlier/anomaly detection methods to be compared\nanomaly_algorithms = [\n (\"Robust covariance\", EllipticEnvelope(contamination=outliers_fraction)),\n (\"One-Class SVM\", svm.OneClassSVM(nu=outliers_fraction, kernel=\"rbf\",\n gamma=0.1)),\n (\"Isolation Forest\", IsolationForest(contamination=outliers_fraction,\n random_state=42)),\n (\"Local Outlier Factor\", LocalOutlierFactor(\n n_neighbors=35, contamination=outliers_fraction))]\n\n# Define datasets\nblobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2)\ndatasets = [\n make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5,\n **blobs_params)[0],\n make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5],\n **blobs_params)[0],\n make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, .3],\n **blobs_params)[0],\n 4. * (make_moons(n_samples=n_samples, noise=.05, random_state=0)[0] -\n np.array([0.5, 0.25])),\n 14. * (np.random.RandomState(42).rand(n_samples, 2) - 0.5)]\n\n# Compare given classifiers under given settings\nxx, yy = np.meshgrid(np.linspace(-7, 7, 150),\n np.linspace(-7, 7, 150))\n\nplt.figure(figsize=(len(anomaly_algorithms) * 2 + 3, 12.5))\nplt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05,\n hspace=.01)\n\nplot_num = 1\nrng = np.random.RandomState(42)\n\nfor i_dataset, X in enumerate(datasets):\n # Add outliers\n X = np.concatenate([X, rng.uniform(low=-6, high=6,\n size=(n_outliers, 2))], axis=0)\n\n for name, algorithm in anomaly_algorithms:\n t0 = time.time()\n algorithm.fit(X)\n t1 = time.time()\n plt.subplot(len(datasets), len(anomaly_algorithms), plot_num)\n if i_dataset == 0:\n plt.title(name, size=18)\n\n # fit the data and tag outliers\n if name == \"Local Outlier Factor\":\n y_pred = algorithm.fit_predict(X)\n else:\n y_pred = algorithm.fit(X).predict(X)\n\n # plot the levels lines and the points\n if name != \"Local Outlier Factor\": # LOF does not implement predict\n Z = algorithm.predict(np.c_[xx.ravel(), yy.ravel()])\n Z = Z.reshape(xx.shape)\n plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors='black')\n\n colors = np.array(['#377eb8', '#ff7f00'])\n plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[(y_pred + 1) // 2])\n\n plt.xlim(-7, 7)\n plt.ylim(-7, 7)\n plt.xticks(())\n plt.yticks(())\n plt.text(.99, .01, ('%.2fs' % (t1 - t0)).lstrip('0'),\n transform=plt.gca().transAxes, size=15,\n horizontalalignment='right')\n plot_num += 1\n\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_anomaly_comparison.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,7 @@
8282
("Robust covariance", EllipticEnvelope(contamination=outliers_fraction)),
8383
("One-Class SVM", svm.OneClassSVM(nu=outliers_fraction, kernel="rbf",
8484
gamma=0.1)),
85-
("Isolation Forest", IsolationForest(behaviour='new',
86-
contamination=outliers_fraction,
85+
("Isolation Forest", IsolationForest(contamination=outliers_fraction,
8786
random_state=42)),
8887
("Local Outlier Factor", LocalOutlierFactor(
8988
n_neighbors=35, contamination=outliers_fraction))]

dev/_downloads/plot_isolation_forest.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.ensemble import IsolationForest\n\nrng = np.random.RandomState(42)\n\n# Generate train data\nX = 0.3 * rng.randn(100, 2)\nX_train = np.r_[X + 2, X - 2]\n# Generate some regular novel observations\nX = 0.3 * rng.randn(20, 2)\nX_test = np.r_[X + 2, X - 2]\n# Generate some abnormal novel observations\nX_outliers = rng.uniform(low=-4, high=4, size=(20, 2))\n\n# fit the model\nclf = IsolationForest(behaviour='new', max_samples=100,\n random_state=rng, contamination='auto')\nclf.fit(X_train)\ny_pred_train = clf.predict(X_train)\ny_pred_test = clf.predict(X_test)\ny_pred_outliers = clf.predict(X_outliers)\n\n# plot the line, the samples, and the nearest vectors to the plane\nxx, yy = np.meshgrid(np.linspace(-5, 5, 50), np.linspace(-5, 5, 50))\nZ = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\nZ = Z.reshape(xx.shape)\n\nplt.title(\"IsolationForest\")\nplt.contourf(xx, yy, Z, cmap=plt.cm.Blues_r)\n\nb1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white',\n s=20, edgecolor='k')\nb2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='green',\n s=20, edgecolor='k')\nc = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='red',\n s=20, edgecolor='k')\nplt.axis('tight')\nplt.xlim((-5, 5))\nplt.ylim((-5, 5))\nplt.legend([b1, b2, c],\n [\"training observations\",\n \"new regular observations\", \"new abnormal observations\"],\n loc=\"upper left\")\nplt.show()"
29+
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.ensemble import IsolationForest\n\nrng = np.random.RandomState(42)\n\n# Generate train data\nX = 0.3 * rng.randn(100, 2)\nX_train = np.r_[X + 2, X - 2]\n# Generate some regular novel observations\nX = 0.3 * rng.randn(20, 2)\nX_test = np.r_[X + 2, X - 2]\n# Generate some abnormal novel observations\nX_outliers = rng.uniform(low=-4, high=4, size=(20, 2))\n\n# fit the model\nclf = IsolationForest(max_samples=100, random_state=rng)\nclf.fit(X_train)\ny_pred_train = clf.predict(X_train)\ny_pred_test = clf.predict(X_test)\ny_pred_outliers = clf.predict(X_outliers)\n\n# plot the line, the samples, and the nearest vectors to the plane\nxx, yy = np.meshgrid(np.linspace(-5, 5, 50), np.linspace(-5, 5, 50))\nZ = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\nZ = Z.reshape(xx.shape)\n\nplt.title(\"IsolationForest\")\nplt.contourf(xx, yy, Z, cmap=plt.cm.Blues_r)\n\nb1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white',\n s=20, edgecolor='k')\nb2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='green',\n s=20, edgecolor='k')\nc = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='red',\n s=20, edgecolor='k')\nplt.axis('tight')\nplt.xlim((-5, 5))\nplt.ylim((-5, 5))\nplt.legend([b1, b2, c],\n [\"training observations\",\n \"new regular observations\", \"new abnormal observations\"],\n loc=\"upper left\")\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_isolation_forest.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,7 @@
4040
X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
4141

4242
# fit the model
43-
clf = IsolationForest(behaviour='new', max_samples=100,
44-
random_state=rng, contamination='auto')
43+
clf = IsolationForest(max_samples=100, random_state=rng)
4544
clf.fit(X_train)
4645
y_pred_train = clf.predict(X_train)
4746
y_pred_test = clf.predict(X_test)

dev/_downloads/scikit-learn-docs.pdf

7.68 KB
Binary file not shown.

dev/_images/iris.png

0 Bytes

0 commit comments

Comments
 (0)