Skip to content

Commit 3e5a43c

Browse files
committed
Pushing the docs to dev/ for branch: master, commit cdd693bf955acd2a97cce48011d168c6b1ef316d
1 parent 660fd17 commit 3e5a43c

File tree

912 files changed

+2771
-2753
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

912 files changed

+2771
-2753
lines changed
275 Bytes
Binary file not shown.
269 Bytes
Binary file not shown.

dev/_downloads/plot_custom_kernel.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
"execution_count": null,
2525
"cell_type": "code",
2626
"source": [
27-
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn import svm, datasets\n\n# import some data to play with\niris = datasets.load_iris()\nX = iris.data[:, :2] # we only take the first two features. We could\n # avoid this ugly slicing by using a two-dim dataset\nY = iris.target\n\n\ndef my_kernel(X, Y):\n \"\"\"\n We create a custom kernel:\n\n (2 0)\n k(X, Y) = X ( ) Y.T\n (0 1)\n \"\"\"\n M = np.array([[2, 0], [0, 1.0]])\n return np.dot(np.dot(X, M), Y.T)\n\n\nh = .02 # step size in the mesh\n\n# we create an instance of SVM and fit out data.\nclf = svm.SVC(kernel=my_kernel)\nclf.fit(X, Y)\n\n# Plot the decision boundary. For that, we will assign a color to each\n# point in the mesh [x_min, x_max]x[y_min, y_max].\nx_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\ny_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\nxx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\nZ = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n\n# Put the result into a color plot\nZ = Z.reshape(xx.shape)\nplt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)\n\n# Plot also the training points\nplt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)\nplt.title('3-Class classification using Support Vector Machine with custom'\n ' kernel')\nplt.axis('tight')\nplt.show()"
27+
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn import svm, datasets\n\n# import some data to play with\niris = datasets.load_iris()\nX = iris.data[:, :2] # we only take the first two features. We could\n # avoid this ugly slicing by using a two-dim dataset\nY = iris.target\n\n\ndef my_kernel(X, Y):\n \"\"\"\n We create a custom kernel:\n\n (2 0)\n k(X, Y) = X ( ) Y.T\n (0 1)\n \"\"\"\n M = np.array([[2, 0], [0, 1.0]])\n return np.dot(np.dot(X, M), Y.T)\n\n\nh = .02 # step size in the mesh\n\n# we create an instance of SVM and fit out data.\nclf = svm.SVC(kernel=my_kernel)\nclf.fit(X, Y)\n\n# Plot the decision boundary. For that, we will assign a color to each\n# point in the mesh [x_min, x_max]x[y_min, y_max].\nx_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\ny_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\nxx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\nZ = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n\n# Put the result into a color plot\nZ = Z.reshape(xx.shape)\nplt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)\n\n# Plot also the training points\nplt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired, edgecolors='k')\nplt.title('3-Class classification using Support Vector Machine with custom'\n ' kernel')\nplt.axis('tight')\nplt.show()"
2828
],
2929
"outputs": [],
3030
"metadata": {

dev/_downloads/plot_custom_kernel.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def my_kernel(X, Y):
5050
plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)
5151

5252
# Plot also the training points
53-
plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)
53+
plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired, edgecolors='k')
5454
plt.title('3-Class classification using Support Vector Machine with custom'
5555
' kernel')
5656
plt.axis('tight')

dev/_downloads/plot_oneclass.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
"execution_count": null,
2525
"cell_type": "code",
2626
"source": [
27-
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport matplotlib.font_manager\nfrom sklearn import svm\n\nxx, yy = np.meshgrid(np.linspace(-5, 5, 500), np.linspace(-5, 5, 500))\n# Generate train data\nX = 0.3 * np.random.randn(100, 2)\nX_train = np.r_[X + 2, X - 2]\n# Generate some regular novel observations\nX = 0.3 * np.random.randn(20, 2)\nX_test = np.r_[X + 2, X - 2]\n# Generate some abnormal novel observations\nX_outliers = np.random.uniform(low=-4, high=4, size=(20, 2))\n\n# fit the model\nclf = svm.OneClassSVM(nu=0.1, kernel=\"rbf\", gamma=0.1)\nclf.fit(X_train)\ny_pred_train = clf.predict(X_train)\ny_pred_test = clf.predict(X_test)\ny_pred_outliers = clf.predict(X_outliers)\nn_error_train = y_pred_train[y_pred_train == -1].size\nn_error_test = y_pred_test[y_pred_test == -1].size\nn_error_outliers = y_pred_outliers[y_pred_outliers == 1].size\n\n# plot the line, the points, and the nearest vectors to the plane\nZ = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\nZ = Z.reshape(xx.shape)\n\nplt.title(\"Novelty Detection\")\nplt.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7), cmap=plt.cm.PuBu)\na = plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors='darkred')\nplt.contourf(xx, yy, Z, levels=[0, Z.max()], colors='palevioletred')\n\ns = 40\nb1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white', s=s)\nb2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='blueviolet', s=s)\nc = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='gold', s=s)\nplt.axis('tight')\nplt.xlim((-5, 5))\nplt.ylim((-5, 5))\nplt.legend([a.collections[0], b1, b2, c],\n [\"learned frontier\", \"training observations\",\n \"new regular observations\", \"new abnormal observations\"],\n loc=\"upper left\",\n prop=matplotlib.font_manager.FontProperties(size=11))\nplt.xlabel(\n \"error train: %d/200 ; errors novel regular: %d/40 ; \"\n \"errors novel abnormal: %d/40\"\n % (n_error_train, n_error_test, n_error_outliers))\nplt.show()"
27+
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport matplotlib.font_manager\nfrom sklearn import svm\n\nxx, yy = np.meshgrid(np.linspace(-5, 5, 500), np.linspace(-5, 5, 500))\n# Generate train data\nX = 0.3 * np.random.randn(100, 2)\nX_train = np.r_[X + 2, X - 2]\n# Generate some regular novel observations\nX = 0.3 * np.random.randn(20, 2)\nX_test = np.r_[X + 2, X - 2]\n# Generate some abnormal novel observations\nX_outliers = np.random.uniform(low=-4, high=4, size=(20, 2))\n\n# fit the model\nclf = svm.OneClassSVM(nu=0.1, kernel=\"rbf\", gamma=0.1)\nclf.fit(X_train)\ny_pred_train = clf.predict(X_train)\ny_pred_test = clf.predict(X_test)\ny_pred_outliers = clf.predict(X_outliers)\nn_error_train = y_pred_train[y_pred_train == -1].size\nn_error_test = y_pred_test[y_pred_test == -1].size\nn_error_outliers = y_pred_outliers[y_pred_outliers == 1].size\n\n# plot the line, the points, and the nearest vectors to the plane\nZ = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\nZ = Z.reshape(xx.shape)\n\nplt.title(\"Novelty Detection\")\nplt.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7), cmap=plt.cm.PuBu)\na = plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors='darkred')\nplt.contourf(xx, yy, Z, levels=[0, Z.max()], colors='palevioletred')\n\ns = 40\nb1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white', s=s, edgecolors='k')\nb2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='blueviolet', s=s,\n edgecolors='k')\nc = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='gold', s=s,\n edgecolors='k')\nplt.axis('tight')\nplt.xlim((-5, 5))\nplt.ylim((-5, 5))\nplt.legend([a.collections[0], b1, b2, c],\n [\"learned frontier\", \"training observations\",\n \"new regular observations\", \"new abnormal observations\"],\n loc=\"upper left\",\n prop=matplotlib.font_manager.FontProperties(size=11))\nplt.xlabel(\n \"error train: %d/200 ; errors novel regular: %d/40 ; \"\n \"errors novel abnormal: %d/40\"\n % (n_error_train, n_error_test, n_error_outliers))\nplt.show()"
2828
],
2929
"outputs": [],
3030
"metadata": {

dev/_downloads/plot_oneclass.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,11 @@
4646
plt.contourf(xx, yy, Z, levels=[0, Z.max()], colors='palevioletred')
4747

4848
s = 40
49-
b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white', s=s)
50-
b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='blueviolet', s=s)
51-
c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='gold', s=s)
49+
b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white', s=s, edgecolors='k')
50+
b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='blueviolet', s=s,
51+
edgecolors='k')
52+
c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='gold', s=s,
53+
edgecolors='k')
5254
plt.axis('tight')
5355
plt.xlim((-5, 5))
5456
plt.ylim((-5, 5))

dev/_downloads/plot_rbf_parameters.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@
7878
"execution_count": null,
7979
"cell_type": "code",
8080
"source": [
81-
"plt.figure(figsize=(8, 6))\nxx, yy = np.meshgrid(np.linspace(-3, 3, 200), np.linspace(-3, 3, 200))\nfor (k, (C, gamma, clf)) in enumerate(classifiers):\n # evaluate decision function in a grid\n Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n Z = Z.reshape(xx.shape)\n\n # visualize decision function for these parameters\n plt.subplot(len(C_2d_range), len(gamma_2d_range), k + 1)\n plt.title(\"gamma=10^%d, C=10^%d\" % (np.log10(gamma), np.log10(C)),\n size='medium')\n\n # visualize parameter's effect on decision function\n plt.pcolormesh(xx, yy, -Z, cmap=plt.cm.RdBu)\n plt.scatter(X_2d[:, 0], X_2d[:, 1], c=y_2d, cmap=plt.cm.RdBu_r)\n plt.xticks(())\n plt.yticks(())\n plt.axis('tight')\n\nscores = grid.cv_results_['mean_test_score'].reshape(len(C_range),\n len(gamma_range))\n\n# Draw heatmap of the validation accuracy as a function of gamma and C\n#\n# The score are encoded as colors with the hot colormap which varies from dark\n# red to bright yellow. As the most interesting scores are all located in the\n# 0.92 to 0.97 range we use a custom normalizer to set the mid-point to 0.92 so\n# as to make it easier to visualize the small variations of score values in the\n# interesting range while not brutally collapsing all the low score values to\n# the same color.\n\nplt.figure(figsize=(8, 6))\nplt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95)\nplt.imshow(scores, interpolation='nearest', cmap=plt.cm.hot,\n norm=MidpointNormalize(vmin=0.2, midpoint=0.92))\nplt.xlabel('gamma')\nplt.ylabel('C')\nplt.colorbar()\nplt.xticks(np.arange(len(gamma_range)), gamma_range, rotation=45)\nplt.yticks(np.arange(len(C_range)), C_range)\nplt.title('Validation accuracy')\nplt.show()"
81+
"plt.figure(figsize=(8, 6))\nxx, yy = np.meshgrid(np.linspace(-3, 3, 200), np.linspace(-3, 3, 200))\nfor (k, (C, gamma, clf)) in enumerate(classifiers):\n # evaluate decision function in a grid\n Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n Z = Z.reshape(xx.shape)\n\n # visualize decision function for these parameters\n plt.subplot(len(C_2d_range), len(gamma_2d_range), k + 1)\n plt.title(\"gamma=10^%d, C=10^%d\" % (np.log10(gamma), np.log10(C)),\n size='medium')\n\n # visualize parameter's effect on decision function\n plt.pcolormesh(xx, yy, -Z, cmap=plt.cm.RdBu)\n plt.scatter(X_2d[:, 0], X_2d[:, 1], c=y_2d, cmap=plt.cm.RdBu_r,\n edgecolors='k')\n plt.xticks(())\n plt.yticks(())\n plt.axis('tight')\n\nscores = grid.cv_results_['mean_test_score'].reshape(len(C_range),\n len(gamma_range))\n\n# Draw heatmap of the validation accuracy as a function of gamma and C\n#\n# The score are encoded as colors with the hot colormap which varies from dark\n# red to bright yellow. As the most interesting scores are all located in the\n# 0.92 to 0.97 range we use a custom normalizer to set the mid-point to 0.92 so\n# as to make it easier to visualize the small variations of score values in the\n# interesting range while not brutally collapsing all the low score values to\n# the same color.\n\nplt.figure(figsize=(8, 6))\nplt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95)\nplt.imshow(scores, interpolation='nearest', cmap=plt.cm.hot,\n norm=MidpointNormalize(vmin=0.2, midpoint=0.92))\nplt.xlabel('gamma')\nplt.ylabel('C')\nplt.colorbar()\nplt.xticks(np.arange(len(gamma_range)), gamma_range, rotation=45)\nplt.yticks(np.arange(len(C_range)), C_range)\nplt.title('Validation accuracy')\nplt.show()"
8282
],
8383
"outputs": [],
8484
"metadata": {

dev/_downloads/plot_rbf_parameters.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,8 @@ def __call__(self, value, clip=None):
166166

167167
# visualize parameter's effect on decision function
168168
plt.pcolormesh(xx, yy, -Z, cmap=plt.cm.RdBu)
169-
plt.scatter(X_2d[:, 0], X_2d[:, 1], c=y_2d, cmap=plt.cm.RdBu_r)
169+
plt.scatter(X_2d[:, 0], X_2d[:, 1], c=y_2d, cmap=plt.cm.RdBu_r,
170+
edgecolors='k')
170171
plt.xticks(())
171172
plt.yticks(())
172173
plt.axis('tight')

dev/_downloads/plot_separating_hyperplane_unbalanced.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
"execution_count": null,
2525
"cell_type": "code",
2626
"source": [
27-
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn import svm\n#from sklearn.linear_model import SGDClassifier\n\n# we create 40 separable points\nrng = np.random.RandomState(0)\nn_samples_1 = 1000\nn_samples_2 = 100\nX = np.r_[1.5 * rng.randn(n_samples_1, 2),\n 0.5 * rng.randn(n_samples_2, 2) + [2, 2]]\ny = [0] * (n_samples_1) + [1] * (n_samples_2)\n\n# fit the model and get the separating hyperplane\nclf = svm.SVC(kernel='linear', C=1.0)\nclf.fit(X, y)\n\nw = clf.coef_[0]\na = -w[0] / w[1]\nxx = np.linspace(-5, 5)\nyy = a * xx - clf.intercept_[0] / w[1]\n\n\n# get the separating hyperplane using weighted classes\nwclf = svm.SVC(kernel='linear', class_weight={1: 10})\nwclf.fit(X, y)\n\nww = wclf.coef_[0]\nwa = -ww[0] / ww[1]\nwyy = wa * xx - wclf.intercept_[0] / ww[1]\n\n# plot separating hyperplanes and samples\nh0 = plt.plot(xx, yy, 'k-', label='no weights')\nh1 = plt.plot(xx, wyy, 'k--', label='with weights')\nplt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired)\nplt.legend()\n\nplt.axis('tight')\nplt.show()"
27+
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn import svm\n#from sklearn.linear_model import SGDClassifier\n\n# we create 40 separable points\nrng = np.random.RandomState(0)\nn_samples_1 = 1000\nn_samples_2 = 100\nX = np.r_[1.5 * rng.randn(n_samples_1, 2),\n 0.5 * rng.randn(n_samples_2, 2) + [2, 2]]\ny = [0] * (n_samples_1) + [1] * (n_samples_2)\n\n# fit the model and get the separating hyperplane\nclf = svm.SVC(kernel='linear', C=1.0)\nclf.fit(X, y)\n\nw = clf.coef_[0]\na = -w[0] / w[1]\nxx = np.linspace(-5, 5)\nyy = a * xx - clf.intercept_[0] / w[1]\n\n\n# get the separating hyperplane using weighted classes\nwclf = svm.SVC(kernel='linear', class_weight={1: 10})\nwclf.fit(X, y)\n\nww = wclf.coef_[0]\nwa = -ww[0] / ww[1]\nwyy = wa * xx - wclf.intercept_[0] / ww[1]\n\n# plot separating hyperplanes and samples\nh0 = plt.plot(xx, yy, 'k-', label='no weights')\nh1 = plt.plot(xx, wyy, 'k--', label='with weights')\nplt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired, edgecolors='k')\nplt.legend()\n\nplt.axis('tight')\nplt.show()"
2828
],
2929
"outputs": [],
3030
"metadata": {

dev/_downloads/plot_separating_hyperplane_unbalanced.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
# plot separating hyperplanes and samples
6161
h0 = plt.plot(xx, yy, 'k-', label='no weights')
6262
h1 = plt.plot(xx, wyy, 'k--', label='with weights')
63-
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired)
63+
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired, edgecolors='k')
6464
plt.legend()
6565

6666
plt.axis('tight')

0 commit comments

Comments
 (0)