Skip to content

Commit 36dc1f8

Browse files
Pushing the docs to dev/ for branch: master, commit bb911a57071813c17057fd227d4489e73f4178ba
1 parent d5270db commit 36dc1f8

File tree

1,962 files changed

+9399
-7827
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,962 files changed

+9399
-7827
lines changed

dev/_downloads/002ebccb35a2de5ac6d32e3f54d8fa4f/plot_iris_exercise.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
np.random.seed(0)
2828
order = np.random.permutation(n_sample)
2929
X = X[order]
30-
y = y[order].astype(np.float)
30+
y = y[order].astype(float)
3131

3232
X_train = X[:int(.9 * n_sample)]
3333
y_train = y[:int(.9 * n_sample)]

dev/_downloads/006fc185672e58b056a5c134db26935c/plot_coin_segmentation.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\n# Author: Gael Varoquaux <[email protected]>, Brian Cheung\n# License: BSD 3 clause\n\nimport time\n\nimport numpy as np\nfrom distutils.version import LooseVersion\nfrom scipy.ndimage.filters import gaussian_filter\nimport matplotlib.pyplot as plt\nimport skimage\nfrom skimage.data import coins\nfrom skimage.transform import rescale\n\nfrom sklearn.feature_extraction import image\nfrom sklearn.cluster import spectral_clustering\n\n# these were introduced in skimage-0.14\nif LooseVersion(skimage.__version__) >= '0.14':\n rescale_params = {'anti_aliasing': False, 'multichannel': False}\nelse:\n rescale_params = {}\n\n# load the coins as a numpy array\norig_coins = coins()\n\n# Resize it to 20% of the original size to speed up the processing\n# Applying a Gaussian filter for smoothing prior to down-scaling\n# reduces aliasing artifacts.\nsmoothened_coins = gaussian_filter(orig_coins, sigma=2)\nrescaled_coins = rescale(smoothened_coins, 0.2, mode=\"reflect\",\n **rescale_params)\n\n# Convert the image into a graph with the value of the gradient on the\n# edges.\ngraph = image.img_to_graph(rescaled_coins)\n\n# Take a decreasing function of the gradient: an exponential\n# The smaller beta is, the more independent the segmentation is of the\n# actual image. For beta=1, the segmentation is close to a voronoi\nbeta = 10\neps = 1e-6\ngraph.data = np.exp(-beta * graph.data / graph.data.std()) + eps\n\n# Apply spectral clustering (this step goes much faster if you have pyamg\n# installed)\nN_REGIONS = 25"
29+
"print(__doc__)\n\n# Author: Gael Varoquaux <[email protected]>, Brian Cheung\n# License: BSD 3 clause\n\nimport time\n\nimport numpy as np\nfrom scipy.ndimage.filters import gaussian_filter\nimport matplotlib.pyplot as plt\nimport skimage\nfrom skimage.data import coins\nfrom skimage.transform import rescale\n\nfrom sklearn.feature_extraction import image\nfrom sklearn.cluster import spectral_clustering\nfrom sklearn.utils.fixes import parse_version\n\n# these were introduced in skimage-0.14\nif parse_version(skimage.__version__) >= parse_version('0.14'):\n rescale_params = {'anti_aliasing': False, 'multichannel': False}\nelse:\n rescale_params = {}\n\n# load the coins as a numpy array\norig_coins = coins()\n\n# Resize it to 20% of the original size to speed up the processing\n# Applying a Gaussian filter for smoothing prior to down-scaling\n# reduces aliasing artifacts.\nsmoothened_coins = gaussian_filter(orig_coins, sigma=2)\nrescaled_coins = rescale(smoothened_coins, 0.2, mode=\"reflect\",\n **rescale_params)\n\n# Convert the image into a graph with the value of the gradient on the\n# edges.\ngraph = image.img_to_graph(rescaled_coins)\n\n# Take a decreasing function of the gradient: an exponential\n# The smaller beta is, the more independent the segmentation is of the\n# actual image. For beta=1, the segmentation is close to a voronoi\nbeta = 10\neps = 1e-6\ngraph.data = np.exp(-beta * graph.data / graph.data.std()) + eps\n\n# Apply spectral clustering (this step goes much faster if you have pyamg\n# installed)\nN_REGIONS = 25"
3030
]
3131
},
3232
{

dev/_downloads/036b9372e2e7802453cbb994da7a6786/plot_linearsvc_support_vectors.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,10 @@
2424
decision_function = clf.decision_function(X)
2525
# we can also calculate the decision function manually
2626
# decision_function = np.dot(X, clf.coef_[0]) + clf.intercept_[0]
27-
support_vector_indices = np.where((2 * y - 1) * decision_function <= 1)[0]
27+
# The support vectors are the samples that lie within the margin
28+
# boundaries, whose size is conventionally constrained to 1
29+
support_vector_indices = np.where(
30+
np.abs(decision_function) <= 1 + 1e-15)[0]
2831
support_vectors = X[support_vector_indices]
2932

3033
plt.subplot(1, 2, i + 1)

dev/_downloads/067cd5d39b097d2c49dd98f563dac13a/plot_iterative_imputer_variants_comparison.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"cell_type": "markdown",
1616
"metadata": {},
1717
"source": [
18-
"\n# Imputing missing values with variants of IterativeImputer\n\n\nThe :class:`sklearn.impute.IterativeImputer` class is very flexible - it can be\nused with a variety of estimators to do round-robin regression, treating every\nvariable as an output in turn.\n\nIn this example we compare some estimators for the purpose of missing feature\nimputation with :class:`sklearn.impute.IterativeImputer`:\n\n* :class:`~sklearn.linear_model.BayesianRidge`: regularized linear regression\n* :class:`~sklearn.tree.DecisionTreeRegressor`: non-linear regression\n* :class:`~sklearn.ensemble.ExtraTreesRegressor`: similar to missForest in R\n* :class:`~sklearn.neighbors.KNeighborsRegressor`: comparable to other KNN\n imputation approaches\n\nOf particular interest is the ability of\n:class:`sklearn.impute.IterativeImputer` to mimic the behavior of missForest, a\npopular imputation package for R. In this example, we have chosen to use\n:class:`sklearn.ensemble.ExtraTreesRegressor` instead of\n:class:`sklearn.ensemble.RandomForestRegressor` (as in missForest) due to its\nincreased speed.\n\nNote that :class:`sklearn.neighbors.KNeighborsRegressor` is different from KNN\nimputation, which learns from samples with missing values by using a distance\nmetric that accounts for missing values, rather than imputing them.\n\nThe goal is to compare different estimators to see which one is best for the\n:class:`sklearn.impute.IterativeImputer` when using a\n:class:`sklearn.linear_model.BayesianRidge` estimator on the California housing\ndataset with a single value randomly removed from each row.\n\nFor this particular pattern of missing values we see that\n:class:`sklearn.ensemble.ExtraTreesRegressor` and\n:class:`sklearn.linear_model.BayesianRidge` give the best results.\n"
18+
"\n# Imputing missing values with variants of IterativeImputer\n\n\n.. currentmodule:: sklearn\n\nThe :class:`~impute.IterativeImputer` class is very flexible - it can be\nused with a variety of estimators to do round-robin regression, treating every\nvariable as an output in turn.\n\nIn this example we compare some estimators for the purpose of missing feature\nimputation with :class:`~impute.IterativeImputer`:\n\n* :class:`~linear_model.BayesianRidge`: regularized linear regression\n* :class:`~tree.DecisionTreeRegressor`: non-linear regression\n* :class:`~ensemble.ExtraTreesRegressor`: similar to missForest in R\n* :class:`~neighbors.KNeighborsRegressor`: comparable to other KNN\n imputation approaches\n\nOf particular interest is the ability of\n:class:`~impute.IterativeImputer` to mimic the behavior of missForest, a\npopular imputation package for R. In this example, we have chosen to use\n:class:`~ensemble.ExtraTreesRegressor` instead of\n:class:`~ensemble.RandomForestRegressor` (as in missForest) due to its\nincreased speed.\n\nNote that :class:`~neighbors.KNeighborsRegressor` is different from KNN\nimputation, which learns from samples with missing values by using a distance\nmetric that accounts for missing values, rather than imputing them.\n\nThe goal is to compare different estimators to see which one is best for the\n:class:`~impute.IterativeImputer` when using a\n:class:`~linear_model.BayesianRidge` estimator on the California housing\ndataset with a single value randomly removed from each row.\n\nFor this particular pattern of missing values we see that\n:class:`~ensemble.ExtraTreesRegressor` and\n:class:`~linear_model.BayesianRidge` give the best results.\n"
1919
]
2020
},
2121
{

dev/_downloads/083d8568c199bebbc1a847fc6c917e9e/plot_kernel_approximation.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"cell_type": "markdown",
1616
"metadata": {},
1717
"source": [
18-
"\n# Explicit feature map approximation for RBF kernels\n\n\nAn example illustrating the approximation of the feature map\nof an RBF kernel.\n\n.. currentmodule:: sklearn.kernel_approximation\n\nIt shows how to use :class:`RBFSampler` and :class:`Nystroem` to\napproximate the feature map of an RBF kernel for classification with an SVM on\nthe digits dataset. Results using a linear SVM in the original space, a linear\nSVM using the approximate mappings and using a kernelized SVM are compared.\nTimings and accuracy for varying amounts of Monte Carlo samplings (in the case\nof :class:`RBFSampler`, which uses random Fourier features) and different sized\nsubsets of the training set (for :class:`Nystroem`) for the approximate mapping\nare shown.\n\nPlease note that the dataset here is not large enough to show the benefits\nof kernel approximation, as the exact SVM is still reasonably fast.\n\nSampling more dimensions clearly leads to better classification results, but\ncomes at a greater cost. This means there is a tradeoff between runtime and\naccuracy, given by the parameter n_components. Note that solving the Linear\nSVM and also the approximate kernel SVM could be greatly accelerated by using\nstochastic gradient descent via :class:`sklearn.linear_model.SGDClassifier`.\nThis is not easily possible for the case of the kernelized SVM.\n"
18+
"\n# Explicit feature map approximation for RBF kernels\n\n\nAn example illustrating the approximation of the feature map\nof an RBF kernel.\n\n.. currentmodule:: sklearn.kernel_approximation\n\nIt shows how to use :class:`RBFSampler` and :class:`Nystroem` to\napproximate the feature map of an RBF kernel for classification with an SVM on\nthe digits dataset. Results using a linear SVM in the original space, a linear\nSVM using the approximate mappings and using a kernelized SVM are compared.\nTimings and accuracy for varying amounts of Monte Carlo samplings (in the case\nof :class:`RBFSampler`, which uses random Fourier features) and different sized\nsubsets of the training set (for :class:`Nystroem`) for the approximate mapping\nare shown.\n\nPlease note that the dataset here is not large enough to show the benefits\nof kernel approximation, as the exact SVM is still reasonably fast.\n\nSampling more dimensions clearly leads to better classification results, but\ncomes at a greater cost. This means there is a tradeoff between runtime and\naccuracy, given by the parameter n_components. Note that solving the Linear\nSVM and also the approximate kernel SVM could be greatly accelerated by using\nstochastic gradient descent via :class:`~sklearn.linear_model.SGDClassifier`.\nThis is not easily possible for the case of the kernelized SVM.\n"
1919
]
2020
},
2121
{

dev/_downloads/1168f82083b3e70f31672e7c33738f8d/plot_pca_iris.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
horizontalalignment='center',
5050
bbox=dict(alpha=.5, edgecolor='w', facecolor='w'))
5151
# Reorder the labels to have colors matching the cluster results
52-
y = np.choose(y, [1, 2, 0]).astype(np.float)
52+
y = np.choose(y, [1, 2, 0]).astype(float)
5353
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.nipy_spectral,
5454
edgecolor='k')
5555

dev/_downloads/12a392e818ac5fa47dd91461855f3f77/plot_linearsvc_support_vectors.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"import numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import make_blobs\nfrom sklearn.svm import LinearSVC\n\nX, y = make_blobs(n_samples=40, centers=2, random_state=0)\n\nplt.figure(figsize=(10, 5))\nfor i, C in enumerate([1, 100]):\n # \"hinge\" is the standard SVM loss\n clf = LinearSVC(C=C, loss=\"hinge\", random_state=42).fit(X, y)\n # obtain the support vectors through the decision function\n decision_function = clf.decision_function(X)\n # we can also calculate the decision function manually\n # decision_function = np.dot(X, clf.coef_[0]) + clf.intercept_[0]\n support_vector_indices = np.where((2 * y - 1) * decision_function <= 1)[0]\n support_vectors = X[support_vector_indices]\n\n plt.subplot(1, 2, i + 1)\n plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired)\n ax = plt.gca()\n xlim = ax.get_xlim()\n ylim = ax.get_ylim()\n xx, yy = np.meshgrid(np.linspace(xlim[0], xlim[1], 50),\n np.linspace(ylim[0], ylim[1], 50))\n Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n Z = Z.reshape(xx.shape)\n plt.contour(xx, yy, Z, colors='k', levels=[-1, 0, 1], alpha=0.5,\n linestyles=['--', '-', '--'])\n plt.scatter(support_vectors[:, 0], support_vectors[:, 1], s=100,\n linewidth=1, facecolors='none', edgecolors='k')\n plt.title(\"C=\" + str(C))\nplt.tight_layout()\nplt.show()"
29+
"import numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import make_blobs\nfrom sklearn.svm import LinearSVC\n\nX, y = make_blobs(n_samples=40, centers=2, random_state=0)\n\nplt.figure(figsize=(10, 5))\nfor i, C in enumerate([1, 100]):\n # \"hinge\" is the standard SVM loss\n clf = LinearSVC(C=C, loss=\"hinge\", random_state=42).fit(X, y)\n # obtain the support vectors through the decision function\n decision_function = clf.decision_function(X)\n # we can also calculate the decision function manually\n # decision_function = np.dot(X, clf.coef_[0]) + clf.intercept_[0]\n # The support vectors are the samples that lie within the margin\n # boundaries, whose size is conventionally constrained to 1\n support_vector_indices = np.where(\n np.abs(decision_function) <= 1 + 1e-15)[0]\n support_vectors = X[support_vector_indices]\n\n plt.subplot(1, 2, i + 1)\n plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired)\n ax = plt.gca()\n xlim = ax.get_xlim()\n ylim = ax.get_ylim()\n xx, yy = np.meshgrid(np.linspace(xlim[0], xlim[1], 50),\n np.linspace(ylim[0], ylim[1], 50))\n Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n Z = Z.reshape(xx.shape)\n plt.contour(xx, yy, Z, colors='k', levels=[-1, 0, 1], alpha=0.5,\n linestyles=['--', '-', '--'])\n plt.scatter(support_vectors[:, 0], support_vectors[:, 1], s=100,\n linewidth=1, facecolors='none', edgecolors='k')\n plt.title(\"C=\" + str(C))\nplt.tight_layout()\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/18eb95af29bd5554020a8428b3ceac54/plot_cluster_iris.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\n\n# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n# Though the following import is not directly being used, it is required\n# for 3D projection to work\nfrom mpl_toolkits.mplot3d import Axes3D\n\nfrom sklearn.cluster import KMeans\nfrom sklearn import datasets\n\nnp.random.seed(5)\n\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\n\nestimators = [('k_means_iris_8', KMeans(n_clusters=8)),\n ('k_means_iris_3', KMeans(n_clusters=3)),\n ('k_means_iris_bad_init', KMeans(n_clusters=3, n_init=1,\n init='random'))]\n\nfignum = 1\ntitles = ['8 clusters', '3 clusters', '3 clusters, bad initialization']\nfor name, est in estimators:\n fig = plt.figure(fignum, figsize=(4, 3))\n ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)\n est.fit(X)\n labels = est.labels_\n\n ax.scatter(X[:, 3], X[:, 0], X[:, 2],\n c=labels.astype(np.float), edgecolor='k')\n\n ax.w_xaxis.set_ticklabels([])\n ax.w_yaxis.set_ticklabels([])\n ax.w_zaxis.set_ticklabels([])\n ax.set_xlabel('Petal width')\n ax.set_ylabel('Sepal length')\n ax.set_zlabel('Petal length')\n ax.set_title(titles[fignum - 1])\n ax.dist = 12\n fignum = fignum + 1\n\n# Plot the ground truth\nfig = plt.figure(fignum, figsize=(4, 3))\nax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)\n\nfor name, label in [('Setosa', 0),\n ('Versicolour', 1),\n ('Virginica', 2)]:\n ax.text3D(X[y == label, 3].mean(),\n X[y == label, 0].mean(),\n X[y == label, 2].mean() + 2, name,\n horizontalalignment='center',\n bbox=dict(alpha=.2, edgecolor='w', facecolor='w'))\n# Reorder the labels to have colors matching the cluster results\ny = np.choose(y, [1, 2, 0]).astype(np.float)\nax.scatter(X[:, 3], X[:, 0], X[:, 2], c=y, edgecolor='k')\n\nax.w_xaxis.set_ticklabels([])\nax.w_yaxis.set_ticklabels([])\nax.w_zaxis.set_ticklabels([])\nax.set_xlabel('Petal width')\nax.set_ylabel('Sepal length')\nax.set_zlabel('Petal length')\nax.set_title('Ground Truth')\nax.dist = 12\n\nfig.show()"
29+
"print(__doc__)\n\n\n# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n# Though the following import is not directly being used, it is required\n# for 3D projection to work\nfrom mpl_toolkits.mplot3d import Axes3D\n\nfrom sklearn.cluster import KMeans\nfrom sklearn import datasets\n\nnp.random.seed(5)\n\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\n\nestimators = [('k_means_iris_8', KMeans(n_clusters=8)),\n ('k_means_iris_3', KMeans(n_clusters=3)),\n ('k_means_iris_bad_init', KMeans(n_clusters=3, n_init=1,\n init='random'))]\n\nfignum = 1\ntitles = ['8 clusters', '3 clusters', '3 clusters, bad initialization']\nfor name, est in estimators:\n fig = plt.figure(fignum, figsize=(4, 3))\n ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)\n est.fit(X)\n labels = est.labels_\n\n ax.scatter(X[:, 3], X[:, 0], X[:, 2],\n c=labels.astype(float), edgecolor='k')\n\n ax.w_xaxis.set_ticklabels([])\n ax.w_yaxis.set_ticklabels([])\n ax.w_zaxis.set_ticklabels([])\n ax.set_xlabel('Petal width')\n ax.set_ylabel('Sepal length')\n ax.set_zlabel('Petal length')\n ax.set_title(titles[fignum - 1])\n ax.dist = 12\n fignum = fignum + 1\n\n# Plot the ground truth\nfig = plt.figure(fignum, figsize=(4, 3))\nax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)\n\nfor name, label in [('Setosa', 0),\n ('Versicolour', 1),\n ('Virginica', 2)]:\n ax.text3D(X[y == label, 3].mean(),\n X[y == label, 0].mean(),\n X[y == label, 2].mean() + 2, name,\n horizontalalignment='center',\n bbox=dict(alpha=.2, edgecolor='w', facecolor='w'))\n# Reorder the labels to have colors matching the cluster results\ny = np.choose(y, [1, 2, 0]).astype(float)\nax.scatter(X[:, 3], X[:, 0], X[:, 2], c=y, edgecolor='k')\n\nax.w_xaxis.set_ticklabels([])\nax.w_yaxis.set_ticklabels([])\nax.w_zaxis.set_ticklabels([])\nax.set_xlabel('Petal width')\nax.set_ylabel('Sepal length')\nax.set_zlabel('Petal length')\nax.set_title('Ground Truth')\nax.dist = 12\n\nfig.show()"
3030
]
3131
}
3232
],

0 commit comments

Comments
 (0)