Skip to content

Commit 6c69ff5

Browse files
committed
Pushing the docs to 0.24/ for branch: 0.24.X, commit 255718b4ad9a3490bc99c992d467f85737bd1291
1 parent caf8613 commit 6c69ff5

File tree

3,785 files changed

+713533
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

3,785 files changed

+713533
-0
lines changed

0.24/.buildinfo

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Sphinx build info version 1
2+
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3+
config: 133687f2af5eb2f75adf185bdaa6b0c9
4+
tags: 645f666f9bcd5a90fca523b33c5a78b7
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
"""
2+
================================
3+
SVM Exercise
4+
================================
5+
6+
A tutorial exercise for using different SVM kernels.
7+
8+
This exercise is used in the :ref:`using_kernels_tut` part of the
9+
:ref:`supervised_learning_tut` section of the :ref:`stat_learn_tut_index`.
10+
"""
11+
print(__doc__)
12+
13+
14+
import numpy as np
15+
import matplotlib.pyplot as plt
16+
from sklearn import datasets, svm
17+
18+
iris = datasets.load_iris()
19+
X = iris.data
20+
y = iris.target
21+
22+
X = X[y != 0, :2]
23+
y = y[y != 0]
24+
25+
n_sample = len(X)
26+
27+
np.random.seed(0)
28+
order = np.random.permutation(n_sample)
29+
X = X[order]
30+
y = y[order].astype(float)
31+
32+
X_train = X[:int(.9 * n_sample)]
33+
y_train = y[:int(.9 * n_sample)]
34+
X_test = X[int(.9 * n_sample):]
35+
y_test = y[int(.9 * n_sample):]
36+
37+
# fit the model
38+
for kernel in ('linear', 'rbf', 'poly'):
39+
clf = svm.SVC(kernel=kernel, gamma=10)
40+
clf.fit(X_train, y_train)
41+
42+
plt.figure()
43+
plt.clf()
44+
plt.scatter(X[:, 0], X[:, 1], c=y, zorder=10, cmap=plt.cm.Paired,
45+
edgecolor='k', s=20)
46+
47+
# Circle out the test data
48+
plt.scatter(X_test[:, 0], X_test[:, 1], s=80, facecolors='none',
49+
zorder=10, edgecolor='k')
50+
51+
plt.axis('tight')
52+
x_min = X[:, 0].min()
53+
x_max = X[:, 0].max()
54+
y_min = X[:, 1].min()
55+
y_max = X[:, 1].max()
56+
57+
XX, YY = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
58+
Z = clf.decision_function(np.c_[XX.ravel(), YY.ravel()])
59+
60+
# Put the result into a color plot
61+
Z = Z.reshape(XX.shape)
62+
plt.pcolormesh(XX, YY, Z > 0, cmap=plt.cm.Paired)
63+
plt.contour(XX, YY, Z, colors=['k', 'k', 'k'],
64+
linestyles=['--', '-', '--'], levels=[-.5, 0, .5])
65+
66+
plt.title(kernel)
67+
plt.show()
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {
7+
"collapsed": false
8+
},
9+
"outputs": [],
10+
"source": [
11+
"%matplotlib inline"
12+
]
13+
},
14+
{
15+
"cell_type": "markdown",
16+
"metadata": {},
17+
"source": [
18+
"\n# Segmenting the picture of greek coins in regions\n\nThis example uses `spectral_clustering` on a graph created from\nvoxel-to-voxel difference on an image to break this image into multiple\npartly-homogeneous regions.\n\nThis procedure (spectral clustering on an image) is an efficient\napproximate solution for finding normalized graph cuts.\n\nThere are two options to assign labels:\n\n* with 'kmeans' spectral clustering will cluster samples in the embedding space\n using a kmeans algorithm\n* whereas 'discrete' will iteratively search for the closest partition\n space to the embedding space.\n"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": null,
24+
"metadata": {
25+
"collapsed": false
26+
},
27+
"outputs": [],
28+
"source": [
29+
"print(__doc__)\n\n# Author: Gael Varoquaux <[email protected]>, Brian Cheung\n# License: BSD 3 clause\n\nimport time\n\nimport numpy as np\nfrom scipy.ndimage.filters import gaussian_filter\nimport matplotlib.pyplot as plt\nimport skimage\nfrom skimage.data import coins\nfrom skimage.transform import rescale\n\nfrom sklearn.feature_extraction import image\nfrom sklearn.cluster import spectral_clustering\nfrom sklearn.utils.fixes import parse_version\n\n# these were introduced in skimage-0.14\nif parse_version(skimage.__version__) >= parse_version('0.14'):\n rescale_params = {'anti_aliasing': False, 'multichannel': False}\nelse:\n rescale_params = {}\n\n# load the coins as a numpy array\norig_coins = coins()\n\n# Resize it to 20% of the original size to speed up the processing\n# Applying a Gaussian filter for smoothing prior to down-scaling\n# reduces aliasing artifacts.\nsmoothened_coins = gaussian_filter(orig_coins, sigma=2)\nrescaled_coins = rescale(smoothened_coins, 0.2, mode=\"reflect\",\n **rescale_params)\n\n# Convert the image into a graph with the value of the gradient on the\n# edges.\ngraph = image.img_to_graph(rescaled_coins)\n\n# Take a decreasing function of the gradient: an exponential\n# The smaller beta is, the more independent the segmentation is of the\n# actual image. For beta=1, the segmentation is close to a voronoi\nbeta = 10\neps = 1e-6\ngraph.data = np.exp(-beta * graph.data / graph.data.std()) + eps\n\n# Apply spectral clustering (this step goes much faster if you have pyamg\n# installed)\nN_REGIONS = 25"
30+
]
31+
},
32+
{
33+
"cell_type": "markdown",
34+
"metadata": {},
35+
"source": [
36+
"Visualize the resulting regions\n\n"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": null,
42+
"metadata": {
43+
"collapsed": false
44+
},
45+
"outputs": [],
46+
"source": [
47+
"for assign_labels in ('kmeans', 'discretize'):\n t0 = time.time()\n labels = spectral_clustering(graph, n_clusters=N_REGIONS,\n assign_labels=assign_labels, random_state=42)\n t1 = time.time()\n labels = labels.reshape(rescaled_coins.shape)\n\n plt.figure(figsize=(5, 5))\n plt.imshow(rescaled_coins, cmap=plt.cm.gray)\n for l in range(N_REGIONS):\n plt.contour(labels == l,\n colors=[plt.cm.nipy_spectral(l / float(N_REGIONS))])\n plt.xticks(())\n plt.yticks(())\n title = 'Spectral clustering: %s, %.2fs' % (assign_labels, (t1 - t0))\n print(title)\n plt.title(title)\nplt.show()"
48+
]
49+
}
50+
],
51+
"metadata": {
52+
"kernelspec": {
53+
"display_name": "Python 3",
54+
"language": "python",
55+
"name": "python3"
56+
},
57+
"language_info": {
58+
"codemirror_mode": {
59+
"name": "ipython",
60+
"version": 3
61+
},
62+
"file_extension": ".py",
63+
"mimetype": "text/x-python",
64+
"name": "python",
65+
"nbconvert_exporter": "python",
66+
"pygments_lexer": "ipython3",
67+
"version": "3.8.5"
68+
}
69+
},
70+
"nbformat": 4,
71+
"nbformat_minor": 0
72+
}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""
2+
================================================================
3+
Plot the decision surface of a decision tree on the iris dataset
4+
================================================================
5+
6+
Plot the decision surface of a decision tree trained on pairs
7+
of features of the iris dataset.
8+
9+
See :ref:`decision tree <tree>` for more information on the estimator.
10+
11+
For each pair of iris features, the decision tree learns decision
12+
boundaries made of combinations of simple thresholding rules inferred from
13+
the training samples.
14+
15+
We also show the tree structure of a model built on all of the features.
16+
"""
17+
print(__doc__)
18+
19+
import numpy as np
20+
import matplotlib.pyplot as plt
21+
22+
from sklearn.datasets import load_iris
23+
from sklearn.tree import DecisionTreeClassifier, plot_tree
24+
25+
# Parameters
26+
n_classes = 3
27+
plot_colors = "ryb"
28+
plot_step = 0.02
29+
30+
# Load data
31+
iris = load_iris()
32+
33+
for pairidx, pair in enumerate([[0, 1], [0, 2], [0, 3],
34+
[1, 2], [1, 3], [2, 3]]):
35+
# We only take the two corresponding features
36+
X = iris.data[:, pair]
37+
y = iris.target
38+
39+
# Train
40+
clf = DecisionTreeClassifier().fit(X, y)
41+
42+
# Plot the decision boundary
43+
plt.subplot(2, 3, pairidx + 1)
44+
45+
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
46+
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
47+
xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
48+
np.arange(y_min, y_max, plot_step))
49+
plt.tight_layout(h_pad=0.5, w_pad=0.5, pad=2.5)
50+
51+
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
52+
Z = Z.reshape(xx.shape)
53+
cs = plt.contourf(xx, yy, Z, cmap=plt.cm.RdYlBu)
54+
55+
plt.xlabel(iris.feature_names[pair[0]])
56+
plt.ylabel(iris.feature_names[pair[1]])
57+
58+
# Plot the training points
59+
for i, color in zip(range(n_classes), plot_colors):
60+
idx = np.where(y == i)
61+
plt.scatter(X[idx, 0], X[idx, 1], c=color, label=iris.target_names[i],
62+
cmap=plt.cm.RdYlBu, edgecolor='black', s=15)
63+
64+
plt.suptitle("Decision surface of a decision tree using paired features")
65+
plt.legend(loc='lower right', borderpad=0, handletextpad=0)
66+
plt.axis("tight")
67+
68+
plt.figure()
69+
clf = DecisionTreeClassifier().fit(iris.data, iris.target)
70+
plot_tree(clf, filled=True)
71+
plt.show()
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
"""
2+
=============================================================
3+
Receiver Operating Characteristic (ROC) with cross validation
4+
=============================================================
5+
6+
Example of Receiver Operating Characteristic (ROC) metric to evaluate
7+
classifier output quality using cross-validation.
8+
9+
ROC curves typically feature true positive rate on the Y axis, and false
10+
positive rate on the X axis. This means that the top left corner of the plot is
11+
the "ideal" point - a false positive rate of zero, and a true positive rate of
12+
one. This is not very realistic, but it does mean that a larger area under the
13+
curve (AUC) is usually better.
14+
15+
The "steepness" of ROC curves is also important, since it is ideal to maximize
16+
the true positive rate while minimizing the false positive rate.
17+
18+
This example shows the ROC response of different datasets, created from K-fold
19+
cross-validation. Taking all of these curves, it is possible to calculate the
20+
mean area under curve, and see the variance of the curve when the
21+
training set is split into different subsets. This roughly shows how the
22+
classifier output is affected by changes in the training data, and how
23+
different the splits generated by K-fold cross-validation are from one another.
24+
25+
.. note::
26+
27+
See also :func:`sklearn.metrics.roc_auc_score`,
28+
:func:`sklearn.model_selection.cross_val_score`,
29+
:ref:`sphx_glr_auto_examples_model_selection_plot_roc.py`,
30+
31+
"""
32+
print(__doc__)
33+
34+
import numpy as np
35+
import matplotlib.pyplot as plt
36+
37+
from sklearn import svm, datasets
38+
from sklearn.metrics import auc
39+
from sklearn.metrics import plot_roc_curve
40+
from sklearn.model_selection import StratifiedKFold
41+
42+
# #############################################################################
43+
# Data IO and generation
44+
45+
# Import some data to play with
46+
iris = datasets.load_iris()
47+
X = iris.data
48+
y = iris.target
49+
X, y = X[y != 2], y[y != 2]
50+
n_samples, n_features = X.shape
51+
52+
# Add noisy features
53+
random_state = np.random.RandomState(0)
54+
X = np.c_[X, random_state.randn(n_samples, 200 * n_features)]
55+
56+
# #############################################################################
57+
# Classification and ROC analysis
58+
59+
# Run classifier with cross-validation and plot ROC curves
60+
cv = StratifiedKFold(n_splits=6)
61+
classifier = svm.SVC(kernel='linear', probability=True,
62+
random_state=random_state)
63+
64+
tprs = []
65+
aucs = []
66+
mean_fpr = np.linspace(0, 1, 100)
67+
68+
fig, ax = plt.subplots()
69+
for i, (train, test) in enumerate(cv.split(X, y)):
70+
classifier.fit(X[train], y[train])
71+
viz = plot_roc_curve(classifier, X[test], y[test],
72+
name='ROC fold {}'.format(i),
73+
alpha=0.3, lw=1, ax=ax)
74+
interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
75+
interp_tpr[0] = 0.0
76+
tprs.append(interp_tpr)
77+
aucs.append(viz.roc_auc)
78+
79+
ax.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',
80+
label='Chance', alpha=.8)
81+
82+
mean_tpr = np.mean(tprs, axis=0)
83+
mean_tpr[-1] = 1.0
84+
mean_auc = auc(mean_fpr, mean_tpr)
85+
std_auc = np.std(aucs)
86+
ax.plot(mean_fpr, mean_tpr, color='b',
87+
label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc),
88+
lw=2, alpha=.8)
89+
90+
std_tpr = np.std(tprs, axis=0)
91+
tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
92+
tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
93+
ax.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,
94+
label=r'$\pm$ 1 std. dev.')
95+
96+
ax.set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05],
97+
title="Receiver operating characteristic example")
98+
ax.legend(loc="lower right")
99+
plt.show()
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
"""
2+
=================================================
3+
Concatenating multiple feature extraction methods
4+
=================================================
5+
6+
In many real-world examples, there are many ways to extract features from a
7+
dataset. Often it is beneficial to combine several methods to obtain good
8+
performance. This example shows how to use ``FeatureUnion`` to combine
9+
features obtained by PCA and univariate selection.
10+
11+
Combining features using this transformer has the benefit that it allows
12+
cross validation and grid searches over the whole process.
13+
14+
The combination used in this example is not particularly helpful on this
15+
dataset and is only used to illustrate the usage of FeatureUnion.
16+
"""
17+
18+
# Author: Andreas Mueller <[email protected]>
19+
#
20+
# License: BSD 3 clause
21+
22+
from sklearn.pipeline import Pipeline, FeatureUnion
23+
from sklearn.model_selection import GridSearchCV
24+
from sklearn.svm import SVC
25+
from sklearn.datasets import load_iris
26+
from sklearn.decomposition import PCA
27+
from sklearn.feature_selection import SelectKBest
28+
29+
iris = load_iris()
30+
31+
X, y = iris.data, iris.target
32+
33+
# This dataset is way too high-dimensional. Better do PCA:
34+
pca = PCA(n_components=2)
35+
36+
# Maybe some original features were good, too?
37+
selection = SelectKBest(k=1)
38+
39+
# Build estimator from PCA and Univariate selection:
40+
41+
combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
42+
43+
# Use combined features to transform dataset:
44+
X_features = combined_features.fit(X, y).transform(X)
45+
print("Combined space has", X_features.shape[1], "features")
46+
47+
svm = SVC(kernel="linear")
48+
49+
# Do grid search over k, n_components and C:
50+
51+
pipeline = Pipeline([("features", combined_features), ("svm", svm)])
52+
53+
param_grid = dict(features__pca__n_components=[1, 2, 3],
54+
features__univ_select__k=[1, 2],
55+
svm__C=[0.1, 1, 10])
56+
57+
grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=10)
58+
grid_search.fit(X, y)
59+
print(grid_search.best_estimator_)

0 commit comments

Comments
 (0)