Skip to content

Commit 79013d0

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 1deb95a13e774050a0fb7ff1d896465e2bf937c1
1 parent deefc21 commit 79013d0

File tree

1,050 files changed

+3340
-3348
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,050 files changed

+3340
-3348
lines changed
124 Bytes
Binary file not shown.
128 Bytes
Binary file not shown.

dev/_downloads/plot_svm_anova.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"cell_type": "markdown",
1616
"metadata": {},
1717
"source": [
18-
"\n=================================================\nSVM-Anova: SVM with univariate feature selection\n=================================================\n\nThis example shows how to perform univariate feature selection before running a\nSVC (support vector classifier) to improve the classification scores.\n\n"
18+
"\n=================================================\nSVM-Anova: SVM with univariate feature selection\n=================================================\n\nThis example shows how to perform univariate feature selection before running a\nSVC (support vector classifier) to improve the classification scores. We use\nthe iris dataset (4 features) and add 36 non-informative features. We can find\nthat our model achieves best performance when we select around 10% of features.\n\n"
1919
]
2020
},
2121
{
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_digits\nfrom sklearn.feature_selection import SelectPercentile, chi2\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.svm import SVC\n\n\n# #############################################################################\n# Import some data to play with\nX, y = load_digits(return_X_y=True)\n# Throw away data, to be in the curse of dimension settings\nX = X[:200]\ny = y[:200]\nn_samples = len(y)\nX = X.reshape((n_samples, -1))\n# add 200 non-informative features\nX = np.hstack((X, 2 * np.random.random((n_samples, 200))))\n\n# #############################################################################\n# Create a feature-selection transform and an instance of SVM that we\n# combine together to have an full-blown estimator\n\ntransform = SelectPercentile(chi2)\n\nclf = Pipeline([('anova', transform), ('svc', SVC(gamma=\"auto\"))])\n\n# #############################################################################\n# Plot the cross-validation score as a function of percentile of features\nscore_means = list()\nscore_stds = list()\npercentiles = (1, 3, 6, 10, 15, 20, 30, 40, 60, 80, 100)\n\nfor percentile in percentiles:\n clf.set_params(anova__percentile=percentile)\n # Compute cross-validation score using 1 CPU\n this_scores = cross_val_score(clf, X, y, cv=5, n_jobs=1)\n score_means.append(this_scores.mean())\n score_stds.append(this_scores.std())\n\nplt.errorbar(percentiles, score_means, np.array(score_stds))\n\nplt.title(\n 'Performance of the SVM-Anova varying the percentile of features selected')\nplt.xlabel('Percentile')\nplt.ylabel('Prediction rate')\n\nplt.axis('tight')\nplt.show()"
29+
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_iris\nfrom sklearn.feature_selection import SelectPercentile, chi2\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.svm import SVC\n\n\n# #############################################################################\n# Import some data to play with\nX, y = load_iris(return_X_y=True)\n# Add non-informative features\nnp.random.seed(0)\nX = np.hstack((X, 2 * np.random.random((X.shape[0], 36))))\n\n# #############################################################################\n# Create a feature-selection transform, a scaler and an instance of SVM that we\n# combine together to have an full-blown estimator\nclf = Pipeline([('anova', SelectPercentile(chi2)),\n ('scaler', StandardScaler()),\n ('svc', SVC(gamma=\"auto\"))])\n\n# #############################################################################\n# Plot the cross-validation score as a function of percentile of features\nscore_means = list()\nscore_stds = list()\npercentiles = (1, 3, 6, 10, 15, 20, 30, 40, 60, 80, 100)\n\nfor percentile in percentiles:\n clf.set_params(anova__percentile=percentile)\n this_scores = cross_val_score(clf, X, y, cv=5)\n score_means.append(this_scores.mean())\n score_stds.append(this_scores.std())\n\nplt.errorbar(percentiles, score_means, np.array(score_stds))\nplt.title(\n 'Performance of the SVM-Anova varying the percentile of features selected')\nplt.xticks(np.linspace(0, 100, 11, endpoint=True))\nplt.xlabel('Percentile')\nplt.ylabel('Accuracy Score')\nplt.axis('tight')\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_svm_anova.py

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,37 +4,35 @@
44
=================================================
55
66
This example shows how to perform univariate feature selection before running a
7-
SVC (support vector classifier) to improve the classification scores.
7+
SVC (support vector classifier) to improve the classification scores. We use
8+
the iris dataset (4 features) and add 36 non-informative features. We can find
9+
that our model achieves best performance when we select around 10% of features.
810
"""
911
print(__doc__)
1012

1113
import numpy as np
1214
import matplotlib.pyplot as plt
13-
from sklearn.datasets import load_digits
15+
from sklearn.datasets import load_iris
1416
from sklearn.feature_selection import SelectPercentile, chi2
1517
from sklearn.model_selection import cross_val_score
1618
from sklearn.pipeline import Pipeline
19+
from sklearn.preprocessing import StandardScaler
1720
from sklearn.svm import SVC
1821

1922

2023
# #############################################################################
2124
# Import some data to play with
22-
X, y = load_digits(return_X_y=True)
23-
# Throw away data, to be in the curse of dimension settings
24-
X = X[:200]
25-
y = y[:200]
26-
n_samples = len(y)
27-
X = X.reshape((n_samples, -1))
28-
# add 200 non-informative features
29-
X = np.hstack((X, 2 * np.random.random((n_samples, 200))))
25+
X, y = load_iris(return_X_y=True)
26+
# Add non-informative features
27+
np.random.seed(0)
28+
X = np.hstack((X, 2 * np.random.random((X.shape[0], 36))))
3029

3130
# #############################################################################
32-
# Create a feature-selection transform and an instance of SVM that we
31+
# Create a feature-selection transform, a scaler and an instance of SVM that we
3332
# combine together to have an full-blown estimator
34-
35-
transform = SelectPercentile(chi2)
36-
37-
clf = Pipeline([('anova', transform), ('svc', SVC(gamma="auto"))])
33+
clf = Pipeline([('anova', SelectPercentile(chi2)),
34+
('scaler', StandardScaler()),
35+
('svc', SVC(gamma="auto"))])
3836

3937
# #############################################################################
4038
# Plot the cross-validation score as a function of percentile of features
@@ -44,17 +42,15 @@
4442

4543
for percentile in percentiles:
4644
clf.set_params(anova__percentile=percentile)
47-
# Compute cross-validation score using 1 CPU
48-
this_scores = cross_val_score(clf, X, y, cv=5, n_jobs=1)
45+
this_scores = cross_val_score(clf, X, y, cv=5)
4946
score_means.append(this_scores.mean())
5047
score_stds.append(this_scores.std())
5148

5249
plt.errorbar(percentiles, score_means, np.array(score_stds))
53-
5450
plt.title(
5551
'Performance of the SVM-Anova varying the percentile of features selected')
52+
plt.xticks(np.linspace(0, 100, 11, endpoint=True))
5653
plt.xlabel('Percentile')
57-
plt.ylabel('Prediction rate')
58-
54+
plt.ylabel('Accuracy Score')
5955
plt.axis('tight')
6056
plt.show()

dev/_downloads/scikit-learn-docs.pdf

7.98 KB
Binary file not shown.

dev/_images/iris.png

0 Bytes
-187 Bytes
-187 Bytes
133 Bytes
133 Bytes

0 commit comments

Comments
 (0)