Skip to content

Commit 058afb9

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 57e3523888326e0223ae49c12fce3ce4d3b2d906
1 parent c8ce22b commit 058afb9

File tree

1,219 files changed

+5047
-4524
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,219 files changed

+5047
-4524
lines changed

dev/_downloads/010337852815f8103ac6cca38a812b3c/plot_roc_crossval.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,12 @@
3030
3131
"""
3232

33+
# %%
34+
# Data IO and generation
35+
# ----------------------
3336
import numpy as np
34-
import matplotlib.pyplot as plt
35-
36-
from sklearn import svm, datasets
37-
from sklearn.metrics import auc
38-
from sklearn.metrics import RocCurveDisplay
39-
from sklearn.model_selection import StratifiedKFold
4037

41-
# #############################################################################
42-
# Data IO and generation
38+
from sklearn import datasets
4339

4440
# Import some data to play with
4541
iris = datasets.load_iris()
@@ -52,8 +48,15 @@
5248
random_state = np.random.RandomState(0)
5349
X = np.c_[X, random_state.randn(n_samples, 200 * n_features)]
5450

55-
# #############################################################################
51+
# %%
5652
# Classification and ROC analysis
53+
# -------------------------------
54+
import matplotlib.pyplot as plt
55+
56+
from sklearn import svm
57+
from sklearn.metrics import auc
58+
from sklearn.metrics import RocCurveDisplay
59+
from sklearn.model_selection import StratifiedKFold
5760

5861
# Run classifier with cross-validation and plot ROC curves
5962
cv = StratifiedKFold(n_splits=6)

dev/_downloads/055e8313e28f2f3b5fd508054dfe5fe0/plot_roc_crossval.ipynb

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,31 @@
1818
"\n# Receiver Operating Characteristic (ROC) with cross validation\n\nExample of Receiver Operating Characteristic (ROC) metric to evaluate\nclassifier output quality using cross-validation.\n\nROC curves typically feature true positive rate on the Y axis, and false\npositive rate on the X axis. This means that the top left corner of the plot is\nthe \"ideal\" point - a false positive rate of zero, and a true positive rate of\none. This is not very realistic, but it does mean that a larger area under the\ncurve (AUC) is usually better.\n\nThe \"steepness\" of ROC curves is also important, since it is ideal to maximize\nthe true positive rate while minimizing the false positive rate.\n\nThis example shows the ROC response of different datasets, created from K-fold\ncross-validation. Taking all of these curves, it is possible to calculate the\nmean area under curve, and see the variance of the curve when the\ntraining set is split into different subsets. This roughly shows how the\nclassifier output is affected by changes in the training data, and how\ndifferent the splits generated by K-fold cross-validation are from one another.\n\n<div class=\"alert alert-info\"><h4>Note</h4><p>See also :func:`sklearn.metrics.roc_auc_score`,\n :func:`sklearn.model_selection.cross_val_score`,\n `sphx_glr_auto_examples_model_selection_plot_roc.py`,</p></div>\n"
1919
]
2020
},
21+
{
22+
"cell_type": "markdown",
23+
"metadata": {},
24+
"source": [
25+
"## Data IO and generation\n\n"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": null,
31+
"metadata": {
32+
"collapsed": false
33+
},
34+
"outputs": [],
35+
"source": [
36+
"import numpy as np\n\nfrom sklearn import datasets\n\n# Import some data to play with\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\nX, y = X[y != 2], y[y != 2]\nn_samples, n_features = X.shape\n\n# Add noisy features\nrandom_state = np.random.RandomState(0)\nX = np.c_[X, random_state.randn(n_samples, 200 * n_features)]"
37+
]
38+
},
39+
{
40+
"cell_type": "markdown",
41+
"metadata": {},
42+
"source": [
43+
"## Classification and ROC analysis\n\n"
44+
]
45+
},
2146
{
2247
"cell_type": "code",
2348
"execution_count": null,
@@ -26,7 +51,7 @@
2651
},
2752
"outputs": [],
2853
"source": [
29-
"import numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import svm, datasets\nfrom sklearn.metrics import auc\nfrom sklearn.metrics import RocCurveDisplay\nfrom sklearn.model_selection import StratifiedKFold\n\n# #############################################################################\n# Data IO and generation\n\n# Import some data to play with\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\nX, y = X[y != 2], y[y != 2]\nn_samples, n_features = X.shape\n\n# Add noisy features\nrandom_state = np.random.RandomState(0)\nX = np.c_[X, random_state.randn(n_samples, 200 * n_features)]\n\n# #############################################################################\n# Classification and ROC analysis\n\n# Run classifier with cross-validation and plot ROC curves\ncv = StratifiedKFold(n_splits=6)\nclassifier = svm.SVC(kernel=\"linear\", probability=True, random_state=random_state)\n\ntprs = []\naucs = []\nmean_fpr = np.linspace(0, 1, 100)\n\nfig, ax = plt.subplots()\nfor i, (train, test) in enumerate(cv.split(X, y)):\n classifier.fit(X[train], y[train])\n viz = RocCurveDisplay.from_estimator(\n classifier,\n X[test],\n y[test],\n name=\"ROC fold {}\".format(i),\n alpha=0.3,\n lw=1,\n ax=ax,\n )\n interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)\n interp_tpr[0] = 0.0\n tprs.append(interp_tpr)\n aucs.append(viz.roc_auc)\n\nax.plot([0, 1], [0, 1], linestyle=\"--\", lw=2, color=\"r\", label=\"Chance\", alpha=0.8)\n\nmean_tpr = np.mean(tprs, axis=0)\nmean_tpr[-1] = 1.0\nmean_auc = auc(mean_fpr, mean_tpr)\nstd_auc = np.std(aucs)\nax.plot(\n mean_fpr,\n mean_tpr,\n color=\"b\",\n label=r\"Mean ROC (AUC = %0.2f $\\pm$ %0.2f)\" % (mean_auc, std_auc),\n lw=2,\n alpha=0.8,\n)\n\nstd_tpr = np.std(tprs, axis=0)\ntprs_upper = np.minimum(mean_tpr + std_tpr, 1)\ntprs_lower = np.maximum(mean_tpr - std_tpr, 0)\nax.fill_between(\n mean_fpr,\n tprs_lower,\n tprs_upper,\n color=\"grey\",\n alpha=0.2,\n label=r\"$\\pm$ 1 std. dev.\",\n)\n\nax.set(\n xlim=[-0.05, 1.05],\n ylim=[-0.05, 1.05],\n title=\"Receiver operating characteristic example\",\n)\nax.legend(loc=\"lower right\")\nplt.show()"
54+
"import matplotlib.pyplot as plt\n\nfrom sklearn import svm\nfrom sklearn.metrics import auc\nfrom sklearn.metrics import RocCurveDisplay\nfrom sklearn.model_selection import StratifiedKFold\n\n# Run classifier with cross-validation and plot ROC curves\ncv = StratifiedKFold(n_splits=6)\nclassifier = svm.SVC(kernel=\"linear\", probability=True, random_state=random_state)\n\ntprs = []\naucs = []\nmean_fpr = np.linspace(0, 1, 100)\n\nfig, ax = plt.subplots()\nfor i, (train, test) in enumerate(cv.split(X, y)):\n classifier.fit(X[train], y[train])\n viz = RocCurveDisplay.from_estimator(\n classifier,\n X[test],\n y[test],\n name=\"ROC fold {}\".format(i),\n alpha=0.3,\n lw=1,\n ax=ax,\n )\n interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)\n interp_tpr[0] = 0.0\n tprs.append(interp_tpr)\n aucs.append(viz.roc_auc)\n\nax.plot([0, 1], [0, 1], linestyle=\"--\", lw=2, color=\"r\", label=\"Chance\", alpha=0.8)\n\nmean_tpr = np.mean(tprs, axis=0)\nmean_tpr[-1] = 1.0\nmean_auc = auc(mean_fpr, mean_tpr)\nstd_auc = np.std(aucs)\nax.plot(\n mean_fpr,\n mean_tpr,\n color=\"b\",\n label=r\"Mean ROC (AUC = %0.2f $\\pm$ %0.2f)\" % (mean_auc, std_auc),\n lw=2,\n alpha=0.8,\n)\n\nstd_tpr = np.std(tprs, axis=0)\ntprs_upper = np.minimum(mean_tpr + std_tpr, 1)\ntprs_lower = np.maximum(mean_tpr - std_tpr, 0)\nax.fill_between(\n mean_fpr,\n tprs_lower,\n tprs_upper,\n color=\"grey\",\n alpha=0.2,\n label=r\"$\\pm$ 1 std. dev.\",\n)\n\nax.set(\n xlim=[-0.05, 1.05],\n ylim=[-0.05, 1.05],\n title=\"Receiver operating characteristic example\",\n)\nax.legend(loc=\"lower right\")\nplt.show()"
3055
]
3156
}
3257
],
Binary file not shown.

dev/_downloads/41973816d3932cd07b75d8825fd2c13d/plot_svm_anova.py

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,26 +10,29 @@
1010
1111
"""
1212

13+
# %%
14+
# Load some data to play with
15+
# ---------------------------
1316
import numpy as np
14-
import matplotlib.pyplot as plt
1517
from sklearn.datasets import load_iris
16-
from sklearn.feature_selection import SelectPercentile, chi2
17-
from sklearn.model_selection import cross_val_score
18-
from sklearn.pipeline import Pipeline
19-
from sklearn.preprocessing import StandardScaler
20-
from sklearn.svm import SVC
21-
2218

23-
# #############################################################################
24-
# Import some data to play with
2519
X, y = load_iris(return_X_y=True)
20+
2621
# Add non-informative features
27-
np.random.seed(0)
28-
X = np.hstack((X, 2 * np.random.random((X.shape[0], 36))))
22+
rng = np.random.RandomState(0)
23+
X = np.hstack((X, 2 * rng.random((X.shape[0], 36))))
24+
25+
# %%
26+
# Create the pipeline
27+
# -------------------
28+
from sklearn.pipeline import Pipeline
29+
from sklearn.feature_selection import SelectPercentile, chi2
30+
from sklearn.preprocessing import StandardScaler
31+
from sklearn.svm import SVC
2932

30-
# #############################################################################
3133
# Create a feature-selection transform, a scaler and an instance of SVM that we
3234
# combine together to have a full-blown estimator
35+
3336
clf = Pipeline(
3437
[
3538
("anova", SelectPercentile(chi2)),
@@ -38,8 +41,12 @@
3841
]
3942
)
4043

41-
# #############################################################################
44+
# %%
4245
# Plot the cross-validation score as a function of percentile of features
46+
# -----------------------------------------------------------------------
47+
import matplotlib.pyplot as plt
48+
from sklearn.model_selection import cross_val_score
49+
4350
score_means = list()
4451
score_stds = list()
4552
percentiles = (1, 3, 6, 10, 15, 20, 30, 40, 60, 80, 100)
Binary file not shown.

dev/_downloads/6f4a6a0d8063b616c4aa4db2865de57c/plot_svm_anova.ipynb

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,49 @@
1818
"\n# SVM-Anova: SVM with univariate feature selection\n\nThis example shows how to perform univariate feature selection before running a\nSVC (support vector classifier) to improve the classification scores. We use\nthe iris dataset (4 features) and add 36 non-informative features. We can find\nthat our model achieves best performance when we select around 10% of features.\n"
1919
]
2020
},
21+
{
22+
"cell_type": "markdown",
23+
"metadata": {},
24+
"source": [
25+
"## Load some data to play with\n\n"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": null,
31+
"metadata": {
32+
"collapsed": false
33+
},
34+
"outputs": [],
35+
"source": [
36+
"import numpy as np\nfrom sklearn.datasets import load_iris\n\nX, y = load_iris(return_X_y=True)\n\n# Add non-informative features\nrng = np.random.RandomState(0)\nX = np.hstack((X, 2 * rng.random((X.shape[0], 36))))"
37+
]
38+
},
39+
{
40+
"cell_type": "markdown",
41+
"metadata": {},
42+
"source": [
43+
"## Create the pipeline\n\n"
44+
]
45+
},
46+
{
47+
"cell_type": "code",
48+
"execution_count": null,
49+
"metadata": {
50+
"collapsed": false
51+
},
52+
"outputs": [],
53+
"source": [
54+
"from sklearn.pipeline import Pipeline\nfrom sklearn.feature_selection import SelectPercentile, chi2\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.svm import SVC\n\n# Create a feature-selection transform, a scaler and an instance of SVM that we\n# combine together to have a full-blown estimator\n\nclf = Pipeline(\n [\n (\"anova\", SelectPercentile(chi2)),\n (\"scaler\", StandardScaler()),\n (\"svc\", SVC(gamma=\"auto\")),\n ]\n)"
55+
]
56+
},
57+
{
58+
"cell_type": "markdown",
59+
"metadata": {},
60+
"source": [
61+
"## Plot the cross-validation score as a function of percentile of features\n\n"
62+
]
63+
},
2164
{
2265
"cell_type": "code",
2366
"execution_count": null,
@@ -26,7 +69,7 @@
2669
},
2770
"outputs": [],
2871
"source": [
29-
"import numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_iris\nfrom sklearn.feature_selection import SelectPercentile, chi2\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.svm import SVC\n\n\n# #############################################################################\n# Import some data to play with\nX, y = load_iris(return_X_y=True)\n# Add non-informative features\nnp.random.seed(0)\nX = np.hstack((X, 2 * np.random.random((X.shape[0], 36))))\n\n# #############################################################################\n# Create a feature-selection transform, a scaler and an instance of SVM that we\n# combine together to have a full-blown estimator\nclf = Pipeline(\n [\n (\"anova\", SelectPercentile(chi2)),\n (\"scaler\", StandardScaler()),\n (\"svc\", SVC(gamma=\"auto\")),\n ]\n)\n\n# #############################################################################\n# Plot the cross-validation score as a function of percentile of features\nscore_means = list()\nscore_stds = list()\npercentiles = (1, 3, 6, 10, 15, 20, 30, 40, 60, 80, 100)\n\nfor percentile in percentiles:\n clf.set_params(anova__percentile=percentile)\n this_scores = cross_val_score(clf, X, y)\n score_means.append(this_scores.mean())\n score_stds.append(this_scores.std())\n\nplt.errorbar(percentiles, score_means, np.array(score_stds))\nplt.title(\"Performance of the SVM-Anova varying the percentile of features selected\")\nplt.xticks(np.linspace(0, 100, 11, endpoint=True))\nplt.xlabel(\"Percentile\")\nplt.ylabel(\"Accuracy Score\")\nplt.axis(\"tight\")\nplt.show()"
72+
"import matplotlib.pyplot as plt\nfrom sklearn.model_selection import cross_val_score\n\nscore_means = list()\nscore_stds = list()\npercentiles = (1, 3, 6, 10, 15, 20, 30, 40, 60, 80, 100)\n\nfor percentile in percentiles:\n clf.set_params(anova__percentile=percentile)\n this_scores = cross_val_score(clf, X, y)\n score_means.append(this_scores.mean())\n score_stds.append(this_scores.std())\n\nplt.errorbar(percentiles, score_means, np.array(score_stds))\nplt.title(\"Performance of the SVM-Anova varying the percentile of features selected\")\nplt.xticks(np.linspace(0, 100, 11, endpoint=True))\nplt.xlabel(\"Percentile\")\nplt.ylabel(\"Accuracy Score\")\nplt.axis(\"tight\")\nplt.show()"
3073
]
3174
}
3275
],

dev/_downloads/788b8c55a85f84a55e652c6048c4f623/plot_bayesian_ridge.py

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,12 @@
2424
2525
"""
2626

27+
# %%
28+
# Generate simulated data with Gaussian weights
29+
# ---------------------------------------------
2730
import numpy as np
28-
import matplotlib.pyplot as plt
2931
from scipy import stats
3032

31-
from sklearn.linear_model import BayesianRidge, LinearRegression
32-
33-
# #############################################################################
34-
# Generating simulated data with Gaussian weights
3533
np.random.seed(0)
3634
n_samples, n_features = 100, 100
3735
X = np.random.randn(n_samples, n_features) # Create Gaussian data
@@ -40,6 +38,7 @@
4038
w = np.zeros(n_features)
4139
# Only keep 10 weights of interest
4240
relevant_features = np.random.randint(0, n_features, 10)
41+
4342
for i in relevant_features:
4443
w[i] = stats.norm.rvs(loc=0, scale=1.0 / np.sqrt(lambda_))
4544
# Create noise with a precision alpha of 50.
@@ -48,17 +47,22 @@
4847
# Create the target
4948
y = np.dot(X, w) + noise
5049

51-
# #############################################################################
50+
# %%
5251
# Fit the Bayesian Ridge Regression and an OLS for comparison
52+
# -----------------------------------------------------------
53+
from sklearn.linear_model import BayesianRidge, LinearRegression
54+
5355
clf = BayesianRidge(compute_score=True)
5456
clf.fit(X, y)
5557

5658
ols = LinearRegression()
5759
ols.fit(X, y)
5860

59-
# #############################################################################
60-
# Plot true weights, estimated weights, histogram of the weights, and
61-
# predictions with standard deviations
61+
# %%
62+
# Plot true weights and estimated weights
63+
# ---------------------------------------
64+
import matplotlib.pyplot as plt
65+
6266
lw = 2
6367
plt.figure(figsize=(6, 5))
6468
plt.title("Weights of the model")
@@ -67,7 +71,11 @@
6771
plt.plot(ols.coef_, color="navy", linestyle="--", label="OLS estimate")
6872
plt.xlabel("Features")
6973
plt.ylabel("Values of the weights")
70-
plt.legend(loc="best", prop=dict(size=12))
74+
_ = plt.legend(loc="best", prop=dict(size=12))
75+
76+
# %%
77+
# Plot histogram of the weights
78+
# -----------------------------
7179

7280
plt.figure(figsize=(6, 5))
7381
plt.title("Histogram of the weights")
@@ -80,16 +88,23 @@
8088
)
8189
plt.ylabel("Features")
8290
plt.xlabel("Values of the weights")
83-
plt.legend(loc="upper left")
91+
_ = plt.legend(loc="upper left")
92+
93+
# %%
94+
# Plot marginal log-likelihood
95+
# ----------------------------
8496

8597
plt.figure(figsize=(6, 5))
8698
plt.title("Marginal log-likelihood")
8799
plt.plot(clf.scores_, color="navy", linewidth=lw)
88100
plt.ylabel("Score")
89-
plt.xlabel("Iterations")
101+
_ = plt.xlabel("Iterations")
102+
103+
# %%
104+
# Plot some predictions for polynomial regression with standard deviations
105+
# ------------------------------------------------------------------------
90106

91107

92-
# Plotting some predictions for polynomial regression
93108
def f(x, noise_amount):
94109
y = np.sqrt(x) * np.sin(x)
95110
noise = np.random.normal(0, 1, len(x))
@@ -117,5 +132,4 @@ def f(x, noise_amount):
117132
plt.plot(X_plot, y_plot, color="gold", linewidth=lw, label="Ground Truth")
118133
plt.ylabel("Output y")
119134
plt.xlabel("Feature X")
120-
plt.legend(loc="lower left")
121-
plt.show()
135+
_ = plt.legend(loc="lower left")

0 commit comments

Comments
 (0)