Skip to content

Commit 3d2243c

Browse files
committed
Pushing the docs to dev/ for branch: main, commit f5eb00ce903b11af74c1afc1f3115a5e840c668a
1 parent f253718 commit 3d2243c

File tree

1,225 files changed

+4644
-4491
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,225 files changed

+4644
-4491
lines changed
Binary file not shown.

dev/_downloads/27d42183163dfa32c3c487b21701b537/plot_cv_diabetes.ipynb

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,49 @@
1818
"\n# Cross-validation on diabetes Dataset Exercise\n\nA tutorial exercise which uses cross-validation with linear models.\n\nThis exercise is used in the `cv_estimators_tut` part of the\n`model_selection_tut` section of the `stat_learn_tut_index`.\n"
1919
]
2020
},
21+
{
22+
"cell_type": "markdown",
23+
"metadata": {},
24+
"source": [
25+
"## Load dataset and apply GridSearchCV\n\n"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": null,
31+
"metadata": {
32+
"collapsed": false
33+
},
34+
"outputs": [],
35+
"source": [
36+
"import matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn import datasets\nfrom sklearn.linear_model import Lasso\nfrom sklearn.model_selection import GridSearchCV\n\nX, y = datasets.load_diabetes(return_X_y=True)\nX = X[:150]\ny = y[:150]\n\nlasso = Lasso(random_state=0, max_iter=10000)\nalphas = np.logspace(-4, -0.5, 30)\n\ntuned_parameters = [{\"alpha\": alphas}]\nn_folds = 5\n\nclf = GridSearchCV(lasso, tuned_parameters, cv=n_folds, refit=False)\nclf.fit(X, y)\nscores = clf.cv_results_[\"mean_test_score\"]\nscores_std = clf.cv_results_[\"std_test_score\"]"
37+
]
38+
},
39+
{
40+
"cell_type": "markdown",
41+
"metadata": {},
42+
"source": [
43+
"## Plot error lines showing +/- std. errors of the scores\n\n"
44+
]
45+
},
46+
{
47+
"cell_type": "code",
48+
"execution_count": null,
49+
"metadata": {
50+
"collapsed": false
51+
},
52+
"outputs": [],
53+
"source": [
54+
"plt.figure().set_size_inches(8, 6)\nplt.semilogx(alphas, scores)\n\nstd_error = scores_std / np.sqrt(n_folds)\n\nplt.semilogx(alphas, scores + std_error, \"b--\")\nplt.semilogx(alphas, scores - std_error, \"b--\")\n\n# alpha=0.2 controls the translucency of the fill color\nplt.fill_between(alphas, scores + std_error, scores - std_error, alpha=0.2)\n\nplt.ylabel(\"CV score +/- std error\")\nplt.xlabel(\"alpha\")\nplt.axhline(np.max(scores), linestyle=\"--\", color=\".5\")\nplt.xlim([alphas[0], alphas[-1]])"
55+
]
56+
},
57+
{
58+
"cell_type": "markdown",
59+
"metadata": {},
60+
"source": [
61+
"## Bonus: how much can you trust the selection of alpha?\n\n"
62+
]
63+
},
2164
{
2265
"cell_type": "code",
2366
"execution_count": null,
@@ -26,7 +69,7 @@
2669
},
2770
"outputs": [],
2871
"source": [
29-
"import numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import datasets\nfrom sklearn.linear_model import LassoCV\nfrom sklearn.linear_model import Lasso\nfrom sklearn.model_selection import KFold\nfrom sklearn.model_selection import GridSearchCV\n\nX, y = datasets.load_diabetes(return_X_y=True)\nX = X[:150]\ny = y[:150]\n\nlasso = Lasso(random_state=0, max_iter=10000)\nalphas = np.logspace(-4, -0.5, 30)\n\ntuned_parameters = [{\"alpha\": alphas}]\nn_folds = 5\n\nclf = GridSearchCV(lasso, tuned_parameters, cv=n_folds, refit=False)\nclf.fit(X, y)\nscores = clf.cv_results_[\"mean_test_score\"]\nscores_std = clf.cv_results_[\"std_test_score\"]\nplt.figure().set_size_inches(8, 6)\nplt.semilogx(alphas, scores)\n\n# plot error lines showing +/- std. errors of the scores\nstd_error = scores_std / np.sqrt(n_folds)\n\nplt.semilogx(alphas, scores + std_error, \"b--\")\nplt.semilogx(alphas, scores - std_error, \"b--\")\n\n# alpha=0.2 controls the translucency of the fill color\nplt.fill_between(alphas, scores + std_error, scores - std_error, alpha=0.2)\n\nplt.ylabel(\"CV score +/- std error\")\nplt.xlabel(\"alpha\")\nplt.axhline(np.max(scores), linestyle=\"--\", color=\".5\")\nplt.xlim([alphas[0], alphas[-1]])\n\n# #############################################################################\n# Bonus: how much can you trust the selection of alpha?\n\n# To answer this question we use the LassoCV object that sets its alpha\n# parameter automatically from the data by internal cross-validation (i.e. it\n# performs cross-validation on the training data it receives).\n# We use external cross-validation to see how much the automatically obtained\n# alphas differ across different cross-validation folds.\nlasso_cv = LassoCV(alphas=alphas, random_state=0, max_iter=10000)\nk_fold = KFold(3)\n\nprint(\"Answer to the bonus question:\", \"how much can you trust the selection of alpha?\")\nprint()\nprint(\"Alpha parameters maximising the generalization score on different\")\nprint(\"subsets of the data:\")\nfor k, (train, test) in enumerate(k_fold.split(X, y)):\n lasso_cv.fit(X[train], y[train])\n print(\n \"[fold {0}] alpha: {1:.5f}, score: {2:.5f}\".format(\n k, lasso_cv.alpha_, lasso_cv.score(X[test], y[test])\n )\n )\nprint()\nprint(\"Answer: Not very much since we obtained different alphas for different\")\nprint(\"subsets of the data and moreover, the scores for these alphas differ\")\nprint(\"quite substantially.\")\n\nplt.show()"
72+
"# To answer this question we use the LassoCV object that sets its alpha\n# parameter automatically from the data by internal cross-validation (i.e. it\n# performs cross-validation on the training data it receives).\n# We use external cross-validation to see how much the automatically obtained\n# alphas differ across different cross-validation folds.\n\nfrom sklearn.linear_model import LassoCV\nfrom sklearn.model_selection import KFold\n\nlasso_cv = LassoCV(alphas=alphas, random_state=0, max_iter=10000)\nk_fold = KFold(3)\n\nprint(\"Answer to the bonus question:\", \"how much can you trust the selection of alpha?\")\nprint()\nprint(\"Alpha parameters maximising the generalization score on different\")\nprint(\"subsets of the data:\")\nfor k, (train, test) in enumerate(k_fold.split(X, y)):\n lasso_cv.fit(X[train], y[train])\n print(\n \"[fold {0}] alpha: {1:.5f}, score: {2:.5f}\".format(\n k, lasso_cv.alpha_, lasso_cv.score(X[test], y[test])\n )\n )\nprint()\nprint(\"Answer: Not very much since we obtained different alphas for different\")\nprint(\"subsets of the data and moreover, the scores for these alphas differ\")\nprint(\"quite substantially.\")\n\nplt.show()"
3073
]
3174
}
3275
],

dev/_downloads/428a26d23bc55d1c898a0e4361695ad0/plot_cv_diabetes.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,14 @@
1010
1111
"""
1212

13-
import numpy as np
13+
# %%
14+
# Load dataset and apply GridSearchCV
15+
# -----------------------------------
1416
import matplotlib.pyplot as plt
17+
import numpy as np
1518

1619
from sklearn import datasets
17-
from sklearn.linear_model import LassoCV
1820
from sklearn.linear_model import Lasso
19-
from sklearn.model_selection import KFold
2021
from sklearn.model_selection import GridSearchCV
2122

2223
X, y = datasets.load_diabetes(return_X_y=True)
@@ -33,10 +34,14 @@
3334
clf.fit(X, y)
3435
scores = clf.cv_results_["mean_test_score"]
3536
scores_std = clf.cv_results_["std_test_score"]
37+
38+
# %%
39+
# Plot error lines showing +/- std. errors of the scores
40+
# ------------------------------------------------------
41+
3642
plt.figure().set_size_inches(8, 6)
3743
plt.semilogx(alphas, scores)
3844

39-
# plot error lines showing +/- std. errors of the scores
4045
std_error = scores_std / np.sqrt(n_folds)
4146

4247
plt.semilogx(alphas, scores + std_error, "b--")
@@ -50,14 +55,19 @@
5055
plt.axhline(np.max(scores), linestyle="--", color=".5")
5156
plt.xlim([alphas[0], alphas[-1]])
5257

53-
# #############################################################################
58+
# %%
5459
# Bonus: how much can you trust the selection of alpha?
60+
# -----------------------------------------------------
5561

5662
# To answer this question we use the LassoCV object that sets its alpha
5763
# parameter automatically from the data by internal cross-validation (i.e. it
5864
# performs cross-validation on the training data it receives).
5965
# We use external cross-validation to see how much the automatically obtained
6066
# alphas differ across different cross-validation folds.
67+
68+
from sklearn.linear_model import LassoCV
69+
from sklearn.model_selection import KFold
70+
6171
lasso_cv = LassoCV(alphas=alphas, random_state=0, max_iter=10000)
6272
k_fold = KFold(3)
6373

Binary file not shown.

dev/_downloads/scikit-learn-docs.zip

-8.16 KB
Binary file not shown.
-208 Bytes
-189 Bytes
-99 Bytes
-16 Bytes
-71 Bytes

0 commit comments

Comments
 (0)