Skip to content

Commit 5a645de

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 700afd8f6fcdcaca104aa67a5fdf2d8cf720d790
1 parent 328f9ed commit 5a645de

File tree

1,327 files changed

+7241
-7238
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,327 files changed

+7241
-7238
lines changed

dev/.buildinfo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: 76945c2274df0cd94f7b8d2af037eb3c
3+
config: e6acfa7fb88be6e394ebbd45ff8893ff
44
tags: 645f666f9bcd5a90fca523b33c5a78b7
Binary file not shown.
Binary file not shown.

dev/_downloads/898b30acf62919d918478efbe526195f/plot_digits_pipe.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
},
1616
"outputs": [],
1717
"source": [
18-
"# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\n\nfrom sklearn import datasets\nfrom sklearn.decomposition import PCA\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.preprocessing import StandardScaler\n\n# Define a pipeline to search for the best combination of PCA truncation\n# and classifier regularization.\npca = PCA()\n# Define a Standard Scaler to normalize inputs\nscaler = StandardScaler()\n\n# set the tolerance to a large value to make the example faster\nlogistic = LogisticRegression(max_iter=10000, tol=0.1)\npipe = Pipeline(steps=[(\"scaler\", scaler), (\"pca\", pca), (\"logistic\", logistic)])\n\nX_digits, y_digits = datasets.load_digits(return_X_y=True)\n# Parameters of pipelines can be set using '__' separated parameter names:\nparam_grid = {\n \"pca__n_components\": [5, 15, 30, 45, 60],\n \"logistic__C\": np.logspace(-4, 4, 4),\n}\nsearch = GridSearchCV(pipe, param_grid, n_jobs=2)\nsearch.fit(X_digits, y_digits)\nprint(\"Best parameter (CV score=%0.3f):\" % search.best_score_)\nprint(search.best_params_)\n\n# Plot the PCA spectrum\npca.fit(X_digits)\n\nfig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(6, 6))\nax0.plot(\n np.arange(1, pca.n_components_ + 1), pca.explained_variance_ratio_, \"+\", linewidth=2\n)\nax0.set_ylabel(\"PCA explained variance ratio\")\n\nax0.axvline(\n search.best_estimator_.named_steps[\"pca\"].n_components,\n linestyle=\":\",\n label=\"n_components chosen\",\n)\nax0.legend(prop=dict(size=12))\n\n# For each number of components, find the best classifier results\nresults = pd.DataFrame(search.cv_results_)\ncomponents_col = \"param_pca__n_components\"\nbest_clfs = results.groupby(components_col)[\n [components_col, \"mean_test_score\", \"std_test_score\"]\n].apply(lambda g: g.nlargest(1, \"mean_test_score\"))\n\nbest_clfs.plot(\n x=components_col, y=\"mean_test_score\", yerr=\"std_test_score\", legend=False, ax=ax1\n)\nax1.set_ylabel(\"Classification accuracy (val)\")\nax1.set_xlabel(\"n_components\")\n\nplt.xlim(-1, 70)\n\nplt.tight_layout()\nplt.show()"
18+
"# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\n\nfrom sklearn import datasets\nfrom sklearn.decomposition import PCA\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.preprocessing import StandardScaler\n\n# Define a pipeline to search for the best combination of PCA truncation\n# and classifier regularization.\npca = PCA()\n# Define a Standard Scaler to normalize inputs\nscaler = StandardScaler()\n\n# set the tolerance to a large value to make the example faster\nlogistic = LogisticRegression(max_iter=10000, tol=0.1)\npipe = Pipeline(steps=[(\"scaler\", scaler), (\"pca\", pca), (\"logistic\", logistic)])\n\nX_digits, y_digits = datasets.load_digits(return_X_y=True)\n# Parameters of pipelines can be set using '__' separated parameter names:\nparam_grid = {\n \"pca__n_components\": [5, 15, 30, 45, 60],\n \"logistic__C\": np.logspace(-4, 4, 4),\n}\nsearch = GridSearchCV(pipe, param_grid, n_jobs=2)\nsearch.fit(X_digits, y_digits)\nprint(\"Best parameter (CV score=%0.3f):\" % search.best_score_)\nprint(search.best_params_)\n\n# Plot the PCA spectrum\npca.fit(X_digits)\n\nfig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(6, 6))\nax0.plot(\n np.arange(1, pca.n_components_ + 1), pca.explained_variance_ratio_, \"+\", linewidth=2\n)\nax0.set_ylabel(\"PCA explained variance ratio\")\n\nax0.axvline(\n search.best_estimator_.named_steps[\"pca\"].n_components,\n linestyle=\":\",\n label=\"n_components chosen\",\n)\nax0.legend(prop=dict(size=12))\n\n# For each number of components, find the best classifier results\nresults = pd.DataFrame(search.cv_results_)\ncomponents_col = \"param_pca__n_components\"\nbest_clfs = results.groupby(components_col)[\n [components_col, \"mean_test_score\", \"std_test_score\"]\n].apply(lambda g: g.nlargest(1, \"mean_test_score\"))\nax1.errorbar(\n best_clfs[components_col],\n best_clfs[\"mean_test_score\"],\n yerr=best_clfs[\"std_test_score\"],\n)\nax1.set_ylabel(\"Classification accuracy (val)\")\nax1.set_xlabel(\"n_components\")\n\nplt.xlim(-1, 70)\n\nplt.tight_layout()\nplt.show()"
1919
]
2020
}
2121
],

dev/_downloads/ba89a400c6902f85c10199ff86947d23/plot_digits_pipe.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,10 @@
6868
best_clfs = results.groupby(components_col)[
6969
[components_col, "mean_test_score", "std_test_score"]
7070
].apply(lambda g: g.nlargest(1, "mean_test_score"))
71-
72-
best_clfs.plot(
73-
x=components_col, y="mean_test_score", yerr="std_test_score", legend=False, ax=ax1
71+
ax1.errorbar(
72+
best_clfs[components_col],
73+
best_clfs["mean_test_score"],
74+
yerr=best_clfs["std_test_score"],
7475
)
7576
ax1.set_ylabel("Classification accuracy (val)")
7677
ax1.set_xlabel("n_components")

dev/_downloads/scikit-learn-docs.zip

8.3 KB
Binary file not shown.

0 commit comments

Comments
 (0)