Skip to content

Commit c99bb68

Browse files
committed
Pushing the docs to dev/ for branch: master, commit d1f58339b54063e12c880e66e08d91abc0c1912a
1 parent 1f20fb9 commit c99bb68

File tree

1,091 files changed

+2978
-2978
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,091 files changed

+2978
-2978
lines changed
-8 Bytes
Binary file not shown.
-8 Bytes
Binary file not shown.

dev/_downloads/plot_digits_pipe.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\n\n# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport pandas as pd\n\nfrom sklearn import datasets\nfrom sklearn.decomposition import PCA\nfrom sklearn.linear_model import SGDClassifier\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.model_selection import GridSearchCV\n\n\n# Define a pipeline to search for the best combination of PCA truncation\n# and classifier regularization.\nlogistic = SGDClassifier(loss='log', penalty='l2', early_stopping=True,\n max_iter=10000, tol=1e-5, random_state=0)\npca = PCA()\npipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)])\n\nX_digits, y_digits = datasets.load_digits(return_X_y=True)\n\n# Parameters of pipelines can be set using \u2018__\u2019 separated parameter names:\nparam_grid = {\n 'pca__n_components': [5, 20, 30, 40, 50, 64],\n 'logistic__alpha': np.logspace(-4, 4, 5),\n}\nsearch = GridSearchCV(pipe, param_grid)\nsearch.fit(X_digits, y_digits)\nprint(\"Best parameter (CV score=%0.3f):\" % search.best_score_)\nprint(search.best_params_)\n\n# Plot the PCA spectrum\npca.fit(X_digits)\n\nfig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(6, 6))\nax0.plot(pca.explained_variance_ratio_, linewidth=2)\nax0.set_ylabel('PCA explained variance')\n\nax0.axvline(search.best_estimator_.named_steps['pca'].n_components,\n linestyle=':', label='n_components chosen')\nax0.legend(prop=dict(size=12))\n\n# For each number of components, find the best classifier results\nresults = pd.DataFrame(search.cv_results_)\ncomponents_col = 'param_pca__n_components'\nbest_clfs = results.groupby(components_col).apply(\n lambda g: g.nlargest(1, 'mean_test_score'))\n\nbest_clfs.plot(x=components_col, y='mean_test_score', yerr='std_test_score',\n legend=False, ax=ax1)\nax1.set_ylabel('Classification accuracy (val)')\nax1.set_xlabel('n_components')\n\nplt.tight_layout()\nplt.show()"
29+
"print(__doc__)\n\n\n# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport pandas as pd\n\nfrom sklearn import datasets\nfrom sklearn.decomposition import PCA\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.model_selection import GridSearchCV\n\n\n# Define a pipeline to search for the best combination of PCA truncation\n# and classifier regularization.\npca = PCA()\n# set the tolerance to a large value to make the example faster\nlogistic = LogisticRegression(max_iter=10000, tol=0.1)\npipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)])\n\nX_digits, y_digits = datasets.load_digits(return_X_y=True)\n\n# Parameters of pipelines can be set using \u2018__\u2019 separated parameter names:\nparam_grid = {\n 'pca__n_components': [5, 20, 30, 40, 50, 64],\n 'logistic__C': np.logspace(-4, 4, 5),\n}\nsearch = GridSearchCV(pipe, param_grid, n_jobs=-1)\nsearch.fit(X_digits, y_digits)\nprint(\"Best parameter (CV score=%0.3f):\" % search.best_score_)\nprint(search.best_params_)\n\n# Plot the PCA spectrum\npca.fit(X_digits)\n\nfig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(6, 6))\nax0.plot(pca.explained_variance_ratio_, linewidth=2)\nax0.set_ylabel('PCA explained variance')\n\nax0.axvline(search.best_estimator_.named_steps['pca'].n_components,\n linestyle=':', label='n_components chosen')\nax0.legend(prop=dict(size=12))\n\n# For each number of components, find the best classifier results\nresults = pd.DataFrame(search.cv_results_)\ncomponents_col = 'param_pca__n_components'\nbest_clfs = results.groupby(components_col).apply(\n lambda g: g.nlargest(1, 'mean_test_score'))\n\nbest_clfs.plot(x=components_col, y='mean_test_score', yerr='std_test_score',\n legend=False, ax=ax1)\nax1.set_ylabel('Classification accuracy (val)')\nax1.set_xlabel('n_components')\n\nplt.tight_layout()\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_digits_pipe.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,26 +26,26 @@
2626

2727
from sklearn import datasets
2828
from sklearn.decomposition import PCA
29-
from sklearn.linear_model import SGDClassifier
29+
from sklearn.linear_model import LogisticRegression
3030
from sklearn.pipeline import Pipeline
3131
from sklearn.model_selection import GridSearchCV
3232

3333

3434
# Define a pipeline to search for the best combination of PCA truncation
3535
# and classifier regularization.
36-
logistic = SGDClassifier(loss='log', penalty='l2', early_stopping=True,
37-
max_iter=10000, tol=1e-5, random_state=0)
3836
pca = PCA()
37+
# set the tolerance to a large value to make the example faster
38+
logistic = LogisticRegression(max_iter=10000, tol=0.1)
3939
pipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)])
4040

4141
X_digits, y_digits = datasets.load_digits(return_X_y=True)
4242

4343
# Parameters of pipelines can be set using ‘__’ separated parameter names:
4444
param_grid = {
4545
'pca__n_components': [5, 20, 30, 40, 50, 64],
46-
'logistic__alpha': np.logspace(-4, 4, 5),
46+
'logistic__C': np.logspace(-4, 4, 5),
4747
}
48-
search = GridSearchCV(pipe, param_grid)
48+
search = GridSearchCV(pipe, param_grid, n_jobs=-1)
4949
search.fit(X_digits, y_digits)
5050
print("Best parameter (CV score=%0.3f):" % search.best_score_)
5151
print(search.best_params_)

dev/_downloads/scikit-learn-docs.pdf

-26.5 KB
Binary file not shown.

dev/_images/iris.png

0 Bytes
-583 Bytes
-583 Bytes
-619 Bytes
-619 Bytes

0 commit comments

Comments
 (0)