solversa
diff --git a/‎dev/_downloads/auto_examples_jupyter.zip
-8 Bytes b/‎dev/_downloads/auto_examples_jupyter.zip
-8 Bytes
diff --git a/‎dev/_downloads/auto_examples_python.zip
-8 Bytes b/‎dev/_downloads/auto_examples_python.zip
-8 Bytes
diff --git a/‎dev/_downloads/plot_digits_pipe.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_digits_pipe.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_digits_pipe.py
Lines changed: 5 additions & 5 deletions b/‎dev/_downloads/plot_digits_pipe.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎dev/_downloads/scikit-learn-docs.pdf
-26.5 KB b/‎dev/_downloads/scikit-learn-docs.pdf
-26.5 KB
diff --git a/‎dev/_images/iris.png
0 Bytes b/‎dev/_images/iris.png
0 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-583 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-583 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
-583 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
-583 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-619 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-619 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0021.png
-619 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0021.png
-619 Bytes
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\n\n# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport pandas as pd\n\nfrom sklearn import datasets\nfrom sklearn.decomposition import PCA\nfrom sklearn.linear_model import SGDClassifier\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.model_selection import GridSearchCV\n\n\n# Define a pipeline to search for the best combination of PCA truncation\n# and classifier regularization.\nlogistic = SGDClassifier(loss='log', penalty='l2', early_stopping=True,\n                         max_iter=10000, tol=1e-5, random_state=0)\npca = PCA()\npipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)])\n\nX_digits, y_digits = datasets.load_digits(return_X_y=True)\n\n# Parameters of pipelines can be set using \u2018__\u2019 separated parameter names:\nparam_grid = {\n    'pca__n_components': [5, 20, 30, 40, 50, 64],\n    'logistic__alpha': np.logspace(-4, 4, 5),\n}\nsearch = GridSearchCV(pipe, param_grid)\nsearch.fit(X_digits, y_digits)\nprint(\"Best parameter (CV score=%0.3f):\" % search.best_score_)\nprint(search.best_params_)\n\n# Plot the PCA spectrum\npca.fit(X_digits)\n\nfig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(6, 6))\nax0.plot(pca.explained_variance_ratio_, linewidth=2)\nax0.set_ylabel('PCA explained variance')\n\nax0.axvline(search.best_estimator_.named_steps['pca'].n_components,\n            linestyle=':', label='n_components chosen')\nax0.legend(prop=dict(size=12))\n\n# For each number of components, find the best classifier results\nresults = pd.DataFrame(search.cv_results_)\ncomponents_col = 'param_pca__n_components'\nbest_clfs = results.groupby(components_col).apply(\n    lambda g: g.nlargest(1, 'mean_test_score'))\n\nbest_clfs.plot(x=components_col, y='mean_test_score', yerr='std_test_score',\n               legend=False, ax=ax1)\nax1.set_ylabel('Classification accuracy (val)')\nax1.set_xlabel('n_components')\n\nplt.tight_layout()\nplt.show()"
+        "print(__doc__)\n\n\n# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport pandas as pd\n\nfrom sklearn import datasets\nfrom sklearn.decomposition import PCA\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.model_selection import GridSearchCV\n\n\n# Define a pipeline to search for the best combination of PCA truncation\n# and classifier regularization.\npca = PCA()\n# set the tolerance to a large value to make the example faster\nlogistic = LogisticRegression(max_iter=10000, tol=0.1)\npipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)])\n\nX_digits, y_digits = datasets.load_digits(return_X_y=True)\n\n# Parameters of pipelines can be set using \u2018__\u2019 separated parameter names:\nparam_grid = {\n    'pca__n_components': [5, 20, 30, 40, 50, 64],\n    'logistic__C': np.logspace(-4, 4, 5),\n}\nsearch = GridSearchCV(pipe, param_grid, n_jobs=-1)\nsearch.fit(X_digits, y_digits)\nprint(\"Best parameter (CV score=%0.3f):\" % search.best_score_)\nprint(search.best_params_)\n\n# Plot the PCA spectrum\npca.fit(X_digits)\n\nfig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(6, 6))\nax0.plot(pca.explained_variance_ratio_, linewidth=2)\nax0.set_ylabel('PCA explained variance')\n\nax0.axvline(search.best_estimator_.named_steps['pca'].n_components,\n            linestyle=':', label='n_components chosen')\nax0.legend(prop=dict(size=12))\n\n# For each number of components, find the best classifier results\nresults = pd.DataFrame(search.cv_results_)\ncomponents_col = 'param_pca__n_components'\nbest_clfs = results.groupby(components_col).apply(\n    lambda g: g.nlargest(1, 'mean_test_score'))\n\nbest_clfs.plot(x=components_col, y='mean_test_score', yerr='std_test_score',\n               legend=False, ax=ax1)\nax1.set_ylabel('Classification accuracy (val)')\nax1.set_xlabel('n_components')\n\nplt.tight_layout()\nplt.show()"
       ]
     }
   ],
 
@@ -26,26 +26,26 @@
 
 from sklearn import datasets
 from sklearn.decomposition import PCA
-from sklearn.linear_model import SGDClassifier
+from sklearn.linear_model import LogisticRegression
 from sklearn.pipeline import Pipeline
 from sklearn.model_selection import GridSearchCV
 
 
 # Define a pipeline to search for the best combination of PCA truncation
 # and classifier regularization.
-logistic = SGDClassifier(loss='log', penalty='l2', early_stopping=True,
-                         max_iter=10000, tol=1e-5, random_state=0)
 pca = PCA()
+# set the tolerance to a large value to make the example faster
+logistic = LogisticRegression(max_iter=10000, tol=0.1)
 pipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)])
 
 X_digits, y_digits = datasets.load_digits(return_X_y=True)
 
 # Parameters of pipelines can be set using ‘__’ separated parameter names:
 param_grid = {
     'pca__n_components': [5, 20, 30, 40, 50, 64],
-    'logistic__alpha': np.logspace(-4, 4, 5),
+    'logistic__C': np.logspace(-4, 4, 5),
 }
-search = GridSearchCV(pipe, param_grid)
+search = GridSearchCV(pipe, param_grid, n_jobs=-1)
 search.fit(X_digits, y_digits)
 print("Best parameter (CV score=%0.3f):" % search.best_score_)
 print(search.best_params_)
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "print(__doc__)\n\n\n# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport pandas as pd\n\nfrom sklearn import datasets\nfrom sklearn.decomposition import PCA\nfrom sklearn.linear_model import SGDClassifier\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.model_selection import GridSearchCV\n\n\n# Define a pipeline to search for the best combination of PCA truncation\n# and classifier regularization.\nlogistic = SGDClassifier(loss='log', penalty='l2', early_stopping=True,\n max_iter=10000, tol=1e-5, random_state=0)\npca = PCA()\npipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)])\n\nX_digits, y_digits = datasets.load_digits(return_X_y=True)\n\n# Parameters of pipelines can be set using \u2018__\u2019 separated parameter names:\nparam_grid = {\n 'pca__n_components': [5, 20, 30, 40, 50, 64],\n 'logistic__alpha': np.logspace(-4, 4, 5),\n}\nsearch = GridSearchCV(pipe, param_grid)\nsearch.fit(X_digits, y_digits)\nprint(\"Best parameter (CV score=%0.3f):\" % search.best_score_)\nprint(search.best_params_)\n\n# Plot the PCA spectrum\npca.fit(X_digits)\n\nfig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(6, 6))\nax0.plot(pca.explained_variance_ratio_, linewidth=2)\nax0.set_ylabel('PCA explained variance')\n\nax0.axvline(search.best_estimator_.named_steps['pca'].n_components,\n linestyle=':', label='n_components chosen')\nax0.legend(prop=dict(size=12))\n\n# For each number of components, find the best classifier results\nresults = pd.DataFrame(search.cv_results_)\ncomponents_col = 'param_pca__n_components'\nbest_clfs = results.groupby(components_col).apply(\n lambda g: g.nlargest(1, 'mean_test_score'))\n\nbest_clfs.plot(x=components_col, y='mean_test_score', yerr='std_test_score',\n legend=False, ax=ax1)\nax1.set_ylabel('Classification accuracy (val)')\nax1.set_xlabel('n_components')\n\nplt.tight_layout()\nplt.show()"
	`29`	+ "print(__doc__)\n\n\n# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport pandas as pd\n\nfrom sklearn import datasets\nfrom sklearn.decomposition import PCA\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.model_selection import GridSearchCV\n\n\n# Define a pipeline to search for the best combination of PCA truncation\n# and classifier regularization.\npca = PCA()\n# set the tolerance to a large value to make the example faster\nlogistic = LogisticRegression(max_iter=10000, tol=0.1)\npipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)])\n\nX_digits, y_digits = datasets.load_digits(return_X_y=True)\n\n# Parameters of pipelines can be set using \u2018__\u2019 separated parameter names:\nparam_grid = {\n 'pca__n_components': [5, 20, 30, 40, 50, 64],\n 'logistic__C': np.logspace(-4, 4, 5),\n}\nsearch = GridSearchCV(pipe, param_grid, n_jobs=-1)\nsearch.fit(X_digits, y_digits)\nprint(\"Best parameter (CV score=%0.3f):\" % search.best_score_)\nprint(search.best_params_)\n\n# Plot the PCA spectrum\npca.fit(X_digits)\n\nfig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(6, 6))\nax0.plot(pca.explained_variance_ratio_, linewidth=2)\nax0.set_ylabel('PCA explained variance')\n\nax0.axvline(search.best_estimator_.named_steps['pca'].n_components,\n linestyle=':', label='n_components chosen')\nax0.legend(prop=dict(size=12))\n\n# For each number of components, find the best classifier results\nresults = pd.DataFrame(search.cv_results_)\ncomponents_col = 'param_pca__n_components'\nbest_clfs = results.groupby(components_col).apply(\n lambda g: g.nlargest(1, 'mean_test_score'))\n\nbest_clfs.plot(x=components_col, y='mean_test_score', yerr='std_test_score',\n legend=False, ax=ax1)\nax1.set_ylabel('Classification accuracy (val)')\nax1.set_xlabel('n_components')\n\nplt.tight_layout()\nplt.show()"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`