scikit-learn
diff --git a/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
141 Bytes b/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
141 Bytes
diff --git a/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
145 Bytes b/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
145 Bytes
diff --git a/‎dev/_downloads/898b30acf62919d918478efbe526195f/plot_digits_pipe.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/898b30acf62919d918478efbe526195f/plot_digits_pipe.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/ba89a400c6902f85c10199ff86947d23/plot_digits_pipe.py
Lines changed: 6 additions & 4 deletions b/‎dev/_downloads/ba89a400c6902f85c10199ff86947d23/plot_digits_pipe.py
Lines changed: 6 additions & 4 deletions
diff --git a/‎dev/_downloads/scikit-learn-docs.zip
14.7 KB b/‎dev/_downloads/scikit-learn-docs.zip
14.7 KB
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
188 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
188 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-17 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-17 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
-184 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
-184 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
27 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
27 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_thumb.png
7 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_thumb.png
7 Bytes
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport pandas as pd\n\nfrom sklearn import datasets\nfrom sklearn.decomposition import PCA\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.model_selection import GridSearchCV\n\n\n# Define a pipeline to search for the best combination of PCA truncation\n# and classifier regularization.\npca = PCA()\n# set the tolerance to a large value to make the example faster\nlogistic = LogisticRegression(max_iter=10000, tol=0.1)\npipe = Pipeline(steps=[(\"pca\", pca), (\"logistic\", logistic)])\n\nX_digits, y_digits = datasets.load_digits(return_X_y=True)\n\n# Parameters of pipelines can be set using \u2018__\u2019 separated parameter names:\nparam_grid = {\n    \"pca__n_components\": [5, 15, 30, 45, 64],\n    \"logistic__C\": np.logspace(-4, 4, 4),\n}\nsearch = GridSearchCV(pipe, param_grid, n_jobs=2)\nsearch.fit(X_digits, y_digits)\nprint(\"Best parameter (CV score=%0.3f):\" % search.best_score_)\nprint(search.best_params_)\n\n# Plot the PCA spectrum\npca.fit(X_digits)\n\nfig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(6, 6))\nax0.plot(\n    np.arange(1, pca.n_components_ + 1), pca.explained_variance_ratio_, \"+\", linewidth=2\n)\nax0.set_ylabel(\"PCA explained variance ratio\")\n\nax0.axvline(\n    search.best_estimator_.named_steps[\"pca\"].n_components,\n    linestyle=\":\",\n    label=\"n_components chosen\",\n)\nax0.legend(prop=dict(size=12))\n\n# For each number of components, find the best classifier results\nresults = pd.DataFrame(search.cv_results_)\ncomponents_col = \"param_pca__n_components\"\nbest_clfs = results.groupby(components_col).apply(\n    lambda g: g.nlargest(1, \"mean_test_score\")\n)\n\nbest_clfs.plot(\n    x=components_col, y=\"mean_test_score\", yerr=\"std_test_score\", legend=False, ax=ax1\n)\nax1.set_ylabel(\"Classification accuracy (val)\")\nax1.set_xlabel(\"n_components\")\n\nplt.xlim(-1, 70)\n\nplt.tight_layout()\nplt.show()"
+        "# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport pandas as pd\n\nfrom sklearn import datasets\nfrom sklearn.decomposition import PCA\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.preprocessing import StandardScaler\n\n# Define a pipeline to search for the best combination of PCA truncation\n# and classifier regularization.\npca = PCA()\n# Define a Standard Scaler to normalize inputs\nscaler = StandardScaler()\n\n# set the tolerance to a large value to make the example faster\nlogistic = LogisticRegression(max_iter=10000, tol=0.1)\npipe = Pipeline(steps=[(\"scaler\", scaler), (\"pca\", pca), (\"logistic\", logistic)])\n\nX_digits, y_digits = datasets.load_digits(return_X_y=True)\n# Parameters of pipelines can be set using \u2018__\u2019 separated parameter names:\nparam_grid = {\n    \"pca__n_components\": [5, 15, 30, 45, 60],\n    \"logistic__C\": np.logspace(-4, 4, 4),\n}\nsearch = GridSearchCV(pipe, param_grid, n_jobs=2)\nsearch.fit(X_digits, y_digits)\nprint(\"Best parameter (CV score=%0.3f):\" % search.best_score_)\nprint(search.best_params_)\n\n# Plot the PCA spectrum\npca.fit(X_digits)\n\nfig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(6, 6))\nax0.plot(\n    np.arange(1, pca.n_components_ + 1), pca.explained_variance_ratio_, \"+\", linewidth=2\n)\nax0.set_ylabel(\"PCA explained variance ratio\")\n\nax0.axvline(\n    search.best_estimator_.named_steps[\"pca\"].n_components,\n    linestyle=\":\",\n    label=\"n_components chosen\",\n)\nax0.legend(prop=dict(size=12))\n\n# For each number of components, find the best classifier results\nresults = pd.DataFrame(search.cv_results_)\ncomponents_col = \"param_pca__n_components\"\nbest_clfs = results.groupby(components_col).apply(\n    lambda g: g.nlargest(1, \"mean_test_score\")\n)\n\nbest_clfs.plot(\n    x=components_col, y=\"mean_test_score\", yerr=\"std_test_score\", legend=False, ax=ax1\n)\nax1.set_ylabel(\"Classification accuracy (val)\")\nax1.set_xlabel(\"n_components\")\n\nplt.xlim(-1, 70)\n\nplt.tight_layout()\nplt.show()"
       ]
     }
   ],
 
@@ -24,20 +24,22 @@
 from sklearn.linear_model import LogisticRegression
 from sklearn.pipeline import Pipeline
 from sklearn.model_selection import GridSearchCV
-
+from sklearn.preprocessing import StandardScaler
 
 # Define a pipeline to search for the best combination of PCA truncation
 # and classifier regularization.
 pca = PCA()
+# Define a Standard Scaler to normalize inputs
+scaler = StandardScaler()
+
 # set the tolerance to a large value to make the example faster
 logistic = LogisticRegression(max_iter=10000, tol=0.1)
-pipe = Pipeline(steps=[("pca", pca), ("logistic", logistic)])
+pipe = Pipeline(steps=[("scaler", scaler), ("pca", pca), ("logistic", logistic)])
 
 X_digits, y_digits = datasets.load_digits(return_X_y=True)
-
 # Parameters of pipelines can be set using ‘__’ separated parameter names:
 param_grid = {
-    "pca__n_components": [5, 15, 30, 45, 64],
+    "pca__n_components": [5, 15, 30, 45, 60],
     "logistic__C": np.logspace(-4, 4, 4),
 }
 search = GridSearchCV(pipe, param_grid, n_jobs=2)
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport pandas as pd\n\nfrom sklearn import datasets\nfrom sklearn.decomposition import PCA\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.model_selection import GridSearchCV\n\n\n# Define a pipeline to search for the best combination of PCA truncation\n# and classifier regularization.\npca = PCA()\n# set the tolerance to a large value to make the example faster\nlogistic = LogisticRegression(max_iter=10000, tol=0.1)\npipe = Pipeline(steps=[(\"pca\", pca), (\"logistic\", logistic)])\n\nX_digits, y_digits = datasets.load_digits(return_X_y=True)\n\n# Parameters of pipelines can be set using \u2018__\u2019 separated parameter names:\nparam_grid = {\n \"pca__n_components\": [5, 15, 30, 45, 64],\n \"logistic__C\": np.logspace(-4, 4, 4),\n}\nsearch = GridSearchCV(pipe, param_grid, n_jobs=2)\nsearch.fit(X_digits, y_digits)\nprint(\"Best parameter (CV score=%0.3f):\" % search.best_score_)\nprint(search.best_params_)\n\n# Plot the PCA spectrum\npca.fit(X_digits)\n\nfig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(6, 6))\nax0.plot(\n np.arange(1, pca.n_components_ + 1), pca.explained_variance_ratio_, \"+\", linewidth=2\n)\nax0.set_ylabel(\"PCA explained variance ratio\")\n\nax0.axvline(\n search.best_estimator_.named_steps[\"pca\"].n_components,\n linestyle=\":\",\n label=\"n_components chosen\",\n)\nax0.legend(prop=dict(size=12))\n\n# For each number of components, find the best classifier results\nresults = pd.DataFrame(search.cv_results_)\ncomponents_col = \"param_pca__n_components\"\nbest_clfs = results.groupby(components_col).apply(\n lambda g: g.nlargest(1, \"mean_test_score\")\n)\n\nbest_clfs.plot(\n x=components_col, y=\"mean_test_score\", yerr=\"std_test_score\", legend=False, ax=ax1\n)\nax1.set_ylabel(\"Classification accuracy (val)\")\nax1.set_xlabel(\"n_components\")\n\nplt.xlim(-1, 70)\n\nplt.tight_layout()\nplt.show()"
	`29`	+ "# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport pandas as pd\n\nfrom sklearn import datasets\nfrom sklearn.decomposition import PCA\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.preprocessing import StandardScaler\n\n# Define a pipeline to search for the best combination of PCA truncation\n# and classifier regularization.\npca = PCA()\n# Define a Standard Scaler to normalize inputs\nscaler = StandardScaler()\n\n# set the tolerance to a large value to make the example faster\nlogistic = LogisticRegression(max_iter=10000, tol=0.1)\npipe = Pipeline(steps=[(\"scaler\", scaler), (\"pca\", pca), (\"logistic\", logistic)])\n\nX_digits, y_digits = datasets.load_digits(return_X_y=True)\n# Parameters of pipelines can be set using \u2018__\u2019 separated parameter names:\nparam_grid = {\n \"pca__n_components\": [5, 15, 30, 45, 60],\n \"logistic__C\": np.logspace(-4, 4, 4),\n}\nsearch = GridSearchCV(pipe, param_grid, n_jobs=2)\nsearch.fit(X_digits, y_digits)\nprint(\"Best parameter (CV score=%0.3f):\" % search.best_score_)\nprint(search.best_params_)\n\n# Plot the PCA spectrum\npca.fit(X_digits)\n\nfig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(6, 6))\nax0.plot(\n np.arange(1, pca.n_components_ + 1), pca.explained_variance_ratio_, \"+\", linewidth=2\n)\nax0.set_ylabel(\"PCA explained variance ratio\")\n\nax0.axvline(\n search.best_estimator_.named_steps[\"pca\"].n_components,\n linestyle=\":\",\n label=\"n_components chosen\",\n)\nax0.legend(prop=dict(size=12))\n\n# For each number of components, find the best classifier results\nresults = pd.DataFrame(search.cv_results_)\ncomponents_col = \"param_pca__n_components\"\nbest_clfs = results.groupby(components_col).apply(\n lambda g: g.nlargest(1, \"mean_test_score\")\n)\n\nbest_clfs.plot(\n x=components_col, y=\"mean_test_score\", yerr=\"std_test_score\", legend=False, ax=ax1\n)\nax1.set_ylabel(\"Classification accuracy (val)\")\nax1.set_xlabel(\"n_components\")\n\nplt.xlim(-1, 70)\n\nplt.tight_layout()\nplt.show()"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`