Skip to content

Commit dc9ecaf

Browse files
committed
Pushing the docs to dev/ for branch: master, commit f1fa2ed15f4aa7f4f34e630abf3b16569e4b3527
1 parent e494c03 commit dc9ecaf

File tree

1,028 files changed

+3102
-3125
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,028 files changed

+3102
-3125
lines changed
-181 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.

dev/_downloads/plot_compare_reduction.ipynb

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
},
3434
"outputs": [],
3535
"source": [
36-
"# Authors: Robert McGibbon, Joel Nothman, Guillaume Lemaitre\n\nfrom __future__ import print_function, division\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.svm import LinearSVC\nfrom sklearn.decomposition import PCA, NMF\nfrom sklearn.feature_selection import SelectKBest, chi2\n\nprint(__doc__)\n\npipe = Pipeline([\n ('reduce_dim', PCA()),\n ('classify', LinearSVC())\n])\n\nN_FEATURES_OPTIONS = [2, 4, 8]\nC_OPTIONS = [1, 10, 100, 1000]\nparam_grid = [\n {\n 'reduce_dim': [PCA(iterated_power=7), NMF()],\n 'reduce_dim__n_components': N_FEATURES_OPTIONS,\n 'classify__C': C_OPTIONS\n },\n {\n 'reduce_dim': [SelectKBest(chi2)],\n 'reduce_dim__k': N_FEATURES_OPTIONS,\n 'classify__C': C_OPTIONS\n },\n]\nreducer_labels = ['PCA', 'NMF', 'KBest(chi2)']\n\ngrid = GridSearchCV(pipe, cv=3, n_jobs=1, param_grid=param_grid)\ndigits = load_digits()\ngrid.fit(digits.data, digits.target)\n\nmean_scores = np.array(grid.cv_results_['mean_test_score'])\n# scores are in the order of param_grid iteration, which is alphabetical\nmean_scores = mean_scores.reshape(len(C_OPTIONS), -1, len(N_FEATURES_OPTIONS))\n# select score for best C\nmean_scores = mean_scores.max(axis=0)\nbar_offsets = (np.arange(len(N_FEATURES_OPTIONS)) *\n (len(reducer_labels) + 1) + .5)\n\nplt.figure()\nCOLORS = 'bgrcmyk'\nfor i, (label, reducer_scores) in enumerate(zip(reducer_labels, mean_scores)):\n plt.bar(bar_offsets + i, reducer_scores, label=label, color=COLORS[i])\n\nplt.title(\"Comparing feature reduction techniques\")\nplt.xlabel('Reduced number of features')\nplt.xticks(bar_offsets + len(reducer_labels) / 2, N_FEATURES_OPTIONS)\nplt.ylabel('Digit classification accuracy')\nplt.ylim((0, 1))\nplt.legend(loc='upper left')"
36+
"# Authors: Robert McGibbon, Joel Nothman, Guillaume Lemaitre\n\nfrom __future__ import print_function, division\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.svm import LinearSVC\nfrom sklearn.decomposition import PCA, NMF\nfrom sklearn.feature_selection import SelectKBest, chi2\n\nprint(__doc__)\n\npipe = Pipeline([\n ('reduce_dim', PCA()),\n ('classify', LinearSVC())\n])\n\nN_FEATURES_OPTIONS = [2, 4, 8]\nC_OPTIONS = [1, 10, 100, 1000]\nparam_grid = [\n {\n 'reduce_dim': [PCA(iterated_power=7), NMF()],\n 'reduce_dim__n_components': N_FEATURES_OPTIONS,\n 'classify__C': C_OPTIONS\n },\n {\n 'reduce_dim': [SelectKBest(chi2)],\n 'reduce_dim__k': N_FEATURES_OPTIONS,\n 'classify__C': C_OPTIONS\n },\n]\nreducer_labels = ['PCA', 'NMF', 'KBest(chi2)']\n\ngrid = GridSearchCV(pipe, cv=3, n_jobs=1, param_grid=param_grid)\ndigits = load_digits()\ngrid.fit(digits.data, digits.target)\n\nmean_scores = np.array(grid.cv_results_['mean_test_score'])\n# scores are in the order of param_grid iteration, which is alphabetical\nmean_scores = mean_scores.reshape(len(C_OPTIONS), -1, len(N_FEATURES_OPTIONS))\n# select score for best C\nmean_scores = mean_scores.max(axis=0)\nbar_offsets = (np.arange(len(N_FEATURES_OPTIONS)) *\n (len(reducer_labels) + 1) + .5)\n\nplt.figure()\nCOLORS = 'bgrcmyk'\nfor i, (label, reducer_scores) in enumerate(zip(reducer_labels, mean_scores)):\n plt.bar(bar_offsets + i, reducer_scores, label=label, color=COLORS[i])\n\nplt.title(\"Comparing feature reduction techniques\")\nplt.xlabel('Reduced number of features')\nplt.xticks(bar_offsets + len(reducer_labels) / 2, N_FEATURES_OPTIONS)\nplt.ylabel('Digit classification accuracy')\nplt.ylim((0, 1))\nplt.legend(loc='upper left')\n\nplt.show()"
3737
]
3838
},
3939
{
@@ -60,17 +60,6 @@
6060
"source": [
6161
"The ``PCA`` fitting is only computed at the evaluation of the first\nconfiguration of the ``C`` parameter of the ``LinearSVC`` classifier. The\nother configurations of ``C`` will trigger the loading of the cached ``PCA``\nestimator data, leading to save processing time. Therefore, the use of\ncaching the pipeline using ``memory`` is highly beneficial when fitting\na transformer is costly.\n\n"
6262
]
63-
},
64-
{
65-
"cell_type": "code",
66-
"execution_count": null,
67-
"metadata": {
68-
"collapsed": false
69-
},
70-
"outputs": [],
71-
"source": [
72-
"plt.show()"
73-
]
7463
}
7564
],
7665
"metadata": {

dev/_downloads/plot_compare_reduction.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@
8787
plt.ylim((0, 1))
8888
plt.legend(loc='upper left')
8989

90+
plt.show()
91+
9092
###############################################################################
9193
# Caching transformers within a ``Pipeline``
9294
###############################################################################
@@ -126,5 +128,3 @@
126128
# estimator data, leading to save processing time. Therefore, the use of
127129
# caching the pipeline using ``memory`` is highly beneficial when fitting
128130
# a transformer is costly.
129-
130-
plt.show()

dev/_downloads/scikit-learn-docs.pdf

-14.1 KB
Binary file not shown.
530 Bytes
530 Bytes
-60 Bytes
-60 Bytes
366 Bytes

0 commit comments

Comments
 (0)