Skip to content

Commit 37e9c8a

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 08924c340ee61d4ae3db54daefdd68d11b470d0a
1 parent 2b43ee3 commit 37e9c8a

File tree

1,046 files changed

+3271
-3262
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,046 files changed

+3271
-3262
lines changed
153 Bytes
Binary file not shown.
150 Bytes
Binary file not shown.

dev/_downloads/plot_scaling_importance.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"from __future__ import print_function\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.decomposition import PCA\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn import metrics\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_wine\nfrom sklearn.pipeline import make_pipeline\nprint(__doc__)\n\n# Code source: Tyler Lanigan <[email protected]>\n# Sebastian Raschka <[email protected]>\n\n# License: BSD 3 clause\n\nRANDOM_STATE = 42\nFIG_SIZE = (10, 7)\n\n\nfeatures, target = load_wine(return_X_y=True)\n\n# Make a train/test split using 30% test size\nX_train, X_test, y_train, y_test = train_test_split(features, target,\n test_size=0.30,\n random_state=RANDOM_STATE)\n\n# Fit to data and predict using pipelined GNB and PCA.\nunscaled_clf = make_pipeline(PCA(n_components=2), GaussianNB())\nunscaled_clf.fit(X_train, y_train)\npred_test = unscaled_clf.predict(X_test)\n\n# Fit to data and predict using pipelined scaling, GNB and PCA.\nstd_clf = make_pipeline(StandardScaler(), PCA(n_components=2), GaussianNB())\nstd_clf.fit(X_train, y_train)\npred_test_std = std_clf.predict(X_test)\n\n# Show prediction accuracies in scaled and unscaled data.\nprint('\\nPrediction accuracy for the normal test dataset with PCA')\nprint('{:.2%}\\n'.format(metrics.accuracy_score(y_test, pred_test)))\n\nprint('\\nPrediction accuracy for the standardized test dataset with PCA')\nprint('{:.2%}\\n'.format(metrics.accuracy_score(y_test, pred_test_std)))\n\n# Extract PCA from pipeline\npca = unscaled_clf.named_steps['pca']\npca_std = std_clf.named_steps['pca']\n\n# Show first principal components\nprint('\\nPC 1 without scaling:\\n', pca.components_[0])\nprint('\\nPC 1 with scaling:\\n', pca_std.components_[0])\n\n# Scale and use PCA on X_train data for visualization.\nscaler = std_clf.named_steps['standardscaler']\nX_train_std = pca_std.transform(scaler.transform(X_train))\n\n# visualize standardized vs. untouched dataset with PCA performed\nfig, (ax1, ax2) = plt.subplots(ncols=2, figsize=FIG_SIZE)\n\n\nfor l, c, m in zip(range(0, 3), ('blue', 'red', 'green'), ('^', 's', 'o')):\n ax1.scatter(X_train[y_train == l, 0], X_train[y_train == l, 1],\n color=c,\n label='class %s' % l,\n alpha=0.5,\n marker=m\n )\n\nfor l, c, m in zip(range(0, 3), ('blue', 'red', 'green'), ('^', 's', 'o')):\n ax2.scatter(X_train_std[y_train == l, 0], X_train_std[y_train == l, 1],\n color=c,\n label='class %s' % l,\n alpha=0.5,\n marker=m\n )\n\nax1.set_title('Training dataset after PCA')\nax2.set_title('Standardized training dataset after PCA')\n\nfor ax in (ax1, ax2):\n ax.set_xlabel('1st principal component')\n ax.set_ylabel('2nd principal component')\n ax.legend(loc='upper right')\n ax.grid()\n\nplt.tight_layout()\n\nplt.show()"
29+
"from __future__ import print_function\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.decomposition import PCA\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn import metrics\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_wine\nfrom sklearn.pipeline import make_pipeline\nprint(__doc__)\n\n# Code source: Tyler Lanigan <[email protected]>\n# Sebastian Raschka <[email protected]>\n\n# License: BSD 3 clause\n\nRANDOM_STATE = 42\nFIG_SIZE = (10, 7)\n\n\nfeatures, target = load_wine(return_X_y=True)\n\n# Make a train/test split using 30% test size\nX_train, X_test, y_train, y_test = train_test_split(features, target,\n test_size=0.30,\n random_state=RANDOM_STATE)\n\n# Fit to data and predict using pipelined GNB and PCA.\nunscaled_clf = make_pipeline(PCA(n_components=2), GaussianNB())\nunscaled_clf.fit(X_train, y_train)\npred_test = unscaled_clf.predict(X_test)\n\n# Fit to data and predict using pipelined scaling, GNB and PCA.\nstd_clf = make_pipeline(StandardScaler(), PCA(n_components=2), GaussianNB())\nstd_clf.fit(X_train, y_train)\npred_test_std = std_clf.predict(X_test)\n\n# Show prediction accuracies in scaled and unscaled data.\nprint('\\nPrediction accuracy for the normal test dataset with PCA')\nprint('{:.2%}\\n'.format(metrics.accuracy_score(y_test, pred_test)))\n\nprint('\\nPrediction accuracy for the standardized test dataset with PCA')\nprint('{:.2%}\\n'.format(metrics.accuracy_score(y_test, pred_test_std)))\n\n# Extract PCA from pipeline\npca = unscaled_clf.named_steps['pca']\npca_std = std_clf.named_steps['pca']\n\n# Show first principal components\nprint('\\nPC 1 without scaling:\\n', pca.components_[0])\nprint('\\nPC 1 with scaling:\\n', pca_std.components_[0])\n\n# Use PCA without and with scale on X_train data for visualization.\nX_train_transformed = pca.transform(X_train)\nscaler = std_clf.named_steps['standardscaler']\nX_train_std_transformed = pca_std.transform(scaler.transform(X_train))\n\n# visualize standardized vs. untouched dataset with PCA performed\nfig, (ax1, ax2) = plt.subplots(ncols=2, figsize=FIG_SIZE)\n\n\nfor l, c, m in zip(range(0, 3), ('blue', 'red', 'green'), ('^', 's', 'o')):\n ax1.scatter(X_train_transformed[y_train == l, 0],\n X_train_transformed[y_train == l, 1],\n color=c,\n label='class %s' % l,\n alpha=0.5,\n marker=m\n )\n\nfor l, c, m in zip(range(0, 3), ('blue', 'red', 'green'), ('^', 's', 'o')):\n ax2.scatter(X_train_std_transformed[y_train == l, 0],\n X_train_std_transformed[y_train == l, 1],\n color=c,\n label='class %s' % l,\n alpha=0.5,\n marker=m\n )\n\nax1.set_title('Training dataset after PCA')\nax2.set_title('Standardized training dataset after PCA')\n\nfor ax in (ax1, ax2):\n ax.set_xlabel('1st principal component')\n ax.set_ylabel('2nd principal component')\n ax.legend(loc='upper right')\n ax.grid()\n\nplt.tight_layout()\n\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_scaling_importance.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,24 +93,27 @@
9393
print('\nPC 1 without scaling:\n', pca.components_[0])
9494
print('\nPC 1 with scaling:\n', pca_std.components_[0])
9595

96-
# Scale and use PCA on X_train data for visualization.
96+
# Use PCA without and with scale on X_train data for visualization.
97+
X_train_transformed = pca.transform(X_train)
9798
scaler = std_clf.named_steps['standardscaler']
98-
X_train_std = pca_std.transform(scaler.transform(X_train))
99+
X_train_std_transformed = pca_std.transform(scaler.transform(X_train))
99100

100101
# visualize standardized vs. untouched dataset with PCA performed
101102
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=FIG_SIZE)
102103

103104

104105
for l, c, m in zip(range(0, 3), ('blue', 'red', 'green'), ('^', 's', 'o')):
105-
ax1.scatter(X_train[y_train == l, 0], X_train[y_train == l, 1],
106+
ax1.scatter(X_train_transformed[y_train == l, 0],
107+
X_train_transformed[y_train == l, 1],
106108
color=c,
107109
label='class %s' % l,
108110
alpha=0.5,
109111
marker=m
110112
)
111113

112114
for l, c, m in zip(range(0, 3), ('blue', 'red', 'green'), ('^', 's', 'o')):
113-
ax2.scatter(X_train_std[y_train == l, 0], X_train_std[y_train == l, 1],
115+
ax2.scatter(X_train_std_transformed[y_train == l, 0],
116+
X_train_std_transformed[y_train == l, 1],
114117
color=c,
115118
label='class %s' % l,
116119
alpha=0.5,

dev/_downloads/scikit-learn-docs.pdf

12.8 KB
Binary file not shown.

dev/_images/iris.png

0 Bytes
-755 Bytes

0 commit comments

Comments
 (0)