Skip to content

Commit 804ea85

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 5e193ec655d2b7634e35733e2fa0e5bef5e63d45
1 parent 67b24f6 commit 804ea85

File tree

1,222 files changed

+5183
-4154
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,222 files changed

+5183
-4154
lines changed
Binary file not shown.
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {
7+
"collapsed": false
8+
},
9+
"outputs": [],
10+
"source": [
11+
"%matplotlib inline"
12+
]
13+
},
14+
{
15+
"cell_type": "markdown",
16+
"metadata": {},
17+
"source": [
18+
"\n===============================================================\nFactor Analysis (with rotation) to visualize patterns\n===============================================================\n\nInvestigating the Iris dataset, we see that sepal length, petal\nlength and petal width are highly correlated. Sepal width is\nless redundant. Matrix decomposition techniques can uncover\nthese latent patterns. Applying rotations to the resulting\ncomponents does not inherently improve the predictve value\nof the derived latent space, but can help visualise their\nstructure; here, for example, the varimax rotation, which\nis found by maximizing the squared variances of the weights,\nfinds a structure where the second component only loads\npositively on sepal width.\n"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": null,
24+
"metadata": {
25+
"collapsed": false
26+
},
27+
"outputs": [],
28+
"source": [
29+
"# Authors: Jona Sassenhagen\n# License: BSD 3 clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.decomposition import FactorAnalysis, PCA\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.datasets import load_iris\n\nprint(__doc__)"
30+
]
31+
},
32+
{
33+
"cell_type": "markdown",
34+
"metadata": {},
35+
"source": [
36+
"Load Iris data\n\n"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": null,
42+
"metadata": {
43+
"collapsed": false
44+
},
45+
"outputs": [],
46+
"source": [
47+
"data = load_iris()\nX = StandardScaler().fit_transform(data[\"data\"])\nfeature_names = data[\"feature_names\"]"
48+
]
49+
},
50+
{
51+
"cell_type": "markdown",
52+
"metadata": {},
53+
"source": [
54+
"Plot covariance of Iris features\n\n"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"execution_count": null,
60+
"metadata": {
61+
"collapsed": false
62+
},
63+
"outputs": [],
64+
"source": [
65+
"ax = plt.axes()\n\nim = ax.imshow(np.corrcoef(X.T), cmap=\"RdBu_r\", vmin=-1, vmax=1)\n\nax.set_xticks([0, 1, 2, 3])\nax.set_xticklabels(list(feature_names), rotation=90)\nax.set_yticks([0, 1, 2, 3])\nax.set_yticklabels(list(feature_names))\n\nplt.colorbar(im).ax.set_ylabel(\"$r$\", rotation=0)\nax.set_title(\"Iris feature correlation matrix\")\nplt.tight_layout()"
66+
]
67+
},
68+
{
69+
"cell_type": "markdown",
70+
"metadata": {},
71+
"source": [
72+
"Run factor analysis with Varimax rotation\n\n"
73+
]
74+
},
75+
{
76+
"cell_type": "code",
77+
"execution_count": null,
78+
"metadata": {
79+
"collapsed": false
80+
},
81+
"outputs": [],
82+
"source": [
83+
"n_comps = 2\n\nmethods = [('PCA', PCA()),\n ('Unrotated FA', FactorAnalysis()),\n ('Varimax FA', FactorAnalysis(rotation='varimax'))]\nfig, axes = plt.subplots(ncols=len(methods), figsize=(10, 8))\n\nfor ax, (method, fa) in zip(axes, methods):\n fa.set_params(n_components=n_comps)\n fa.fit(X)\n\n components = fa.components_.T\n print(\"\\n\\n %s :\\n\" % method)\n print(components)\n\n vmax = np.abs(components).max()\n ax.imshow(components, cmap=\"RdBu_r\", vmax=vmax, vmin=-vmax)\n ax.set_yticks(np.arange(len(feature_names)))\n if ax.is_first_col():\n ax.set_yticklabels(feature_names)\n else:\n ax.set_yticklabels([])\n ax.set_title(str(method))\n ax.set_xticks([0, 1])\n ax.set_xticklabels([\"Comp. 1\", \"Comp. 2\"])\nfig.suptitle(\"Factors\")\nplt.tight_layout()\nplt.show()"
84+
]
85+
}
86+
],
87+
"metadata": {
88+
"kernelspec": {
89+
"display_name": "Python 3",
90+
"language": "python",
91+
"name": "python3"
92+
},
93+
"language_info": {
94+
"codemirror_mode": {
95+
"name": "ipython",
96+
"version": 3
97+
},
98+
"file_extension": ".py",
99+
"mimetype": "text/x-python",
100+
"name": "python",
101+
"nbconvert_exporter": "python",
102+
"pygments_lexer": "ipython3",
103+
"version": "3.8.3"
104+
}
105+
},
106+
"nbformat": 4,
107+
"nbformat_minor": 0
108+
}
Binary file not shown.
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
"""
2+
===============================================================
3+
Factor Analysis (with rotation) to visualize patterns
4+
===============================================================
5+
6+
Investigating the Iris dataset, we see that sepal length, petal
7+
length and petal width are highly correlated. Sepal width is
8+
less redundant. Matrix decomposition techniques can uncover
9+
these latent patterns. Applying rotations to the resulting
10+
components does not inherently improve the predictve value
11+
of the derived latent space, but can help visualise their
12+
structure; here, for example, the varimax rotation, which
13+
is found by maximizing the squared variances of the weights,
14+
finds a structure where the second component only loads
15+
positively on sepal width.
16+
"""
17+
18+
# Authors: Jona Sassenhagen
19+
# License: BSD 3 clause
20+
21+
import matplotlib.pyplot as plt
22+
import numpy as np
23+
24+
from sklearn.decomposition import FactorAnalysis, PCA
25+
from sklearn.preprocessing import StandardScaler
26+
from sklearn.datasets import load_iris
27+
28+
print(__doc__)
29+
30+
# %%
31+
# Load Iris data
32+
data = load_iris()
33+
X = StandardScaler().fit_transform(data["data"])
34+
feature_names = data["feature_names"]
35+
36+
# %%
37+
# Plot covariance of Iris features
38+
ax = plt.axes()
39+
40+
im = ax.imshow(np.corrcoef(X.T), cmap="RdBu_r", vmin=-1, vmax=1)
41+
42+
ax.set_xticks([0, 1, 2, 3])
43+
ax.set_xticklabels(list(feature_names), rotation=90)
44+
ax.set_yticks([0, 1, 2, 3])
45+
ax.set_yticklabels(list(feature_names))
46+
47+
plt.colorbar(im).ax.set_ylabel("$r$", rotation=0)
48+
ax.set_title("Iris feature correlation matrix")
49+
plt.tight_layout()
50+
51+
# %%
52+
# Run factor analysis with Varimax rotation
53+
n_comps = 2
54+
55+
methods = [('PCA', PCA()),
56+
('Unrotated FA', FactorAnalysis()),
57+
('Varimax FA', FactorAnalysis(rotation='varimax'))]
58+
fig, axes = plt.subplots(ncols=len(methods), figsize=(10, 8))
59+
60+
for ax, (method, fa) in zip(axes, methods):
61+
fa.set_params(n_components=n_comps)
62+
fa.fit(X)
63+
64+
components = fa.components_.T
65+
print("\n\n %s :\n" % method)
66+
print(components)
67+
68+
vmax = np.abs(components).max()
69+
ax.imshow(components, cmap="RdBu_r", vmax=vmax, vmin=-vmax)
70+
ax.set_yticks(np.arange(len(feature_names)))
71+
if ax.is_first_col():
72+
ax.set_yticklabels(feature_names)
73+
else:
74+
ax.set_yticklabels([])
75+
ax.set_title(str(method))
76+
ax.set_xticks([0, 1])
77+
ax.set_xticklabels(["Comp. 1", "Comp. 2"])
78+
fig.suptitle("Factors")
79+
plt.tight_layout()
80+
plt.show()

dev/_downloads/scikit-learn-docs.pdf

70.6 KB
Binary file not shown.

dev/_images/iris.png

0 Bytes

0 commit comments

Comments
 (0)