Skip to content

Commit 0eaace6

Browse files
committed
Pushing the docs to dev/ for branch: main, commit cb73a1e2fedd6341034e24cb5ad277e9b3c33ef0
1 parent d4c109a commit 0eaace6

File tree

1,222 files changed

+4397
-4290
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,222 files changed

+4397
-4290
lines changed
Binary file not shown.

dev/_downloads/0c988b0c2bea0040fec13fe1055db95c/plot_pca_vs_fa_model_selection.ipynb

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,43 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Authors: Alexandre Gramfort\n# Denis A. Engemann\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom scipy import linalg\n\nfrom sklearn.decomposition import PCA, FactorAnalysis\nfrom sklearn.covariance import ShrunkCovariance, LedoitWolf\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.model_selection import GridSearchCV\n\n# #############################################################################\n# Create the data\n\nn_samples, n_features, rank = 500, 25, 5\nsigma = 1.0\nrng = np.random.RandomState(42)\nU, _, _ = linalg.svd(rng.randn(n_features, n_features))\nX = np.dot(rng.randn(n_samples, rank), U[:, :rank].T)\n\n# Adding homoscedastic noise\nX_homo = X + sigma * rng.randn(n_samples, n_features)\n\n# Adding heteroscedastic noise\nsigmas = sigma * rng.rand(n_features) + sigma / 2.0\nX_hetero = X + rng.randn(n_samples, n_features) * sigmas\n\n# #############################################################################\n# Fit the models\n\nn_components = np.arange(0, n_features, 5) # options for n_components\n\n\ndef compute_scores(X):\n pca = PCA(svd_solver=\"full\")\n fa = FactorAnalysis()\n\n pca_scores, fa_scores = [], []\n for n in n_components:\n pca.n_components = n\n fa.n_components = n\n pca_scores.append(np.mean(cross_val_score(pca, X)))\n fa_scores.append(np.mean(cross_val_score(fa, X)))\n\n return pca_scores, fa_scores\n\n\ndef shrunk_cov_score(X):\n shrinkages = np.logspace(-2, 0, 30)\n cv = GridSearchCV(ShrunkCovariance(), {\"shrinkage\": shrinkages})\n return np.mean(cross_val_score(cv.fit(X).best_estimator_, X))\n\n\ndef lw_score(X):\n return np.mean(cross_val_score(LedoitWolf(), X))\n\n\nfor X, title in [(X_homo, \"Homoscedastic Noise\"), (X_hetero, \"Heteroscedastic Noise\")]:\n pca_scores, fa_scores = compute_scores(X)\n n_components_pca = n_components[np.argmax(pca_scores)]\n n_components_fa = n_components[np.argmax(fa_scores)]\n\n pca = PCA(svd_solver=\"full\", n_components=\"mle\")\n pca.fit(X)\n n_components_pca_mle = pca.n_components_\n\n print(\"best n_components by PCA CV = %d\" % n_components_pca)\n print(\"best n_components by FactorAnalysis CV = %d\" % n_components_fa)\n print(\"best n_components by PCA MLE = %d\" % n_components_pca_mle)\n\n plt.figure()\n plt.plot(n_components, pca_scores, \"b\", label=\"PCA scores\")\n plt.plot(n_components, fa_scores, \"r\", label=\"FA scores\")\n plt.axvline(rank, color=\"g\", label=\"TRUTH: %d\" % rank, linestyle=\"-\")\n plt.axvline(\n n_components_pca,\n color=\"b\",\n label=\"PCA CV: %d\" % n_components_pca,\n linestyle=\"--\",\n )\n plt.axvline(\n n_components_fa,\n color=\"r\",\n label=\"FactorAnalysis CV: %d\" % n_components_fa,\n linestyle=\"--\",\n )\n plt.axvline(\n n_components_pca_mle,\n color=\"k\",\n label=\"PCA MLE: %d\" % n_components_pca_mle,\n linestyle=\"--\",\n )\n\n # compare with other covariance estimators\n plt.axhline(\n shrunk_cov_score(X),\n color=\"violet\",\n label=\"Shrunk Covariance MLE\",\n linestyle=\"-.\",\n )\n plt.axhline(\n lw_score(X),\n color=\"orange\",\n label=\"LedoitWolf MLE\" % n_components_pca_mle,\n linestyle=\"-.\",\n )\n\n plt.xlabel(\"nb of components\")\n plt.ylabel(\"CV scores\")\n plt.legend(loc=\"lower right\")\n plt.title(title)\n\nplt.show()"
29+
"# Authors: Alexandre Gramfort\n# Denis A. Engemann\n# License: BSD 3 clause"
30+
]
31+
},
32+
{
33+
"cell_type": "markdown",
34+
"metadata": {},
35+
"source": [
36+
"## Create the data\n\n"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": null,
42+
"metadata": {
43+
"collapsed": false
44+
},
45+
"outputs": [],
46+
"source": [
47+
"import numpy as np\n\nfrom scipy import linalg\n\nn_samples, n_features, rank = 500, 25, 5\nsigma = 1.0\nrng = np.random.RandomState(42)\nU, _, _ = linalg.svd(rng.randn(n_features, n_features))\nX = np.dot(rng.randn(n_samples, rank), U[:, :rank].T)\n\n# Adding homoscedastic noise\nX_homo = X + sigma * rng.randn(n_samples, n_features)\n\n# Adding heteroscedastic noise\nsigmas = sigma * rng.rand(n_features) + sigma / 2.0\nX_hetero = X + rng.randn(n_samples, n_features) * sigmas"
48+
]
49+
},
50+
{
51+
"cell_type": "markdown",
52+
"metadata": {},
53+
"source": [
54+
"## Fit the models\n\n"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"execution_count": null,
60+
"metadata": {
61+
"collapsed": false
62+
},
63+
"outputs": [],
64+
"source": [
65+
"import matplotlib.pyplot as plt\n\nfrom sklearn.decomposition import PCA, FactorAnalysis\nfrom sklearn.covariance import ShrunkCovariance, LedoitWolf\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.model_selection import GridSearchCV\n\nn_components = np.arange(0, n_features, 5) # options for n_components\n\n\ndef compute_scores(X):\n pca = PCA(svd_solver=\"full\")\n fa = FactorAnalysis()\n\n pca_scores, fa_scores = [], []\n for n in n_components:\n pca.n_components = n\n fa.n_components = n\n pca_scores.append(np.mean(cross_val_score(pca, X)))\n fa_scores.append(np.mean(cross_val_score(fa, X)))\n\n return pca_scores, fa_scores\n\n\ndef shrunk_cov_score(X):\n shrinkages = np.logspace(-2, 0, 30)\n cv = GridSearchCV(ShrunkCovariance(), {\"shrinkage\": shrinkages})\n return np.mean(cross_val_score(cv.fit(X).best_estimator_, X))\n\n\ndef lw_score(X):\n return np.mean(cross_val_score(LedoitWolf(), X))\n\n\nfor X, title in [(X_homo, \"Homoscedastic Noise\"), (X_hetero, \"Heteroscedastic Noise\")]:\n pca_scores, fa_scores = compute_scores(X)\n n_components_pca = n_components[np.argmax(pca_scores)]\n n_components_fa = n_components[np.argmax(fa_scores)]\n\n pca = PCA(svd_solver=\"full\", n_components=\"mle\")\n pca.fit(X)\n n_components_pca_mle = pca.n_components_\n\n print(\"best n_components by PCA CV = %d\" % n_components_pca)\n print(\"best n_components by FactorAnalysis CV = %d\" % n_components_fa)\n print(\"best n_components by PCA MLE = %d\" % n_components_pca_mle)\n\n plt.figure()\n plt.plot(n_components, pca_scores, \"b\", label=\"PCA scores\")\n plt.plot(n_components, fa_scores, \"r\", label=\"FA scores\")\n plt.axvline(rank, color=\"g\", label=\"TRUTH: %d\" % rank, linestyle=\"-\")\n plt.axvline(\n n_components_pca,\n color=\"b\",\n label=\"PCA CV: %d\" % n_components_pca,\n linestyle=\"--\",\n )\n plt.axvline(\n n_components_fa,\n color=\"r\",\n label=\"FactorAnalysis CV: %d\" % n_components_fa,\n linestyle=\"--\",\n )\n plt.axvline(\n n_components_pca_mle,\n color=\"k\",\n label=\"PCA MLE: %d\" % n_components_pca_mle,\n linestyle=\"--\",\n )\n\n # compare with other covariance estimators\n plt.axhline(\n shrunk_cov_score(X),\n color=\"violet\",\n label=\"Shrunk Covariance MLE\",\n linestyle=\"-.\",\n )\n plt.axhline(\n lw_score(X),\n color=\"orange\",\n label=\"LedoitWolf MLE\" % n_components_pca_mle,\n linestyle=\"-.\",\n )\n\n plt.xlabel(\"nb of components\")\n plt.ylabel(\"CV scores\")\n plt.legend(loc=\"lower right\")\n plt.title(title)\n\nplt.show()"
3066
]
3167
}
3268
],
Binary file not shown.

dev/_downloads/79ed9713970355da938b86bf77fcefa5/plot_pca_vs_fa_model_selection.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,13 @@
2929
# Denis A. Engemann
3030
# License: BSD 3 clause
3131

32-
import numpy as np
33-
import matplotlib.pyplot as plt
34-
from scipy import linalg
32+
# %%
33+
# Create the data
34+
# ---------------
3535

36-
from sklearn.decomposition import PCA, FactorAnalysis
37-
from sklearn.covariance import ShrunkCovariance, LedoitWolf
38-
from sklearn.model_selection import cross_val_score
39-
from sklearn.model_selection import GridSearchCV
36+
import numpy as np
4037

41-
# #############################################################################
42-
# Create the data
38+
from scipy import linalg
4339

4440
n_samples, n_features, rank = 500, 25, 5
4541
sigma = 1.0
@@ -54,8 +50,16 @@
5450
sigmas = sigma * rng.rand(n_features) + sigma / 2.0
5551
X_hetero = X + rng.randn(n_samples, n_features) * sigmas
5652

57-
# #############################################################################
53+
# %%
5854
# Fit the models
55+
# --------------
56+
57+
import matplotlib.pyplot as plt
58+
59+
from sklearn.decomposition import PCA, FactorAnalysis
60+
from sklearn.covariance import ShrunkCovariance, LedoitWolf
61+
from sklearn.model_selection import cross_val_score
62+
from sklearn.model_selection import GridSearchCV
5963

6064
n_components = np.arange(0, n_features, 5) # options for n_components
6165

dev/_downloads/scikit-learn-docs.zip

1.54 KB
Binary file not shown.
-25 Bytes
110 Bytes
-145 Bytes
-7 Bytes
-61 Bytes

0 commit comments

Comments
 (0)