scikit-learn
diff --git a/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
-113 Bytes b/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
-113 Bytes
diff --git a/‎dev/_downloads/0c988b0c2bea0040fec13fe1055db95c/plot_pca_vs_fa_model_selection.ipynb
Lines changed: 37 additions & 1 deletion b/‎dev/_downloads/0c988b0c2bea0040fec13fe1055db95c/plot_pca_vs_fa_model_selection.ipynb
Lines changed: 37 additions & 1 deletion
diff --git a/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
412 Bytes b/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
412 Bytes
diff --git a/‎dev/_downloads/79ed9713970355da938b86bf77fcefa5/plot_pca_vs_fa_model_selection.py
Lines changed: 14 additions & 10 deletions b/‎dev/_downloads/79ed9713970355da938b86bf77fcefa5/plot_pca_vs_fa_model_selection.py
Lines changed: 14 additions & 10 deletions
diff --git a/‎dev/_downloads/scikit-learn-docs.zip
1.54 KB b/‎dev/_downloads/scikit-learn-docs.zip
1.54 KB
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-25 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-25 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
110 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
110 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
-145 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
-145 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_thumb.png
-7 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_thumb.png
-7 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_anomaly_comparison_001.png
-61 Bytes b/‎dev/_images/sphx_glr_plot_anomaly_comparison_001.png
-61 Bytes
@@ -26,7 +26,43 @@
       },
       "outputs": [],
       "source": [
-        "# Authors: Alexandre Gramfort\n#          Denis A. Engemann\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom scipy import linalg\n\nfrom sklearn.decomposition import PCA, FactorAnalysis\nfrom sklearn.covariance import ShrunkCovariance, LedoitWolf\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.model_selection import GridSearchCV\n\n# #############################################################################\n# Create the data\n\nn_samples, n_features, rank = 500, 25, 5\nsigma = 1.0\nrng = np.random.RandomState(42)\nU, _, _ = linalg.svd(rng.randn(n_features, n_features))\nX = np.dot(rng.randn(n_samples, rank), U[:, :rank].T)\n\n# Adding homoscedastic noise\nX_homo = X + sigma * rng.randn(n_samples, n_features)\n\n# Adding heteroscedastic noise\nsigmas = sigma * rng.rand(n_features) + sigma / 2.0\nX_hetero = X + rng.randn(n_samples, n_features) * sigmas\n\n# #############################################################################\n# Fit the models\n\nn_components = np.arange(0, n_features, 5)  # options for n_components\n\n\ndef compute_scores(X):\n    pca = PCA(svd_solver=\"full\")\n    fa = FactorAnalysis()\n\n    pca_scores, fa_scores = [], []\n    for n in n_components:\n        pca.n_components = n\n        fa.n_components = n\n        pca_scores.append(np.mean(cross_val_score(pca, X)))\n        fa_scores.append(np.mean(cross_val_score(fa, X)))\n\n    return pca_scores, fa_scores\n\n\ndef shrunk_cov_score(X):\n    shrinkages = np.logspace(-2, 0, 30)\n    cv = GridSearchCV(ShrunkCovariance(), {\"shrinkage\": shrinkages})\n    return np.mean(cross_val_score(cv.fit(X).best_estimator_, X))\n\n\ndef lw_score(X):\n    return np.mean(cross_val_score(LedoitWolf(), X))\n\n\nfor X, title in [(X_homo, \"Homoscedastic Noise\"), (X_hetero, \"Heteroscedastic Noise\")]:\n    pca_scores, fa_scores = compute_scores(X)\n    n_components_pca = n_components[np.argmax(pca_scores)]\n    n_components_fa = n_components[np.argmax(fa_scores)]\n\n    pca = PCA(svd_solver=\"full\", n_components=\"mle\")\n    pca.fit(X)\n    n_components_pca_mle = pca.n_components_\n\n    print(\"best n_components by PCA CV = %d\" % n_components_pca)\n    print(\"best n_components by FactorAnalysis CV = %d\" % n_components_fa)\n    print(\"best n_components by PCA MLE = %d\" % n_components_pca_mle)\n\n    plt.figure()\n    plt.plot(n_components, pca_scores, \"b\", label=\"PCA scores\")\n    plt.plot(n_components, fa_scores, \"r\", label=\"FA scores\")\n    plt.axvline(rank, color=\"g\", label=\"TRUTH: %d\" % rank, linestyle=\"-\")\n    plt.axvline(\n        n_components_pca,\n        color=\"b\",\n        label=\"PCA CV: %d\" % n_components_pca,\n        linestyle=\"--\",\n    )\n    plt.axvline(\n        n_components_fa,\n        color=\"r\",\n        label=\"FactorAnalysis CV: %d\" % n_components_fa,\n        linestyle=\"--\",\n    )\n    plt.axvline(\n        n_components_pca_mle,\n        color=\"k\",\n        label=\"PCA MLE: %d\" % n_components_pca_mle,\n        linestyle=\"--\",\n    )\n\n    # compare with other covariance estimators\n    plt.axhline(\n        shrunk_cov_score(X),\n        color=\"violet\",\n        label=\"Shrunk Covariance MLE\",\n        linestyle=\"-.\",\n    )\n    plt.axhline(\n        lw_score(X),\n        color=\"orange\",\n        label=\"LedoitWolf MLE\" % n_components_pca_mle,\n        linestyle=\"-.\",\n    )\n\n    plt.xlabel(\"nb of components\")\n    plt.ylabel(\"CV scores\")\n    plt.legend(loc=\"lower right\")\n    plt.title(title)\n\nplt.show()"
+        "# Authors: Alexandre Gramfort\n#          Denis A. Engemann\n# License: BSD 3 clause"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Create the data\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\n\nfrom scipy import linalg\n\nn_samples, n_features, rank = 500, 25, 5\nsigma = 1.0\nrng = np.random.RandomState(42)\nU, _, _ = linalg.svd(rng.randn(n_features, n_features))\nX = np.dot(rng.randn(n_samples, rank), U[:, :rank].T)\n\n# Adding homoscedastic noise\nX_homo = X + sigma * rng.randn(n_samples, n_features)\n\n# Adding heteroscedastic noise\nsigmas = sigma * rng.rand(n_features) + sigma / 2.0\nX_hetero = X + rng.randn(n_samples, n_features) * sigmas"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Fit the models\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n\nfrom sklearn.decomposition import PCA, FactorAnalysis\nfrom sklearn.covariance import ShrunkCovariance, LedoitWolf\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.model_selection import GridSearchCV\n\nn_components = np.arange(0, n_features, 5)  # options for n_components\n\n\ndef compute_scores(X):\n    pca = PCA(svd_solver=\"full\")\n    fa = FactorAnalysis()\n\n    pca_scores, fa_scores = [], []\n    for n in n_components:\n        pca.n_components = n\n        fa.n_components = n\n        pca_scores.append(np.mean(cross_val_score(pca, X)))\n        fa_scores.append(np.mean(cross_val_score(fa, X)))\n\n    return pca_scores, fa_scores\n\n\ndef shrunk_cov_score(X):\n    shrinkages = np.logspace(-2, 0, 30)\n    cv = GridSearchCV(ShrunkCovariance(), {\"shrinkage\": shrinkages})\n    return np.mean(cross_val_score(cv.fit(X).best_estimator_, X))\n\n\ndef lw_score(X):\n    return np.mean(cross_val_score(LedoitWolf(), X))\n\n\nfor X, title in [(X_homo, \"Homoscedastic Noise\"), (X_hetero, \"Heteroscedastic Noise\")]:\n    pca_scores, fa_scores = compute_scores(X)\n    n_components_pca = n_components[np.argmax(pca_scores)]\n    n_components_fa = n_components[np.argmax(fa_scores)]\n\n    pca = PCA(svd_solver=\"full\", n_components=\"mle\")\n    pca.fit(X)\n    n_components_pca_mle = pca.n_components_\n\n    print(\"best n_components by PCA CV = %d\" % n_components_pca)\n    print(\"best n_components by FactorAnalysis CV = %d\" % n_components_fa)\n    print(\"best n_components by PCA MLE = %d\" % n_components_pca_mle)\n\n    plt.figure()\n    plt.plot(n_components, pca_scores, \"b\", label=\"PCA scores\")\n    plt.plot(n_components, fa_scores, \"r\", label=\"FA scores\")\n    plt.axvline(rank, color=\"g\", label=\"TRUTH: %d\" % rank, linestyle=\"-\")\n    plt.axvline(\n        n_components_pca,\n        color=\"b\",\n        label=\"PCA CV: %d\" % n_components_pca,\n        linestyle=\"--\",\n    )\n    plt.axvline(\n        n_components_fa,\n        color=\"r\",\n        label=\"FactorAnalysis CV: %d\" % n_components_fa,\n        linestyle=\"--\",\n    )\n    plt.axvline(\n        n_components_pca_mle,\n        color=\"k\",\n        label=\"PCA MLE: %d\" % n_components_pca_mle,\n        linestyle=\"--\",\n    )\n\n    # compare with other covariance estimators\n    plt.axhline(\n        shrunk_cov_score(X),\n        color=\"violet\",\n        label=\"Shrunk Covariance MLE\",\n        linestyle=\"-.\",\n    )\n    plt.axhline(\n        lw_score(X),\n        color=\"orange\",\n        label=\"LedoitWolf MLE\" % n_components_pca_mle,\n        linestyle=\"-.\",\n    )\n\n    plt.xlabel(\"nb of components\")\n    plt.ylabel(\"CV scores\")\n    plt.legend(loc=\"lower right\")\n    plt.title(title)\n\nplt.show()"
       ]
     }
   ],
 
@@ -29,17 +29,13 @@
 #          Denis A. Engemann
 # License: BSD 3 clause
 
-import numpy as np
-import matplotlib.pyplot as plt
-from scipy import linalg
+# %%
+# Create the data
+# ---------------
 
-from sklearn.decomposition import PCA, FactorAnalysis
-from sklearn.covariance import ShrunkCovariance, LedoitWolf
-from sklearn.model_selection import cross_val_score
-from sklearn.model_selection import GridSearchCV
+import numpy as np
 
-# #############################################################################
-# Create the data
+from scipy import linalg
 
 n_samples, n_features, rank = 500, 25, 5
 sigma = 1.0
@@ -54,8 +50,16 @@
 sigmas = sigma * rng.rand(n_features) + sigma / 2.0
 X_hetero = X + rng.randn(n_samples, n_features) * sigmas
 
-# #############################################################################
+# %%
 # Fit the models
+# --------------
+
+import matplotlib.pyplot as plt
+
+from sklearn.decomposition import PCA, FactorAnalysis
+from sklearn.covariance import ShrunkCovariance, LedoitWolf
+from sklearn.model_selection import cross_val_score
+from sklearn.model_selection import GridSearchCV
 
 n_components = np.arange(0, n_features, 5)  # options for n_components