scikit-learn
diff --git a/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
-219 Bytes b/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
-219 Bytes
diff --git a/‎dev/_downloads/6c7cb9f528114f658d5f562073332c24/plot_feature_agglomeration_vs_univariate_selection.py
Lines changed: 14 additions & 6 deletions b/‎dev/_downloads/6c7cb9f528114f658d5f562073332c24/plot_feature_agglomeration_vs_univariate_selection.py
Lines changed: 14 additions & 6 deletions
diff --git a/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
2.14 KB b/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
2.14 KB
diff --git a/‎dev/_downloads/fd3181da9f1988c60c583c95e97389f8/plot_feature_agglomeration_vs_univariate_selection.ipynb
Lines changed: 156 additions & 1 deletion b/‎dev/_downloads/fd3181da9f1988c60c583c95e97389f8/plot_feature_agglomeration_vs_univariate_selection.ipynb
Lines changed: 156 additions & 1 deletion
diff --git a/‎dev/_downloads/scikit-learn-docs.zip
4.23 KB b/‎dev/_downloads/scikit-learn-docs.zip
4.23 KB
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-212 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-212 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-204 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-204 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
-99 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
-99 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
-107 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
-107 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_thumb.png
-62 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_thumb.png
-62 Bytes
@@ -17,6 +17,7 @@
 # Author: Alexandre Gramfort <[email protected]>
 # License: BSD 3 clause
 
+# %%
 import shutil
 import tempfile
 
@@ -33,15 +34,16 @@
 from sklearn.model_selection import GridSearchCV
 from sklearn.model_selection import KFold
 
-# #############################################################################
-# Generate data
+# %%
+# Set parameters
 n_samples = 200
 size = 40  # image size
 roi_size = 15
 snr = 5.0
 np.random.seed(0)
-mask = np.ones([size, size], dtype=bool)
 
+# %%
+# Generate data
 coef = np.zeros((size, size))
 coef[0:roi_size, 0:roi_size] = -1.0
 coef[-roi_size:, -roi_size:] = 1.0
@@ -53,17 +55,21 @@
 X /= X.std(axis=0)
 
 y = np.dot(X, coef.ravel())
+
+# %%
+# add noise
 noise = np.random.randn(y.shape[0])
 noise_coef = (linalg.norm(y, 2) / np.exp(snr / 20.0)) / linalg.norm(noise, 2)
-y += noise_coef * noise  # add noise
+y += noise_coef * noise
 
-# #############################################################################
+# %%
 # Compute the coefs of a Bayesian Ridge with GridSearch
 cv = KFold(2)  # cross-validation generator for model selection
 ridge = BayesianRidge()
 cachedir = tempfile.mkdtemp()
 mem = Memory(___location=cachedir, verbose=1)
 
+# %%
 # Ward agglomeration followed by BayesianRidge
 connectivity = grid_to_graph(n_x=size, n_y=size)
 ward = FeatureAgglomeration(n_clusters=10, connectivity=connectivity, memory=mem)
@@ -75,6 +81,7 @@
 coef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_)
 coef_agglomeration_ = coef_.reshape(size, size)
 
+# %%
 # Anova univariate feature selection followed by BayesianRidge
 f_regression = mem.cache(feature_selection.f_regression)  # caching function
 anova = feature_selection.SelectPercentile(f_regression)
@@ -86,7 +93,7 @@
 coef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_.reshape(1, -1))
 coef_selection_ = coef_.reshape(size, size)
 
-# #############################################################################
+# %%
 # Inverse the transformation to plot the results on an image
 plt.close("all")
 plt.figure(figsize=(7.3, 2.7))
@@ -102,5 +109,6 @@
 plt.subplots_adjust(0.04, 0.0, 0.98, 0.94, 0.16, 0.26)
 plt.show()
 
+# %%
 # Attempt to remove the temporary cachedir, but don't worry if it fails
 shutil.rmtree(cachedir, ignore_errors=True)
@@ -26,7 +26,162 @@
       },
       "outputs": [],
       "source": [
-        "# Author: Alexandre Gramfort <[email protected]>\n# License: BSD 3 clause\n\nimport shutil\nimport tempfile\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom scipy import linalg, ndimage\nfrom joblib import Memory\n\nfrom sklearn.feature_extraction.image import grid_to_graph\nfrom sklearn import feature_selection\nfrom sklearn.cluster import FeatureAgglomeration\nfrom sklearn.linear_model import BayesianRidge\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.model_selection import KFold\n\n# #############################################################################\n# Generate data\nn_samples = 200\nsize = 40  # image size\nroi_size = 15\nsnr = 5.0\nnp.random.seed(0)\nmask = np.ones([size, size], dtype=bool)\n\ncoef = np.zeros((size, size))\ncoef[0:roi_size, 0:roi_size] = -1.0\ncoef[-roi_size:, -roi_size:] = 1.0\n\nX = np.random.randn(n_samples, size**2)\nfor x in X:  # smooth data\n    x[:] = ndimage.gaussian_filter(x.reshape(size, size), sigma=1.0).ravel()\nX -= X.mean(axis=0)\nX /= X.std(axis=0)\n\ny = np.dot(X, coef.ravel())\nnoise = np.random.randn(y.shape[0])\nnoise_coef = (linalg.norm(y, 2) / np.exp(snr / 20.0)) / linalg.norm(noise, 2)\ny += noise_coef * noise  # add noise\n\n# #############################################################################\n# Compute the coefs of a Bayesian Ridge with GridSearch\ncv = KFold(2)  # cross-validation generator for model selection\nridge = BayesianRidge()\ncachedir = tempfile.mkdtemp()\nmem = Memory(___location=cachedir, verbose=1)\n\n# Ward agglomeration followed by BayesianRidge\nconnectivity = grid_to_graph(n_x=size, n_y=size)\nward = FeatureAgglomeration(n_clusters=10, connectivity=connectivity, memory=mem)\nclf = Pipeline([(\"ward\", ward), (\"ridge\", ridge)])\n# Select the optimal number of parcels with grid search\nclf = GridSearchCV(clf, {\"ward__n_clusters\": [10, 20, 30]}, n_jobs=1, cv=cv)\nclf.fit(X, y)  # set the best parameters\ncoef_ = clf.best_estimator_.steps[-1][1].coef_\ncoef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_)\ncoef_agglomeration_ = coef_.reshape(size, size)\n\n# Anova univariate feature selection followed by BayesianRidge\nf_regression = mem.cache(feature_selection.f_regression)  # caching function\nanova = feature_selection.SelectPercentile(f_regression)\nclf = Pipeline([(\"anova\", anova), (\"ridge\", ridge)])\n# Select the optimal percentage of features with grid search\nclf = GridSearchCV(clf, {\"anova__percentile\": [5, 10, 20]}, cv=cv)\nclf.fit(X, y)  # set the best parameters\ncoef_ = clf.best_estimator_.steps[-1][1].coef_\ncoef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_.reshape(1, -1))\ncoef_selection_ = coef_.reshape(size, size)\n\n# #############################################################################\n# Inverse the transformation to plot the results on an image\nplt.close(\"all\")\nplt.figure(figsize=(7.3, 2.7))\nplt.subplot(1, 3, 1)\nplt.imshow(coef, interpolation=\"nearest\", cmap=plt.cm.RdBu_r)\nplt.title(\"True weights\")\nplt.subplot(1, 3, 2)\nplt.imshow(coef_selection_, interpolation=\"nearest\", cmap=plt.cm.RdBu_r)\nplt.title(\"Feature Selection\")\nplt.subplot(1, 3, 3)\nplt.imshow(coef_agglomeration_, interpolation=\"nearest\", cmap=plt.cm.RdBu_r)\nplt.title(\"Feature Agglomeration\")\nplt.subplots_adjust(0.04, 0.0, 0.98, 0.94, 0.16, 0.26)\nplt.show()\n\n# Attempt to remove the temporary cachedir, but don't worry if it fails\nshutil.rmtree(cachedir, ignore_errors=True)"
+        "# Author: Alexandre Gramfort <[email protected]>\n# License: BSD 3 clause"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import shutil\nimport tempfile\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom scipy import linalg, ndimage\nfrom joblib import Memory\n\nfrom sklearn.feature_extraction.image import grid_to_graph\nfrom sklearn import feature_selection\nfrom sklearn.cluster import FeatureAgglomeration\nfrom sklearn.linear_model import BayesianRidge\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.model_selection import KFold"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Set parameters\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "n_samples = 200\nsize = 40  # image size\nroi_size = 15\nsnr = 5.0\nnp.random.seed(0)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Generate data\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "coef = np.zeros((size, size))\ncoef[0:roi_size, 0:roi_size] = -1.0\ncoef[-roi_size:, -roi_size:] = 1.0\n\nX = np.random.randn(n_samples, size**2)\nfor x in X:  # smooth data\n    x[:] = ndimage.gaussian_filter(x.reshape(size, size), sigma=1.0).ravel()\nX -= X.mean(axis=0)\nX /= X.std(axis=0)\n\ny = np.dot(X, coef.ravel())"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "add noise\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "noise = np.random.randn(y.shape[0])\nnoise_coef = (linalg.norm(y, 2) / np.exp(snr / 20.0)) / linalg.norm(noise, 2)\ny += noise_coef * noise"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Compute the coefs of a Bayesian Ridge with GridSearch\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "cv = KFold(2)  # cross-validation generator for model selection\nridge = BayesianRidge()\ncachedir = tempfile.mkdtemp()\nmem = Memory(___location=cachedir, verbose=1)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Ward agglomeration followed by BayesianRidge\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "connectivity = grid_to_graph(n_x=size, n_y=size)\nward = FeatureAgglomeration(n_clusters=10, connectivity=connectivity, memory=mem)\nclf = Pipeline([(\"ward\", ward), (\"ridge\", ridge)])\n# Select the optimal number of parcels with grid search\nclf = GridSearchCV(clf, {\"ward__n_clusters\": [10, 20, 30]}, n_jobs=1, cv=cv)\nclf.fit(X, y)  # set the best parameters\ncoef_ = clf.best_estimator_.steps[-1][1].coef_\ncoef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_)\ncoef_agglomeration_ = coef_.reshape(size, size)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Anova univariate feature selection followed by BayesianRidge\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "f_regression = mem.cache(feature_selection.f_regression)  # caching function\nanova = feature_selection.SelectPercentile(f_regression)\nclf = Pipeline([(\"anova\", anova), (\"ridge\", ridge)])\n# Select the optimal percentage of features with grid search\nclf = GridSearchCV(clf, {\"anova__percentile\": [5, 10, 20]}, cv=cv)\nclf.fit(X, y)  # set the best parameters\ncoef_ = clf.best_estimator_.steps[-1][1].coef_\ncoef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_.reshape(1, -1))\ncoef_selection_ = coef_.reshape(size, size)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Inverse the transformation to plot the results on an image\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "plt.close(\"all\")\nplt.figure(figsize=(7.3, 2.7))\nplt.subplot(1, 3, 1)\nplt.imshow(coef, interpolation=\"nearest\", cmap=plt.cm.RdBu_r)\nplt.title(\"True weights\")\nplt.subplot(1, 3, 2)\nplt.imshow(coef_selection_, interpolation=\"nearest\", cmap=plt.cm.RdBu_r)\nplt.title(\"Feature Selection\")\nplt.subplot(1, 3, 3)\nplt.imshow(coef_agglomeration_, interpolation=\"nearest\", cmap=plt.cm.RdBu_r)\nplt.title(\"Feature Agglomeration\")\nplt.subplots_adjust(0.04, 0.0, 0.98, 0.94, 0.16, 0.26)\nplt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Attempt to remove the temporary cachedir, but don't worry if it fails\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "shutil.rmtree(cachedir, ignore_errors=True)"
       ]
     }
   ],