|
26 | 26 | },
|
27 | 27 | "outputs": [],
|
28 | 28 | "source": [
|
29 |
| - "# Author: Alexandre Gramfort < [email protected]>\n# License: BSD 3 clause\n\nimport shutil\nimport tempfile\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom scipy import linalg, ndimage\nfrom joblib import Memory\n\nfrom sklearn.feature_extraction.image import grid_to_graph\nfrom sklearn import feature_selection\nfrom sklearn.cluster import FeatureAgglomeration\nfrom sklearn.linear_model import BayesianRidge\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.model_selection import KFold\n\n# #############################################################################\n# Generate data\nn_samples = 200\nsize = 40 # image size\nroi_size = 15\nsnr = 5.0\nnp.random.seed(0)\nmask = np.ones([size, size], dtype=bool)\n\ncoef = np.zeros((size, size))\ncoef[0:roi_size, 0:roi_size] = -1.0\ncoef[-roi_size:, -roi_size:] = 1.0\n\nX = np.random.randn(n_samples, size**2)\nfor x in X: # smooth data\n x[:] = ndimage.gaussian_filter(x.reshape(size, size), sigma=1.0).ravel()\nX -= X.mean(axis=0)\nX /= X.std(axis=0)\n\ny = np.dot(X, coef.ravel())\nnoise = np.random.randn(y.shape[0])\nnoise_coef = (linalg.norm(y, 2) / np.exp(snr / 20.0)) / linalg.norm(noise, 2)\ny += noise_coef * noise # add noise\n\n# #############################################################################\n# Compute the coefs of a Bayesian Ridge with GridSearch\ncv = KFold(2) # cross-validation generator for model selection\nridge = BayesianRidge()\ncachedir = tempfile.mkdtemp()\nmem = Memory(___location=cachedir, verbose=1)\n\n# Ward agglomeration followed by BayesianRidge\nconnectivity = grid_to_graph(n_x=size, n_y=size)\nward = FeatureAgglomeration(n_clusters=10, connectivity=connectivity, memory=mem)\nclf = Pipeline([(\"ward\", ward), (\"ridge\", ridge)])\n# Select the optimal number of parcels with grid search\nclf = GridSearchCV(clf, {\"ward__n_clusters\": [10, 20, 30]}, n_jobs=1, cv=cv)\nclf.fit(X, y) # set the best parameters\ncoef_ = clf.best_estimator_.steps[-1][1].coef_\ncoef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_)\ncoef_agglomeration_ = coef_.reshape(size, size)\n\n# Anova univariate feature selection followed by BayesianRidge\nf_regression = mem.cache(feature_selection.f_regression) # caching function\nanova = feature_selection.SelectPercentile(f_regression)\nclf = Pipeline([(\"anova\", anova), (\"ridge\", ridge)])\n# Select the optimal percentage of features with grid search\nclf = GridSearchCV(clf, {\"anova__percentile\": [5, 10, 20]}, cv=cv)\nclf.fit(X, y) # set the best parameters\ncoef_ = clf.best_estimator_.steps[-1][1].coef_\ncoef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_.reshape(1, -1))\ncoef_selection_ = coef_.reshape(size, size)\n\n# #############################################################################\n# Inverse the transformation to plot the results on an image\nplt.close(\"all\")\nplt.figure(figsize=(7.3, 2.7))\nplt.subplot(1, 3, 1)\nplt.imshow(coef, interpolation=\"nearest\", cmap=plt.cm.RdBu_r)\nplt.title(\"True weights\")\nplt.subplot(1, 3, 2)\nplt.imshow(coef_selection_, interpolation=\"nearest\", cmap=plt.cm.RdBu_r)\nplt.title(\"Feature Selection\")\nplt.subplot(1, 3, 3)\nplt.imshow(coef_agglomeration_, interpolation=\"nearest\", cmap=plt.cm.RdBu_r)\nplt.title(\"Feature Agglomeration\")\nplt.subplots_adjust(0.04, 0.0, 0.98, 0.94, 0.16, 0.26)\nplt.show()\n\n# Attempt to remove the temporary cachedir, but don't worry if it fails\nshutil.rmtree(cachedir, ignore_errors=True)" |
| 29 | + "# Author: Alexandre Gramfort <[email protected]>\n# License: BSD 3 clause" |
| 30 | + ] |
| 31 | + }, |
| 32 | + { |
| 33 | + "cell_type": "code", |
| 34 | + "execution_count": null, |
| 35 | + "metadata": { |
| 36 | + "collapsed": false |
| 37 | + }, |
| 38 | + "outputs": [], |
| 39 | + "source": [ |
| 40 | + "import shutil\nimport tempfile\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom scipy import linalg, ndimage\nfrom joblib import Memory\n\nfrom sklearn.feature_extraction.image import grid_to_graph\nfrom sklearn import feature_selection\nfrom sklearn.cluster import FeatureAgglomeration\nfrom sklearn.linear_model import BayesianRidge\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.model_selection import KFold" |
| 41 | + ] |
| 42 | + }, |
| 43 | + { |
| 44 | + "cell_type": "markdown", |
| 45 | + "metadata": {}, |
| 46 | + "source": [ |
| 47 | + "Set parameters\n\n" |
| 48 | + ] |
| 49 | + }, |
| 50 | + { |
| 51 | + "cell_type": "code", |
| 52 | + "execution_count": null, |
| 53 | + "metadata": { |
| 54 | + "collapsed": false |
| 55 | + }, |
| 56 | + "outputs": [], |
| 57 | + "source": [ |
| 58 | + "n_samples = 200\nsize = 40 # image size\nroi_size = 15\nsnr = 5.0\nnp.random.seed(0)" |
| 59 | + ] |
| 60 | + }, |
| 61 | + { |
| 62 | + "cell_type": "markdown", |
| 63 | + "metadata": {}, |
| 64 | + "source": [ |
| 65 | + "Generate data\n\n" |
| 66 | + ] |
| 67 | + }, |
| 68 | + { |
| 69 | + "cell_type": "code", |
| 70 | + "execution_count": null, |
| 71 | + "metadata": { |
| 72 | + "collapsed": false |
| 73 | + }, |
| 74 | + "outputs": [], |
| 75 | + "source": [ |
| 76 | + "coef = np.zeros((size, size))\ncoef[0:roi_size, 0:roi_size] = -1.0\ncoef[-roi_size:, -roi_size:] = 1.0\n\nX = np.random.randn(n_samples, size**2)\nfor x in X: # smooth data\n x[:] = ndimage.gaussian_filter(x.reshape(size, size), sigma=1.0).ravel()\nX -= X.mean(axis=0)\nX /= X.std(axis=0)\n\ny = np.dot(X, coef.ravel())" |
| 77 | + ] |
| 78 | + }, |
| 79 | + { |
| 80 | + "cell_type": "markdown", |
| 81 | + "metadata": {}, |
| 82 | + "source": [ |
| 83 | + "add noise\n\n" |
| 84 | + ] |
| 85 | + }, |
| 86 | + { |
| 87 | + "cell_type": "code", |
| 88 | + "execution_count": null, |
| 89 | + "metadata": { |
| 90 | + "collapsed": false |
| 91 | + }, |
| 92 | + "outputs": [], |
| 93 | + "source": [ |
| 94 | + "noise = np.random.randn(y.shape[0])\nnoise_coef = (linalg.norm(y, 2) / np.exp(snr / 20.0)) / linalg.norm(noise, 2)\ny += noise_coef * noise" |
| 95 | + ] |
| 96 | + }, |
| 97 | + { |
| 98 | + "cell_type": "markdown", |
| 99 | + "metadata": {}, |
| 100 | + "source": [ |
| 101 | + "Compute the coefs of a Bayesian Ridge with GridSearch\n\n" |
| 102 | + ] |
| 103 | + }, |
| 104 | + { |
| 105 | + "cell_type": "code", |
| 106 | + "execution_count": null, |
| 107 | + "metadata": { |
| 108 | + "collapsed": false |
| 109 | + }, |
| 110 | + "outputs": [], |
| 111 | + "source": [ |
| 112 | + "cv = KFold(2) # cross-validation generator for model selection\nridge = BayesianRidge()\ncachedir = tempfile.mkdtemp()\nmem = Memory(___location=cachedir, verbose=1)" |
| 113 | + ] |
| 114 | + }, |
| 115 | + { |
| 116 | + "cell_type": "markdown", |
| 117 | + "metadata": {}, |
| 118 | + "source": [ |
| 119 | + "Ward agglomeration followed by BayesianRidge\n\n" |
| 120 | + ] |
| 121 | + }, |
| 122 | + { |
| 123 | + "cell_type": "code", |
| 124 | + "execution_count": null, |
| 125 | + "metadata": { |
| 126 | + "collapsed": false |
| 127 | + }, |
| 128 | + "outputs": [], |
| 129 | + "source": [ |
| 130 | + "connectivity = grid_to_graph(n_x=size, n_y=size)\nward = FeatureAgglomeration(n_clusters=10, connectivity=connectivity, memory=mem)\nclf = Pipeline([(\"ward\", ward), (\"ridge\", ridge)])\n# Select the optimal number of parcels with grid search\nclf = GridSearchCV(clf, {\"ward__n_clusters\": [10, 20, 30]}, n_jobs=1, cv=cv)\nclf.fit(X, y) # set the best parameters\ncoef_ = clf.best_estimator_.steps[-1][1].coef_\ncoef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_)\ncoef_agglomeration_ = coef_.reshape(size, size)" |
| 131 | + ] |
| 132 | + }, |
| 133 | + { |
| 134 | + "cell_type": "markdown", |
| 135 | + "metadata": {}, |
| 136 | + "source": [ |
| 137 | + "Anova univariate feature selection followed by BayesianRidge\n\n" |
| 138 | + ] |
| 139 | + }, |
| 140 | + { |
| 141 | + "cell_type": "code", |
| 142 | + "execution_count": null, |
| 143 | + "metadata": { |
| 144 | + "collapsed": false |
| 145 | + }, |
| 146 | + "outputs": [], |
| 147 | + "source": [ |
| 148 | + "f_regression = mem.cache(feature_selection.f_regression) # caching function\nanova = feature_selection.SelectPercentile(f_regression)\nclf = Pipeline([(\"anova\", anova), (\"ridge\", ridge)])\n# Select the optimal percentage of features with grid search\nclf = GridSearchCV(clf, {\"anova__percentile\": [5, 10, 20]}, cv=cv)\nclf.fit(X, y) # set the best parameters\ncoef_ = clf.best_estimator_.steps[-1][1].coef_\ncoef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_.reshape(1, -1))\ncoef_selection_ = coef_.reshape(size, size)" |
| 149 | + ] |
| 150 | + }, |
| 151 | + { |
| 152 | + "cell_type": "markdown", |
| 153 | + "metadata": {}, |
| 154 | + "source": [ |
| 155 | + "Inverse the transformation to plot the results on an image\n\n" |
| 156 | + ] |
| 157 | + }, |
| 158 | + { |
| 159 | + "cell_type": "code", |
| 160 | + "execution_count": null, |
| 161 | + "metadata": { |
| 162 | + "collapsed": false |
| 163 | + }, |
| 164 | + "outputs": [], |
| 165 | + "source": [ |
| 166 | + "plt.close(\"all\")\nplt.figure(figsize=(7.3, 2.7))\nplt.subplot(1, 3, 1)\nplt.imshow(coef, interpolation=\"nearest\", cmap=plt.cm.RdBu_r)\nplt.title(\"True weights\")\nplt.subplot(1, 3, 2)\nplt.imshow(coef_selection_, interpolation=\"nearest\", cmap=plt.cm.RdBu_r)\nplt.title(\"Feature Selection\")\nplt.subplot(1, 3, 3)\nplt.imshow(coef_agglomeration_, interpolation=\"nearest\", cmap=plt.cm.RdBu_r)\nplt.title(\"Feature Agglomeration\")\nplt.subplots_adjust(0.04, 0.0, 0.98, 0.94, 0.16, 0.26)\nplt.show()" |
| 167 | + ] |
| 168 | + }, |
| 169 | + { |
| 170 | + "cell_type": "markdown", |
| 171 | + "metadata": {}, |
| 172 | + "source": [ |
| 173 | + "Attempt to remove the temporary cachedir, but don't worry if it fails\n\n" |
| 174 | + ] |
| 175 | + }, |
| 176 | + { |
| 177 | + "cell_type": "code", |
| 178 | + "execution_count": null, |
| 179 | + "metadata": { |
| 180 | + "collapsed": false |
| 181 | + }, |
| 182 | + "outputs": [], |
| 183 | + "source": [ |
| 184 | + "shutil.rmtree(cachedir, ignore_errors=True)" |
30 | 185 | ]
|
31 | 186 | }
|
32 | 187 | ],
|
|
0 commit comments