Skip to content

Commit ca6ed1a

Browse files
committed
Pushing the docs to dev/ for branch: main, commit f9d74236e26f6169b32e23887f30879c32ac76c7
1 parent 8ec166a commit ca6ed1a

File tree

1,217 files changed

+4897
-4442
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,217 files changed

+4897
-4442
lines changed
Binary file not shown.

dev/_downloads/6c7cb9f528114f658d5f562073332c24/plot_feature_agglomeration_vs_univariate_selection.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
# Author: Alexandre Gramfort <[email protected]>
1818
# License: BSD 3 clause
1919

20+
# %%
2021
import shutil
2122
import tempfile
2223

@@ -33,15 +34,16 @@
3334
from sklearn.model_selection import GridSearchCV
3435
from sklearn.model_selection import KFold
3536

36-
# #############################################################################
37-
# Generate data
37+
# %%
38+
# Set parameters
3839
n_samples = 200
3940
size = 40 # image size
4041
roi_size = 15
4142
snr = 5.0
4243
np.random.seed(0)
43-
mask = np.ones([size, size], dtype=bool)
4444

45+
# %%
46+
# Generate data
4547
coef = np.zeros((size, size))
4648
coef[0:roi_size, 0:roi_size] = -1.0
4749
coef[-roi_size:, -roi_size:] = 1.0
@@ -53,17 +55,21 @@
5355
X /= X.std(axis=0)
5456

5557
y = np.dot(X, coef.ravel())
58+
59+
# %%
60+
# add noise
5661
noise = np.random.randn(y.shape[0])
5762
noise_coef = (linalg.norm(y, 2) / np.exp(snr / 20.0)) / linalg.norm(noise, 2)
58-
y += noise_coef * noise # add noise
63+
y += noise_coef * noise
5964

60-
# #############################################################################
65+
# %%
6166
# Compute the coefs of a Bayesian Ridge with GridSearch
6267
cv = KFold(2) # cross-validation generator for model selection
6368
ridge = BayesianRidge()
6469
cachedir = tempfile.mkdtemp()
6570
mem = Memory(___location=cachedir, verbose=1)
6671

72+
# %%
6773
# Ward agglomeration followed by BayesianRidge
6874
connectivity = grid_to_graph(n_x=size, n_y=size)
6975
ward = FeatureAgglomeration(n_clusters=10, connectivity=connectivity, memory=mem)
@@ -75,6 +81,7 @@
7581
coef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_)
7682
coef_agglomeration_ = coef_.reshape(size, size)
7783

84+
# %%
7885
# Anova univariate feature selection followed by BayesianRidge
7986
f_regression = mem.cache(feature_selection.f_regression) # caching function
8087
anova = feature_selection.SelectPercentile(f_regression)
@@ -86,7 +93,7 @@
8693
coef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_.reshape(1, -1))
8794
coef_selection_ = coef_.reshape(size, size)
8895

89-
# #############################################################################
96+
# %%
9097
# Inverse the transformation to plot the results on an image
9198
plt.close("all")
9299
plt.figure(figsize=(7.3, 2.7))
@@ -102,5 +109,6 @@
102109
plt.subplots_adjust(0.04, 0.0, 0.98, 0.94, 0.16, 0.26)
103110
plt.show()
104111

112+
# %%
105113
# Attempt to remove the temporary cachedir, but don't worry if it fails
106114
shutil.rmtree(cachedir, ignore_errors=True)
Binary file not shown.

dev/_downloads/fd3181da9f1988c60c583c95e97389f8/plot_feature_agglomeration_vs_univariate_selection.ipynb

Lines changed: 156 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,162 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Author: Alexandre Gramfort <[email protected]>\n# License: BSD 3 clause\n\nimport shutil\nimport tempfile\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom scipy import linalg, ndimage\nfrom joblib import Memory\n\nfrom sklearn.feature_extraction.image import grid_to_graph\nfrom sklearn import feature_selection\nfrom sklearn.cluster import FeatureAgglomeration\nfrom sklearn.linear_model import BayesianRidge\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.model_selection import KFold\n\n# #############################################################################\n# Generate data\nn_samples = 200\nsize = 40 # image size\nroi_size = 15\nsnr = 5.0\nnp.random.seed(0)\nmask = np.ones([size, size], dtype=bool)\n\ncoef = np.zeros((size, size))\ncoef[0:roi_size, 0:roi_size] = -1.0\ncoef[-roi_size:, -roi_size:] = 1.0\n\nX = np.random.randn(n_samples, size**2)\nfor x in X: # smooth data\n x[:] = ndimage.gaussian_filter(x.reshape(size, size), sigma=1.0).ravel()\nX -= X.mean(axis=0)\nX /= X.std(axis=0)\n\ny = np.dot(X, coef.ravel())\nnoise = np.random.randn(y.shape[0])\nnoise_coef = (linalg.norm(y, 2) / np.exp(snr / 20.0)) / linalg.norm(noise, 2)\ny += noise_coef * noise # add noise\n\n# #############################################################################\n# Compute the coefs of a Bayesian Ridge with GridSearch\ncv = KFold(2) # cross-validation generator for model selection\nridge = BayesianRidge()\ncachedir = tempfile.mkdtemp()\nmem = Memory(___location=cachedir, verbose=1)\n\n# Ward agglomeration followed by BayesianRidge\nconnectivity = grid_to_graph(n_x=size, n_y=size)\nward = FeatureAgglomeration(n_clusters=10, connectivity=connectivity, memory=mem)\nclf = Pipeline([(\"ward\", ward), (\"ridge\", ridge)])\n# Select the optimal number of parcels with grid search\nclf = GridSearchCV(clf, {\"ward__n_clusters\": [10, 20, 30]}, n_jobs=1, cv=cv)\nclf.fit(X, y) # set the best parameters\ncoef_ = clf.best_estimator_.steps[-1][1].coef_\ncoef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_)\ncoef_agglomeration_ = coef_.reshape(size, size)\n\n# Anova univariate feature selection followed by BayesianRidge\nf_regression = mem.cache(feature_selection.f_regression) # caching function\nanova = feature_selection.SelectPercentile(f_regression)\nclf = Pipeline([(\"anova\", anova), (\"ridge\", ridge)])\n# Select the optimal percentage of features with grid search\nclf = GridSearchCV(clf, {\"anova__percentile\": [5, 10, 20]}, cv=cv)\nclf.fit(X, y) # set the best parameters\ncoef_ = clf.best_estimator_.steps[-1][1].coef_\ncoef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_.reshape(1, -1))\ncoef_selection_ = coef_.reshape(size, size)\n\n# #############################################################################\n# Inverse the transformation to plot the results on an image\nplt.close(\"all\")\nplt.figure(figsize=(7.3, 2.7))\nplt.subplot(1, 3, 1)\nplt.imshow(coef, interpolation=\"nearest\", cmap=plt.cm.RdBu_r)\nplt.title(\"True weights\")\nplt.subplot(1, 3, 2)\nplt.imshow(coef_selection_, interpolation=\"nearest\", cmap=plt.cm.RdBu_r)\nplt.title(\"Feature Selection\")\nplt.subplot(1, 3, 3)\nplt.imshow(coef_agglomeration_, interpolation=\"nearest\", cmap=plt.cm.RdBu_r)\nplt.title(\"Feature Agglomeration\")\nplt.subplots_adjust(0.04, 0.0, 0.98, 0.94, 0.16, 0.26)\nplt.show()\n\n# Attempt to remove the temporary cachedir, but don't worry if it fails\nshutil.rmtree(cachedir, ignore_errors=True)"
29+
"# Author: Alexandre Gramfort <[email protected]>\n# License: BSD 3 clause"
30+
]
31+
},
32+
{
33+
"cell_type": "code",
34+
"execution_count": null,
35+
"metadata": {
36+
"collapsed": false
37+
},
38+
"outputs": [],
39+
"source": [
40+
"import shutil\nimport tempfile\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom scipy import linalg, ndimage\nfrom joblib import Memory\n\nfrom sklearn.feature_extraction.image import grid_to_graph\nfrom sklearn import feature_selection\nfrom sklearn.cluster import FeatureAgglomeration\nfrom sklearn.linear_model import BayesianRidge\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.model_selection import KFold"
41+
]
42+
},
43+
{
44+
"cell_type": "markdown",
45+
"metadata": {},
46+
"source": [
47+
"Set parameters\n\n"
48+
]
49+
},
50+
{
51+
"cell_type": "code",
52+
"execution_count": null,
53+
"metadata": {
54+
"collapsed": false
55+
},
56+
"outputs": [],
57+
"source": [
58+
"n_samples = 200\nsize = 40 # image size\nroi_size = 15\nsnr = 5.0\nnp.random.seed(0)"
59+
]
60+
},
61+
{
62+
"cell_type": "markdown",
63+
"metadata": {},
64+
"source": [
65+
"Generate data\n\n"
66+
]
67+
},
68+
{
69+
"cell_type": "code",
70+
"execution_count": null,
71+
"metadata": {
72+
"collapsed": false
73+
},
74+
"outputs": [],
75+
"source": [
76+
"coef = np.zeros((size, size))\ncoef[0:roi_size, 0:roi_size] = -1.0\ncoef[-roi_size:, -roi_size:] = 1.0\n\nX = np.random.randn(n_samples, size**2)\nfor x in X: # smooth data\n x[:] = ndimage.gaussian_filter(x.reshape(size, size), sigma=1.0).ravel()\nX -= X.mean(axis=0)\nX /= X.std(axis=0)\n\ny = np.dot(X, coef.ravel())"
77+
]
78+
},
79+
{
80+
"cell_type": "markdown",
81+
"metadata": {},
82+
"source": [
83+
"add noise\n\n"
84+
]
85+
},
86+
{
87+
"cell_type": "code",
88+
"execution_count": null,
89+
"metadata": {
90+
"collapsed": false
91+
},
92+
"outputs": [],
93+
"source": [
94+
"noise = np.random.randn(y.shape[0])\nnoise_coef = (linalg.norm(y, 2) / np.exp(snr / 20.0)) / linalg.norm(noise, 2)\ny += noise_coef * noise"
95+
]
96+
},
97+
{
98+
"cell_type": "markdown",
99+
"metadata": {},
100+
"source": [
101+
"Compute the coefs of a Bayesian Ridge with GridSearch\n\n"
102+
]
103+
},
104+
{
105+
"cell_type": "code",
106+
"execution_count": null,
107+
"metadata": {
108+
"collapsed": false
109+
},
110+
"outputs": [],
111+
"source": [
112+
"cv = KFold(2) # cross-validation generator for model selection\nridge = BayesianRidge()\ncachedir = tempfile.mkdtemp()\nmem = Memory(___location=cachedir, verbose=1)"
113+
]
114+
},
115+
{
116+
"cell_type": "markdown",
117+
"metadata": {},
118+
"source": [
119+
"Ward agglomeration followed by BayesianRidge\n\n"
120+
]
121+
},
122+
{
123+
"cell_type": "code",
124+
"execution_count": null,
125+
"metadata": {
126+
"collapsed": false
127+
},
128+
"outputs": [],
129+
"source": [
130+
"connectivity = grid_to_graph(n_x=size, n_y=size)\nward = FeatureAgglomeration(n_clusters=10, connectivity=connectivity, memory=mem)\nclf = Pipeline([(\"ward\", ward), (\"ridge\", ridge)])\n# Select the optimal number of parcels with grid search\nclf = GridSearchCV(clf, {\"ward__n_clusters\": [10, 20, 30]}, n_jobs=1, cv=cv)\nclf.fit(X, y) # set the best parameters\ncoef_ = clf.best_estimator_.steps[-1][1].coef_\ncoef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_)\ncoef_agglomeration_ = coef_.reshape(size, size)"
131+
]
132+
},
133+
{
134+
"cell_type": "markdown",
135+
"metadata": {},
136+
"source": [
137+
"Anova univariate feature selection followed by BayesianRidge\n\n"
138+
]
139+
},
140+
{
141+
"cell_type": "code",
142+
"execution_count": null,
143+
"metadata": {
144+
"collapsed": false
145+
},
146+
"outputs": [],
147+
"source": [
148+
"f_regression = mem.cache(feature_selection.f_regression) # caching function\nanova = feature_selection.SelectPercentile(f_regression)\nclf = Pipeline([(\"anova\", anova), (\"ridge\", ridge)])\n# Select the optimal percentage of features with grid search\nclf = GridSearchCV(clf, {\"anova__percentile\": [5, 10, 20]}, cv=cv)\nclf.fit(X, y) # set the best parameters\ncoef_ = clf.best_estimator_.steps[-1][1].coef_\ncoef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_.reshape(1, -1))\ncoef_selection_ = coef_.reshape(size, size)"
149+
]
150+
},
151+
{
152+
"cell_type": "markdown",
153+
"metadata": {},
154+
"source": [
155+
"Inverse the transformation to plot the results on an image\n\n"
156+
]
157+
},
158+
{
159+
"cell_type": "code",
160+
"execution_count": null,
161+
"metadata": {
162+
"collapsed": false
163+
},
164+
"outputs": [],
165+
"source": [
166+
"plt.close(\"all\")\nplt.figure(figsize=(7.3, 2.7))\nplt.subplot(1, 3, 1)\nplt.imshow(coef, interpolation=\"nearest\", cmap=plt.cm.RdBu_r)\nplt.title(\"True weights\")\nplt.subplot(1, 3, 2)\nplt.imshow(coef_selection_, interpolation=\"nearest\", cmap=plt.cm.RdBu_r)\nplt.title(\"Feature Selection\")\nplt.subplot(1, 3, 3)\nplt.imshow(coef_agglomeration_, interpolation=\"nearest\", cmap=plt.cm.RdBu_r)\nplt.title(\"Feature Agglomeration\")\nplt.subplots_adjust(0.04, 0.0, 0.98, 0.94, 0.16, 0.26)\nplt.show()"
167+
]
168+
},
169+
{
170+
"cell_type": "markdown",
171+
"metadata": {},
172+
"source": [
173+
"Attempt to remove the temporary cachedir, but don't worry if it fails\n\n"
174+
]
175+
},
176+
{
177+
"cell_type": "code",
178+
"execution_count": null,
179+
"metadata": {
180+
"collapsed": false
181+
},
182+
"outputs": [],
183+
"source": [
184+
"shutil.rmtree(cachedir, ignore_errors=True)"
30185
]
31186
}
32187
],

dev/_downloads/scikit-learn-docs.zip

4.23 KB
Binary file not shown.
-212 Bytes
-204 Bytes
-99 Bytes
-107 Bytes
-62 Bytes

0 commit comments

Comments
 (0)