samim0088
diff --git a/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
2.43 KB b/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
2.43 KB
diff --git a/‎dev/_downloads/53e76f761ef04e8d06fa5757554513b0/plot_select_from_model_diabetes.py
Lines changed: 110 additions & 68 deletions b/‎dev/_downloads/53e76f761ef04e8d06fa5757554513b0/plot_select_from_model_diabetes.py
Lines changed: 110 additions & 68 deletions
diff --git a/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
2.52 KB b/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
2.52 KB
diff --git a/‎dev/_downloads/f1e887db7b101f4c858db7db12e9c7e2/plot_select_from_model_diabetes.ipynb
Lines changed: 17 additions & 10 deletions b/‎dev/_downloads/f1e887db7b101f4c858db7db12e9c7e2/plot_select_from_model_diabetes.ipynb
Lines changed: 17 additions & 10 deletions
diff --git a/‎dev/_downloads/scikit-learn-docs.pdf
4.18 KB b/‎dev/_downloads/scikit-learn-docs.pdf
4.18 KB
diff --git a/‎dev/_images/iris.png
0 Bytes b/‎dev/_images/iris.png
0 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
366 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
366 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0031.png
366 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0031.png
366 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
243 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
243 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0041.png
243 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0041.png
243 Bytes
@@ -1,100 +1,142 @@
 """
-===================================================
-Feature selection using SelectFromModel and LassoCV
-===================================================
+============================================
+Model-based and sequential feature selection
+============================================
 
-Use SelectFromModel meta-transformer along with Lasso to select the best
-couple of features from the diabetes dataset.
+This example illustrates and compares two approaches for feature selection:
+:class:`~sklearn.feature_selection.SelectFromModel` which is based on feature
+importance, and
+:class:`~sklearn.feature_selection.SequentialFeatureSelection` which relies
+on a greedy approach.
 
-Since the L1 norm promotes sparsity of features we might be interested in
-selecting only a subset of the most interesting features from the dataset. This
-example shows how to select two the most interesting features from the diabetes
-dataset.
-
-Diabetes dataset consists of 10 variables (features) collected from 442
-diabetes patients. This example shows how to use SelectFromModel and LassoCv to
-find the best two features predicting disease progression after one year from
-the baseline.
+We use the Diabetes dataset, which consists of 10 features collected from 442
+diabetes patients.
 
 Authors: `Manoj Kumar <[email protected]>`_,
-`Maria Telenczuk <https://github.com/maikia>`_
+`Maria Telenczuk <https://github.com/maikia>`_, Nicolas Hug.
 
 License: BSD 3 clause
 """
 
 print(__doc__)
 
-import matplotlib.pyplot as plt
-import numpy as np
-
-from sklearn.datasets import load_diabetes
-from sklearn.feature_selection import SelectFromModel
-from sklearn.linear_model import LassoCV
 
 # %%
-# Load the data
-# ---------------------------------------------------------
+# Loading the data
+# ----------------
 #
-# First, let's load the diabetes dataset which is available from within
-# sklearn. Then, we will look what features are collected for the diabates
-# patients:
+# We first load the diabetes dataset which is available from within
+# scikit-learn, and print its description:
+from sklearn.datasets import load_diabetes
 
 diabetes = load_diabetes()
+X, y = diabetes.data, diabetes.target
+print(diabetes.DESCR)
 
-X = diabetes.data
-y = diabetes.target
+# %%
+# Feature importance from coefficients
+# ------------------------------------
+#
+# To get an idea of the importance of the features, we are going to use the
+# :class:`~sklearn.linear_model.LassoCV` estimator. The features with the
+# highest absolute `coef_` value are considered the most important.
+# We can observe the coefficients directly without needing to scale them (or
+# scale the data) because from the description above, we know that the features
+# were already standardized.
+# For a more complete example on the interpretations of the coefficients of
+# linear models, you may refer to
+# :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py`.
+import matplotlib.pyplot as plt
+import numpy as np
+from sklearn.linear_model import LassoCV
 
-feature_names = diabetes.feature_names
-print(feature_names)
+lasso = LassoCV().fit(X, y)
+importance = np.abs(lasso.coef_)
+feature_names = np.array(diabetes.feature_names)
+plt.bar(height=importance, x=feature_names)
+plt.title("Feature importances via coefficients")
+plt.show()
 
 # %%
-# Find importance of the features
-# ---------------------------------------------------------
+# Selecting features based on importance
+# --------------------------------------
 #
-# To decide on the importance of the features we are going to use LassoCV
-# estimator. The features with the highest absolute `coef_` value are
-# considered the most important
+# Now we want to select the two features which are the most important according
+# to the coefficients. The :class:`~sklearn.feature_selection.SelectFromModel`
+# is meant just for that. :class:`~sklearn.feature_selection.SelectFromModel`
+# accepts a `threshold` parameter and will select the features whose importance
+# (defined by the coefficients) are above this threshold.
+#
+# Since we want to select only 2 features, we will set this threshold slightly
+# above the coefficient of third most important feature.
+from sklearn.feature_selection import SelectFromModel
+from time import time
 
-clf = LassoCV().fit(X, y)
-importance = np.abs(clf.coef_)
-print(importance)
+threshold = np.sort(importance)[-3] + 0.01
+
+tic = time()
+sfm = SelectFromModel(lasso, threshold=threshold).fit(X, y)
+toc = time()
+print("Features selected by SelectFromModel: "
+      f"{feature_names[sfm.get_support()]}")
+print(f"Done in {toc - tic:.3f}s")
 
 # %%
-# Select from the model features with the higest score
-# ---------------------------------------------------------
+# Selecting features with Sequential Feature Selection
+# ----------------------------------------------------
+#
+# Another way of selecting features is to use
+# :class:`~sklearn.feature_selection.SequentialFeatureSelector`
+# (SFS). SFS is a greedy procedure where, at each iteration, we choose the best
+# new feature to add to our selected features based a cross-validation score.
+# That is, we start with 0 features and choose the best single feature with the
+# highest score. The procedure is repeated until we reach the desired number of
+# selected features.
 #
-# Now we want to select the two features which are the most important.
-# SelectFromModel() allows for setting the threshold. Only the features with
-# the `coef_` higher than the threshold will remain. Here, we want to set the
-# threshold slightly above the third highest `coef_` calculated by LassoCV()
-# from our data.
+# We can also go in the reverse direction (backward SFS), *i.e.* start with all
+# the features and greedily choose features to remove one by one. We illustrate
+# both approaches here.
 
-idx_third = importance.argsort()[-3]
-threshold = importance[idx_third] + 0.01
+from sklearn.feature_selection import SequentialFeatureSelector
 
-idx_features = (-importance).argsort()[:2]
-name_features = np.array(feature_names)[idx_features]
-print('Selected features: {}'.format(name_features))
+tic_fwd = time()
+sfs_forward = SequentialFeatureSelector(lasso, n_features_to_select=2,
+                                        direction='forward').fit(X, y)
+toc_fwd = time()
 
-sfm = SelectFromModel(clf, threshold=threshold)
-sfm.fit(X, y)
-X_transform = sfm.transform(X)
+tic_bwd = time()
+sfs_backward = SequentialFeatureSelector(lasso, n_features_to_select=2,
+                                         direction='backward').fit(X, y)
+toc_bwd = time()
 
-n_features = sfm.transform(X).shape[1]
+print("Features selected by forward sequential selection: "
+      f"{feature_names[sfs_forward.get_support()]}")
+print(f"Done in {toc_fwd - tic_fwd:.3f}s")
+print("Features selected by backward sequential selection: "
+      f"{feature_names[sfs_backward.get_support()]}")
+print(f"Done in {toc_bwd - tic_bwd:.3f}s")
 
 # %%
-# Plot the two most important features
-# ---------------------------------------------------------
+# Discussion
+# ----------
 #
-# Finally we will plot the selected two features from the data.
-
-plt.title(
-    "Features from diabets using SelectFromModel with "
-    "threshold %0.3f." % sfm.threshold)
-feature1 = X_transform[:, 0]
-feature2 = X_transform[:, 1]
-plt.plot(feature1, feature2, 'r.')
-plt.xlabel("First feature: {}".format(name_features[0]))
-plt.ylabel("Second feature: {}".format(name_features[1]))
-plt.ylim([np.min(feature2), np.max(feature2)])
-plt.show()
+# Interestingly, forward and backward selection have selected the same set of
+# features. In general, this isn't the case and the two methods would lead to
+# different results.
+#
+# We also note that the features selected by SFS differ from those selected by
+# feature importance: SFS selects `bmi` instead of `s1`. This does sound
+# reasonable though, since `bmi` corresponds to the third most important
+# feature according to the coefficients. It is quite remarkable considering
+# that SFS makes no use of the coefficients at all.
+#
+# To finish with, we should note that
+# :class:`~sklearn.feature_selection.SelectFromModel` is significantly faster
+# than SFS. Indeed, :class:`~sklearn.feature_selection.SelectFromModel` only
+# needs to fit a model once, while SFS needs to cross-validate many different
+# models for each of the iterations. SFS however works with any model, while
+# :class:`~sklearn.feature_selection.SelectFromModel` requires the underlying
+# estimator to expose a `coef_` attribute or a `feature_importances_`
+# attribute. The forward SFS is faster than the backward SFS because it only
+# needs to perform `n_features_to_select = 2` iterations, while the backward
+# SFS needs to perform `n_features - n_features_to_select = 8` iterations.
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\n# Feature selection using SelectFromModel and LassoCV\n\n\nUse SelectFromModel meta-transformer along with Lasso to select the best\ncouple of features from the diabetes dataset.\n\nSince the L1 norm promotes sparsity of features we might be interested in\nselecting only a subset of the most interesting features from the dataset. This\nexample shows how to select two the most interesting features from the diabetes\ndataset.\n\nDiabetes dataset consists of 10 variables (features) collected from 442\ndiabetes patients. This example shows how to use SelectFromModel and LassoCv to\nfind the best two features predicting disease progression after one year from\nthe baseline.\n\nAuthors: `Manoj Kumar <[email protected]>`_,\n`Maria Telenczuk <https://github.com/maikia>`_\n\nLicense: BSD 3 clause\n"
+        "\n# Model-based and sequential feature selection\n\n\nThis example illustrates and compares two approaches for feature selection:\n:class:`~sklearn.feature_selection.SelectFromModel` which is based on feature\nimportance, and\n:class:`~sklearn.feature_selection.SequentialFeatureSelection` which relies\non a greedy approach.\n\nWe use the Diabetes dataset, which consists of 10 features collected from 442\ndiabetes patients.\n\nAuthors: `Manoj Kumar <[email protected]>`_,\n`Maria Telenczuk <https://github.com/maikia>`_, Nicolas Hug.\n\nLicense: BSD 3 clause\n"
       ]
     },
     {
@@ -26,14 +26,14 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import load_diabetes\nfrom sklearn.feature_selection import SelectFromModel\nfrom sklearn.linear_model import LassoCV"
+        "print(__doc__)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Load the data\n---------------------------------------------------------\n\nFirst, let's load the diabetes dataset which is available from within\nsklearn. Then, we will look what features are collected for the diabates\npatients:\n\n"
+        "Loading the data\n----------------\n\nWe first load the diabetes dataset which is available from within\nscikit-learn, and print its description:\n\n"
       ]
     },
     {
@@ -44,14 +44,14 @@
       },
       "outputs": [],
       "source": [
-        "diabetes = load_diabetes()\n\nX = diabetes.data\ny = diabetes.target\n\nfeature_names = diabetes.feature_names\nprint(feature_names)"
+        "from sklearn.datasets import load_diabetes\n\ndiabetes = load_diabetes()\nX, y = diabetes.data, diabetes.target\nprint(diabetes.DESCR)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Find importance of the features\n---------------------------------------------------------\n\nTo decide on the importance of the features we are going to use LassoCV\nestimator. The features with the highest absolute `coef_` value are\nconsidered the most important\n\n"
+        "Feature importance from coefficients\n------------------------------------\n\nTo get an idea of the importance of the features, we are going to use the\n:class:`~sklearn.linear_model.LassoCV` estimator. The features with the\nhighest absolute `coef_` value are considered the most important.\nWe can observe the coefficients directly without needing to scale them (or\nscale the data) because from the description above, we know that the features\nwere already standardized.\nFor a more complete example on the interpretations of the coefficients of\nlinear models, you may refer to\n`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py`.\n\n"
       ]
     },
     {
@@ -62,14 +62,14 @@
       },
       "outputs": [],
       "source": [
-        "clf = LassoCV().fit(X, y)\nimportance = np.abs(clf.coef_)\nprint(importance)"
+        "import matplotlib.pyplot as plt\nimport numpy as np\nfrom sklearn.linear_model import LassoCV\n\nlasso = LassoCV().fit(X, y)\nimportance = np.abs(lasso.coef_)\nfeature_names = np.array(diabetes.feature_names)\nplt.bar(height=importance, x=feature_names)\nplt.title(\"Feature importances via coefficients\")\nplt.show()"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Select from the model features with the higest score\n---------------------------------------------------------\n\nNow we want to select the two features which are the most important.\nSelectFromModel() allows for setting the threshold. Only the features with\nthe `coef_` higher than the threshold will remain. Here, we want to set the\nthreshold slightly above the third highest `coef_` calculated by LassoCV()\nfrom our data.\n\n"
+        "Selecting features based on importance\n--------------------------------------\n\nNow we want to select the two features which are the most important according\nto the coefficients. The :class:`~sklearn.feature_selection.SelectFromModel`\nis meant just for that. :class:`~sklearn.feature_selection.SelectFromModel`\naccepts a `threshold` parameter and will select the features whose importance\n(defined by the coefficients) are above this threshold.\n\nSince we want to select only 2 features, we will set this threshold slightly\nabove the coefficient of third most important feature.\n\n"
       ]
     },
     {
@@ -80,14 +80,14 @@
       },
       "outputs": [],
       "source": [
-        "idx_third = importance.argsort()[-3]\nthreshold = importance[idx_third] + 0.01\n\nidx_features = (-importance).argsort()[:2]\nname_features = np.array(feature_names)[idx_features]\nprint('Selected features: {}'.format(name_features))\n\nsfm = SelectFromModel(clf, threshold=threshold)\nsfm.fit(X, y)\nX_transform = sfm.transform(X)\n\nn_features = sfm.transform(X).shape[1]"
+        "from sklearn.feature_selection import SelectFromModel\nfrom time import time\n\nthreshold = np.sort(importance)[-3] + 0.01\n\ntic = time()\nsfm = SelectFromModel(lasso, threshold=threshold).fit(X, y)\ntoc = time()\nprint(\"Features selected by SelectFromModel: \"\n      f\"{feature_names[sfm.get_support()]}\")\nprint(f\"Done in {toc - tic:.3f}s\")"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Plot the two most important features\n---------------------------------------------------------\n\nFinally we will plot the selected two features from the data.\n\n"
+        "Selecting features with Sequential Feature Selection\n----------------------------------------------------\n\nAnother way of selecting features is to use\n:class:`~sklearn.feature_selection.SequentialFeatureSelector`\n(SFS). SFS is a greedy procedure where, at each iteration, we choose the best\nnew feature to add to our selected features based a cross-validation score.\nThat is, we start with 0 features and choose the best single feature with the\nhighest score. The procedure is repeated until we reach the desired number of\nselected features.\n\nWe can also go in the reverse direction (backward SFS), *i.e.* start with all\nthe features and greedily choose features to remove one by one. We illustrate\nboth approaches here.\n\n"
       ]
     },
     {
@@ -98,7 +98,14 @@
       },
       "outputs": [],
       "source": [
-        "plt.title(\n    \"Features from diabets using SelectFromModel with \"\n    \"threshold %0.3f.\" % sfm.threshold)\nfeature1 = X_transform[:, 0]\nfeature2 = X_transform[:, 1]\nplt.plot(feature1, feature2, 'r.')\nplt.xlabel(\"First feature: {}\".format(name_features[0]))\nplt.ylabel(\"Second feature: {}\".format(name_features[1]))\nplt.ylim([np.min(feature2), np.max(feature2)])\nplt.show()"
+        "from sklearn.feature_selection import SequentialFeatureSelector\n\ntic_fwd = time()\nsfs_forward = SequentialFeatureSelector(lasso, n_features_to_select=2,\n                                        direction='forward').fit(X, y)\ntoc_fwd = time()\n\ntic_bwd = time()\nsfs_backward = SequentialFeatureSelector(lasso, n_features_to_select=2,\n                                         direction='backward').fit(X, y)\ntoc_bwd = time()\n\nprint(\"Features selected by forward sequential selection: \"\n      f\"{feature_names[sfs_forward.get_support()]}\")\nprint(f\"Done in {toc_fwd - tic_fwd:.3f}s\")\nprint(\"Features selected by backward sequential selection: \"\n      f\"{feature_names[sfs_backward.get_support()]}\")\nprint(f\"Done in {toc_bwd - tic_bwd:.3f}s\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Discussion\n----------\n\nInterestingly, forward and backward selection have selected the same set of\nfeatures. In general, this isn't the case and the two methods would lead to\ndifferent results.\n\nWe also note that the features selected by SFS differ from those selected by\nfeature importance: SFS selects `bmi` instead of `s1`. This does sound\nreasonable though, since `bmi` corresponds to the third most important\nfeature according to the coefficients. It is quite remarkable considering\nthat SFS makes no use of the coefficients at all.\n\nTo finish with, we should note that\n:class:`~sklearn.feature_selection.SelectFromModel` is significantly faster\nthan SFS. Indeed, :class:`~sklearn.feature_selection.SelectFromModel` only\nneeds to fit a model once, while SFS needs to cross-validate many different\nmodels for each of the iterations. SFS however works with any model, while\n:class:`~sklearn.feature_selection.SelectFromModel` requires the underlying\nestimator to expose a `coef_` attribute or a `feature_importances_`\nattribute. The forward SFS is faster than the backward SFS because it only\nneeds to perform `n_features_to_select = 2` iterations, while the backward\nSFS needs to perform `n_features - n_features_to_select = 8` iterations.\n\n"
       ]
     }
   ],
Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,7 @@`
`15`	`15`	`"cell_type": "markdown",`
`16`	`16`	`"metadata": {},`
`17`	`17`	`"source": [`
`18`		- "\n# Feature selection using SelectFromModel and LassoCV\n\n\nUse SelectFromModel meta-transformer along with Lasso to select the best\ncouple of features from the diabetes dataset.\n\nSince the L1 norm promotes sparsity of features we might be interested in\nselecting only a subset of the most interesting features from the dataset. This\nexample shows how to select two the most interesting features from the diabetes\ndataset.\n\nDiabetes dataset consists of 10 variables (features) collected from 442\ndiabetes patients. This example shows how to use SelectFromModel and LassoCv to\nfind the best two features predicting disease progression after one year from\nthe baseline.\n\nAuthors: `Manoj Kumar <[email protected]>`_,\n`Maria Telenczuk <https://github.com/maikia>`_\n\nLicense: BSD 3 clause\n"
	`18`	+ "\n# Model-based and sequential feature selection\n\n\nThis example illustrates and compares two approaches for feature selection:\n:class:`~sklearn.feature_selection.SelectFromModel` which is based on feature\nimportance, and\n:class:`~sklearn.feature_selection.SequentialFeatureSelection` which relies\non a greedy approach.\n\nWe use the Diabetes dataset, which consists of 10 features collected from 442\ndiabetes patients.\n\nAuthors: `Manoj Kumar <[email protected]>`_,\n`Maria Telenczuk <https://github.com/maikia>`_, Nicolas Hug.\n\nLicense: BSD 3 clause\n"
`19`	`19`	`]`
`20`	`20`	`},`
`21`	`21`	`{`
`@@ -26,14 +26,14 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		`- "print(__doc__)\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import load_diabetes\nfrom sklearn.feature_selection import SelectFromModel\nfrom sklearn.linear_model import LassoCV"`
	`29`	`+ "print(__doc__)"`
`30`	`30`	`]`
`31`	`31`	`},`
`32`	`32`	`{`
`33`	`33`	`"cell_type": "markdown",`
`34`	`34`	`"metadata": {},`
`35`	`35`	`"source": [`
`36`		`- "Load the data\n---------------------------------------------------------\n\nFirst, let's load the diabetes dataset which is available from within\nsklearn. Then, we will look what features are collected for the diabates\npatients:\n\n"`
	`36`	`+ "Loading the data\n----------------\n\nWe first load the diabetes dataset which is available from within\nscikit-learn, and print its description:\n\n"`
`37`	`37`	`]`
`38`	`38`	`},`
`39`	`39`	`{`
`@@ -44,14 +44,14 @@`
`44`	`44`	`},`
`45`	`45`	`"outputs": [],`
`46`	`46`	`"source": [`
`47`		`- "diabetes = load_diabetes()\n\nX = diabetes.data\ny = diabetes.target\n\nfeature_names = diabetes.feature_names\nprint(feature_names)"`
	`47`	`+ "from sklearn.datasets import load_diabetes\n\ndiabetes = load_diabetes()\nX, y = diabetes.data, diabetes.target\nprint(diabetes.DESCR)"`
`48`	`48`	`]`
`49`	`49`	`},`
`50`	`50`	`{`
`51`	`51`	`"cell_type": "markdown",`
`52`	`52`	`"metadata": {},`
`53`	`53`	`"source": [`
`54`		- "Find importance of the features\n---------------------------------------------------------\n\nTo decide on the importance of the features we are going to use LassoCV\nestimator. The features with the highest absolute `coef_` value are\nconsidered the most important\n\n"
	`54`	+ "Feature importance from coefficients\n------------------------------------\n\nTo get an idea of the importance of the features, we are going to use the\n:class:`~sklearn.linear_model.LassoCV` estimator. The features with the\nhighest absolute `coef_` value are considered the most important.\nWe can observe the coefficients directly without needing to scale them (or\nscale the data) because from the description above, we know that the features\nwere already standardized.\nFor a more complete example on the interpretations of the coefficients of\nlinear models, you may refer to\n`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py`.\n\n"
`55`	`55`	`]`
`56`	`56`	`},`
`57`	`57`	`{`
`@@ -62,14 +62,14 @@`
`62`	`62`	`},`
`63`	`63`	`"outputs": [],`
`64`	`64`	`"source": [`
`65`		`- "clf = LassoCV().fit(X, y)\nimportance = np.abs(clf.coef_)\nprint(importance)"`
	`65`	`+ "import matplotlib.pyplot as plt\nimport numpy as np\nfrom sklearn.linear_model import LassoCV\n\nlasso = LassoCV().fit(X, y)\nimportance = np.abs(lasso.coef_)\nfeature_names = np.array(diabetes.feature_names)\nplt.bar(height=importance, x=feature_names)\nplt.title(\"Feature importances via coefficients\")\nplt.show()"`
`66`	`66`	`]`
`67`	`67`	`},`
`68`	`68`	`{`
`69`	`69`	`"cell_type": "markdown",`
`70`	`70`	`"metadata": {},`
`71`	`71`	`"source": [`
`72`		- "Select from the model features with the higest score\n---------------------------------------------------------\n\nNow we want to select the two features which are the most important.\nSelectFromModel() allows for setting the threshold. Only the features with\nthe `coef_` higher than the threshold will remain. Here, we want to set the\nthreshold slightly above the third highest `coef_` calculated by LassoCV()\nfrom our data.\n\n"
	`72`	+ "Selecting features based on importance\n--------------------------------------\n\nNow we want to select the two features which are the most important according\nto the coefficients. The :class:`~sklearn.feature_selection.SelectFromModel`\nis meant just for that. :class:`~sklearn.feature_selection.SelectFromModel`\naccepts a `threshold` parameter and will select the features whose importance\n(defined by the coefficients) are above this threshold.\n\nSince we want to select only 2 features, we will set this threshold slightly\nabove the coefficient of third most important feature.\n\n"
`73`	`73`	`]`
`74`	`74`	`},`
`75`	`75`	`{`
`@@ -80,14 +80,14 @@`
`80`	`80`	`},`
`81`	`81`	`"outputs": [],`
`82`	`82`	`"source": [`
`83`		`- "idx_third = importance.argsort()[-3]\nthreshold = importance[idx_third] + 0.01\n\nidx_features = (-importance).argsort()[:2]\nname_features = np.array(feature_names)[idx_features]\nprint('Selected features: {}'.format(name_features))\n\nsfm = SelectFromModel(clf, threshold=threshold)\nsfm.fit(X, y)\nX_transform = sfm.transform(X)\n\nn_features = sfm.transform(X).shape[1]"`
	`83`	`+ "from sklearn.feature_selection import SelectFromModel\nfrom time import time\n\nthreshold = np.sort(importance)[-3] + 0.01\n\ntic = time()\nsfm = SelectFromModel(lasso, threshold=threshold).fit(X, y)\ntoc = time()\nprint(\"Features selected by SelectFromModel: \"\n f\"{feature_names[sfm.get_support()]}\")\nprint(f\"Done in {toc - tic:.3f}s\")"`
`84`	`84`	`]`
`85`	`85`	`},`
`86`	`86`	`{`
`87`	`87`	`"cell_type": "markdown",`
`88`	`88`	`"metadata": {},`
`89`	`89`	`"source": [`
`90`		`- "Plot the two most important features\n---------------------------------------------------------\n\nFinally we will plot the selected two features from the data.\n\n"`
	`90`	+ "Selecting features with Sequential Feature Selection\n----------------------------------------------------\n\nAnother way of selecting features is to use\n:class:`~sklearn.feature_selection.SequentialFeatureSelector`\n(SFS). SFS is a greedy procedure where, at each iteration, we choose the best\nnew feature to add to our selected features based a cross-validation score.\nThat is, we start with 0 features and choose the best single feature with the\nhighest score. The procedure is repeated until we reach the desired number of\nselected features.\n\nWe can also go in the reverse direction (backward SFS), i.e. start with all\nthe features and greedily choose features to remove one by one. We illustrate\nboth approaches here.\n\n"
`91`	`91`	`]`
`92`	`92`	`},`
`93`	`93`	`{`
`@@ -98,7 +98,14 @@`
`98`	`98`	`},`
`99`	`99`	`"outputs": [],`
`100`	`100`	`"source": [`
`101`		`- "plt.title(\n \"Features from diabets using SelectFromModel with \"\n \"threshold %0.3f.\" % sfm.threshold)\nfeature1 = X_transform[:, 0]\nfeature2 = X_transform[:, 1]\nplt.plot(feature1, feature2, 'r.')\nplt.xlabel(\"First feature: {}\".format(name_features[0]))\nplt.ylabel(\"Second feature: {}\".format(name_features[1]))\nplt.ylim([np.min(feature2), np.max(feature2)])\nplt.show()"`
	`101`	+ "from sklearn.feature_selection import SequentialFeatureSelector\n\ntic_fwd = time()\nsfs_forward = SequentialFeatureSelector(lasso, n_features_to_select=2,\n direction='forward').fit(X, y)\ntoc_fwd = time()\n\ntic_bwd = time()\nsfs_backward = SequentialFeatureSelector(lasso, n_features_to_select=2,\n direction='backward').fit(X, y)\ntoc_bwd = time()\n\nprint(\"Features selected by forward sequential selection: \"\n f\"{feature_names[sfs_forward.get_support()]}\")\nprint(f\"Done in {toc_fwd - tic_fwd:.3f}s\")\nprint(\"Features selected by backward sequential selection: \"\n f\"{feature_names[sfs_backward.get_support()]}\")\nprint(f\"Done in {toc_bwd - tic_bwd:.3f}s\")"
	`102`	`+ ]`
	`103`	`+ },`
	`104`	`+ {`
	`105`	`+ "cell_type": "markdown",`
	`106`	`+ "metadata": {},`
	`107`	`+ "source": [`
	`108`	+ "Discussion\n----------\n\nInterestingly, forward and backward selection have selected the same set of\nfeatures. In general, this isn't the case and the two methods would lead to\ndifferent results.\n\nWe also note that the features selected by SFS differ from those selected by\nfeature importance: SFS selects `bmi` instead of `s1`. This does sound\nreasonable though, since `bmi` corresponds to the third most important\nfeature according to the coefficients. It is quite remarkable considering\nthat SFS makes no use of the coefficients at all.\n\nTo finish with, we should note that\n:class:`~sklearn.feature_selection.SelectFromModel` is significantly faster\nthan SFS. Indeed, :class:`~sklearn.feature_selection.SelectFromModel` only\nneeds to fit a model once, while SFS needs to cross-validate many different\nmodels for each of the iterations. SFS however works with any model, while\n:class:`~sklearn.feature_selection.SelectFromModel` requires the underlying\nestimator to expose a `coef_` attribute or a `feature_importances_`\nattribute. The forward SFS is faster than the backward SFS because it only\nneeds to perform `n_features_to_select = 2` iterations, while the backward\nSFS needs to perform `n_features - n_features_to_select = 8` iterations.\n\n"
`102`	`109`	`]`
`103`	`110`	`}`
`104`	`111`	`],`