ak15-2464
diff --git a/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
2.4 KB b/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
2.4 KB
diff --git a/‎dev/_downloads/7ee55c12f8d3eb1dd8d2005d9dd7b6f1/plot_release_highlights_0_22_0.py
Lines changed: 64 additions & 1 deletion b/‎dev/_downloads/7ee55c12f8d3eb1dd8d2005d9dd7b6f1/plot_release_highlights_0_22_0.py
Lines changed: 64 additions & 1 deletion
diff --git a/‎dev/_downloads/c101b602d0b3510ef47dd19d64a4a92b/plot_release_highlights_0_22_0.ipynb
Lines changed: 37 additions & 1 deletion b/‎dev/_downloads/c101b602d0b3510ef47dd19d64a4a92b/plot_release_highlights_0_22_0.ipynb
Lines changed: 37 additions & 1 deletion
diff --git a/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
2.8 KB b/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
2.8 KB
diff --git a/‎dev/_downloads/scikit-learn-docs.pdf
-7.45 KB b/‎dev/_downloads/scikit-learn-docs.pdf
-7.45 KB
diff --git a/‎dev/_images/iris.png
0 Bytes b/‎dev/_images/iris.png
0 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-854 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-854 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
-854 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
-854 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-290 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-290 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0021.png
-290 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0021.png
-290 Bytes
@@ -12,13 +12,36 @@
 
 To install the latest version (with pip)::
 
-    pip install -U scikit-learn --upgrade
+    pip install --upgrade scikit-learn
 
 or with conda::
 
     conda install scikit-learn
 """
 
+##############################################################################
+# KNN Based Imputation
+# ------------------------------------
+# We now support imputation for completing missing values using k-Nearest
+# Neighbors.
+#
+# Each sample's missing values are imputed using the mean value from
+# ``n_neighbors`` nearest neighbors found in the training set. Two samples are
+# close if the features that neither is missing are close.
+# By default, a euclidean distance metric
+# that supports missing values,
+# :func:`~metrics.nan_euclidean_distances`, is used to find the nearest
+# neighbors.
+#
+# Read more in the :ref:`User Guide <knnimpute>`.
+
+import numpy as np
+from sklearn.impute import KNNImputer
+
+X = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]]
+imputer = KNNImputer(n_neighbors=2)
+print(imputer.fit_transform(X))
+
 ##############################################################################
 # Permutation-based feature importance
 # ------------------------------------
@@ -149,3 +172,43 @@
     # recomputed.
     estimator.set_params(isomap__n_neighbors=5)
     estimator.fit(X)
+
+############################################################################
+# Stacking Classifier and Regressor
+# ---------------------------------
+# :class:`~ensemble.StackingClassifier` and
+# :class:`~ensemble.StackingRegressor`
+# allow you to have a stack of estimators with a final classifier or
+# a regressor.
+# Stacked generalization consists in stacking the output of individual
+# estimators and use a classifier to compute the final prediction. Stacking
+# allows to use the strength of each individual estimator by using their output
+# as input of a final estimator.
+# Base estimators are fitted on the full ``X`` while
+# the final estimator is trained using cross-validated predictions of the
+# base estimators using ``cross_val_predict``.
+#
+# Read more in the :ref:`User Guide <stacking>`.
+
+from sklearn.datasets import load_iris
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.svm import LinearSVC
+from sklearn.linear_model import LogisticRegression
+from sklearn.preprocessing import StandardScaler
+from sklearn.pipeline import make_pipeline
+from sklearn.ensemble import StackingClassifier
+from sklearn.model_selection import train_test_split
+
+X, y = load_iris(return_X_y=True)
+estimators = [
+    ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),
+    ('svr', make_pipeline(StandardScaler(),
+                          LinearSVC(random_state=42)))
+]
+clf = StackingClassifier(
+    estimators=estimators, final_estimator=LogisticRegression()
+)
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, stratify=y, random_state=42
+)
+clf.fit(X_train, y_train).score(X_test, y_test)
@@ -15,7 +15,25 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\n========================================\nRelease Highlights for scikit-learn 0.22\n========================================\n\n.. currentmodule:: sklearn\n\nWe are pleased to announce the release of scikit-learn 0.22, which comes\nwith many bug fixes and new features! We detail below a few of the major\nfeatures of this release. For an exhaustive list of all the changes, please\nrefer to the `release notes <changes_0_22>`.\n\nTo install the latest version (with pip)::\n\n    pip install -U scikit-learn --upgrade\n\nor with conda::\n\n    conda install scikit-learn\n"
+        "\n========================================\nRelease Highlights for scikit-learn 0.22\n========================================\n\n.. currentmodule:: sklearn\n\nWe are pleased to announce the release of scikit-learn 0.22, which comes\nwith many bug fixes and new features! We detail below a few of the major\nfeatures of this release. For an exhaustive list of all the changes, please\nrefer to the `release notes <changes_0_22>`.\n\nTo install the latest version (with pip)::\n\n    pip install --upgrade scikit-learn\n\nor with conda::\n\n    conda install scikit-learn\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "KNN Based Imputation\n------------------------------------\nWe now support imputation for completing missing values using k-Nearest\nNeighbors.\n\nEach sample's missing values are imputed using the mean value from\n``n_neighbors`` nearest neighbors found in the training set. Two samples are\nclose if the features that neither is missing are close.\nBy default, a euclidean distance metric\nthat supports missing values,\n:func:`~metrics.nan_euclidean_distances`, is used to find the nearest\nneighbors.\n\nRead more in the `User Guide <knnimpute>`.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\nfrom sklearn.impute import KNNImputer\n\nX = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]]\nimputer = KNNImputer(n_neighbors=2)\nprint(imputer.fit_transform(X))"
       ]
     },
     {
@@ -125,6 +143,24 @@
       "source": [
         "from tempfile import TemporaryDirectory\nfrom sklearn.neighbors import KNeighborsTransformer\nfrom sklearn.manifold import Isomap\nfrom sklearn.pipeline import make_pipeline\n\nwith TemporaryDirectory(prefix=\"sklearn_cache_\") as tmpdir:\n    estimator = make_pipeline(\n        KNeighborsTransformer(n_neighbors=10, mode='distance'),\n        Isomap(n_neighbors=10, metric='precomputed'),\n        memory=tmpdir)\n    estimator.fit(X)\n\n    # We can decrease the number of neighbors and the graph will not be\n    # recomputed.\n    estimator.set_params(isomap__n_neighbors=5)\n    estimator.fit(X)"
       ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Stacking Classifier and Regressor\n---------------------------------\n:class:`~ensemble.StackingClassifier` and\n:class:`~ensemble.StackingRegressor`\nallow you to have a stack of estimators with a final classifier or\na regressor.\nStacked generalization consists in stacking the output of individual\nestimators and use a classifier to compute the final prediction. Stacking\nallows to use the strength of each individual estimator by using their output\nas input of a final estimator.\nBase estimators are fitted on the full ``X`` while\nthe final estimator is trained using cross-validated predictions of the\nbase estimators using ``cross_val_predict``.\n\nRead more in the `User Guide <stacking>`.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn.datasets import load_iris\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.svm import LinearSVC\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.ensemble import StackingClassifier\nfrom sklearn.model_selection import train_test_split\n\nX, y = load_iris(return_X_y=True)\nestimators = [\n    ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),\n    ('svr', make_pipeline(StandardScaler(),\n                          LinearSVC(random_state=42)))\n]\nclf = StackingClassifier(\n    estimators=estimators, final_estimator=LogisticRegression()\n)\nX_train, X_test, y_train, y_test = train_test_split(\n    X, y, stratify=y, random_state=42\n)\nclf.fit(X_train, y_train).score(X_test, y_test)"
+      ]
     }
   ],
   "metadata": {