scikit-learn
diff --git a/‎dev/.buildinfo
Lines changed: 1 addition & 1 deletion b/‎dev/.buildinfo
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
6.1 KB b/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
6.1 KB
diff --git a/‎dev/_downloads/4cf0456267ced0f869a458ef4776d4c5/plot_release_highlights_1_1_0.py
Lines changed: 163 additions & 0 deletions b/‎dev/_downloads/4cf0456267ced0f869a458ef4776d4c5/plot_release_highlights_1_1_0.py
Lines changed: 163 additions & 0 deletions
diff --git a/‎dev/_downloads/68fdea23e50d165632d4bd4e36453cd5/plot_release_highlights_1_1_0.ipynb
Lines changed: 140 additions & 0 deletions b/‎dev/_downloads/68fdea23e50d165632d4bd4e36453cd5/plot_release_highlights_1_1_0.ipynb
Lines changed: 140 additions & 0 deletions
diff --git a/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
8.14 KB b/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
8.14 KB
diff --git a/‎dev/_downloads/scikit-learn-docs.zip
113 KB b/‎dev/_downloads/scikit-learn-docs.zip
113 KB
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
4 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
4 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
141 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
141 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
281 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
281 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
33 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
33 Bytes
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: fb929b3c6a9413164e1ac03fbe586798
+config: 44afcf8dd215cc5d065a44ea3a818dd0
 tags: 645f666f9bcd5a90fca523b33c5a78b7
@@ -0,0 +1,163 @@
+# flake8: noqa
+"""
+=======================================
+Release Highlights for scikit-learn 1.1
+=======================================
+
+.. currentmodule:: sklearn
+
+We are pleased to announce the release of scikit-learn 1.1! Many bug fixes
+and improvements were added, as well as some new key features. We detail
+below a few of the major features of this release. **For an exhaustive list of
+all the changes**, please refer to the :ref:`release notes <changes_1_1>`.
+
+To install the latest version (with pip)::
+
+    pip install --upgrade scikit-learn
+
+or with conda::
+
+    conda install -c conda-forge scikit-learn
+
+"""
+
+# %%
+# Quantile loss in :class:`ensemble.HistGradientBoostingRegressor`
+# ----------------------------------------------------------------
+# :class:`ensemble.HistGradientBoostingRegressor` can model quantiles with
+# `loss="quantile"` and the new parameter `quantile`.
+from sklearn.datasets import make_regression
+from sklearn.ensemble import HistGradientBoostingRegressor
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Simple regression function for X * cos(X)
+rng = np.random.RandomState(42)
+X_1d = np.linspace(0, 10, num=2000)
+X = X_1d.reshape(-1, 1)
+y = X_1d * np.cos(X_1d) + rng.normal(scale=X_1d / 3)
+
+quantiles = [0.95, 0.5, 0.05]
+parameters = dict(loss="quantile", max_bins=32, max_iter=50)
+hist_quantiles = {
+    f"quantile={quantile:.2f}": HistGradientBoostingRegressor(
+        **parameters, quantile=quantile
+    ).fit(X, y)
+    for quantile in quantiles
+}
+
+fig, ax = plt.subplots()
+ax.plot(X_1d, y, "o", alpha=0.5, markersize=1)
+for quantile, hist in hist_quantiles.items():
+    ax.plot(X_1d, hist.predict(X), label=quantile)
+ax.legend(loc="lower left")
+
+
+# %%
+# `get_feature_names_out` Available in all Transformers
+# -----------------------------------------------------
+# :term:`get_feature_names_out` is now available in all Transformers. This enables
+# :class:`pipeline.Pipeline` to construct the output feature names for more complex
+# pipelines:
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
+from sklearn.pipeline import make_pipeline
+from sklearn.impute import SimpleImputer
+from sklearn.feature_selection import SelectKBest
+from sklearn.datasets import fetch_openml
+from sklearn.linear_model import LogisticRegression
+
+X, y = fetch_openml("titanic", version=1, as_frame=True, return_X_y=True)
+numeric_features = ["age", "fare"]
+numeric_transformer = make_pipeline(SimpleImputer(strategy="median"), StandardScaler())
+categorical_features = ["embarked", "pclass"]
+
+preprocessor = ColumnTransformer(
+    [
+        ("num", numeric_transformer, numeric_features),
+        (
+            "cat",
+            OneHotEncoder(handle_unknown="ignore", sparse=False),
+            categorical_features,
+        ),
+    ],
+    verbose_feature_names_out=False,
+)
+log_reg = make_pipeline(preprocessor, SelectKBest(k=7), LogisticRegression())
+log_reg.fit(X, y)
+
+
+# %%
+# Here we slice the pipeline to include all the steps but the last one. The output
+# feature names of this pipeline slice are the features put into logistic
+# regression. These names correspond directly to the coefficients in the logistic
+# regression:
+import pandas as pd
+
+log_reg_input_features = log_reg[:-1].get_feature_names_out()
+pd.Series(log_reg[-1].coef_.ravel(), index=log_reg_input_features).plot.bar()
+
+
+# %%
+# Grouping infrequent categories in :class:`OneHotEncoder`
+# --------------------------------------------------------
+# :class:`OneHotEncoder` supports aggregating infrequent categories into a single
+# output for each feature. The parameters to enable the gathering of infrequent
+# categories are `min_frequency` and `max_categories`. See the
+# :ref:`User Guide <one_hot_encoder_infrequent_categories>` for more details.
+from sklearn.preprocessing import OneHotEncoder
+import numpy as np
+
+X = np.array(
+    [["dog"] * 5 + ["cat"] * 20 + ["rabbit"] * 10 + ["snake"] * 3], dtype=object
+).T
+enc = OneHotEncoder(min_frequency=6, sparse=False).fit(X)
+enc.infrequent_categories_
+
+# %%
+# Since dog and snake are infrequent categories, they are grouped together when
+# transformed:
+encoded = enc.transform(np.array([["dog"], ["snake"], ["cat"], ["rabbit"]]))
+pd.DataFrame(encoded, columns=enc.get_feature_names_out())
+
+# %%
+# Performance improvements
+# ------------------------
+# Reductions on pairwise distances for dense float64 datasets has been refactored
+# to better take advantage of non-blocking thread parallelism. For example,
+# :meth:`neighbors.NearestNeighbors.kneighbors` and
+# :meth:`neighbors.NearestNeighbors.radius_neighbors` can respectively be up to ×20 and
+# ×5 faster than previously. In summary, the following functions and estimators
+# now benefit from improved performance:
+#
+# - :func:`metrics.pairwise_distances_argmin`
+# - :func:`metrics.pairwise_distances_argmin_min`
+# - :class:`cluster.AffinityPropagation`
+# - :class:`cluster.Birch`
+# - :class:`cluster.MeanShift`
+# - :class:`cluster.OPTICS`
+# - :class:`cluster.SpectralClustering`
+# - :func:`feature_selection.mutual_info_regression`
+# - :class:`neighbors.KNeighborsClassifier`
+# - :class:`neighbors.KNeighborsRegressor`
+# - :class:`neighbors.RadiusNeighborsClassifier`
+# - :class:`neighbors.RadiusNeighborsRegressor`
+# - :class:`neighbors.LocalOutlierFactor`
+# - :class:`neighbors.NearestNeighbors`
+# - :class:`manifold.Isomap`
+# - :class:`manifold.LocallyLinearEmbedding`
+# - :class:`manifold.TSNE`
+# - :func:`manifold.trustworthiness`
+# - :class:`semi_supervised.LabelPropagation`
+# - :class:`semi_supervised.LabelSpreading`
+#
+# To know more about the technical details of this work, you can read
+# `this suite of blog posts <https://blog.scikit-learn.org/technical/performances/>`_.
+#
+# Moreover, the computation of loss functions has been refactored using
+# Cython resulting in performance improvements for the following estimators:
+#
+# - :class:`linear_model.LogisticRegression`
+# - :class:`linear_model.GammaRegressor`
+# - :class:`linear_model.PoissonRegressor`
+# - :class:`linear_model.TweedieRegressor`
@@ -0,0 +1,140 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n# Release Highlights for scikit-learn 1.1\n\n.. currentmodule:: sklearn\n\nWe are pleased to announce the release of scikit-learn 1.1! Many bug fixes\nand improvements were added, as well as some new key features. We detail\nbelow a few of the major features of this release. **For an exhaustive list of\nall the changes**, please refer to the `release notes <changes_1_1>`.\n\nTo install the latest version (with pip)::\n\n    pip install --upgrade scikit-learn\n\nor with conda::\n\n    conda install -c conda-forge scikit-learn\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Quantile loss in :class:`ensemble.HistGradientBoostingRegressor`\n:class:`ensemble.HistGradientBoostingRegressor` can model quantiles with\n`loss=\"quantile\"` and the new parameter `quantile`.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn.datasets import make_regression\nfrom sklearn.ensemble import HistGradientBoostingRegressor\nimport numpy as np\nimport matplotlib.pyplot as plt\n\n# Simple regression function for X * cos(X)\nrng = np.random.RandomState(42)\nX_1d = np.linspace(0, 10, num=2000)\nX = X_1d.reshape(-1, 1)\ny = X_1d * np.cos(X_1d) + rng.normal(scale=X_1d / 3)\n\nquantiles = [0.95, 0.5, 0.05]\nparameters = dict(loss=\"quantile\", max_bins=32, max_iter=50)\nhist_quantiles = {\n    f\"quantile={quantile:.2f}\": HistGradientBoostingRegressor(\n        **parameters, quantile=quantile\n    ).fit(X, y)\n    for quantile in quantiles\n}\n\nfig, ax = plt.subplots()\nax.plot(X_1d, y, \"o\", alpha=0.5, markersize=1)\nfor quantile, hist in hist_quantiles.items():\n    ax.plot(X_1d, hist.predict(X), label=quantile)\nax.legend(loc=\"lower left\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## `get_feature_names_out` Available in all Transformers\n:term:`get_feature_names_out` is now available in all Transformers. This enables\n:class:`pipeline.Pipeline` to construct the output feature names for more complex\npipelines:\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn.compose import ColumnTransformer\nfrom sklearn.preprocessing import OneHotEncoder, StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.impute import SimpleImputer\nfrom sklearn.feature_selection import SelectKBest\nfrom sklearn.datasets import fetch_openml\nfrom sklearn.linear_model import LogisticRegression\n\nX, y = fetch_openml(\"titanic\", version=1, as_frame=True, return_X_y=True)\nnumeric_features = [\"age\", \"fare\"]\nnumeric_transformer = make_pipeline(SimpleImputer(strategy=\"median\"), StandardScaler())\ncategorical_features = [\"embarked\", \"pclass\"]\n\npreprocessor = ColumnTransformer(\n    [\n        (\"num\", numeric_transformer, numeric_features),\n        (\n            \"cat\",\n            OneHotEncoder(handle_unknown=\"ignore\", sparse=False),\n            categorical_features,\n        ),\n    ],\n    verbose_feature_names_out=False,\n)\nlog_reg = make_pipeline(preprocessor, SelectKBest(k=7), LogisticRegression())\nlog_reg.fit(X, y)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Here we slice the pipeline to include all the steps but the last one. The output\nfeature names of this pipeline slice are the features put into logistic\nregression. These names correspond directly to the coefficients in the logistic\nregression:\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import pandas as pd\n\nlog_reg_input_features = log_reg[:-1].get_feature_names_out()\npd.Series(log_reg[-1].coef_.ravel(), index=log_reg_input_features).plot.bar()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Grouping infrequent categories in :class:`OneHotEncoder`\n:class:`OneHotEncoder` supports aggregating infrequent categories into a single\noutput for each feature. The parameters to enable the gathering of infrequent\ncategories are `min_frequency` and `max_categories`. See the\n`User Guide <one_hot_encoder_infrequent_categories>` for more details.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn.preprocessing import OneHotEncoder\nimport numpy as np\n\nX = np.array(\n    [[\"dog\"] * 5 + [\"cat\"] * 20 + [\"rabbit\"] * 10 + [\"snake\"] * 3], dtype=object\n).T\nenc = OneHotEncoder(min_frequency=6, sparse=False).fit(X)\nenc.infrequent_categories_"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Since dog and snake are infrequent categories, they are grouped together when\ntransformed:\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "encoded = enc.transform(np.array([[\"dog\"], [\"snake\"], [\"cat\"], [\"rabbit\"]]))\npd.DataFrame(encoded, columns=enc.get_feature_names_out())"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Performance improvements\nReductions on pairwise distances for dense float64 datasets has been refactored\nto better take advantage of non-blocking thread parallelism. For example,\n:meth:`neighbors.NearestNeighbors.kneighbors` and\n:meth:`neighbors.NearestNeighbors.radius_neighbors` can respectively be up to \u00d720 and\n\u00d75 faster than previously. In summary, the following functions and estimators\nnow benefit from improved performance:\n\n- :func:`metrics.pairwise_distances_argmin`\n- :func:`metrics.pairwise_distances_argmin_min`\n- :class:`cluster.AffinityPropagation`\n- :class:`cluster.Birch`\n- :class:`cluster.MeanShift`\n- :class:`cluster.OPTICS`\n- :class:`cluster.SpectralClustering`\n- :func:`feature_selection.mutual_info_regression`\n- :class:`neighbors.KNeighborsClassifier`\n- :class:`neighbors.KNeighborsRegressor`\n- :class:`neighbors.RadiusNeighborsClassifier`\n- :class:`neighbors.RadiusNeighborsRegressor`\n- :class:`neighbors.LocalOutlierFactor`\n- :class:`neighbors.NearestNeighbors`\n- :class:`manifold.Isomap`\n- :class:`manifold.LocallyLinearEmbedding`\n- :class:`manifold.TSNE`\n- :func:`manifold.trustworthiness`\n- :class:`semi_supervised.LabelPropagation`\n- :class:`semi_supervised.LabelSpreading`\n\nTo know more about the technical details of this work, you can read\n`this suite of blog posts <https://blog.scikit-learn.org/technical/performances/>`_.\n\nMoreover, the computation of loss functions has been refactored using\nCython resulting in performance improvements for the following estimators:\n\n- :class:`linear_model.LogisticRegression`\n- :class:`linear_model.GammaRegressor`\n- :class:`linear_model.PoissonRegressor`\n- :class:`linear_model.TweedieRegressor`\n\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.9.12"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}