solversa
diff --git a/‎0.23/.buildinfo
Lines changed: 1 addition & 1 deletion b/‎0.23/.buildinfo
Lines changed: 1 addition & 1 deletion
diff --git a/‎0.23/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
7.3 KB b/‎0.23/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
7.3 KB
diff --git a/‎0.23/_downloads/713967aff070bffaf8a2f98a0b9c0d95/plot_partial_dependence_visualization_api.py
Lines changed: 0 additions & 1 deletion b/‎0.23/_downloads/713967aff070bffaf8a2f98a0b9c0d95/plot_partial_dependence_visualization_api.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎0.23/_downloads/931b876dec5cf70d716a97fbe44370c7/plot_partial_dependence_visualization_api.ipynb
Lines changed: 1 addition & 1 deletion b/‎0.23/_downloads/931b876dec5cf70d716a97fbe44370c7/plot_partial_dependence_visualization_api.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎0.23/_downloads/a4ab16c5452534a66bb87e6404a1d2a1/plot_release_highlights_0_23_0.py
Lines changed: 165 additions & 0 deletions b/‎0.23/_downloads/a4ab16c5452534a66bb87e6404a1d2a1/plot_release_highlights_0_23_0.py
Lines changed: 165 additions & 0 deletions
diff --git a/‎0.23/_downloads/bf22717272b44c5d619a1e45d91c7ac1/plot_release_highlights_0_23_0.ipynb
Lines changed: 133 additions & 0 deletions b/‎0.23/_downloads/bf22717272b44c5d619a1e45d91c7ac1/plot_release_highlights_0_23_0.ipynb
Lines changed: 133 additions & 0 deletions
diff --git a/‎0.23/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
9.12 KB b/‎0.23/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
9.12 KB
diff --git a/‎0.23/_downloads/scikit-learn-docs.pdf
87.6 KB b/‎0.23/_downloads/scikit-learn-docs.pdf
87.6 KB
diff --git a/‎0.23/_images/iris.png
0 Bytes b/‎0.23/_images/iris.png
0 Bytes
diff --git a/‎0.23/_images/sphx_glr_plot_agglomerative_clustering_001.png
-571 Bytes b/‎0.23/_images/sphx_glr_plot_agglomerative_clustering_001.png
-571 Bytes
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 39d2e4864eb47d9365787dcc7c68a004
+config: 3d4ccbdbd4613be0a63487194504dc50
 tags: 645f666f9bcd5a90fca523b33c5a78b7
@@ -98,7 +98,6 @@
 # which will plot the partial dependence curves of each model on the same axes.
 # The length of the axes list must be equal to the number of plots drawn.
 
-# Sets this image as the thumbnail for sphinx gallery
 # sphinx_gallery_thumbnail_number = 4
 fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 6))
 tree_disp.plot(ax=[ax1, ax2], line_kw={"label": "Decision Tree"})
 
@@ -116,7 +116,7 @@
       },
       "outputs": [],
       "source": [
-        "# Sets this image as the thumbnail for sphinx gallery\nfig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 6))\ntree_disp.plot(ax=[ax1, ax2], line_kw={\"label\": \"Decision Tree\"})\nmlp_disp.plot(ax=[ax1, ax2], line_kw={\"label\": \"Multi-layer Perceptron\",\n                                      \"c\": \"red\"})\nax1.legend()\nax2.legend()"
+        "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 6))\ntree_disp.plot(ax=[ax1, ax2], line_kw={\"label\": \"Decision Tree\"})\nmlp_disp.plot(ax=[ax1, ax2], line_kw={\"label\": \"Multi-layer Perceptron\",\n                                      \"c\": \"red\"})\nax1.legend()\nax2.legend()"
       ]
     },
     {
 
@@ -0,0 +1,165 @@
+# flake8: noqa
+"""
+========================================
+Release Highlights for scikit-learn 0.23
+========================================
+
+.. currentmodule:: sklearn
+
+We are pleased to announce the release of scikit-learn 0.23! Many bug fixes
+and improvements were added, as well as some new key features. We detail
+below a few of the major features of this release. **For an exhaustive list of
+all the changes**, please refer to the :ref:`release notes <changes_0_23>`.
+
+To install the latest version (with pip)::
+
+    pip install --upgrade scikit-learn
+
+or with conda::
+
+    conda install scikit-learn
+"""
+
+##############################################################################
+# Generalized Linear Models, and Poisson loss for gradient boosting
+# -----------------------------------------------------------------
+# Long-awaited Generalized Linear Models with non-normal loss functions are now
+# available. In particular, three new regressors were implemented:
+# :class:`~sklearn.linear_model.PoissonRegressor`,
+# :class:`~sklearn.linear_model.GammaRegressor`, and
+# :class:`~sklearn.linear_model.TweedieRegressor`. The Poisson regressor can be
+# used to model positive integer counts, or relative frequencies. Read more in
+# the :ref:`User Guide <Generalized_linear_regression>`. Additionally,
+# :class:`~sklearn.ensemble.HistGradientBoostingRegressor` supports a new
+# 'poisson' loss as well.
+
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import PoissonRegressor
+from sklearn.experimental import enable_hist_gradient_boosting  # noqa
+from sklearn.ensemble import HistGradientBoostingRegressor
+
+n_samples, n_features = 1000, 20
+rng = np.random.RandomState(0)
+X = rng.randn(n_samples, n_features)
+# positive integer target correlated with X[:, 5] with many zeros:
+y = rng.poisson(lam=np.exp(X[:, 5]) / 2)
+X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)
+glm = PoissonRegressor()
+gbdt = HistGradientBoostingRegressor(loss='poisson', learning_rate=.01)
+glm.fit(X_train, y_train)
+gbdt.fit(X_train, y_train)
+print(glm.score(X_test, y_test))
+print(gbdt.score(X_test, y_test))
+
+##############################################################################
+# Rich HTML representation for estimators
+# ---------------------------------------
+# Estimators can now be rendered in html in notebooks by enabling the
+# `display='diagram'` option. This is particularly useful to visualize
+# pipelines and composite estimators. Click on the entries to expand and see
+# details.
+from sklearn import set_config
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
+from sklearn.impute import SimpleImputer
+from sklearn.compose import make_column_transformer
+from sklearn.linear_model import LogisticRegression
+set_config(display='diagram')
+
+num_proc = make_pipeline(SimpleImputer(strategy='median'), StandardScaler())
+
+cat_proc = make_pipeline(
+    SimpleImputer(strategy='constant', fill_value='missing'),
+    OneHotEncoder(handle_unknown='ignore'))
+
+preprocessor = make_column_transformer((num_proc, ('feat1', 'feat3')),
+                                       (cat_proc, ('feat0', 'feat2')))
+
+clf = make_pipeline(preprocessor, LogisticRegression())
+clf
+
+##############################################################################
+# Scalability and stability improvements to KMeans
+# ------------------------------------------------
+# The :class:`~sklearn.cluster.KMeans` estimator was entirely re-worked, and it
+# is now significantly faster and more stable. In addition, the Elkan algorithm
+# is now compatible with sparse matrices. The estimator uses OpenMP based
+# parallelism instead of relying on joblib, so the `n_jobs` parameter has no
+# effect anymore. For more details on how to control the number of threads,
+# please refer to our :ref:`parallelism` notes.
+import scipy
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.cluster import KMeans
+from sklearn.datasets import make_blobs
+from sklearn.metrics import completeness_score
+
+rng = np.random.RandomState(0)
+X, y = make_blobs(random_state=rng)
+X = scipy.sparse.csr_matrix(X)
+X_train, X_test, _, y_test = train_test_split(X, y, random_state=rng)
+kmeans = KMeans(algorithm='elkan').fit(X_train)
+print(completeness_score(kmeans.predict(X_test), y_test))
+
+##############################################################################
+# Improvements to the histogram-based Gradient Boosting estimators
+# ----------------------------------------------------------------
+# Various improvements were made to
+# :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and
+# :class:`~sklearn.ensemble.HistGradientBoostingRegressor`. On top of the
+# Poisson loss mentionned above, these estimators now support :ref:`sample
+# weights <sw_hgbdt>`. Also, an automatic early-stopping criterion was added:
+# early-stopping is enabled by default when the number of samples exceeds 10k.
+# Finally, users can now define :ref:`monotonic constraints
+# <monotonic_cst_gbdt>` to constrain the predictions based on the variations of
+# specific features. In the following example, we construct a target that is
+# generally positively correlated with the first feature, with some noise.
+# Applying monotoinc constraints allows the prediction to capture the global
+# effect of the first feature, instead of fitting the noise.
+import numpy as np
+from matplotlib import pyplot as plt
+from sklearn.model_selection import train_test_split
+from sklearn.inspection import plot_partial_dependence
+from sklearn.experimental import enable_hist_gradient_boosting  # noqa
+from sklearn.ensemble import HistGradientBoostingRegressor
+
+n_samples = 500
+rng = np.random.RandomState(0)
+X = rng.randn(n_samples, 2)
+noise = rng.normal(loc=0.0, scale=0.01, size=n_samples)
+y = (5 * X[:, 0] + np.sin(10 * np.pi * X[:, 0]) - noise)
+
+gbdt_no_cst = HistGradientBoostingRegressor().fit(X, y)
+gbdt_cst = HistGradientBoostingRegressor(monotonic_cst=[1, 0]).fit(X, y)
+
+disp = plot_partial_dependence(
+    gbdt_no_cst, X, features=[0], feature_names=['feature 0'],
+    line_kw={'linewidth': 4, 'label': 'unconstrained'})
+plot_partial_dependence(gbdt_cst, X, features=[0],
+    line_kw={'linewidth': 4, 'label': 'constrained'}, ax=disp.axes_)
+disp.axes_[0, 0].plot(X[:, 0], y, 'o', alpha=.5, zorder=-1, label='samples')
+disp.axes_[0, 0].set_ylim(-3, 3); disp.axes_[0, 0].set_xlim(-1, 1)
+plt.legend()
+plt.show()
+
+##############################################################################
+# Sample-weight support for Lasso and ElasticNet
+# ----------------------------------------------
+# The two linear regressors :class:`~sklearn.linear_model.Lasso` and
+# :class:`~sklearn.linear_model.ElasticNet` now support sample weights.
+
+from sklearn.model_selection import train_test_split
+from sklearn.datasets import make_regression
+from sklearn.linear_model import Lasso
+import numpy as np
+
+n_samples, n_features = 1000, 20
+rng = np.random.RandomState(0)
+X, y = make_regression(n_samples, n_features, random_state=rng)
+sample_weight = rng.rand(n_samples)
+X_train, X_test, y_train, y_test, sw_train, sw_test = train_test_split(
+    X, y, sample_weight, random_state=rng)
+reg = Lasso()
+reg.fit(X_train, y_train, sample_weight=sw_train)
+print(reg.score(X_test, y_test, sw_test))
@@ -0,0 +1,133 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n========================================\nRelease Highlights for scikit-learn 0.23\n========================================\n\n.. currentmodule:: sklearn\n\nWe are pleased to announce the release of scikit-learn 0.23! Many bug fixes\nand improvements were added, as well as some new key features. We detail\nbelow a few of the major features of this release. **For an exhaustive list of\nall the changes**, please refer to the `release notes <changes_0_23>`.\n\nTo install the latest version (with pip)::\n\n    pip install --upgrade scikit-learn\n\nor with conda::\n\n    conda install scikit-learn\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Generalized Linear Models, and Poisson loss for gradient boosting\n-----------------------------------------------------------------\nLong-awaited Generalized Linear Models with non-normal loss functions are now\navailable. In particular, three new regressors were implemented:\n:class:`~sklearn.linear_model.PoissonRegressor`,\n:class:`~sklearn.linear_model.GammaRegressor`, and\n:class:`~sklearn.linear_model.TweedieRegressor`. The Poisson regressor can be\nused to model positive integer counts, or relative frequencies. Read more in\nthe `User Guide <Generalized_linear_regression>`. Additionally,\n:class:`~sklearn.ensemble.HistGradientBoostingRegressor` supports a new\n'poisson' loss as well.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import PoissonRegressor\nfrom sklearn.experimental import enable_hist_gradient_boosting  # noqa\nfrom sklearn.ensemble import HistGradientBoostingRegressor\n\nn_samples, n_features = 1000, 20\nrng = np.random.RandomState(0)\nX = rng.randn(n_samples, n_features)\n# positive integer target correlated with X[:, 5] with many zeros:\ny = rng.poisson(lam=np.exp(X[:, 5]) / 2)\nX_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)\nglm = PoissonRegressor()\ngbdt = HistGradientBoostingRegressor(loss='poisson', learning_rate=.01)\nglm.fit(X_train, y_train)\ngbdt.fit(X_train, y_train)\nprint(glm.score(X_test, y_test))\nprint(gbdt.score(X_test, y_test))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Rich HTML representation for estimators\n---------------------------------------\nEstimators can now be rendered in html in notebooks by enabling the\n`display='diagram'` option. This is particularly useful to visualize\npipelines and composite estimators. Click on the entries to expand and see\ndetails.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn import set_config\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import OneHotEncoder, StandardScaler\nfrom sklearn.impute import SimpleImputer\nfrom sklearn.compose import make_column_transformer\nfrom sklearn.linear_model import LogisticRegression\nset_config(display='diagram')\n\nnum_proc = make_pipeline(SimpleImputer(strategy='median'), StandardScaler())\n\ncat_proc = make_pipeline(\n    SimpleImputer(strategy='constant', fill_value='missing'),\n    OneHotEncoder(handle_unknown='ignore'))\n\npreprocessor = make_column_transformer((num_proc, ('feat1', 'feat3')),\n                                       (cat_proc, ('feat0', 'feat2')))\n\nclf = make_pipeline(preprocessor, LogisticRegression())\nclf"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Scalability and stability improvements to KMeans\n------------------------------------------------\nThe :class:`~sklearn.cluster.KMeans` estimator was entirely re-worked, and it\nis now significantly faster and more stable. In addition, the Elkan algorithm\nis now compatible with sparse matrices. The estimator uses OpenMP based\nparallelism instead of relying on joblib, so the `n_jobs` parameter has no\neffect anymore. For more details on how to control the number of threads,\nplease refer to our `parallelism` notes.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import scipy\nimport numpy as np\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.cluster import KMeans\nfrom sklearn.datasets import make_blobs\nfrom sklearn.metrics import completeness_score\n\nrng = np.random.RandomState(0)\nX, y = make_blobs(random_state=rng)\nX = scipy.sparse.csr_matrix(X)\nX_train, X_test, _, y_test = train_test_split(X, y, random_state=rng)\nkmeans = KMeans(algorithm='elkan').fit(X_train)\nprint(completeness_score(kmeans.predict(X_test), y_test))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Improvements to the histogram-based Gradient Boosting estimators\n----------------------------------------------------------------\nVarious improvements were made to\n:class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n:class:`~sklearn.ensemble.HistGradientBoostingRegressor`. On top of the\nPoisson loss mentionned above, these estimators now support `sample\nweights <sw_hgbdt>`. Also, an automatic early-stopping criterion was added:\nearly-stopping is enabled by default when the number of samples exceeds 10k.\nFinally, users can now define `monotonic constraints\n<monotonic_cst_gbdt>` to constrain the predictions based on the variations of\nspecific features. In the following example, we construct a target that is\ngenerally positively correlated with the first feature, with some noise.\nApplying monotoinc constraints allows the prediction to capture the global\neffect of the first feature, instead of fitting the noise.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\nfrom matplotlib import pyplot as plt\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.inspection import plot_partial_dependence\nfrom sklearn.experimental import enable_hist_gradient_boosting  # noqa\nfrom sklearn.ensemble import HistGradientBoostingRegressor\n\nn_samples = 500\nrng = np.random.RandomState(0)\nX = rng.randn(n_samples, 2)\nnoise = rng.normal(loc=0.0, scale=0.01, size=n_samples)\ny = (5 * X[:, 0] + np.sin(10 * np.pi * X[:, 0]) - noise)\n\ngbdt_no_cst = HistGradientBoostingRegressor().fit(X, y)\ngbdt_cst = HistGradientBoostingRegressor(monotonic_cst=[1, 0]).fit(X, y)\n\ndisp = plot_partial_dependence(\n    gbdt_no_cst, X, features=[0], feature_names=['feature 0'],\n    line_kw={'linewidth': 4, 'label': 'unconstrained'})\nplot_partial_dependence(gbdt_cst, X, features=[0],\n    line_kw={'linewidth': 4, 'label': 'constrained'}, ax=disp.axes_)\ndisp.axes_[0, 0].plot(X[:, 0], y, 'o', alpha=.5, zorder=-1, label='samples')\ndisp.axes_[0, 0].set_ylim(-3, 3); disp.axes_[0, 0].set_xlim(-1, 1)\nplt.legend()\nplt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Sample-weight support for Lasso and ElasticNet\n----------------------------------------------\nThe two linear regressors :class:`~sklearn.linear_model.Lasso` and\n:class:`~sklearn.linear_model.ElasticNet` now support sample weights.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn.model_selection import train_test_split\nfrom sklearn.datasets import make_regression\nfrom sklearn.linear_model import Lasso\nimport numpy as np\n\nn_samples, n_features = 1000, 20\nrng = np.random.RandomState(0)\nX, y = make_regression(n_samples, n_features, random_state=rng)\nsample_weight = rng.rand(n_samples)\nX_train, X_test, y_train, y_test, sw_train, sw_test = train_test_split(\n    X, y, sample_weight, random_state=rng)\nreg = Lasso()\nreg.fit(X_train, y_train, sample_weight=sw_train)\nprint(reg.score(X_test, y_test, sw_test))"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.8.2"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
Original file line number	Diff line number	Diff line change
`@@ -116,7 +116,7 @@`
`116`	`116`	`},`
`117`	`117`	`"outputs": [],`
`118`	`118`	`"source": [`
`119`		`- "# Sets this image as the thumbnail for sphinx gallery\nfig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 6))\ntree_disp.plot(ax=[ax1, ax2], line_kw={\"label\": \"Decision Tree\"})\nmlp_disp.plot(ax=[ax1, ax2], line_kw={\"label\": \"Multi-layer Perceptron\",\n \"c\": \"red\"})\nax1.legend()\nax2.legend()"`
	`119`	`+ "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 6))\ntree_disp.plot(ax=[ax1, ax2], line_kw={\"label\": \"Decision Tree\"})\nmlp_disp.plot(ax=[ax1, ax2], line_kw={\"label\": \"Multi-layer Perceptron\",\n \"c\": \"red\"})\nax1.legend()\nax2.legend()"`
`120`	`120`	`]`
`121`	`121`	`},`
`122`	`122`	`{`