Skip to content

Commit 96e539f

Browse files
committed
Pushing the docs to 0.23/ for branch: 0.23.X, commit 22a7d5bc722b0430908f202e3ea40aa2ba1a0361
1 parent 3f6d86d commit 96e539f

File tree

1,339 files changed

+12210
-7003
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,339 files changed

+12210
-7003
lines changed

0.23/.buildinfo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: 39d2e4864eb47d9365787dcc7c68a004
3+
config: 3d4ccbdbd4613be0a63487194504dc50
44
tags: 645f666f9bcd5a90fca523b33c5a78b7
Binary file not shown.

0.23/_downloads/713967aff070bffaf8a2f98a0b9c0d95/plot_partial_dependence_visualization_api.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,6 @@
9898
# which will plot the partial dependence curves of each model on the same axes.
9999
# The length of the axes list must be equal to the number of plots drawn.
100100

101-
# Sets this image as the thumbnail for sphinx gallery
102101
# sphinx_gallery_thumbnail_number = 4
103102
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 6))
104103
tree_disp.plot(ax=[ax1, ax2], line_kw={"label": "Decision Tree"})

0.23/_downloads/931b876dec5cf70d716a97fbe44370c7/plot_partial_dependence_visualization_api.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@
116116
},
117117
"outputs": [],
118118
"source": [
119-
"# Sets this image as the thumbnail for sphinx gallery\nfig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 6))\ntree_disp.plot(ax=[ax1, ax2], line_kw={\"label\": \"Decision Tree\"})\nmlp_disp.plot(ax=[ax1, ax2], line_kw={\"label\": \"Multi-layer Perceptron\",\n \"c\": \"red\"})\nax1.legend()\nax2.legend()"
119+
"fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 6))\ntree_disp.plot(ax=[ax1, ax2], line_kw={\"label\": \"Decision Tree\"})\nmlp_disp.plot(ax=[ax1, ax2], line_kw={\"label\": \"Multi-layer Perceptron\",\n \"c\": \"red\"})\nax1.legend()\nax2.legend()"
120120
]
121121
},
122122
{
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
# flake8: noqa
2+
"""
3+
========================================
4+
Release Highlights for scikit-learn 0.23
5+
========================================
6+
7+
.. currentmodule:: sklearn
8+
9+
We are pleased to announce the release of scikit-learn 0.23! Many bug fixes
10+
and improvements were added, as well as some new key features. We detail
11+
below a few of the major features of this release. **For an exhaustive list of
12+
all the changes**, please refer to the :ref:`release notes <changes_0_23>`.
13+
14+
To install the latest version (with pip)::
15+
16+
pip install --upgrade scikit-learn
17+
18+
or with conda::
19+
20+
conda install scikit-learn
21+
"""
22+
23+
##############################################################################
24+
# Generalized Linear Models, and Poisson loss for gradient boosting
25+
# -----------------------------------------------------------------
26+
# Long-awaited Generalized Linear Models with non-normal loss functions are now
27+
# available. In particular, three new regressors were implemented:
28+
# :class:`~sklearn.linear_model.PoissonRegressor`,
29+
# :class:`~sklearn.linear_model.GammaRegressor`, and
30+
# :class:`~sklearn.linear_model.TweedieRegressor`. The Poisson regressor can be
31+
# used to model positive integer counts, or relative frequencies. Read more in
32+
# the :ref:`User Guide <Generalized_linear_regression>`. Additionally,
33+
# :class:`~sklearn.ensemble.HistGradientBoostingRegressor` supports a new
34+
# 'poisson' loss as well.
35+
36+
import numpy as np
37+
from sklearn.model_selection import train_test_split
38+
from sklearn.linear_model import PoissonRegressor
39+
from sklearn.experimental import enable_hist_gradient_boosting # noqa
40+
from sklearn.ensemble import HistGradientBoostingRegressor
41+
42+
n_samples, n_features = 1000, 20
43+
rng = np.random.RandomState(0)
44+
X = rng.randn(n_samples, n_features)
45+
# positive integer target correlated with X[:, 5] with many zeros:
46+
y = rng.poisson(lam=np.exp(X[:, 5]) / 2)
47+
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)
48+
glm = PoissonRegressor()
49+
gbdt = HistGradientBoostingRegressor(loss='poisson', learning_rate=.01)
50+
glm.fit(X_train, y_train)
51+
gbdt.fit(X_train, y_train)
52+
print(glm.score(X_test, y_test))
53+
print(gbdt.score(X_test, y_test))
54+
55+
##############################################################################
56+
# Rich HTML representation for estimators
57+
# ---------------------------------------
58+
# Estimators can now be rendered in html in notebooks by enabling the
59+
# `display='diagram'` option. This is particularly useful to visualize
60+
# pipelines and composite estimators. Click on the entries to expand and see
61+
# details.
62+
from sklearn import set_config
63+
from sklearn.pipeline import make_pipeline
64+
from sklearn.preprocessing import OneHotEncoder, StandardScaler
65+
from sklearn.impute import SimpleImputer
66+
from sklearn.compose import make_column_transformer
67+
from sklearn.linear_model import LogisticRegression
68+
set_config(display='diagram')
69+
70+
num_proc = make_pipeline(SimpleImputer(strategy='median'), StandardScaler())
71+
72+
cat_proc = make_pipeline(
73+
SimpleImputer(strategy='constant', fill_value='missing'),
74+
OneHotEncoder(handle_unknown='ignore'))
75+
76+
preprocessor = make_column_transformer((num_proc, ('feat1', 'feat3')),
77+
(cat_proc, ('feat0', 'feat2')))
78+
79+
clf = make_pipeline(preprocessor, LogisticRegression())
80+
clf
81+
82+
##############################################################################
83+
# Scalability and stability improvements to KMeans
84+
# ------------------------------------------------
85+
# The :class:`~sklearn.cluster.KMeans` estimator was entirely re-worked, and it
86+
# is now significantly faster and more stable. In addition, the Elkan algorithm
87+
# is now compatible with sparse matrices. The estimator uses OpenMP based
88+
# parallelism instead of relying on joblib, so the `n_jobs` parameter has no
89+
# effect anymore. For more details on how to control the number of threads,
90+
# please refer to our :ref:`parallelism` notes.
91+
import scipy
92+
import numpy as np
93+
from sklearn.model_selection import train_test_split
94+
from sklearn.cluster import KMeans
95+
from sklearn.datasets import make_blobs
96+
from sklearn.metrics import completeness_score
97+
98+
rng = np.random.RandomState(0)
99+
X, y = make_blobs(random_state=rng)
100+
X = scipy.sparse.csr_matrix(X)
101+
X_train, X_test, _, y_test = train_test_split(X, y, random_state=rng)
102+
kmeans = KMeans(algorithm='elkan').fit(X_train)
103+
print(completeness_score(kmeans.predict(X_test), y_test))
104+
105+
##############################################################################
106+
# Improvements to the histogram-based Gradient Boosting estimators
107+
# ----------------------------------------------------------------
108+
# Various improvements were made to
109+
# :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and
110+
# :class:`~sklearn.ensemble.HistGradientBoostingRegressor`. On top of the
111+
# Poisson loss mentionned above, these estimators now support :ref:`sample
112+
# weights <sw_hgbdt>`. Also, an automatic early-stopping criterion was added:
113+
# early-stopping is enabled by default when the number of samples exceeds 10k.
114+
# Finally, users can now define :ref:`monotonic constraints
115+
# <monotonic_cst_gbdt>` to constrain the predictions based on the variations of
116+
# specific features. In the following example, we construct a target that is
117+
# generally positively correlated with the first feature, with some noise.
118+
# Applying monotoinc constraints allows the prediction to capture the global
119+
# effect of the first feature, instead of fitting the noise.
120+
import numpy as np
121+
from matplotlib import pyplot as plt
122+
from sklearn.model_selection import train_test_split
123+
from sklearn.inspection import plot_partial_dependence
124+
from sklearn.experimental import enable_hist_gradient_boosting # noqa
125+
from sklearn.ensemble import HistGradientBoostingRegressor
126+
127+
n_samples = 500
128+
rng = np.random.RandomState(0)
129+
X = rng.randn(n_samples, 2)
130+
noise = rng.normal(loc=0.0, scale=0.01, size=n_samples)
131+
y = (5 * X[:, 0] + np.sin(10 * np.pi * X[:, 0]) - noise)
132+
133+
gbdt_no_cst = HistGradientBoostingRegressor().fit(X, y)
134+
gbdt_cst = HistGradientBoostingRegressor(monotonic_cst=[1, 0]).fit(X, y)
135+
136+
disp = plot_partial_dependence(
137+
gbdt_no_cst, X, features=[0], feature_names=['feature 0'],
138+
line_kw={'linewidth': 4, 'label': 'unconstrained'})
139+
plot_partial_dependence(gbdt_cst, X, features=[0],
140+
line_kw={'linewidth': 4, 'label': 'constrained'}, ax=disp.axes_)
141+
disp.axes_[0, 0].plot(X[:, 0], y, 'o', alpha=.5, zorder=-1, label='samples')
142+
disp.axes_[0, 0].set_ylim(-3, 3); disp.axes_[0, 0].set_xlim(-1, 1)
143+
plt.legend()
144+
plt.show()
145+
146+
##############################################################################
147+
# Sample-weight support for Lasso and ElasticNet
148+
# ----------------------------------------------
149+
# The two linear regressors :class:`~sklearn.linear_model.Lasso` and
150+
# :class:`~sklearn.linear_model.ElasticNet` now support sample weights.
151+
152+
from sklearn.model_selection import train_test_split
153+
from sklearn.datasets import make_regression
154+
from sklearn.linear_model import Lasso
155+
import numpy as np
156+
157+
n_samples, n_features = 1000, 20
158+
rng = np.random.RandomState(0)
159+
X, y = make_regression(n_samples, n_features, random_state=rng)
160+
sample_weight = rng.rand(n_samples)
161+
X_train, X_test, y_train, y_test, sw_train, sw_test = train_test_split(
162+
X, y, sample_weight, random_state=rng)
163+
reg = Lasso()
164+
reg.fit(X_train, y_train, sample_weight=sw_train)
165+
print(reg.score(X_test, y_test, sw_test))
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {
7+
"collapsed": false
8+
},
9+
"outputs": [],
10+
"source": [
11+
"%matplotlib inline"
12+
]
13+
},
14+
{
15+
"cell_type": "markdown",
16+
"metadata": {},
17+
"source": [
18+
"\n========================================\nRelease Highlights for scikit-learn 0.23\n========================================\n\n.. currentmodule:: sklearn\n\nWe are pleased to announce the release of scikit-learn 0.23! Many bug fixes\nand improvements were added, as well as some new key features. We detail\nbelow a few of the major features of this release. **For an exhaustive list of\nall the changes**, please refer to the `release notes <changes_0_23>`.\n\nTo install the latest version (with pip)::\n\n pip install --upgrade scikit-learn\n\nor with conda::\n\n conda install scikit-learn\n"
19+
]
20+
},
21+
{
22+
"cell_type": "markdown",
23+
"metadata": {},
24+
"source": [
25+
"Generalized Linear Models, and Poisson loss for gradient boosting\n-----------------------------------------------------------------\nLong-awaited Generalized Linear Models with non-normal loss functions are now\navailable. In particular, three new regressors were implemented:\n:class:`~sklearn.linear_model.PoissonRegressor`,\n:class:`~sklearn.linear_model.GammaRegressor`, and\n:class:`~sklearn.linear_model.TweedieRegressor`. The Poisson regressor can be\nused to model positive integer counts, or relative frequencies. Read more in\nthe `User Guide <Generalized_linear_regression>`. Additionally,\n:class:`~sklearn.ensemble.HistGradientBoostingRegressor` supports a new\n'poisson' loss as well.\n\n"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": null,
31+
"metadata": {
32+
"collapsed": false
33+
},
34+
"outputs": [],
35+
"source": [
36+
"import numpy as np\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import PoissonRegressor\nfrom sklearn.experimental import enable_hist_gradient_boosting # noqa\nfrom sklearn.ensemble import HistGradientBoostingRegressor\n\nn_samples, n_features = 1000, 20\nrng = np.random.RandomState(0)\nX = rng.randn(n_samples, n_features)\n# positive integer target correlated with X[:, 5] with many zeros:\ny = rng.poisson(lam=np.exp(X[:, 5]) / 2)\nX_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)\nglm = PoissonRegressor()\ngbdt = HistGradientBoostingRegressor(loss='poisson', learning_rate=.01)\nglm.fit(X_train, y_train)\ngbdt.fit(X_train, y_train)\nprint(glm.score(X_test, y_test))\nprint(gbdt.score(X_test, y_test))"
37+
]
38+
},
39+
{
40+
"cell_type": "markdown",
41+
"metadata": {},
42+
"source": [
43+
"Rich HTML representation for estimators\n---------------------------------------\nEstimators can now be rendered in html in notebooks by enabling the\n`display='diagram'` option. This is particularly useful to visualize\npipelines and composite estimators. Click on the entries to expand and see\ndetails.\n\n"
44+
]
45+
},
46+
{
47+
"cell_type": "code",
48+
"execution_count": null,
49+
"metadata": {
50+
"collapsed": false
51+
},
52+
"outputs": [],
53+
"source": [
54+
"from sklearn import set_config\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import OneHotEncoder, StandardScaler\nfrom sklearn.impute import SimpleImputer\nfrom sklearn.compose import make_column_transformer\nfrom sklearn.linear_model import LogisticRegression\nset_config(display='diagram')\n\nnum_proc = make_pipeline(SimpleImputer(strategy='median'), StandardScaler())\n\ncat_proc = make_pipeline(\n SimpleImputer(strategy='constant', fill_value='missing'),\n OneHotEncoder(handle_unknown='ignore'))\n\npreprocessor = make_column_transformer((num_proc, ('feat1', 'feat3')),\n (cat_proc, ('feat0', 'feat2')))\n\nclf = make_pipeline(preprocessor, LogisticRegression())\nclf"
55+
]
56+
},
57+
{
58+
"cell_type": "markdown",
59+
"metadata": {},
60+
"source": [
61+
"Scalability and stability improvements to KMeans\n------------------------------------------------\nThe :class:`~sklearn.cluster.KMeans` estimator was entirely re-worked, and it\nis now significantly faster and more stable. In addition, the Elkan algorithm\nis now compatible with sparse matrices. The estimator uses OpenMP based\nparallelism instead of relying on joblib, so the `n_jobs` parameter has no\neffect anymore. For more details on how to control the number of threads,\nplease refer to our `parallelism` notes.\n\n"
62+
]
63+
},
64+
{
65+
"cell_type": "code",
66+
"execution_count": null,
67+
"metadata": {
68+
"collapsed": false
69+
},
70+
"outputs": [],
71+
"source": [
72+
"import scipy\nimport numpy as np\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.cluster import KMeans\nfrom sklearn.datasets import make_blobs\nfrom sklearn.metrics import completeness_score\n\nrng = np.random.RandomState(0)\nX, y = make_blobs(random_state=rng)\nX = scipy.sparse.csr_matrix(X)\nX_train, X_test, _, y_test = train_test_split(X, y, random_state=rng)\nkmeans = KMeans(algorithm='elkan').fit(X_train)\nprint(completeness_score(kmeans.predict(X_test), y_test))"
73+
]
74+
},
75+
{
76+
"cell_type": "markdown",
77+
"metadata": {},
78+
"source": [
79+
"Improvements to the histogram-based Gradient Boosting estimators\n----------------------------------------------------------------\nVarious improvements were made to\n:class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n:class:`~sklearn.ensemble.HistGradientBoostingRegressor`. On top of the\nPoisson loss mentionned above, these estimators now support `sample\nweights <sw_hgbdt>`. Also, an automatic early-stopping criterion was added:\nearly-stopping is enabled by default when the number of samples exceeds 10k.\nFinally, users can now define `monotonic constraints\n<monotonic_cst_gbdt>` to constrain the predictions based on the variations of\nspecific features. In the following example, we construct a target that is\ngenerally positively correlated with the first feature, with some noise.\nApplying monotoinc constraints allows the prediction to capture the global\neffect of the first feature, instead of fitting the noise.\n\n"
80+
]
81+
},
82+
{
83+
"cell_type": "code",
84+
"execution_count": null,
85+
"metadata": {
86+
"collapsed": false
87+
},
88+
"outputs": [],
89+
"source": [
90+
"import numpy as np\nfrom matplotlib import pyplot as plt\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.inspection import plot_partial_dependence\nfrom sklearn.experimental import enable_hist_gradient_boosting # noqa\nfrom sklearn.ensemble import HistGradientBoostingRegressor\n\nn_samples = 500\nrng = np.random.RandomState(0)\nX = rng.randn(n_samples, 2)\nnoise = rng.normal(loc=0.0, scale=0.01, size=n_samples)\ny = (5 * X[:, 0] + np.sin(10 * np.pi * X[:, 0]) - noise)\n\ngbdt_no_cst = HistGradientBoostingRegressor().fit(X, y)\ngbdt_cst = HistGradientBoostingRegressor(monotonic_cst=[1, 0]).fit(X, y)\n\ndisp = plot_partial_dependence(\n gbdt_no_cst, X, features=[0], feature_names=['feature 0'],\n line_kw={'linewidth': 4, 'label': 'unconstrained'})\nplot_partial_dependence(gbdt_cst, X, features=[0],\n line_kw={'linewidth': 4, 'label': 'constrained'}, ax=disp.axes_)\ndisp.axes_[0, 0].plot(X[:, 0], y, 'o', alpha=.5, zorder=-1, label='samples')\ndisp.axes_[0, 0].set_ylim(-3, 3); disp.axes_[0, 0].set_xlim(-1, 1)\nplt.legend()\nplt.show()"
91+
]
92+
},
93+
{
94+
"cell_type": "markdown",
95+
"metadata": {},
96+
"source": [
97+
"Sample-weight support for Lasso and ElasticNet\n----------------------------------------------\nThe two linear regressors :class:`~sklearn.linear_model.Lasso` and\n:class:`~sklearn.linear_model.ElasticNet` now support sample weights.\n\n"
98+
]
99+
},
100+
{
101+
"cell_type": "code",
102+
"execution_count": null,
103+
"metadata": {
104+
"collapsed": false
105+
},
106+
"outputs": [],
107+
"source": [
108+
"from sklearn.model_selection import train_test_split\nfrom sklearn.datasets import make_regression\nfrom sklearn.linear_model import Lasso\nimport numpy as np\n\nn_samples, n_features = 1000, 20\nrng = np.random.RandomState(0)\nX, y = make_regression(n_samples, n_features, random_state=rng)\nsample_weight = rng.rand(n_samples)\nX_train, X_test, y_train, y_test, sw_train, sw_test = train_test_split(\n X, y, sample_weight, random_state=rng)\nreg = Lasso()\nreg.fit(X_train, y_train, sample_weight=sw_train)\nprint(reg.score(X_test, y_test, sw_test))"
109+
]
110+
}
111+
],
112+
"metadata": {
113+
"kernelspec": {
114+
"display_name": "Python 3",
115+
"language": "python",
116+
"name": "python3"
117+
},
118+
"language_info": {
119+
"codemirror_mode": {
120+
"name": "ipython",
121+
"version": 3
122+
},
123+
"file_extension": ".py",
124+
"mimetype": "text/x-python",
125+
"name": "python",
126+
"nbconvert_exporter": "python",
127+
"pygments_lexer": "ipython3",
128+
"version": "3.8.2"
129+
}
130+
},
131+
"nbformat": 4,
132+
"nbformat_minor": 0
133+
}
Binary file not shown.

0.23/_downloads/scikit-learn-docs.pdf

87.6 KB
Binary file not shown.

0.23/_images/iris.png

0 Bytes
-571 Bytes

0 commit comments

Comments
 (0)