Skip to content

Commit db39f78

Browse files
committed
Pushing the docs to dev/ for branch: master, commit be027e0b316097ad0cea9956aa6829ae969d8a62
1 parent 8f3e3b9 commit db39f78

File tree

1,205 files changed

+4026
-3701
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,205 files changed

+4026
-3701
lines changed
Binary file not shown.

dev/_downloads/7ee55c12f8d3eb1dd8d2005d9dd7b6f1/plot_release_highlights_0_22_0.py

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,36 @@
1212
1313
To install the latest version (with pip)::
1414
15-
pip install -U scikit-learn --upgrade
15+
pip install --upgrade scikit-learn
1616
1717
or with conda::
1818
1919
conda install scikit-learn
2020
"""
2121

22+
##############################################################################
23+
# KNN Based Imputation
24+
# ------------------------------------
25+
# We now support imputation for completing missing values using k-Nearest
26+
# Neighbors.
27+
#
28+
# Each sample's missing values are imputed using the mean value from
29+
# ``n_neighbors`` nearest neighbors found in the training set. Two samples are
30+
# close if the features that neither is missing are close.
31+
# By default, a euclidean distance metric
32+
# that supports missing values,
33+
# :func:`~metrics.nan_euclidean_distances`, is used to find the nearest
34+
# neighbors.
35+
#
36+
# Read more in the :ref:`User Guide <knnimpute>`.
37+
38+
import numpy as np
39+
from sklearn.impute import KNNImputer
40+
41+
X = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]]
42+
imputer = KNNImputer(n_neighbors=2)
43+
print(imputer.fit_transform(X))
44+
2245
##############################################################################
2346
# Permutation-based feature importance
2447
# ------------------------------------
@@ -149,3 +172,43 @@
149172
# recomputed.
150173
estimator.set_params(isomap__n_neighbors=5)
151174
estimator.fit(X)
175+
176+
############################################################################
177+
# Stacking Classifier and Regressor
178+
# ---------------------------------
179+
# :class:`~ensemble.StackingClassifier` and
180+
# :class:`~ensemble.StackingRegressor`
181+
# allow you to have a stack of estimators with a final classifier or
182+
# a regressor.
183+
# Stacked generalization consists in stacking the output of individual
184+
# estimators and use a classifier to compute the final prediction. Stacking
185+
# allows to use the strength of each individual estimator by using their output
186+
# as input of a final estimator.
187+
# Base estimators are fitted on the full ``X`` while
188+
# the final estimator is trained using cross-validated predictions of the
189+
# base estimators using ``cross_val_predict``.
190+
#
191+
# Read more in the :ref:`User Guide <stacking>`.
192+
193+
from sklearn.datasets import load_iris
194+
from sklearn.ensemble import RandomForestClassifier
195+
from sklearn.svm import LinearSVC
196+
from sklearn.linear_model import LogisticRegression
197+
from sklearn.preprocessing import StandardScaler
198+
from sklearn.pipeline import make_pipeline
199+
from sklearn.ensemble import StackingClassifier
200+
from sklearn.model_selection import train_test_split
201+
202+
X, y = load_iris(return_X_y=True)
203+
estimators = [
204+
('rf', RandomForestClassifier(n_estimators=10, random_state=42)),
205+
('svr', make_pipeline(StandardScaler(),
206+
LinearSVC(random_state=42)))
207+
]
208+
clf = StackingClassifier(
209+
estimators=estimators, final_estimator=LogisticRegression()
210+
)
211+
X_train, X_test, y_train, y_test = train_test_split(
212+
X, y, stratify=y, random_state=42
213+
)
214+
clf.fit(X_train, y_train).score(X_test, y_test)

dev/_downloads/c101b602d0b3510ef47dd19d64a4a92b/plot_release_highlights_0_22_0.ipynb

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,25 @@
1515
"cell_type": "markdown",
1616
"metadata": {},
1717
"source": [
18-
"\n========================================\nRelease Highlights for scikit-learn 0.22\n========================================\n\n.. currentmodule:: sklearn\n\nWe are pleased to announce the release of scikit-learn 0.22, which comes\nwith many bug fixes and new features! We detail below a few of the major\nfeatures of this release. For an exhaustive list of all the changes, please\nrefer to the `release notes <changes_0_22>`.\n\nTo install the latest version (with pip)::\n\n pip install -U scikit-learn --upgrade\n\nor with conda::\n\n conda install scikit-learn\n"
18+
"\n========================================\nRelease Highlights for scikit-learn 0.22\n========================================\n\n.. currentmodule:: sklearn\n\nWe are pleased to announce the release of scikit-learn 0.22, which comes\nwith many bug fixes and new features! We detail below a few of the major\nfeatures of this release. For an exhaustive list of all the changes, please\nrefer to the `release notes <changes_0_22>`.\n\nTo install the latest version (with pip)::\n\n pip install --upgrade scikit-learn\n\nor with conda::\n\n conda install scikit-learn\n"
19+
]
20+
},
21+
{
22+
"cell_type": "markdown",
23+
"metadata": {},
24+
"source": [
25+
"KNN Based Imputation\n------------------------------------\nWe now support imputation for completing missing values using k-Nearest\nNeighbors.\n\nEach sample's missing values are imputed using the mean value from\n``n_neighbors`` nearest neighbors found in the training set. Two samples are\nclose if the features that neither is missing are close.\nBy default, a euclidean distance metric\nthat supports missing values,\n:func:`~metrics.nan_euclidean_distances`, is used to find the nearest\nneighbors.\n\nRead more in the `User Guide <knnimpute>`.\n\n"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": null,
31+
"metadata": {
32+
"collapsed": false
33+
},
34+
"outputs": [],
35+
"source": [
36+
"import numpy as np\nfrom sklearn.impute import KNNImputer\n\nX = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]]\nimputer = KNNImputer(n_neighbors=2)\nprint(imputer.fit_transform(X))"
1937
]
2038
},
2139
{
@@ -125,6 +143,24 @@
125143
"source": [
126144
"from tempfile import TemporaryDirectory\nfrom sklearn.neighbors import KNeighborsTransformer\nfrom sklearn.manifold import Isomap\nfrom sklearn.pipeline import make_pipeline\n\nwith TemporaryDirectory(prefix=\"sklearn_cache_\") as tmpdir:\n estimator = make_pipeline(\n KNeighborsTransformer(n_neighbors=10, mode='distance'),\n Isomap(n_neighbors=10, metric='precomputed'),\n memory=tmpdir)\n estimator.fit(X)\n\n # We can decrease the number of neighbors and the graph will not be\n # recomputed.\n estimator.set_params(isomap__n_neighbors=5)\n estimator.fit(X)"
127145
]
146+
},
147+
{
148+
"cell_type": "markdown",
149+
"metadata": {},
150+
"source": [
151+
"Stacking Classifier and Regressor\n---------------------------------\n:class:`~ensemble.StackingClassifier` and\n:class:`~ensemble.StackingRegressor`\nallow you to have a stack of estimators with a final classifier or\na regressor.\nStacked generalization consists in stacking the output of individual\nestimators and use a classifier to compute the final prediction. Stacking\nallows to use the strength of each individual estimator by using their output\nas input of a final estimator.\nBase estimators are fitted on the full ``X`` while\nthe final estimator is trained using cross-validated predictions of the\nbase estimators using ``cross_val_predict``.\n\nRead more in the `User Guide <stacking>`.\n\n"
152+
]
153+
},
154+
{
155+
"cell_type": "code",
156+
"execution_count": null,
157+
"metadata": {
158+
"collapsed": false
159+
},
160+
"outputs": [],
161+
"source": [
162+
"from sklearn.datasets import load_iris\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.svm import LinearSVC\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.ensemble import StackingClassifier\nfrom sklearn.model_selection import train_test_split\n\nX, y = load_iris(return_X_y=True)\nestimators = [\n ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),\n ('svr', make_pipeline(StandardScaler(),\n LinearSVC(random_state=42)))\n]\nclf = StackingClassifier(\n estimators=estimators, final_estimator=LogisticRegression()\n)\nX_train, X_test, y_train, y_test = train_test_split(\n X, y, stratify=y, random_state=42\n)\nclf.fit(X_train, y_train).score(X_test, y_test)"
163+
]
128164
}
129165
],
130166
"metadata": {
Binary file not shown.

dev/_downloads/scikit-learn-docs.pdf

-7.45 KB
Binary file not shown.

dev/_images/iris.png

0 Bytes

0 commit comments

Comments
 (0)