Skip to content

Commit ae7028e

Browse files
committed
Pushing the docs to 1.4/ for branch: 1.4.X, commit 46b5f541138458803e39f9ce5810878849e4ecf7
1 parent 853084e commit ae7028e

File tree

1,741 files changed

+15883
-15950
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,741 files changed

+15883
-15950
lines changed

1.4/.buildinfo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: 2f03b9ed6a366461c62ef145db2ca4a5
3+
config: c086ce46b402dc6decef6d3cb80277c2
44
tags: 645f666f9bcd5a90fca523b33c5a78b7
Binary file not shown.

1.4/_downloads/18eb95af29bd5554020a8428b3ceac54/plot_cluster_iris.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"cell_type": "markdown",
55
"metadata": {},
66
"source": [
7-
"\n# K-means Clustering\n\nThe plot shows:\n\n- top left: What a K-means algorithm would yield using 8 clusters.\n\n- top right: What the effect of a bad initialization is\n on the classification process: By setting n_init to only 1\n (default is 10), the amount of times that the algorithm will\n be run with different centroid seeds is reduced.\n\n- bottom left: What using eight clusters would deliver.\n\n- bottom right: The ground truth.\n"
7+
"\n# K-means Clustering\n\nThe plot shows:\n\n- top left: What a K-means algorithm would yield using 8 clusters.\n\n- top right: What using three clusters would deliver.\n\n- bottom left: What the effect of a bad initialization is\n on the classification process: By setting n_init to only 1\n (default is 10), the amount of times that the algorithm will\n be run with different centroid seeds is reduced.\n\n- bottom right: The ground truth.\n"
88
]
99
},
1010
{
@@ -15,7 +15,7 @@
1515
},
1616
"outputs": [],
1717
"source": [
18-
"# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\nimport matplotlib.pyplot as plt\n\n# Though the following import is not directly being used, it is required\n# for 3D projection to work with matplotlib < 3.2\nimport mpl_toolkits.mplot3d # noqa: F401\nimport numpy as np\n\nfrom sklearn import datasets\nfrom sklearn.cluster import KMeans\n\nnp.random.seed(5)\n\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\n\nestimators = [\n (\"k_means_iris_8\", KMeans(n_clusters=8)),\n (\"k_means_iris_3\", KMeans(n_clusters=3)),\n (\"k_means_iris_bad_init\", KMeans(n_clusters=3, n_init=1, init=\"random\")),\n]\n\nfig = plt.figure(figsize=(10, 8))\ntitles = [\"8 clusters\", \"3 clusters\", \"3 clusters, bad initialization\"]\nfor idx, ((name, est), title) in enumerate(zip(estimators, titles)):\n ax = fig.add_subplot(2, 2, idx + 1, projection=\"3d\", elev=48, azim=134)\n est.fit(X)\n labels = est.labels_\n\n ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=labels.astype(float), edgecolor=\"k\")\n\n ax.xaxis.set_ticklabels([])\n ax.yaxis.set_ticklabels([])\n ax.zaxis.set_ticklabels([])\n ax.set_xlabel(\"Petal width\")\n ax.set_ylabel(\"Sepal length\")\n ax.set_zlabel(\"Petal length\")\n ax.set_title(title)\n\n# Plot the ground truth\nax = fig.add_subplot(2, 2, 4, projection=\"3d\", elev=48, azim=134)\n\nfor name, label in [(\"Setosa\", 0), (\"Versicolour\", 1), (\"Virginica\", 2)]:\n ax.text3D(\n X[y == label, 3].mean(),\n X[y == label, 0].mean(),\n X[y == label, 2].mean() + 2,\n name,\n horizontalalignment=\"center\",\n bbox=dict(alpha=0.2, edgecolor=\"w\", facecolor=\"w\"),\n )\n# Reorder the labels to have colors matching the cluster results\ny = np.choose(y, [1, 2, 0]).astype(float)\nax.scatter(X[:, 3], X[:, 0], X[:, 2], c=y, edgecolor=\"k\")\n\nax.xaxis.set_ticklabels([])\nax.yaxis.set_ticklabels([])\nax.zaxis.set_ticklabels([])\nax.set_xlabel(\"Petal width\")\nax.set_ylabel(\"Sepal length\")\nax.set_zlabel(\"Petal length\")\nax.set_title(\"Ground Truth\")\n\nplt.subplots_adjust(wspace=0.25, hspace=0.25)\nplt.show()"
18+
"# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\nimport matplotlib.pyplot as plt\n\n# Though the following import is not directly being used, it is required\n# for 3D projection to work with matplotlib < 3.2\nimport mpl_toolkits.mplot3d # noqa: F401\nimport numpy as np\n\nfrom sklearn import datasets\nfrom sklearn.cluster import KMeans\n\nnp.random.seed(5)\n\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\n\nestimators = [\n (\"k_means_iris_8\", KMeans(n_clusters=8)),\n (\"k_means_iris_3\", KMeans(n_clusters=3)),\n (\"k_means_iris_bad_init\", KMeans(n_clusters=3, n_init=1, init=\"random\")),\n]\n\nfig = plt.figure(figsize=(10, 8))\ntitles = [\"8 clusters\", \"3 clusters\", \"3 clusters, bad initialization\"]\nfor idx, ((name, est), title) in enumerate(zip(estimators, titles)):\n ax = fig.add_subplot(2, 2, idx + 1, projection=\"3d\", elev=48, azim=134)\n est.fit(X)\n labels = est.labels_\n\n ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=labels.astype(float), edgecolor=\"k\")\n\n ax.xaxis.set_ticklabels([])\n ax.yaxis.set_ticklabels([])\n ax.zaxis.set_ticklabels([])\n ax.set_xlabel(\"Petal width\")\n ax.set_ylabel(\"Sepal length\")\n ax.set_zlabel(\"Petal length\")\n ax.set_title(title)\n\n# Plot the ground truth\nax = fig.add_subplot(2, 2, 4, projection=\"3d\", elev=48, azim=134)\n\nfor name, label in [(\"Setosa\", 0), (\"Versicolour\", 1), (\"Virginica\", 2)]:\n ax.text3D(\n X[y == label, 3].mean(),\n X[y == label, 0].mean(),\n X[y == label, 2].mean() + 2,\n name,\n horizontalalignment=\"center\",\n bbox=dict(alpha=0.2, edgecolor=\"w\", facecolor=\"w\"),\n )\n\nax.scatter(X[:, 3], X[:, 0], X[:, 2], c=y, edgecolor=\"k\")\n\nax.xaxis.set_ticklabels([])\nax.yaxis.set_ticklabels([])\nax.zaxis.set_ticklabels([])\nax.set_xlabel(\"Petal width\")\nax.set_ylabel(\"Sepal length\")\nax.set_zlabel(\"Petal length\")\nax.set_title(\"Ground Truth\")\n\nplt.subplots_adjust(wspace=0.25, hspace=0.25)\nplt.show()"
1919
]
2020
}
2121
],

1.4/_downloads/3c3c738275484acc54821615bf72894a/plot_permutation_importance.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@
2424
2001. <10.1023/A:1010933404324>`
2525
2626
"""
27-
# %%
28-
import numpy as np
2927

3028
# %%
3129
# Data Loading and Feature Engineering
@@ -40,6 +38,8 @@
4038
# values as records).
4139
# - ``random_cat`` is a low cardinality categorical variable (3 possible
4240
# values).
41+
import numpy as np
42+
4343
from sklearn.datasets import fetch_openml
4444
from sklearn.model_selection import train_test_split
4545

1.4/_downloads/48802d222a21d57b36b5e7a61adb770c/plot_cv_digits.ipynb

Lines changed: 0 additions & 43 deletions
This file was deleted.

1.4/_downloads/53490cdb42c3c07ba8cccd1c4ed4dca4/plot_release_highlights_1_4_0.ipynb

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,18 @@
5858
},
5959
"outputs": [],
6060
"source": [
61-
"import polars as pl\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.preprocessing import OneHotEncoder\nfrom sklearn.compose import ColumnTransformer\n\ndf = pl.DataFrame(\n {\"height\": [120, 140, 150, 110, 100], \"pet\": [\"dog\", \"cat\", \"dog\", \"cat\", \"cat\"]}\n)\npreprocessor = ColumnTransformer(\n [\n (\"numerical\", StandardScaler(), [\"height\"]),\n (\"categorical\", OneHotEncoder(sparse_output=False), [\"pet\"]),\n ],\n verbose_feature_names_out=False,\n)\npreprocessor.set_output(transform=\"polars\")\n\ndf_out = preprocessor.fit_transform(df)\nprint(f\"Output type: {type(df_out)}\")"
61+
"import polars as pl\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.preprocessing import OneHotEncoder\nfrom sklearn.compose import ColumnTransformer\n\ndf = pl.DataFrame(\n {\"height\": [120, 140, 150, 110, 100], \"pet\": [\"dog\", \"cat\", \"dog\", \"cat\", \"cat\"]}\n)\npreprocessor = ColumnTransformer(\n [\n (\"numerical\", StandardScaler(), [\"height\"]),\n (\"categorical\", OneHotEncoder(sparse_output=False), [\"pet\"]),\n ],\n verbose_feature_names_out=False,\n)\npreprocessor.set_output(transform=\"polars\")\n\ndf_out = preprocessor.fit_transform(df)\ndf_out"
62+
]
63+
},
64+
{
65+
"cell_type": "code",
66+
"execution_count": null,
67+
"metadata": {
68+
"collapsed": false
69+
},
70+
"outputs": [],
71+
"source": [
72+
"print(f\"Output type: {type(df_out)}\")"
6273
]
6374
},
6475
{
@@ -137,7 +148,7 @@
137148
"cell_type": "markdown",
138149
"metadata": {},
139150
"source": [
140-
"## Metadata Routing Support\nMany meta-estimators and cross-validation routines now support metadata\nrouting, which are listed in the `user guide\n<_metadata_routing_models>`. For instance, this is how you can do a nested\ncross-validation with sample weights and :class:`~model_selection.GroupKFold`:\n\n"
151+
"## Metadata Routing Support\nMany meta-estimators and cross-validation routines now support metadata\nrouting, which are listed in the `user guide\n<metadata_routing_models>`. For instance, this is how you can do a nested\ncross-validation with sample weights and :class:`~model_selection.GroupKFold`:\n\n"
141152
]
142153
},
143154
{
@@ -150,6 +161,24 @@
150161
"source": [
151162
"import sklearn\nfrom sklearn.metrics import get_scorer\nfrom sklearn.datasets import make_regression\nfrom sklearn.linear_model import Lasso\nfrom sklearn.model_selection import GridSearchCV, cross_validate, GroupKFold\n\n# For now by default metadata routing is disabled, and need to be explicitly\n# enabled.\nsklearn.set_config(enable_metadata_routing=True)\n\nn_samples = 100\nX, y = make_regression(n_samples=n_samples, n_features=5, noise=0.5)\nrng = np.random.RandomState(7)\ngroups = rng.randint(0, 10, size=n_samples)\nsample_weights = rng.rand(n_samples)\nestimator = Lasso().set_fit_request(sample_weight=True)\nhyperparameter_grid = {\"alpha\": [0.1, 0.5, 1.0, 2.0]}\nscoring_inner_cv = get_scorer(\"neg_mean_squared_error\").set_score_request(\n sample_weight=True\n)\ninner_cv = GroupKFold(n_splits=5)\n\ngrid_search = GridSearchCV(\n estimator=estimator,\n param_grid=hyperparameter_grid,\n cv=inner_cv,\n scoring=scoring_inner_cv,\n)\n\nouter_cv = GroupKFold(n_splits=5)\nscorers = {\n \"mse\": get_scorer(\"neg_mean_squared_error\").set_score_request(sample_weight=True)\n}\nresults = cross_validate(\n grid_search,\n X,\n y,\n cv=outer_cv,\n scoring=scorers,\n return_estimator=True,\n params={\"sample_weight\": sample_weights, \"groups\": groups},\n)\nprint(\"cv error on test sets:\", results[\"test_mse\"])\n\n# Setting the flag to the default `False` to avoid interference with other\n# scripts.\nsklearn.set_config(enable_metadata_routing=False)"
152163
]
164+
},
165+
{
166+
"cell_type": "markdown",
167+
"metadata": {},
168+
"source": [
169+
"## Improved memory and runtime efficiency for PCA on sparse data\nPCA is now able to handle sparse matrices natively for the `arpack`\nsolver by levaraging `scipy.sparse.linalg.LinearOperator` to avoid\nmaterializing large sparse matrices when performing the\neigenvalue decomposition of the data set covariance matrix.\n\n\n"
170+
]
171+
},
172+
{
173+
"cell_type": "code",
174+
"execution_count": null,
175+
"metadata": {
176+
"collapsed": false
177+
},
178+
"outputs": [],
179+
"source": [
180+
"from sklearn.decomposition import PCA\nimport scipy.sparse as sp\nfrom time import time\n\nX_sparse = sp.random(m=1000, n=1000, random_state=0)\nX_dense = X_sparse.toarray()\n\nt0 = time()\nPCA(n_components=10, svd_solver=\"arpack\").fit(X_sparse)\ntime_sparse = time() - t0\n\nt0 = time()\nPCA(n_components=10, svd_solver=\"arpack\").fit(X_dense)\ntime_dense = time() - t0\n\nprint(f\"Speedup: {time_dense / time_sparse:.1f}x\")"
181+
]
153182
}
154183
],
155184
"metadata": {

1.4/_downloads/6304a55e6fa4d75c8e8d11b4ea9a8679/plot_pca_3d.py

Lines changed: 0 additions & 99 deletions
This file was deleted.
Binary file not shown.

1.4/_downloads/720e4861bf00cd09b55ae64187ea58be/plot_pca_3d.ipynb

Lines changed: 0 additions & 79 deletions
This file was deleted.

0 commit comments

Comments
 (0)