Skip to content

Commit 81c2f5e

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 6a3517e0ca41fb3bc8e375dd4fea7e1bb2f906ff
1 parent f6238db commit 81c2f5e

File tree

1,328 files changed

+7461
-7289
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,328 files changed

+7461
-7289
lines changed

dev/.buildinfo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: dac8c3fe1a779cbc4a2e7650e8445792
3+
config: d7564201336e2c7517e53f9b29700ccb
44
tags: 645f666f9bcd5a90fca523b33c5a78b7
Binary file not shown.

dev/_downloads/53490cdb42c3c07ba8cccd1c4ed4dca4/plot_release_highlights_1_4_0.ipynb

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,24 @@
161161
"source": [
162162
"import sklearn\nfrom sklearn.metrics import get_scorer\nfrom sklearn.datasets import make_regression\nfrom sklearn.linear_model import Lasso\nfrom sklearn.model_selection import GridSearchCV, cross_validate, GroupKFold\n\n# For now by default metadata routing is disabled, and need to be explicitly\n# enabled.\nsklearn.set_config(enable_metadata_routing=True)\n\nn_samples = 100\nX, y = make_regression(n_samples=n_samples, n_features=5, noise=0.5)\nrng = np.random.RandomState(7)\ngroups = rng.randint(0, 10, size=n_samples)\nsample_weights = rng.rand(n_samples)\nestimator = Lasso().set_fit_request(sample_weight=True)\nhyperparameter_grid = {\"alpha\": [0.1, 0.5, 1.0, 2.0]}\nscoring_inner_cv = get_scorer(\"neg_mean_squared_error\").set_score_request(\n sample_weight=True\n)\ninner_cv = GroupKFold(n_splits=5)\n\ngrid_search = GridSearchCV(\n estimator=estimator,\n param_grid=hyperparameter_grid,\n cv=inner_cv,\n scoring=scoring_inner_cv,\n)\n\nouter_cv = GroupKFold(n_splits=5)\nscorers = {\n \"mse\": get_scorer(\"neg_mean_squared_error\").set_score_request(sample_weight=True)\n}\nresults = cross_validate(\n grid_search,\n X,\n y,\n cv=outer_cv,\n scoring=scorers,\n return_estimator=True,\n params={\"sample_weight\": sample_weights, \"groups\": groups},\n)\nprint(\"cv error on test sets:\", results[\"test_mse\"])\n\n# Setting the flag to the default `False` to avoid interference with other\n# scripts.\nsklearn.set_config(enable_metadata_routing=False)"
163163
]
164+
},
165+
{
166+
"cell_type": "markdown",
167+
"metadata": {},
168+
"source": [
169+
"## Improved memory and runtime efficiency for PCA on sparse data\nPCA is now able to handle sparse matrices natively for the `arpack`\nsolver by levaraging `scipy.sparse.linalg.LinearOperator` to avoid\nmaterializing large sparse matrices when performing the\neigenvalue decomposition of the data set covariance matrix.\n\n\n"
170+
]
171+
},
172+
{
173+
"cell_type": "code",
174+
"execution_count": null,
175+
"metadata": {
176+
"collapsed": false
177+
},
178+
"outputs": [],
179+
"source": [
180+
"from sklearn.decomposition import PCA\nimport scipy.sparse as sp\nfrom time import time\n\nX_sparse = sp.random(m=1000, n=1000, random_state=0)\nX_dense = X_sparse.toarray()\n\nt0 = time()\nPCA(n_components=10, svd_solver=\"arpack\").fit(X_sparse)\ntime_sparse = time() - t0\n\nt0 = time()\nPCA(n_components=10, svd_solver=\"arpack\").fit(X_dense)\ntime_dense = time() - t0\n\nprint(f\"Speedup: {time_dense / time_sparse:.1f}x\")"
181+
]
164182
}
165183
],
166184
"metadata": {
Binary file not shown.

dev/_downloads/c15cce0dbcd8722cb5638987eff985c0/plot_release_highlights_1_4_0.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,3 +207,28 @@
207207
# Setting the flag to the default `False` to avoid interference with other
208208
# scripts.
209209
sklearn.set_config(enable_metadata_routing=False)
210+
211+
# %%
212+
# Improved memory and runtime efficiency for PCA on sparse data
213+
# -------------------------------------------------------------
214+
# PCA is now able to handle sparse matrices natively for the `arpack`
215+
# solver by levaraging `scipy.sparse.linalg.LinearOperator` to avoid
216+
# materializing large sparse matrices when performing the
217+
# eigenvalue decomposition of the data set covariance matrix.
218+
#
219+
from sklearn.decomposition import PCA
220+
import scipy.sparse as sp
221+
from time import time
222+
223+
X_sparse = sp.random(m=1000, n=1000, random_state=0)
224+
X_dense = X_sparse.toarray()
225+
226+
t0 = time()
227+
PCA(n_components=10, svd_solver="arpack").fit(X_sparse)
228+
time_sparse = time() - t0
229+
230+
t0 = time()
231+
PCA(n_components=10, svd_solver="arpack").fit(X_dense)
232+
time_dense = time() - t0
233+
234+
print(f"Speedup: {time_dense / time_sparse:.1f}x")

dev/_downloads/scikit-learn-docs.zip

7.96 KB
Binary file not shown.
287 Bytes
419 Bytes
213 Bytes
-2 Bytes

0 commit comments

Comments
 (0)