Skip to content

Commit a506912

Browse files
committed
Pushing the docs to dev/ for branch: main, commit ae2ccbf69a7dbc8398bd9c8443ff265f8f687a13
1 parent cff7074 commit a506912

File tree

1,318 files changed

+5798
-5928
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,318 files changed

+5798
-5928
lines changed

dev/.buildinfo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: f1c77509c248640c80fa34f784c7d838
3+
config: d13a003d9102e8060d36407acfd1a1c6
44
tags: 645f666f9bcd5a90fca523b33c5a78b7
Binary file not shown.

dev/_downloads/43e84df0b93ff974da370e8da900f2ee/plot_discretization_strategies.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,9 @@
7676
i += 1
7777
# transform the dataset with KBinsDiscretizer
7878
for strategy in strategies:
79-
enc = KBinsDiscretizer(n_bins=4, encode="ordinal", strategy=strategy)
79+
enc = KBinsDiscretizer(
80+
n_bins=4, encode="ordinal", strategy=strategy, subsample=200_000
81+
)
8082
enc.fit(X)
8183
grid_encoded = enc.transform(grid)
8284

dev/_downloads/50040ae12dd16e7d2e79135d7793c17e/plot_release_highlights_0_22_0.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@
8989
X, y = load_iris(return_X_y=True)
9090
estimators = [
9191
("rf", RandomForestClassifier(n_estimators=10, random_state=42)),
92-
("svr", make_pipeline(StandardScaler(), LinearSVC(random_state=42))),
92+
("svr", make_pipeline(StandardScaler(), LinearSVC(dual="auto", random_state=42))),
9393
]
9494
clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())
9595
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

dev/_downloads/5bb71b0b2052531cacf3736b4d2b3a92/plot_face_compress.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,11 @@
7777

7878
n_bins = 8
7979
encoder = KBinsDiscretizer(
80-
n_bins=n_bins, encode="ordinal", strategy="uniform", random_state=0
80+
n_bins=n_bins,
81+
encode="ordinal",
82+
strategy="uniform",
83+
random_state=0,
84+
subsample=200_000,
8185
)
8286
compressed_raccoon_uniform = encoder.fit_transform(raccoon_face.reshape(-1, 1)).reshape(
8387
raccoon_face.shape
@@ -122,7 +126,11 @@
122126
# find a more optimal mapping.
123127

124128
encoder = KBinsDiscretizer(
125-
n_bins=n_bins, encode="ordinal", strategy="kmeans", random_state=0
129+
n_bins=n_bins,
130+
encode="ordinal",
131+
strategy="kmeans",
132+
random_state=0,
133+
subsample=200_000,
126134
)
127135
compressed_raccoon_kmeans = encoder.fit_transform(raccoon_face.reshape(-1, 1)).reshape(
128136
raccoon_face.shape
Binary file not shown.

dev/_downloads/892d774326b523935a603b8700193195/plot_ard.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@
5858
from sklearn.linear_model import ARDRegression, BayesianRidge, LinearRegression
5959

6060
olr = LinearRegression().fit(X, y)
61-
brr = BayesianRidge(compute_score=True, n_iter=30).fit(X, y)
62-
ard = ARDRegression(compute_score=True, n_iter=30).fit(X, y)
61+
brr = BayesianRidge(compute_score=True, max_iter=30).fit(X, y)
62+
ard = ARDRegression(compute_score=True, max_iter=30).fit(X, y)
6363
df = pd.DataFrame(
6464
{
6565
"Weights of true generative process": true_weights,
@@ -117,7 +117,7 @@
117117

118118
# %%
119119
# Indeed, both models minimize the log-likelihood up to an arbitrary cutoff
120-
# defined by the `n_iter` parameter.
120+
# defined by the `max_iter` parameter.
121121
#
122122
# Bayesian regressions with polynomial feature expansion
123123
# ======================================================

dev/_downloads/adc9be3b7acc279025dad9ee4ce92038/plot_discretization_strategies.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
},
1616
"outputs": [],
1717
"source": [
18-
"# Author: Tom Dupr\u00e9 la Tour\n# License: BSD 3 clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import make_blobs\nfrom sklearn.preprocessing import KBinsDiscretizer\n\nstrategies = [\"uniform\", \"quantile\", \"kmeans\"]\n\nn_samples = 200\ncenters_0 = np.array([[0, 0], [0, 5], [2, 4], [8, 8]])\ncenters_1 = np.array([[0, 0], [3, 1]])\n\n# construct the datasets\nrandom_state = 42\nX_list = [\n np.random.RandomState(random_state).uniform(-3, 3, size=(n_samples, 2)),\n make_blobs(\n n_samples=[\n n_samples // 10,\n n_samples * 4 // 10,\n n_samples // 10,\n n_samples * 4 // 10,\n ],\n cluster_std=0.5,\n centers=centers_0,\n random_state=random_state,\n )[0],\n make_blobs(\n n_samples=[n_samples // 5, n_samples * 4 // 5],\n cluster_std=0.5,\n centers=centers_1,\n random_state=random_state,\n )[0],\n]\n\nfigure = plt.figure(figsize=(14, 9))\ni = 1\nfor ds_cnt, X in enumerate(X_list):\n ax = plt.subplot(len(X_list), len(strategies) + 1, i)\n ax.scatter(X[:, 0], X[:, 1], edgecolors=\"k\")\n if ds_cnt == 0:\n ax.set_title(\"Input data\", size=14)\n\n xx, yy = np.meshgrid(\n np.linspace(X[:, 0].min(), X[:, 0].max(), 300),\n np.linspace(X[:, 1].min(), X[:, 1].max(), 300),\n )\n grid = np.c_[xx.ravel(), yy.ravel()]\n\n ax.set_xlim(xx.min(), xx.max())\n ax.set_ylim(yy.min(), yy.max())\n ax.set_xticks(())\n ax.set_yticks(())\n\n i += 1\n # transform the dataset with KBinsDiscretizer\n for strategy in strategies:\n enc = KBinsDiscretizer(n_bins=4, encode=\"ordinal\", strategy=strategy)\n enc.fit(X)\n grid_encoded = enc.transform(grid)\n\n ax = plt.subplot(len(X_list), len(strategies) + 1, i)\n\n # horizontal stripes\n horizontal = grid_encoded[:, 0].reshape(xx.shape)\n ax.contourf(xx, yy, horizontal, alpha=0.5)\n # vertical stripes\n vertical = grid_encoded[:, 1].reshape(xx.shape)\n ax.contourf(xx, yy, vertical, alpha=0.5)\n\n ax.scatter(X[:, 0], X[:, 1], edgecolors=\"k\")\n ax.set_xlim(xx.min(), xx.max())\n ax.set_ylim(yy.min(), yy.max())\n ax.set_xticks(())\n ax.set_yticks(())\n if ds_cnt == 0:\n ax.set_title(\"strategy='%s'\" % (strategy,), size=14)\n\n i += 1\n\nplt.tight_layout()\nplt.show()"
18+
"# Author: Tom Dupr\u00e9 la Tour\n# License: BSD 3 clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import make_blobs\nfrom sklearn.preprocessing import KBinsDiscretizer\n\nstrategies = [\"uniform\", \"quantile\", \"kmeans\"]\n\nn_samples = 200\ncenters_0 = np.array([[0, 0], [0, 5], [2, 4], [8, 8]])\ncenters_1 = np.array([[0, 0], [3, 1]])\n\n# construct the datasets\nrandom_state = 42\nX_list = [\n np.random.RandomState(random_state).uniform(-3, 3, size=(n_samples, 2)),\n make_blobs(\n n_samples=[\n n_samples // 10,\n n_samples * 4 // 10,\n n_samples // 10,\n n_samples * 4 // 10,\n ],\n cluster_std=0.5,\n centers=centers_0,\n random_state=random_state,\n )[0],\n make_blobs(\n n_samples=[n_samples // 5, n_samples * 4 // 5],\n cluster_std=0.5,\n centers=centers_1,\n random_state=random_state,\n )[0],\n]\n\nfigure = plt.figure(figsize=(14, 9))\ni = 1\nfor ds_cnt, X in enumerate(X_list):\n ax = plt.subplot(len(X_list), len(strategies) + 1, i)\n ax.scatter(X[:, 0], X[:, 1], edgecolors=\"k\")\n if ds_cnt == 0:\n ax.set_title(\"Input data\", size=14)\n\n xx, yy = np.meshgrid(\n np.linspace(X[:, 0].min(), X[:, 0].max(), 300),\n np.linspace(X[:, 1].min(), X[:, 1].max(), 300),\n )\n grid = np.c_[xx.ravel(), yy.ravel()]\n\n ax.set_xlim(xx.min(), xx.max())\n ax.set_ylim(yy.min(), yy.max())\n ax.set_xticks(())\n ax.set_yticks(())\n\n i += 1\n # transform the dataset with KBinsDiscretizer\n for strategy in strategies:\n enc = KBinsDiscretizer(\n n_bins=4, encode=\"ordinal\", strategy=strategy, subsample=200_000\n )\n enc.fit(X)\n grid_encoded = enc.transform(grid)\n\n ax = plt.subplot(len(X_list), len(strategies) + 1, i)\n\n # horizontal stripes\n horizontal = grid_encoded[:, 0].reshape(xx.shape)\n ax.contourf(xx, yy, horizontal, alpha=0.5)\n # vertical stripes\n vertical = grid_encoded[:, 1].reshape(xx.shape)\n ax.contourf(xx, yy, vertical, alpha=0.5)\n\n ax.scatter(X[:, 0], X[:, 1], edgecolors=\"k\")\n ax.set_xlim(xx.min(), xx.max())\n ax.set_ylim(yy.min(), yy.max())\n ax.set_xticks(())\n ax.set_yticks(())\n if ds_cnt == 0:\n ax.set_title(\"strategy='%s'\" % (strategy,), size=14)\n\n i += 1\n\nplt.tight_layout()\nplt.show()"
1919
]
2020
}
2121
],

dev/_downloads/c5d41d4d7d1dab3e49804c2e2c4222e8/plot_ard.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
},
5252
"outputs": [],
5353
"source": [
54-
"import pandas as pd\n\nfrom sklearn.linear_model import ARDRegression, BayesianRidge, LinearRegression\n\nolr = LinearRegression().fit(X, y)\nbrr = BayesianRidge(compute_score=True, n_iter=30).fit(X, y)\nard = ARDRegression(compute_score=True, n_iter=30).fit(X, y)\ndf = pd.DataFrame(\n {\n \"Weights of true generative process\": true_weights,\n \"ARDRegression\": ard.coef_,\n \"BayesianRidge\": brr.coef_,\n \"LinearRegression\": olr.coef_,\n }\n)"
54+
"import pandas as pd\n\nfrom sklearn.linear_model import ARDRegression, BayesianRidge, LinearRegression\n\nolr = LinearRegression().fit(X, y)\nbrr = BayesianRidge(compute_score=True, max_iter=30).fit(X, y)\nard = ARDRegression(compute_score=True, max_iter=30).fit(X, y)\ndf = pd.DataFrame(\n {\n \"Weights of true generative process\": true_weights,\n \"ARDRegression\": ard.coef_,\n \"BayesianRidge\": brr.coef_,\n \"LinearRegression\": olr.coef_,\n }\n)"
5555
]
5656
},
5757
{
@@ -101,7 +101,7 @@
101101
"cell_type": "markdown",
102102
"metadata": {},
103103
"source": [
104-
"Indeed, both models minimize the log-likelihood up to an arbitrary cutoff\ndefined by the `n_iter` parameter.\n\n## Bayesian regressions with polynomial feature expansion\nGenerate synthetic dataset\n--------------------------\nWe create a target that is a non-linear function of the input feature.\nNoise following a standard uniform distribution is added.\n\n"
104+
"Indeed, both models minimize the log-likelihood up to an arbitrary cutoff\ndefined by the `max_iter` parameter.\n\n## Bayesian regressions with polynomial feature expansion\nGenerate synthetic dataset\n--------------------------\nWe create a target that is a non-linear function of the input feature.\nNoise following a standard uniform distribution is added.\n\n"
105105
]
106106
},
107107
{

dev/_downloads/df790541d4c6bdebcc75018a2459467a/plot_release_highlights_0_22_0.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
},
4141
"outputs": [],
4242
"source": [
43-
"from sklearn.datasets import load_iris\nfrom sklearn.ensemble import StackingClassifier\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.svm import LinearSVC\n\nX, y = load_iris(return_X_y=True)\nestimators = [\n (\"rf\", RandomForestClassifier(n_estimators=10, random_state=42)),\n (\"svr\", make_pipeline(StandardScaler(), LinearSVC(random_state=42))),\n]\nclf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())\nX_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)\nclf.fit(X_train, y_train).score(X_test, y_test)"
43+
"from sklearn.datasets import load_iris\nfrom sklearn.ensemble import StackingClassifier\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.svm import LinearSVC\n\nX, y = load_iris(return_X_y=True)\nestimators = [\n (\"rf\", RandomForestClassifier(n_estimators=10, random_state=42)),\n (\"svr\", make_pipeline(StandardScaler(), LinearSVC(dual=\"auto\", random_state=42))),\n]\nclf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())\nX_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)\nclf.fit(X_train, y_train).score(X_test, y_test)"
4444
]
4545
},
4646
{

0 commit comments

Comments
 (0)