Skip to content

Commit 56cf00e

Browse files
committed
Pushing the docs to dev/ for branch: main, commit e3b4b9ce6fbe96f5e4166ead0f45d755f1759291
1 parent ca2a676 commit 56cf00e

File tree

1,247 files changed

+4432
-4437
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,247 files changed

+4432
-4437
lines changed
Binary file not shown.

dev/_downloads/49cd91d05440a1c88b074430761aeb76/plot_cv_indices.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"from sklearn.model_selection import (\n TimeSeriesSplit,\n KFold,\n ShuffleSplit,\n StratifiedKFold,\n GroupShuffleSplit,\n GroupKFold,\n StratifiedShuffleSplit,\n StratifiedGroupKFold,\n)\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib.patches import Patch\n\nnp.random.seed(1338)\ncmap_data = plt.cm.Paired\ncmap_cv = plt.cm.coolwarm\nn_splits = 4"
29+
"from sklearn.model_selection import (\n TimeSeriesSplit,\n KFold,\n ShuffleSplit,\n StratifiedKFold,\n GroupShuffleSplit,\n GroupKFold,\n StratifiedShuffleSplit,\n StratifiedGroupKFold,\n)\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib.patches import Patch\n\nrng = np.random.RandomState(1338)\ncmap_data = plt.cm.Paired\ncmap_cv = plt.cm.coolwarm\nn_splits = 4"
3030
]
3131
},
3232
{
@@ -44,7 +44,7 @@
4444
},
4545
"outputs": [],
4646
"source": [
47-
"# Generate the class/group data\nn_points = 100\nX = np.random.randn(100, 10)\n\npercentiles_classes = [0.1, 0.3, 0.6]\ny = np.hstack([[ii] * int(100 * perc) for ii, perc in enumerate(percentiles_classes)])\n\n# Evenly spaced groups repeated once\ngroups = np.hstack([[ii] * 10 for ii in range(10)])\n\n\ndef visualize_groups(classes, groups, name):\n # Visualize dataset groups\n fig, ax = plt.subplots()\n ax.scatter(\n range(len(groups)),\n [0.5] * len(groups),\n c=groups,\n marker=\"_\",\n lw=50,\n cmap=cmap_data,\n )\n ax.scatter(\n range(len(groups)),\n [3.5] * len(groups),\n c=classes,\n marker=\"_\",\n lw=50,\n cmap=cmap_data,\n )\n ax.set(\n ylim=[-1, 5],\n yticks=[0.5, 3.5],\n yticklabels=[\"Data\\ngroup\", \"Data\\nclass\"],\n xlabel=\"Sample index\",\n )\n\n\nvisualize_groups(y, groups, \"no groups\")"
47+
"# Generate the class/group data\nn_points = 100\nX = rng.randn(100, 10)\n\npercentiles_classes = [0.1, 0.3, 0.6]\ny = np.hstack([[ii] * int(100 * perc) for ii, perc in enumerate(percentiles_classes)])\n\n# Generate uneven groups\ngroup_prior = rng.dirichlet([2] * 10)\ngroups = np.repeat(np.arange(10), rng.multinomial(100, group_prior))\n\n\ndef visualize_groups(classes, groups, name):\n # Visualize dataset groups\n fig, ax = plt.subplots()\n ax.scatter(\n range(len(groups)),\n [0.5] * len(groups),\n c=groups,\n marker=\"_\",\n lw=50,\n cmap=cmap_data,\n )\n ax.scatter(\n range(len(groups)),\n [3.5] * len(groups),\n c=classes,\n marker=\"_\",\n lw=50,\n cmap=cmap_data,\n )\n ax.set(\n ylim=[-1, 5],\n yticks=[0.5, 3.5],\n yticklabels=[\"Data\\ngroup\", \"Data\\nclass\"],\n xlabel=\"Sample index\",\n )\n\n\nvisualize_groups(y, groups, \"no groups\")"
4848
]
4949
},
5050
{
@@ -98,7 +98,7 @@
9898
},
9999
"outputs": [],
100100
"source": [
101-
"# To better demonstrate the difference, we will assign samples to groups\n# unevenly:\n\nuneven_groups = np.sort(np.random.randint(0, 10, n_points))\n\ncvs = [StratifiedKFold, GroupKFold, StratifiedGroupKFold]\n\nfor cv in cvs:\n fig, ax = plt.subplots(figsize=(6, 3))\n plot_cv_indices(cv(n_splits), X, y, uneven_groups, ax, n_splits)\n ax.legend(\n [Patch(color=cmap_cv(0.8)), Patch(color=cmap_cv(0.02))],\n [\"Testing set\", \"Training set\"],\n loc=(1.02, 0.8),\n )\n # Make the legend fit\n plt.tight_layout()\n fig.subplots_adjust(right=0.7)"
101+
"cvs = [StratifiedKFold, GroupKFold, StratifiedGroupKFold]\n\nfor cv in cvs:\n fig, ax = plt.subplots(figsize=(6, 3))\n plot_cv_indices(cv(n_splits), X, y, groups, ax, n_splits)\n ax.legend(\n [Patch(color=cmap_cv(0.8)), Patch(color=cmap_cv(0.02))],\n [\"Testing set\", \"Training set\"],\n loc=(1.02, 0.8),\n )\n # Make the legend fit\n plt.tight_layout()\n fig.subplots_adjust(right=0.7)"
102102
]
103103
},
104104
{
Binary file not shown.

dev/_downloads/f1caa332331b42f32518c03ec8a71341/plot_cv_indices.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import matplotlib.pyplot as plt
2727
from matplotlib.patches import Patch
2828

29-
np.random.seed(1338)
29+
rng = np.random.RandomState(1338)
3030
cmap_data = plt.cm.Paired
3131
cmap_cv = plt.cm.coolwarm
3232
n_splits = 4
@@ -47,13 +47,14 @@
4747

4848
# Generate the class/group data
4949
n_points = 100
50-
X = np.random.randn(100, 10)
50+
X = rng.randn(100, 10)
5151

5252
percentiles_classes = [0.1, 0.3, 0.6]
5353
y = np.hstack([[ii] * int(100 * perc) for ii, perc in enumerate(percentiles_classes)])
5454

55-
# Evenly spaced groups repeated once
56-
groups = np.hstack([[ii] * 10 for ii in range(10)])
55+
# Generate uneven groups
56+
group_prior = rng.dirichlet([2] * 10)
57+
groups = np.repeat(np.arange(10), rng.multinomial(100, group_prior))
5758

5859

5960
def visualize_groups(classes, groups, name):
@@ -158,17 +159,11 @@ def plot_cv_indices(cv, X, y, group, ax, n_splits, lw=10):
158159
# different folds.
159160
# - ``StratifiedGroupKFold`` to keep the constraint of ``GroupKFold`` while
160161
# attempting to return stratified folds.
161-
162-
# To better demonstrate the difference, we will assign samples to groups
163-
# unevenly:
164-
165-
uneven_groups = np.sort(np.random.randint(0, 10, n_points))
166-
167162
cvs = [StratifiedKFold, GroupKFold, StratifiedGroupKFold]
168163

169164
for cv in cvs:
170165
fig, ax = plt.subplots(figsize=(6, 3))
171-
plot_cv_indices(cv(n_splits), X, y, uneven_groups, ax, n_splits)
166+
plot_cv_indices(cv(n_splits), X, y, groups, ax, n_splits)
172167
ax.legend(
173168
[Patch(color=cmap_cv(0.8)), Patch(color=cmap_cv(0.02))],
174169
["Testing set", "Training set"],

dev/_downloads/scikit-learn-docs.zip

-2.91 KB
Binary file not shown.
3 Bytes
138 Bytes
32 Bytes
-357 Bytes
-5 Bytes

0 commit comments

Comments
 (0)