scikit-learn
diff --git a/‎dev/.buildinfo
Lines changed: 1 addition & 1 deletion b/‎dev/.buildinfo
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
-25 Bytes b/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
-25 Bytes
diff --git a/‎dev/_downloads/4f6558a73e0c79834afc005bac34dc13/plot_target_encoder_cross_val.py
Lines changed: 14 additions & 14 deletions b/‎dev/_downloads/4f6558a73e0c79834afc005bac34dc13/plot_target_encoder_cross_val.py
Lines changed: 14 additions & 14 deletions
diff --git a/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
-19 Bytes b/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
-19 Bytes
diff --git a/‎dev/_downloads/7b414ce0c39e11cf961fd4fa23008246/plot_target_encoder.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/7b414ce0c39e11cf961fd4fa23008246/plot_target_encoder.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/c3f95dc25241c64632f9c3378fd4e89b/plot_target_encoder_cross_val.ipynb
Lines changed: 6 additions & 6 deletions b/‎dev/_downloads/c3f95dc25241c64632f9c3378fd4e89b/plot_target_encoder_cross_val.ipynb
Lines changed: 6 additions & 6 deletions
diff --git a/‎dev/_downloads/c62ac915428f3a173ccfc19ab3de33bd/plot_target_encoder.py
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/c62ac915428f3a173ccfc19ab3de33bd/plot_target_encoder.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/scikit-learn-docs.zip
804 Bytes b/‎dev/_downloads/scikit-learn-docs.zip
804 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-20 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-20 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-23 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-23 Bytes
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: c65caea7dd935ad3829460e9f374daab
+config: bccc80489749706009befbc2448fc488
 tags: 645f666f9bcd5a90fca523b33c5a78b7
@@ -1,16 +1,16 @@
 """
-==========================================
-Target Encoder's Internal Cross Validation
-==========================================
+=======================================
+Target Encoder's Internal Cross fitting
+=======================================
 
 .. currentmodule:: sklearn.preprocessing
 
 The :class:`TargetEnocoder` replaces each category of a categorical feature with
 the mean of the target variable for that category. This method is useful
 in cases where there is a strong relationship between the categorical feature
 and the target. To prevent overfitting, :meth:`TargetEncoder.fit_transform` uses
-interval cross validation to encode the training data to be used by a downstream
-model. In this example, we demonstrate the importance of the cross validation
+an internal cross fitting scheme to encode the training data to be used by a
+downstream model. In this example, we demonstrate the importance of the cross fitting
 procedure to prevent overfitting.
 """
 
@@ -49,11 +49,11 @@
 
 # %%
 # The uninformative feature with high cardinality is generated so that is independent of
-# the target variable. We will show that target encoding without cross validation will
+# the target variable. We will show that target encoding without cross fitting will
 # cause catastrophic overfitting for the downstream regressor. These high cardinality
 # features are basically unique identifiers for samples which should generally be
 # removed from machine learning dataset. In this example, we generate them to show how
-# :class:`TargetEncoder`'s default cross validation behavior mitigates the overfitting
+# :class:`TargetEncoder`'s default cross fitting behavior mitigates the overfitting
 # issue automatically.
 X_near_unique_categories = rng.choice(
     int(0.9 * n_samples), size=n_samples, replace=True
@@ -79,7 +79,7 @@
 # ==========================
 # In this section, we train a ridge regressor on the dataset with and without
 # encoding and explore the influence of target encoder with and without the
-# interval cross validation. First, we see the Ridge model trained on the
+# internal cross fitting. First, we see the Ridge model trained on the
 # raw features will have low performance, because the order of the informative
 # feature is not informative:
 import sklearn
@@ -96,7 +96,7 @@
 
 # %%
 # Next, we create a pipeline with the target encoder and ridge model. The pipeline
-# uses :meth:`TargetEncoder.fit_transform` which uses cross validation. We see that
+# uses :meth:`TargetEncoder.fit_transform` which uses cross fitting. We see that
 # the model fits the data well and generalizes to the test set:
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import TargetEncoder
@@ -120,11 +120,11 @@
 _ = coefs_cv.plot(kind="barh")
 
 # %%
-# While :meth:`TargetEncoder.fit_transform` uses an interval cross validation,
-# :meth:`TargetEncoder.transform` itself does not perform any cross validation.
+# While :meth:`TargetEncoder.fit_transform` uses an internal cross fitting scheme,
+# :meth:`TargetEncoder.transform` itself does not perform any cross fitting.
 # It uses the aggregation of the complete training set to transform the categorical
 # features. Thus, we can use :meth:`TargetEncoder.fit` followed by
-# :meth:`TargetEncoder.transform` to disable the cross validation. This encoding
+# :meth:`TargetEncoder.transform` to disable the cross fitting. This encoding
 # is then passed to the ridge model.
 target_encoder = TargetEncoder(random_state=0)
 target_encoder.fit(X_train, y_train)
@@ -154,8 +154,8 @@
 # %%
 # Conclusion
 # ==========
-# This example demonstrates the importance of :class:`TargetEncoder`'s interval cross
-# validation. It is important to use :meth:`TargetEncoder.fit_transform` to encode
+# This example demonstrates the importance of :class:`TargetEncoder`'s internal cross
+# fitting. It is important to use :meth:`TargetEncoder.fit_transform` to encode
 # training data before passing it to a machine learning model. When a
 # :class:`TargetEncoder` is a part of a :class:`~sklearn.pipeline.Pipeline` and the
 # pipeline is fitted, the pipeline will correctly call
 
@@ -4,7 +4,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\n# Comparing Target Encoder with Other Encoders\n\n.. currentmodule:: sklearn.preprocessing\n\nThe :class:`TargetEncoder` uses the value of the target to encode each\ncategorical feature. In this example, we will compare three different approaches\nfor handling categorical features: :class:`TargetEncoder`,\n:class:`OrdinalEncoder`, :class:`OneHotEncoder` and dropping the category.\n\n<div class=\"alert alert-info\"><h4>Note</h4><p>`fit(X, y).transform(X)` does not equal `fit_transform(X, y)` because a\n    cross-validation scheme is used in `fit_transform` for encoding. See the\n    `User Guide <target_encoder>`. for details.</p></div>\n"
+        "\n# Comparing Target Encoder with Other Encoders\n\n.. currentmodule:: sklearn.preprocessing\n\nThe :class:`TargetEncoder` uses the value of the target to encode each\ncategorical feature. In this example, we will compare three different approaches\nfor handling categorical features: :class:`TargetEncoder`,\n:class:`OrdinalEncoder`, :class:`OneHotEncoder` and dropping the category.\n\n<div class=\"alert alert-info\"><h4>Note</h4><p>`fit(X, y).transform(X)` does not equal `fit_transform(X, y)` because a\n    cross fitting scheme is used in `fit_transform` for encoding. See the\n    `User Guide <target_encoder>`. for details.</p></div>\n"
       ]
     },
     {
 
@@ -4,7 +4,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\n# Target Encoder's Internal Cross Validation\n\n.. currentmodule:: sklearn.preprocessing\n\nThe :class:`TargetEnocoder` replaces each category of a categorical feature with\nthe mean of the target variable for that category. This method is useful\nin cases where there is a strong relationship between the categorical feature\nand the target. To prevent overfitting, :meth:`TargetEncoder.fit_transform` uses\ninterval cross validation to encode the training data to be used by a downstream\nmodel. In this example, we demonstrate the importance of the cross validation\nprocedure to prevent overfitting.\n"
+        "\n# Target Encoder's Internal Cross fitting\n\n.. currentmodule:: sklearn.preprocessing\n\nThe :class:`TargetEnocoder` replaces each category of a categorical feature with\nthe mean of the target variable for that category. This method is useful\nin cases where there is a strong relationship between the categorical feature\nand the target. To prevent overfitting, :meth:`TargetEncoder.fit_transform` uses\nan internal cross fitting scheme to encode the training data to be used by a\ndownstream model. In this example, we demonstrate the importance of the cross fitting\nprocedure to prevent overfitting.\n"
       ]
     },
     {
@@ -47,7 +47,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "The uninformative feature with high cardinality is generated so that is independent of\nthe target variable. We will show that target encoding without cross validation will\ncause catastrophic overfitting for the downstream regressor. These high cardinality\nfeatures are basically unique identifiers for samples which should generally be\nremoved from machine learning dataset. In this example, we generate them to show how\n:class:`TargetEncoder`'s default cross validation behavior mitigates the overfitting\nissue automatically.\n\n"
+        "The uninformative feature with high cardinality is generated so that is independent of\nthe target variable. We will show that target encoding without cross fitting will\ncause catastrophic overfitting for the downstream regressor. These high cardinality\nfeatures are basically unique identifiers for samples which should generally be\nremoved from machine learning dataset. In this example, we generate them to show how\n:class:`TargetEncoder`'s default cross fitting behavior mitigates the overfitting\nissue automatically.\n\n"
       ]
     },
     {
@@ -83,7 +83,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## Training a Ridge Regressor\nIn this section, we train a ridge regressor on the dataset with and without\nencoding and explore the influence of target encoder with and without the\ninterval cross validation. First, we see the Ridge model trained on the\nraw features will have low performance, because the order of the informative\nfeature is not informative:\n\n"
+        "## Training a Ridge Regressor\nIn this section, we train a ridge regressor on the dataset with and without\nencoding and explore the influence of target encoder with and without the\ninternal cross fitting. First, we see the Ridge model trained on the\nraw features will have low performance, because the order of the informative\nfeature is not informative:\n\n"
       ]
     },
     {
@@ -101,7 +101,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Next, we create a pipeline with the target encoder and ridge model. The pipeline\nuses :meth:`TargetEncoder.fit_transform` which uses cross validation. We see that\nthe model fits the data well and generalizes to the test set:\n\n"
+        "Next, we create a pipeline with the target encoder and ridge model. The pipeline\nuses :meth:`TargetEncoder.fit_transform` which uses cross fitting. We see that\nthe model fits the data well and generalizes to the test set:\n\n"
       ]
     },
     {
@@ -137,7 +137,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "While :meth:`TargetEncoder.fit_transform` uses an interval cross validation,\n:meth:`TargetEncoder.transform` itself does not perform any cross validation.\nIt uses the aggregation of the complete training set to transform the categorical\nfeatures. Thus, we can use :meth:`TargetEncoder.fit` followed by\n:meth:`TargetEncoder.transform` to disable the cross validation. This encoding\nis then passed to the ridge model.\n\n"
+        "While :meth:`TargetEncoder.fit_transform` uses an internal cross fitting scheme,\n:meth:`TargetEncoder.transform` itself does not perform any cross fitting.\nIt uses the aggregation of the complete training set to transform the categorical\nfeatures. Thus, we can use :meth:`TargetEncoder.fit` followed by\n:meth:`TargetEncoder.transform` to disable the cross fitting. This encoding\nis then passed to the ridge model.\n\n"
       ]
     },
     {
@@ -191,7 +191,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## Conclusion\nThis example demonstrates the importance of :class:`TargetEncoder`'s interval cross\nvalidation. It is important to use :meth:`TargetEncoder.fit_transform` to encode\ntraining data before passing it to a machine learning model. When a\n:class:`TargetEncoder` is a part of a :class:`~sklearn.pipeline.Pipeline` and the\npipeline is fitted, the pipeline will correctly call\n:meth:`TargetEncoder.fit_transform` and pass the encoding along.\n\n"
+        "## Conclusion\nThis example demonstrates the importance of :class:`TargetEncoder`'s internal cross\nfitting. It is important to use :meth:`TargetEncoder.fit_transform` to encode\ntraining data before passing it to a machine learning model. When a\n:class:`TargetEncoder` is a part of a :class:`~sklearn.pipeline.Pipeline` and the\npipeline is fitted, the pipeline will correctly call\n:meth:`TargetEncoder.fit_transform` and pass the encoding along.\n\n"
       ]
     }
   ],
 
@@ -12,7 +12,7 @@
 
 .. note::
     `fit(X, y).transform(X)` does not equal `fit_transform(X, y)` because a
-    cross-validation scheme is used in `fit_transform` for encoding. See the
+    cross fitting scheme is used in `fit_transform` for encoding. See the
     :ref:`User Guide <target_encoder>`. for details.
 """
Original file line number	Diff line number	Diff line change
`@@ -4,7 +4,7 @@`
`4`	`4`	`"cell_type": "markdown",`
`5`	`5`	`"metadata": {},`
`6`	`6`	`"source": [`
`7`		- "\n# Comparing Target Encoder with Other Encoders\n\n.. currentmodule:: sklearn.preprocessing\n\nThe :class:`TargetEncoder` uses the value of the target to encode each\ncategorical feature. In this example, we will compare three different approaches\nfor handling categorical features: :class:`TargetEncoder`,\n:class:`OrdinalEncoder`, :class:`OneHotEncoder` and dropping the category.\n\n<div class=\"alert alert-info\"><h4>Note</h4><p>`fit(X, y).transform(X)` does not equal `fit_transform(X, y)` because a\n cross-validation scheme is used in `fit_transform` for encoding. See the\n `User Guide <target_encoder>`. for details.</p></div>\n"
	`7`	+ "\n# Comparing Target Encoder with Other Encoders\n\n.. currentmodule:: sklearn.preprocessing\n\nThe :class:`TargetEncoder` uses the value of the target to encode each\ncategorical feature. In this example, we will compare three different approaches\nfor handling categorical features: :class:`TargetEncoder`,\n:class:`OrdinalEncoder`, :class:`OneHotEncoder` and dropping the category.\n\n<div class=\"alert alert-info\"><h4>Note</h4><p>`fit(X, y).transform(X)` does not equal `fit_transform(X, y)` because a\n cross fitting scheme is used in `fit_transform` for encoding. See the\n `User Guide <target_encoder>`. for details.</p></div>\n"
`8`	`8`	`]`
`9`	`9`	`},`
`10`	`10`	`{`