linuxdevhub
diff --git a/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
-362 Bytes b/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
-362 Bytes
diff --git a/‎dev/_downloads/51a82a09a4aa0f703f69fb5d4f15104f/plot_partial_dependence_visualization_api.ipynb
Lines changed: 3 additions & 3 deletions b/‎dev/_downloads/51a82a09a4aa0f703f69fb5d4f15104f/plot_partial_dependence_visualization_api.ipynb
Lines changed: 3 additions & 3 deletions
diff --git a/‎dev/_downloads/5a693c97e821586539ab9d250762742c/plot_partial_dependence.ipynb
Lines changed: 2 additions & 2 deletions b/‎dev/_downloads/5a693c97e821586539ab9d250762742c/plot_partial_dependence.ipynb
Lines changed: 2 additions & 2 deletions
diff --git a/‎dev/_downloads/781bb5a2dc85df6b75ee78d2eb118b0b/plot_partial_dependence_visualization_api.py
Lines changed: 3 additions & 7 deletions b/‎dev/_downloads/781bb5a2dc85df6b75ee78d2eb118b0b/plot_partial_dependence_visualization_api.py
Lines changed: 3 additions & 7 deletions
diff --git a/‎dev/_downloads/7ee55c12f8d3eb1dd8d2005d9dd7b6f1/plot_release_highlights_0_22_0.py
Lines changed: 111 additions & 109 deletions b/‎dev/_downloads/7ee55c12f8d3eb1dd8d2005d9dd7b6f1/plot_release_highlights_0_22_0.py
Lines changed: 111 additions & 109 deletions
@@ -62,7 +62,7 @@
       },
       "outputs": [],
       "source": [
-        "fig, ax = plt.subplots(figsize=(12, 6))\nax.set_title(\"Decision Tree\")\ntree_disp = plot_partial_dependence(tree, X, [\"LSTAT\", \"RM\"],\n                                    feature_names=X.columns.tolist(), ax=ax)"
+        "fig, ax = plt.subplots(figsize=(12, 6))\nax.set_title(\"Decision Tree\")\ntree_disp = plot_partial_dependence(tree, X, [\"LSTAT\", \"RM\"], ax=ax)"
       ]
     },
     {
@@ -80,7 +80,7 @@
       },
       "outputs": [],
       "source": [
-        "fig, ax = plt.subplots(figsize=(12, 6))\nax.set_title(\"Multi-layer Perceptron\")\nmlp_disp = plot_partial_dependence(mlp, X, [\"LSTAT\", \"RM\"],\n                                   feature_names=X.columns.tolist(), ax=ax,\n                                   line_kw={\"c\": \"red\"})"
+        "fig, ax = plt.subplots(figsize=(12, 6))\nax.set_title(\"Multi-layer Perceptron\")\nmlp_disp = plot_partial_dependence(mlp, X, [\"LSTAT\", \"RM\"], ax=ax,\n                                   line_kw={\"c\": \"red\"})"
       ]
     },
     {
@@ -152,7 +152,7 @@
       },
       "outputs": [],
       "source": [
-        "tree_disp = plot_partial_dependence(tree, X, [\"LSTAT\"],\n                                    feature_names=X.columns.tolist())\nmlp_disp = plot_partial_dependence(mlp, X, [\"LSTAT\"],\n                                   feature_names=X.columns.tolist(),\n                                   ax=tree_disp.axes_, line_kw={\"c\": \"red\"})"
+        "tree_disp = plot_partial_dependence(tree, X, [\"LSTAT\"])\nmlp_disp = plot_partial_dependence(mlp, X, [\"LSTAT\"],\n                                   ax=tree_disp.axes_, line_kw={\"c\": \"red\"})"
       ]
     }
   ],
 
@@ -80,7 +80,7 @@
       },
       "outputs": [],
       "source": [
-        "print('Computing partial dependence plots...')\ntic = time()\n# We don't compute the 2-way PDP (5, 1) here, because it is a lot slower\n# with the brute method.\nfeatures = ['MedInc', 'AveOccup', 'HouseAge', 'AveRooms']\nplot_partial_dependence(est, X_train, features,\n                        feature_names=X_train.columns.tolist(),\n                        n_jobs=3, grid_resolution=20)\nprint(\"done in {:.3f}s\".format(time() - tic))\nfig = plt.gcf()\nfig.suptitle('Partial dependence of house value on non-___location features\\n'\n             'for the California housing dataset, with MLPRegressor')\nfig.subplots_adjust(hspace=0.3)"
+        "print('Computing partial dependence plots...')\ntic = time()\n# We don't compute the 2-way PDP (5, 1) here, because it is a lot slower\n# with the brute method.\nfeatures = ['MedInc', 'AveOccup', 'HouseAge', 'AveRooms']\nplot_partial_dependence(est, X_train, features,\n                        n_jobs=3, grid_resolution=20)\nprint(\"done in {:.3f}s\".format(time() - tic))\nfig = plt.gcf()\nfig.suptitle('Partial dependence of house value on non-___location features\\n'\n             'for the California housing dataset, with MLPRegressor')\nfig.subplots_adjust(hspace=0.3)"
       ]
     },
     {
@@ -116,7 +116,7 @@
       },
       "outputs": [],
       "source": [
-        "print('Computing partial dependence plots...')\ntic = time()\nfeatures = ['MedInc', 'AveOccup', 'HouseAge', 'AveRooms',\n            ('AveOccup', 'HouseAge')]\nplot_partial_dependence(est, X_train, features,\n                        feature_names=X_train.columns.tolist(),\n                        n_jobs=3, grid_resolution=20)\nprint(\"done in {:.3f}s\".format(time() - tic))\nfig = plt.gcf()\nfig.suptitle('Partial dependence of house value on non-___location features\\n'\n             'for the California housing dataset, with Gradient Boosting')\nfig.subplots_adjust(wspace=0.4, hspace=0.3)"
+        "print('Computing partial dependence plots...')\ntic = time()\nfeatures = ['MedInc', 'AveOccup', 'HouseAge', 'AveRooms',\n            ('AveOccup', 'HouseAge')]\nplot_partial_dependence(est, X_train, features,\n                        n_jobs=3, grid_resolution=20)\nprint(\"done in {:.3f}s\".format(time() - tic))\nfig = plt.gcf()\nfig.suptitle('Partial dependence of house value on non-___location features\\n'\n             'for the California housing dataset, with Gradient Boosting')\nfig.subplots_adjust(wspace=0.4, hspace=0.3)"
       ]
     },
     {
 
@@ -55,8 +55,7 @@
 # defined by `ax` .
 fig, ax = plt.subplots(figsize=(12, 6))
 ax.set_title("Decision Tree")
-tree_disp = plot_partial_dependence(tree, X, ["LSTAT", "RM"],
-                                    feature_names=X.columns.tolist(), ax=ax)
+tree_disp = plot_partial_dependence(tree, X, ["LSTAT", "RM"], ax=ax)
 
 ##############################################################################
 # The partial depdendence curves can be plotted for the multi-layer perceptron.
@@ -65,8 +64,7 @@
 # the curve.
 fig, ax = plt.subplots(figsize=(12, 6))
 ax.set_title("Multi-layer Perceptron")
-mlp_disp = plot_partial_dependence(mlp, X, ["LSTAT", "RM"],
-                                   feature_names=X.columns.tolist(), ax=ax,
+mlp_disp = plot_partial_dependence(mlp, X, ["LSTAT", "RM"], ax=ax,
                                    line_kw={"c": "red"})
 
 ##############################################################################
@@ -134,8 +132,6 @@
 # Here, we plot the partial dependence curves for a single feature, "LSTAT", on
 # the same axes. In this case, `tree_disp.axes_` is passed into the second
 # plot function.
-tree_disp = plot_partial_dependence(tree, X, ["LSTAT"],
-                                    feature_names=X.columns.tolist())
+tree_disp = plot_partial_dependence(tree, X, ["LSTAT"])
 mlp_disp = plot_partial_dependence(mlp, X, ["LSTAT"],
-                                   feature_names=X.columns.tolist(),
                                    ax=tree_disp.axes_, line_kw={"c": "red"})
@@ -20,27 +20,73 @@
 """
 
 ##############################################################################
-# KNN Based Imputation
-# ------------------------------------
-# We now support imputation for completing missing values using k-Nearest
-# Neighbors.
+# New plotting API
+# ----------------
 #
-# Each sample's missing values are imputed using the mean value from
-# ``n_neighbors`` nearest neighbors found in the training set. Two samples are
-# close if the features that neither is missing are close.
-# By default, a euclidean distance metric
-# that supports missing values,
-# :func:`~metrics.nan_euclidean_distances`, is used to find the nearest
-# neighbors.
+# A new plotting API is available for creating visualizations. This new API
+# allows for quickly adjusting the visuals of a plot without involving any
+# recomputation. It is also possible to add different plots to the same
+# figure. See more examples in the :ref:`User Guide <visualizations>`.
+
+from sklearn.model_selection import train_test_split
+from sklearn.svm import SVC
+from sklearn.metrics import plot_roc_curve
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.datasets import make_classification
+import matplotlib.pyplot as plt
+
+X, y = make_classification(random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
+
+svc = SVC(random_state=42)
+svc.fit(X_train, y_train)
+rfc = RandomForestClassifier(random_state=42)
+rfc.fit(X_train, y_train)
+
+svc_disp = plot_roc_curve(svc, X_test, y_test)
+rfc_disp = plot_roc_curve(rfc, X_test, y_test, ax=svc_disp.ax_)
+rfc_disp.figure_.suptitle("ROC curve comparison")
+
+plt.show()
+
+############################################################################
+# Stacking Classifier and Regressor
+# ---------------------------------
+# :class:`~ensemble.StackingClassifier` and
+# :class:`~ensemble.StackingRegressor`
+# allow you to have a stack of estimators with a final classifier or
+# a regressor.
+# Stacked generalization consists in stacking the output of individual
+# estimators and use a classifier to compute the final prediction. Stacking
+# allows to use the strength of each individual estimator by using their output
+# as input of a final estimator.
+# Base estimators are fitted on the full ``X`` while
+# the final estimator is trained using cross-validated predictions of the
+# base estimators using ``cross_val_predict``.
 #
-# Read more in the :ref:`User Guide <knnimpute>`.
+# Read more in the :ref:`User Guide <stacking>`.
 
-import numpy as np
-from sklearn.impute import KNNImputer
+from sklearn.datasets import load_iris
+from sklearn.svm import LinearSVC
+from sklearn.linear_model import LogisticRegression
+from sklearn.preprocessing import StandardScaler
+from sklearn.pipeline import make_pipeline
+from sklearn.ensemble import StackingClassifier
+from sklearn.model_selection import train_test_split
 
-X = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]]
-imputer = KNNImputer(n_neighbors=2)
-print(imputer.fit_transform(X))
+X, y = load_iris(return_X_y=True)
+estimators = [
+    ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),
+    ('svr', make_pipeline(StandardScaler(),
+                          LinearSVC(random_state=42)))
+]
+clf = StackingClassifier(
+    estimators=estimators, final_estimator=LogisticRegression()
+)
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, stratify=y, random_state=42
+)
+clf.fit(X_train, y_train).score(X_test, y_test)
 
 ##############################################################################
 # Permutation-based feature importance
@@ -50,9 +96,7 @@
 # estimate of the importance of each feature, for any fitted estimator:
 
 from sklearn.ensemble import RandomForestClassifier
-from sklearn.datasets import make_classification
 from sklearn.inspection import permutation_importance
-import matplotlib.pyplot as plt
 
 X, y = make_classification(random_state=0, n_features=5, n_informative=3)
 rf = RandomForestClassifier(random_state=0).fit(X, y)
@@ -87,32 +131,60 @@
 gbdt = HistGradientBoostingClassifier(min_samples_leaf=1).fit(X, y)
 print(gbdt.predict(X))
 
-##############################################################################
-# New plotting API
-# ----------------
-#
-# A new plotting API is available for creating visualizations. This new API
-# allows for quickly adjusting the visuals of a plot without involving any
-# recomputation. It is also possible to add different plots to the same
-# figure. See more examples in the :ref:`User Guide <visualizations>`.
+############################################################################
+# Precomputed sparse nearest neighbors graph
+# ------------------------------------------
+# Most estimators based on nearest neighbors graphs now accept precomputed
+# sparse graphs as input, to reuse the same graph for multiple estimator fits.
+# To use this feature in a pipeline, one can use the `memory` parameter, along
+# with one of the two new transformers,
+# :class:`neighbors.KNeighborsTransformer` and
+# :class:`neighbors.RadiusNeighborsTransformer`. The precomputation
+# can also be performed by custom estimators to use alternative
+# implementations, such as approximate nearest neighbors methods.
+# See more details in the :ref:`User Guide <neighbors_transformer>`.
 
-from sklearn.model_selection import train_test_split
-from sklearn.svm import SVC
-from sklearn.metrics import plot_roc_curve
+from tempfile import TemporaryDirectory
+from sklearn.neighbors import KNeighborsTransformer
+from sklearn.manifold import Isomap
+from sklearn.pipeline import make_pipeline
 
 X, y = make_classification(random_state=0)
-X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
 
-svc = SVC(random_state=42)
-svc.fit(X_train, y_train)
-rfc = RandomForestClassifier(random_state=42)
-rfc.fit(X_train, y_train)
+with TemporaryDirectory(prefix="sklearn_cache_") as tmpdir:
+    estimator = make_pipeline(
+        KNeighborsTransformer(n_neighbors=10, mode='distance'),
+        Isomap(n_neighbors=10, metric='precomputed'),
+        memory=tmpdir)
+    estimator.fit(X)
 
-svc_disp = plot_roc_curve(svc, X_test, y_test)
-rfc_disp = plot_roc_curve(rfc, X_test, y_test, ax=svc_disp.ax_)
-rfc_disp.figure_.suptitle("ROC curve comparison")
+    # We can decrease the number of neighbors and the graph will not be
+    # recomputed.
+    estimator.set_params(isomap__n_neighbors=5)
+    estimator.fit(X)
 
-plt.show()
+##############################################################################
+# KNN Based Imputation
+# ------------------------------------
+# We now support imputation for completing missing values using k-Nearest
+# Neighbors.
+#
+# Each sample's missing values are imputed using the mean value from
+# ``n_neighbors`` nearest neighbors found in the training set. Two samples are
+# close if the features that neither is missing are close.
+# By default, a euclidean distance metric
+# that supports missing values,
+# :func:`~metrics.nan_euclidean_distances`, is used to find the nearest
+# neighbors.
+#
+# Read more in the :ref:`User Guide <knnimpute>`.
+
+import numpy as np
+from sklearn.impute import KNNImputer
+
+X = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]]
+imputer = KNNImputer(n_neighbors=2)
+print(imputer.fit_transform(X))
 
 #############################################################################
 # Tree pruning
@@ -143,76 +215,6 @@
 titanic = fetch_openml('titanic', version=1, as_frame=True)
 print(titanic.data.head()[['pclass', 'embarked']])
 
-############################################################################
-# Precomputed sparse nearest neighbors graph
-# ------------------------------------------
-# Most estimators based on nearest neighbors graphs now accept precomputed
-# sparse graphs as input, to reuse the same graph for multiple estimator fits.
-# To use this feature in a pipeline, one can use the `memory` parameter, along
-# with one of the two new transformers,
-# :class:`neighbors.KNeighborsTransformer` and
-# :class:`neighbors.RadiusNeighborsTransformer`. The precomputation
-# can also be performed by custom estimators to use alternative
-# implementations, such as approximate nearest neighbors methods.
-# See more details in the :ref:`User Guide <neighbors_transformer>`.
-
-from tempfile import TemporaryDirectory
-from sklearn.neighbors import KNeighborsTransformer
-from sklearn.manifold import Isomap
-from sklearn.pipeline import make_pipeline
-
-with TemporaryDirectory(prefix="sklearn_cache_") as tmpdir:
-    estimator = make_pipeline(
-        KNeighborsTransformer(n_neighbors=10, mode='distance'),
-        Isomap(n_neighbors=10, metric='precomputed'),
-        memory=tmpdir)
-    estimator.fit(X)
-
-    # We can decrease the number of neighbors and the graph will not be
-    # recomputed.
-    estimator.set_params(isomap__n_neighbors=5)
-    estimator.fit(X)
-
-############################################################################
-# Stacking Classifier and Regressor
-# ---------------------------------
-# :class:`~ensemble.StackingClassifier` and
-# :class:`~ensemble.StackingRegressor`
-# allow you to have a stack of estimators with a final classifier or
-# a regressor.
-# Stacked generalization consists in stacking the output of individual
-# estimators and use a classifier to compute the final prediction. Stacking
-# allows to use the strength of each individual estimator by using their output
-# as input of a final estimator.
-# Base estimators are fitted on the full ``X`` while
-# the final estimator is trained using cross-validated predictions of the
-# base estimators using ``cross_val_predict``.
-#
-# Read more in the :ref:`User Guide <stacking>`.
-
-from sklearn.datasets import load_iris
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.svm import LinearSVC
-from sklearn.linear_model import LogisticRegression
-from sklearn.preprocessing import StandardScaler
-from sklearn.pipeline import make_pipeline
-from sklearn.ensemble import StackingClassifier
-from sklearn.model_selection import train_test_split
-
-X, y = load_iris(return_X_y=True)
-estimators = [
-    ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),
-    ('svr', make_pipeline(StandardScaler(),
-                          LinearSVC(random_state=42)))
-]
-clf = StackingClassifier(
-    estimators=estimators, final_estimator=LogisticRegression()
-)
-X_train, X_test, y_train, y_test = train_test_split(
-    X, y, stratify=y, random_state=42
-)
-clf.fit(X_train, y_train).score(X_test, y_test)
-
 ############################################################################
 # Checking scikit-learn compatibility of an estimator
 # ---------------------------------------------------
Original file line number	Diff line number	Diff line change
`@@ -62,7 +62,7 @@`
`62`	`62`	`},`
`63`	`63`	`"outputs": [],`
`64`	`64`	`"source": [`
`65`		`- "fig, ax = plt.subplots(figsize=(12, 6))\nax.set_title(\"Decision Tree\")\ntree_disp = plot_partial_dependence(tree, X, [\"LSTAT\", \"RM\"],\n feature_names=X.columns.tolist(), ax=ax)"`
	`65`	`+ "fig, ax = plt.subplots(figsize=(12, 6))\nax.set_title(\"Decision Tree\")\ntree_disp = plot_partial_dependence(tree, X, [\"LSTAT\", \"RM\"], ax=ax)"`
`66`	`66`	`]`
`67`	`67`	`},`
`68`	`68`	`{`
`@@ -80,7 +80,7 @@`
`80`	`80`	`},`
`81`	`81`	`"outputs": [],`
`82`	`82`	`"source": [`
`83`		`- "fig, ax = plt.subplots(figsize=(12, 6))\nax.set_title(\"Multi-layer Perceptron\")\nmlp_disp = plot_partial_dependence(mlp, X, [\"LSTAT\", \"RM\"],\n feature_names=X.columns.tolist(), ax=ax,\n line_kw={\"c\": \"red\"})"`
	`83`	`+ "fig, ax = plt.subplots(figsize=(12, 6))\nax.set_title(\"Multi-layer Perceptron\")\nmlp_disp = plot_partial_dependence(mlp, X, [\"LSTAT\", \"RM\"], ax=ax,\n line_kw={\"c\": \"red\"})"`
`84`	`84`	`]`
`85`	`85`	`},`
`86`	`86`	`{`
`@@ -152,7 +152,7 @@`
`152`	`152`	`},`
`153`	`153`	`"outputs": [],`
`154`	`154`	`"source": [`
`155`		`- "tree_disp = plot_partial_dependence(tree, X, [\"LSTAT\"],\n feature_names=X.columns.tolist())\nmlp_disp = plot_partial_dependence(mlp, X, [\"LSTAT\"],\n feature_names=X.columns.tolist(),\n ax=tree_disp.axes_, line_kw={\"c\": \"red\"})"`
	`155`	`+ "tree_disp = plot_partial_dependence(tree, X, [\"LSTAT\"])\nmlp_disp = plot_partial_dependence(mlp, X, [\"LSTAT\"],\n ax=tree_disp.axes_, line_kw={\"c\": \"red\"})"`
`156`	`156`	`]`
`157`	`157`	`}`
`158`	`158`	`],`