scikit-learn
diff --git a/‎dev/_downloads/auto_examples_jupyter.zip
17 Bytes b/‎dev/_downloads/auto_examples_jupyter.zip
17 Bytes
diff --git a/‎dev/_downloads/auto_examples_python.zip
17 Bytes b/‎dev/_downloads/auto_examples_python.zip
17 Bytes
diff --git a/‎dev/_downloads/plot_missing_values.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_missing_values.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_missing_values.py
Lines changed: 4 additions & 4 deletions b/‎dev/_downloads/plot_missing_values.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎dev/_downloads/scikit-learn-docs.pdf
5.49 KB b/‎dev/_downloads/scikit-learn-docs.pdf
5.49 KB
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
329 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
329 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
329 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
329 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
5 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
5 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0021.png
5 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0021.png
5 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
-402 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
-402 Bytes
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "import numpy as np\n\nfrom sklearn.datasets import load_boston\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.preprocessing import Imputer\nfrom sklearn.model_selection import cross_val_score\n\nrng = np.random.RandomState(0)\n\ndataset = load_boston()\nX_full, y_full = dataset.data, dataset.target\nn_samples = X_full.shape[0]\nn_features = X_full.shape[1]\n\n# Estimate the score on the entire dataset, with no missing values\nestimator = RandomForestRegressor(random_state=0, n_estimators=100)\nscore = cross_val_score(estimator, X_full, y_full).mean()\nprint(\"Score with the entire dataset = %.2f\" % score)\n\n# Add missing values in 75% of the lines\nmissing_rate = 0.75\nn_missing_samples = int(np.floor(n_samples * missing_rate))\nmissing_samples = np.hstack((np.zeros(n_samples - n_missing_samples,\n                                      dtype=np.bool),\n                             np.ones(n_missing_samples,\n                                     dtype=np.bool)))\nrng.shuffle(missing_samples)\nmissing_features = rng.randint(0, n_features, n_missing_samples)\n\n# Estimate the score without the lines containing missing values\nX_filtered = X_full[~missing_samples, :]\ny_filtered = y_full[~missing_samples]\nestimator = RandomForestRegressor(random_state=0, n_estimators=100)\nscore = cross_val_score(estimator, X_filtered, y_filtered).mean()\nprint(\"Score without the samples containing missing values = %.2f\" % score)\n\n# Estimate the score after imputation of the missing values\nX_missing = X_full.copy()\nX_missing[np.where(missing_samples)[0], missing_features] = 0\ny_missing = y_full.copy()\nestimator = Pipeline([(\"imputer\", Imputer(missing_values=0,\n                                          strategy=\"mean\",\n                                          axis=0)),\n                      (\"forest\", RandomForestRegressor(random_state=0,\n                                                       n_estimators=100))])\nscore = cross_val_score(estimator, X_missing, y_missing).mean()\nprint(\"Score after imputation of the missing values = %.2f\" % score)"
+        "import numpy as np\n\nfrom sklearn.datasets import load_boston\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.impute import SimpleImputer\nfrom sklearn.model_selection import cross_val_score\n\nrng = np.random.RandomState(0)\n\ndataset = load_boston()\nX_full, y_full = dataset.data, dataset.target\nn_samples = X_full.shape[0]\nn_features = X_full.shape[1]\n\n# Estimate the score on the entire dataset, with no missing values\nestimator = RandomForestRegressor(random_state=0, n_estimators=100)\nscore = cross_val_score(estimator, X_full, y_full).mean()\nprint(\"Score with the entire dataset = %.2f\" % score)\n\n# Add missing values in 75% of the lines\nmissing_rate = 0.75\nn_missing_samples = int(np.floor(n_samples * missing_rate))\nmissing_samples = np.hstack((np.zeros(n_samples - n_missing_samples,\n                                      dtype=np.bool),\n                             np.ones(n_missing_samples,\n                                     dtype=np.bool)))\nrng.shuffle(missing_samples)\nmissing_features = rng.randint(0, n_features, n_missing_samples)\n\n# Estimate the score without the lines containing missing values\nX_filtered = X_full[~missing_samples, :]\ny_filtered = y_full[~missing_samples]\nestimator = RandomForestRegressor(random_state=0, n_estimators=100)\nscore = cross_val_score(estimator, X_filtered, y_filtered).mean()\nprint(\"Score without the samples containing missing values = %.2f\" % score)\n\n# Estimate the score after imputation of the missing values\nX_missing = X_full.copy()\nX_missing[np.where(missing_samples)[0], missing_features] = 0\ny_missing = y_full.copy()\nestimator = Pipeline([(\"imputer\", SimpleImputer(missing_values=0,\n                                                strategy=\"mean\",\n                                                axis=0)),\n                      (\"forest\", RandomForestRegressor(random_state=0,\n                                                       n_estimators=100))])\nscore = cross_val_score(estimator, X_missing, y_missing).mean()\nprint(\"Score after imputation of the missing values = %.2f\" % score)"
       ]
     }
   ],
 
@@ -28,7 +28,7 @@
 from sklearn.datasets import load_boston
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.pipeline import Pipeline
-from sklearn.preprocessing import Imputer
+from sklearn.impute import SimpleImputer
 from sklearn.model_selection import cross_val_score
 
 rng = np.random.RandomState(0)
@@ -64,9 +64,9 @@
 X_missing = X_full.copy()
 X_missing[np.where(missing_samples)[0], missing_features] = 0
 y_missing = y_full.copy()
-estimator = Pipeline([("imputer", Imputer(missing_values=0,
-                                          strategy="mean",
-                                          axis=0)),
+estimator = Pipeline([("imputer", SimpleImputer(missing_values=0,
+                                                strategy="mean",
+                                                axis=0)),
                       ("forest", RandomForestRegressor(random_state=0,
                                                        n_estimators=100))])
 score = cross_val_score(estimator, X_missing, y_missing).mean()
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "import numpy as np\n\nfrom sklearn.datasets import load_boston\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.preprocessing import Imputer\nfrom sklearn.model_selection import cross_val_score\n\nrng = np.random.RandomState(0)\n\ndataset = load_boston()\nX_full, y_full = dataset.data, dataset.target\nn_samples = X_full.shape[0]\nn_features = X_full.shape[1]\n\n# Estimate the score on the entire dataset, with no missing values\nestimator = RandomForestRegressor(random_state=0, n_estimators=100)\nscore = cross_val_score(estimator, X_full, y_full).mean()\nprint(\"Score with the entire dataset = %.2f\" % score)\n\n# Add missing values in 75% of the lines\nmissing_rate = 0.75\nn_missing_samples = int(np.floor(n_samples * missing_rate))\nmissing_samples = np.hstack((np.zeros(n_samples - n_missing_samples,\n dtype=np.bool),\n np.ones(n_missing_samples,\n dtype=np.bool)))\nrng.shuffle(missing_samples)\nmissing_features = rng.randint(0, n_features, n_missing_samples)\n\n# Estimate the score without the lines containing missing values\nX_filtered = X_full[~missing_samples, :]\ny_filtered = y_full[~missing_samples]\nestimator = RandomForestRegressor(random_state=0, n_estimators=100)\nscore = cross_val_score(estimator, X_filtered, y_filtered).mean()\nprint(\"Score without the samples containing missing values = %.2f\" % score)\n\n# Estimate the score after imputation of the missing values\nX_missing = X_full.copy()\nX_missing[np.where(missing_samples)[0], missing_features] = 0\ny_missing = y_full.copy()\nestimator = Pipeline([(\"imputer\", Imputer(missing_values=0,\n strategy=\"mean\",\n axis=0)),\n (\"forest\", RandomForestRegressor(random_state=0,\n n_estimators=100))])\nscore = cross_val_score(estimator, X_missing, y_missing).mean()\nprint(\"Score after imputation of the missing values = %.2f\" % score)"
	`29`	+ "import numpy as np\n\nfrom sklearn.datasets import load_boston\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.impute import SimpleImputer\nfrom sklearn.model_selection import cross_val_score\n\nrng = np.random.RandomState(0)\n\ndataset = load_boston()\nX_full, y_full = dataset.data, dataset.target\nn_samples = X_full.shape[0]\nn_features = X_full.shape[1]\n\n# Estimate the score on the entire dataset, with no missing values\nestimator = RandomForestRegressor(random_state=0, n_estimators=100)\nscore = cross_val_score(estimator, X_full, y_full).mean()\nprint(\"Score with the entire dataset = %.2f\" % score)\n\n# Add missing values in 75% of the lines\nmissing_rate = 0.75\nn_missing_samples = int(np.floor(n_samples * missing_rate))\nmissing_samples = np.hstack((np.zeros(n_samples - n_missing_samples,\n dtype=np.bool),\n np.ones(n_missing_samples,\n dtype=np.bool)))\nrng.shuffle(missing_samples)\nmissing_features = rng.randint(0, n_features, n_missing_samples)\n\n# Estimate the score without the lines containing missing values\nX_filtered = X_full[~missing_samples, :]\ny_filtered = y_full[~missing_samples]\nestimator = RandomForestRegressor(random_state=0, n_estimators=100)\nscore = cross_val_score(estimator, X_filtered, y_filtered).mean()\nprint(\"Score without the samples containing missing values = %.2f\" % score)\n\n# Estimate the score after imputation of the missing values\nX_missing = X_full.copy()\nX_missing[np.where(missing_samples)[0], missing_features] = 0\ny_missing = y_full.copy()\nestimator = Pipeline([(\"imputer\", SimpleImputer(missing_values=0,\n strategy=\"mean\",\n axis=0)),\n (\"forest\", RandomForestRegressor(random_state=0,\n n_estimators=100))])\nscore = cross_val_score(estimator, X_missing, y_missing).mean()\nprint(\"Score after imputation of the missing values = %.2f\" % score)"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`