Skip to content

Commit cf015ee

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 71b29acbfb2f19de5f5244be47f93260b3846e96
1 parent 8c1d1ce commit cf015ee

File tree

1,051 files changed

+4047
-3180
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,051 files changed

+4047
-3180
lines changed
17 Bytes
Binary file not shown.
17 Bytes
Binary file not shown.

dev/_downloads/plot_missing_values.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"import numpy as np\n\nfrom sklearn.datasets import load_boston\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.preprocessing import Imputer\nfrom sklearn.model_selection import cross_val_score\n\nrng = np.random.RandomState(0)\n\ndataset = load_boston()\nX_full, y_full = dataset.data, dataset.target\nn_samples = X_full.shape[0]\nn_features = X_full.shape[1]\n\n# Estimate the score on the entire dataset, with no missing values\nestimator = RandomForestRegressor(random_state=0, n_estimators=100)\nscore = cross_val_score(estimator, X_full, y_full).mean()\nprint(\"Score with the entire dataset = %.2f\" % score)\n\n# Add missing values in 75% of the lines\nmissing_rate = 0.75\nn_missing_samples = int(np.floor(n_samples * missing_rate))\nmissing_samples = np.hstack((np.zeros(n_samples - n_missing_samples,\n dtype=np.bool),\n np.ones(n_missing_samples,\n dtype=np.bool)))\nrng.shuffle(missing_samples)\nmissing_features = rng.randint(0, n_features, n_missing_samples)\n\n# Estimate the score without the lines containing missing values\nX_filtered = X_full[~missing_samples, :]\ny_filtered = y_full[~missing_samples]\nestimator = RandomForestRegressor(random_state=0, n_estimators=100)\nscore = cross_val_score(estimator, X_filtered, y_filtered).mean()\nprint(\"Score without the samples containing missing values = %.2f\" % score)\n\n# Estimate the score after imputation of the missing values\nX_missing = X_full.copy()\nX_missing[np.where(missing_samples)[0], missing_features] = 0\ny_missing = y_full.copy()\nestimator = Pipeline([(\"imputer\", Imputer(missing_values=0,\n strategy=\"mean\",\n axis=0)),\n (\"forest\", RandomForestRegressor(random_state=0,\n n_estimators=100))])\nscore = cross_val_score(estimator, X_missing, y_missing).mean()\nprint(\"Score after imputation of the missing values = %.2f\" % score)"
29+
"import numpy as np\n\nfrom sklearn.datasets import load_boston\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.impute import SimpleImputer\nfrom sklearn.model_selection import cross_val_score\n\nrng = np.random.RandomState(0)\n\ndataset = load_boston()\nX_full, y_full = dataset.data, dataset.target\nn_samples = X_full.shape[0]\nn_features = X_full.shape[1]\n\n# Estimate the score on the entire dataset, with no missing values\nestimator = RandomForestRegressor(random_state=0, n_estimators=100)\nscore = cross_val_score(estimator, X_full, y_full).mean()\nprint(\"Score with the entire dataset = %.2f\" % score)\n\n# Add missing values in 75% of the lines\nmissing_rate = 0.75\nn_missing_samples = int(np.floor(n_samples * missing_rate))\nmissing_samples = np.hstack((np.zeros(n_samples - n_missing_samples,\n dtype=np.bool),\n np.ones(n_missing_samples,\n dtype=np.bool)))\nrng.shuffle(missing_samples)\nmissing_features = rng.randint(0, n_features, n_missing_samples)\n\n# Estimate the score without the lines containing missing values\nX_filtered = X_full[~missing_samples, :]\ny_filtered = y_full[~missing_samples]\nestimator = RandomForestRegressor(random_state=0, n_estimators=100)\nscore = cross_val_score(estimator, X_filtered, y_filtered).mean()\nprint(\"Score without the samples containing missing values = %.2f\" % score)\n\n# Estimate the score after imputation of the missing values\nX_missing = X_full.copy()\nX_missing[np.where(missing_samples)[0], missing_features] = 0\ny_missing = y_full.copy()\nestimator = Pipeline([(\"imputer\", SimpleImputer(missing_values=0,\n strategy=\"mean\",\n axis=0)),\n (\"forest\", RandomForestRegressor(random_state=0,\n n_estimators=100))])\nscore = cross_val_score(estimator, X_missing, y_missing).mean()\nprint(\"Score after imputation of the missing values = %.2f\" % score)"
3030
]
3131
}
3232
],

dev/_downloads/plot_missing_values.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
from sklearn.datasets import load_boston
2929
from sklearn.ensemble import RandomForestRegressor
3030
from sklearn.pipeline import Pipeline
31-
from sklearn.preprocessing import Imputer
31+
from sklearn.impute import SimpleImputer
3232
from sklearn.model_selection import cross_val_score
3333

3434
rng = np.random.RandomState(0)
@@ -64,9 +64,9 @@
6464
X_missing = X_full.copy()
6565
X_missing[np.where(missing_samples)[0], missing_features] = 0
6666
y_missing = y_full.copy()
67-
estimator = Pipeline([("imputer", Imputer(missing_values=0,
68-
strategy="mean",
69-
axis=0)),
67+
estimator = Pipeline([("imputer", SimpleImputer(missing_values=0,
68+
strategy="mean",
69+
axis=0)),
7070
("forest", RandomForestRegressor(random_state=0,
7171
n_estimators=100))])
7272
score = cross_val_score(estimator, X_missing, y_missing).mean()

dev/_downloads/scikit-learn-docs.pdf

5.49 KB
Binary file not shown.
329 Bytes
329 Bytes
5 Bytes
5 Bytes
-402 Bytes

0 commit comments

Comments
 (0)