Skip to content

Commit 6b15fbb

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 52aaf8269235d4965022b8ec970243bdcb59c9a7
1 parent 5e8c017 commit 6b15fbb

File tree

1,026 files changed

+3143
-3122
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,026 files changed

+3143
-3122
lines changed
-51 Bytes
Binary file not shown.
-50 Bytes
Binary file not shown.

dev/_downloads/plot_missing_values.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"import numpy as np\n\nfrom sklearn.datasets import load_boston\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.preprocessing import Imputer\nfrom sklearn.model_selection import cross_val_score\n\nrng = np.random.RandomState(0)\n\ndataset = load_boston()\nX_full, y_full = dataset.data, dataset.target\nn_samples = X_full.shape[0]\nn_features = X_full.shape[1]\n\n# Estimate the score on the entire dataset, with no missing values\nestimator = RandomForestRegressor(random_state=0, n_estimators=100)\nscore = cross_val_score(estimator, X_full, y_full).mean()\nprint(\"Score with the entire dataset = %.2f\" % score)\n\n# Add missing values in 75% of the lines\nmissing_rate = 0.75\nn_missing_samples = int(np.floor(n_samples * missing_rate))\nmissing_samples = np.hstack((np.zeros(n_samples - n_missing_samples,\n dtype=np.bool),\n np.ones(n_missing_samples,\n dtype=np.bool)))\nrng.shuffle(missing_samples)\nmissing_features = rng.randint(0, n_features, n_missing_samples)\n\n# Estimate the score without the lines containing missing values\nX_filtered = X_full[~missing_samples, :]\ny_filtered = y_full[~missing_samples]\nestimator = RandomForestRegressor(random_state=0, n_estimators=100)\nscore = cross_val_score(estimator, X_filtered, y_filtered).mean()\nprint(\"Score without the samples containing missing values = %.2f\" % score)\n\n# Estimate the score after imputation of the missing values\nX_missing = X_full.copy()\nX_missing[np.where(missing_samples)[0], missing_features] = 0\ny_missing = y_full.copy()\nestimator = Pipeline([(\"imputer\", Imputer(missing_values=0,\n strategy=\"mean\",\n axis=0)),\n (\"forest\", RandomForestRegressor(random_state=0,\n n_estimators=100))])\nscore = cross_val_score(estimator, X_missing, y_missing).mean()\nprint(\"Score after imputation of the missing values = %.2f\" % score)"
29+
"import numpy as np\n\nfrom sklearn.datasets import load_boston\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.preprocessing import Imputer\nfrom sklearn.model_selection import cross_val_score\n\nrng = np.random.RandomState(0)\n\ndataset = load_boston()\nX_full, y_full = dataset.data, dataset.target\nn_samples = X_full.shape[0]\nn_features = X_full.shape[1]\n\n# Estimate the score on the entire dataset, with no missing values\nestimator = RandomForestRegressor(random_state=0, n_estimators=100)\nscore = cross_val_score(estimator, X_full, y_full).mean()\nprint(\"Score with the entire dataset = %.2f\" % score)\n\n# Add missing values in 75% of the lines\nmissing_rate = 0.75\nn_missing_samples = int(np.floor(n_samples * missing_rate))\nmissing_samples = np.hstack((np.zeros(n_samples - n_missing_samples,\n dtype=np.bool),\n np.ones(n_missing_samples,\n dtype=np.bool)))\nrng.shuffle(missing_samples)\nmissing_features = rng.randint(0, n_features, n_missing_samples)\n\n# Estimate the score without the lines containing missing values\nX_filtered = X_full[~missing_samples, :]\ny_filtered = y_full[~missing_samples]\nestimator = RandomForestRegressor(random_state=0, n_estimators=100)\nscore = cross_val_score(estimator, X_filtered, y_filtered).mean()\nprint(\"Score without the samples containing missing values = %.2f\" % score)\n\n# Estimate the score after imputation of the missing values\nX_missing = X_full.copy()\nX_missing[np.where(missing_samples)[0], missing_features] = 0\ny_missing = y_full.copy()\nestimator = Pipeline([(\"imputer\", Imputer(missing_values=0,\n strategy=\"mean\")),\n (\"forest\", RandomForestRegressor(random_state=0,\n n_estimators=100))])\nscore = cross_val_score(estimator, X_missing, y_missing).mean()\nprint(\"Score after imputation of the missing values = %.2f\" % score)"
3030
]
3131
}
3232
],

dev/_downloads/plot_missing_values.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,7 @@
6565
X_missing[np.where(missing_samples)[0], missing_features] = 0
6666
y_missing = y_full.copy()
6767
estimator = Pipeline([("imputer", Imputer(missing_values=0,
68-
strategy="mean",
69-
axis=0)),
68+
strategy="mean")),
7069
("forest", RandomForestRegressor(random_state=0,
7170
n_estimators=100))])
7271
score = cross_val_score(estimator, X_missing, y_missing).mean()

dev/_downloads/scikit-learn-docs.pdf

-62.2 KB
Binary file not shown.
-201 Bytes
-201 Bytes
26 Bytes
26 Bytes
278 Bytes

0 commit comments

Comments
 (0)