Skip to content

Commit 4fc0d24

Browse files
committed
Pushing the docs to dev/ for branch: main, commit eb309c471a9b312918892832ad343b008bad3b71
1 parent 08b23da commit 4fc0d24

File tree

1,239 files changed

+4317
-4308
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,239 files changed

+4317
-4308
lines changed
Binary file not shown.
Binary file not shown.

dev/_downloads/98345ee267d0372eda8faf906905730e/plot_missing_values.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,10 @@
5252

5353
X_diabetes, y_diabetes = load_diabetes(return_X_y=True)
5454
X_california, y_california = fetch_california_housing(return_X_y=True)
55-
X_california = X_california[:400]
56-
y_california = y_california[:400]
55+
X_california = X_california[:300]
56+
y_california = y_california[:300]
57+
X_diabetes = X_diabetes[:300]
58+
y_diabetes = y_diabetes[:300]
5759

5860

5961
def add_missing_values(X_full, y_full):
@@ -98,7 +100,7 @@ def add_missing_values(X_full, y_full):
98100
from sklearn.pipeline import make_pipeline
99101

100102

101-
N_SPLITS = 5
103+
N_SPLITS = 4
102104
regressor = RandomForestRegressor(random_state=0)
103105

104106
# %%
@@ -231,7 +233,8 @@ def get_impute_iterative(X_missing, y_missing):
231233
missing_values=np.nan,
232234
add_indicator=True,
233235
random_state=0,
234-
n_nearest_features=5,
236+
n_nearest_features=3,
237+
max_iter=1,
235238
sample_posterior=True,
236239
)
237240
iterative_impute_scores = get_scores_for_imputer(imputer, X_missing, y_missing)

dev/_downloads/a440a8b10138c855100ed5820fdb36b6/plot_missing_values.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
},
4545
"outputs": [],
4646
"source": [
47-
"import numpy as np\n\nfrom sklearn.datasets import fetch_california_housing\nfrom sklearn.datasets import load_diabetes\n\n\nrng = np.random.RandomState(42)\n\nX_diabetes, y_diabetes = load_diabetes(return_X_y=True)\nX_california, y_california = fetch_california_housing(return_X_y=True)\nX_california = X_california[:400]\ny_california = y_california[:400]\n\n\ndef add_missing_values(X_full, y_full):\n n_samples, n_features = X_full.shape\n\n # Add missing values in 75% of the lines\n missing_rate = 0.75\n n_missing_samples = int(n_samples * missing_rate)\n\n missing_samples = np.zeros(n_samples, dtype=bool)\n missing_samples[:n_missing_samples] = True\n\n rng.shuffle(missing_samples)\n missing_features = rng.randint(0, n_features, n_missing_samples)\n X_missing = X_full.copy()\n X_missing[missing_samples, missing_features] = np.nan\n y_missing = y_full.copy()\n\n return X_missing, y_missing\n\n\nX_miss_california, y_miss_california = add_missing_values(X_california, y_california)\n\nX_miss_diabetes, y_miss_diabetes = add_missing_values(X_diabetes, y_diabetes)"
47+
"import numpy as np\n\nfrom sklearn.datasets import fetch_california_housing\nfrom sklearn.datasets import load_diabetes\n\n\nrng = np.random.RandomState(42)\n\nX_diabetes, y_diabetes = load_diabetes(return_X_y=True)\nX_california, y_california = fetch_california_housing(return_X_y=True)\nX_california = X_california[:300]\ny_california = y_california[:300]\nX_diabetes = X_diabetes[:300]\ny_diabetes = y_diabetes[:300]\n\n\ndef add_missing_values(X_full, y_full):\n n_samples, n_features = X_full.shape\n\n # Add missing values in 75% of the lines\n missing_rate = 0.75\n n_missing_samples = int(n_samples * missing_rate)\n\n missing_samples = np.zeros(n_samples, dtype=bool)\n missing_samples[:n_missing_samples] = True\n\n rng.shuffle(missing_samples)\n missing_features = rng.randint(0, n_features, n_missing_samples)\n X_missing = X_full.copy()\n X_missing[missing_samples, missing_features] = np.nan\n y_missing = y_full.copy()\n\n return X_missing, y_missing\n\n\nX_miss_california, y_miss_california = add_missing_values(X_california, y_california)\n\nX_miss_diabetes, y_miss_diabetes = add_missing_values(X_diabetes, y_diabetes)"
4848
]
4949
},
5050
{
@@ -62,7 +62,7 @@
6262
},
6363
"outputs": [],
6464
"source": [
65-
"rng = np.random.RandomState(0)\n\nfrom sklearn.ensemble import RandomForestRegressor\n\n# To use the experimental IterativeImputer, we need to explicitly ask for it:\nfrom sklearn.experimental import enable_iterative_imputer # noqa\nfrom sklearn.impute import SimpleImputer, KNNImputer, IterativeImputer\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.pipeline import make_pipeline\n\n\nN_SPLITS = 5\nregressor = RandomForestRegressor(random_state=0)"
65+
"rng = np.random.RandomState(0)\n\nfrom sklearn.ensemble import RandomForestRegressor\n\n# To use the experimental IterativeImputer, we need to explicitly ask for it:\nfrom sklearn.experimental import enable_iterative_imputer # noqa\nfrom sklearn.impute import SimpleImputer, KNNImputer, IterativeImputer\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.pipeline import make_pipeline\n\n\nN_SPLITS = 4\nregressor = RandomForestRegressor(random_state=0)"
6666
]
6767
},
6868
{
@@ -170,7 +170,7 @@
170170
},
171171
"outputs": [],
172172
"source": [
173-
"def get_impute_iterative(X_missing, y_missing):\n imputer = IterativeImputer(\n missing_values=np.nan,\n add_indicator=True,\n random_state=0,\n n_nearest_features=5,\n sample_posterior=True,\n )\n iterative_impute_scores = get_scores_for_imputer(imputer, X_missing, y_missing)\n return iterative_impute_scores.mean(), iterative_impute_scores.std()\n\n\nmses_california[4], stds_california[4] = get_impute_iterative(\n X_miss_california, y_miss_california\n)\nmses_diabetes[4], stds_diabetes[4] = get_impute_iterative(\n X_miss_diabetes, y_miss_diabetes\n)\nx_labels.append(\"Iterative Imputation\")\n\nmses_diabetes = mses_diabetes * -1\nmses_california = mses_california * -1"
173+
"def get_impute_iterative(X_missing, y_missing):\n imputer = IterativeImputer(\n missing_values=np.nan,\n add_indicator=True,\n random_state=0,\n n_nearest_features=3,\n max_iter=1,\n sample_posterior=True,\n )\n iterative_impute_scores = get_scores_for_imputer(imputer, X_missing, y_missing)\n return iterative_impute_scores.mean(), iterative_impute_scores.std()\n\n\nmses_california[4], stds_california[4] = get_impute_iterative(\n X_miss_california, y_miss_california\n)\nmses_diabetes[4], stds_diabetes[4] = get_impute_iterative(\n X_miss_diabetes, y_miss_diabetes\n)\nx_labels.append(\"Iterative Imputation\")\n\nmses_diabetes = mses_diabetes * -1\nmses_california = mses_california * -1"
174174
]
175175
},
176176
{

dev/_downloads/scikit-learn-docs.zip

7.93 KB
Binary file not shown.
186 Bytes
-204 Bytes
-93 Bytes
-130 Bytes
71 Bytes

0 commit comments

Comments
 (0)