|
44 | 44 | },
|
45 | 45 | "outputs": [],
|
46 | 46 | "source": [
|
47 |
| - "import numpy as np\n\nfrom sklearn.datasets import fetch_california_housing\nfrom sklearn.datasets import load_diabetes\n\n\nrng = np.random.RandomState(42)\n\nX_diabetes, y_diabetes = load_diabetes(return_X_y=True)\nX_california, y_california = fetch_california_housing(return_X_y=True)\nX_california = X_california[:400]\ny_california = y_california[:400]\n\n\ndef add_missing_values(X_full, y_full):\n n_samples, n_features = X_full.shape\n\n # Add missing values in 75% of the lines\n missing_rate = 0.75\n n_missing_samples = int(n_samples * missing_rate)\n\n missing_samples = np.zeros(n_samples, dtype=bool)\n missing_samples[:n_missing_samples] = True\n\n rng.shuffle(missing_samples)\n missing_features = rng.randint(0, n_features, n_missing_samples)\n X_missing = X_full.copy()\n X_missing[missing_samples, missing_features] = np.nan\n y_missing = y_full.copy()\n\n return X_missing, y_missing\n\n\nX_miss_california, y_miss_california = add_missing_values(X_california, y_california)\n\nX_miss_diabetes, y_miss_diabetes = add_missing_values(X_diabetes, y_diabetes)" |
| 47 | + "import numpy as np\n\nfrom sklearn.datasets import fetch_california_housing\nfrom sklearn.datasets import load_diabetes\n\n\nrng = np.random.RandomState(42)\n\nX_diabetes, y_diabetes = load_diabetes(return_X_y=True)\nX_california, y_california = fetch_california_housing(return_X_y=True)\nX_california = X_california[:300]\ny_california = y_california[:300]\nX_diabetes = X_diabetes[:300]\ny_diabetes = y_diabetes[:300]\n\n\ndef add_missing_values(X_full, y_full):\n n_samples, n_features = X_full.shape\n\n # Add missing values in 75% of the lines\n missing_rate = 0.75\n n_missing_samples = int(n_samples * missing_rate)\n\n missing_samples = np.zeros(n_samples, dtype=bool)\n missing_samples[:n_missing_samples] = True\n\n rng.shuffle(missing_samples)\n missing_features = rng.randint(0, n_features, n_missing_samples)\n X_missing = X_full.copy()\n X_missing[missing_samples, missing_features] = np.nan\n y_missing = y_full.copy()\n\n return X_missing, y_missing\n\n\nX_miss_california, y_miss_california = add_missing_values(X_california, y_california)\n\nX_miss_diabetes, y_miss_diabetes = add_missing_values(X_diabetes, y_diabetes)" |
48 | 48 | ]
|
49 | 49 | },
|
50 | 50 | {
|
|
62 | 62 | },
|
63 | 63 | "outputs": [],
|
64 | 64 | "source": [
|
65 |
| - "rng = np.random.RandomState(0)\n\nfrom sklearn.ensemble import RandomForestRegressor\n\n# To use the experimental IterativeImputer, we need to explicitly ask for it:\nfrom sklearn.experimental import enable_iterative_imputer # noqa\nfrom sklearn.impute import SimpleImputer, KNNImputer, IterativeImputer\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.pipeline import make_pipeline\n\n\nN_SPLITS = 5\nregressor = RandomForestRegressor(random_state=0)" |
| 65 | + "rng = np.random.RandomState(0)\n\nfrom sklearn.ensemble import RandomForestRegressor\n\n# To use the experimental IterativeImputer, we need to explicitly ask for it:\nfrom sklearn.experimental import enable_iterative_imputer # noqa\nfrom sklearn.impute import SimpleImputer, KNNImputer, IterativeImputer\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.pipeline import make_pipeline\n\n\nN_SPLITS = 4\nregressor = RandomForestRegressor(random_state=0)" |
66 | 66 | ]
|
67 | 67 | },
|
68 | 68 | {
|
|
170 | 170 | },
|
171 | 171 | "outputs": [],
|
172 | 172 | "source": [
|
173 |
| - "def get_impute_iterative(X_missing, y_missing):\n imputer = IterativeImputer(\n missing_values=np.nan,\n add_indicator=True,\n random_state=0,\n n_nearest_features=5,\n sample_posterior=True,\n )\n iterative_impute_scores = get_scores_for_imputer(imputer, X_missing, y_missing)\n return iterative_impute_scores.mean(), iterative_impute_scores.std()\n\n\nmses_california[4], stds_california[4] = get_impute_iterative(\n X_miss_california, y_miss_california\n)\nmses_diabetes[4], stds_diabetes[4] = get_impute_iterative(\n X_miss_diabetes, y_miss_diabetes\n)\nx_labels.append(\"Iterative Imputation\")\n\nmses_diabetes = mses_diabetes * -1\nmses_california = mses_california * -1" |
| 173 | + "def get_impute_iterative(X_missing, y_missing):\n imputer = IterativeImputer(\n missing_values=np.nan,\n add_indicator=True,\n random_state=0,\n n_nearest_features=3,\n max_iter=1,\n sample_posterior=True,\n )\n iterative_impute_scores = get_scores_for_imputer(imputer, X_missing, y_missing)\n return iterative_impute_scores.mean(), iterative_impute_scores.std()\n\n\nmses_california[4], stds_california[4] = get_impute_iterative(\n X_miss_california, y_miss_california\n)\nmses_diabetes[4], stds_diabetes[4] = get_impute_iterative(\n X_miss_diabetes, y_miss_diabetes\n)\nx_labels.append(\"Iterative Imputation\")\n\nmses_diabetes = mses_diabetes * -1\nmses_california = mses_california * -1" |
174 | 174 | ]
|
175 | 175 | },
|
176 | 176 | {
|
|
0 commit comments