Skip to content

Commit d575bc1

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 84a7a7a80249be8d7881777a26f43cb29b2d7fe4
1 parent 06aaa50 commit d575bc1

File tree

1,366 files changed

+4853
-4791
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,366 files changed

+4853
-4791
lines changed
Binary file not shown.
Binary file not shown.

dev/_downloads/7012baed63b9a27f121bae611b8285c2/plot_cyclical_feature_engineering.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@
297297
},
298298
"outputs": [],
299299
"source": [
300-
"from sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import OrdinalEncoder\nfrom sklearn.compose import ColumnTransformer\nfrom sklearn.ensemble import HistGradientBoostingRegressor\nfrom sklearn.model_selection import cross_validate\n\n\ncategorical_columns = [\n \"weather\",\n \"season\",\n \"holiday\",\n \"workingday\",\n]\ncategories = [\n [\"clear\", \"misty\", \"rain\"],\n [\"spring\", \"summer\", \"fall\", \"winter\"],\n [\"False\", \"True\"],\n [\"False\", \"True\"],\n]\nordinal_encoder = OrdinalEncoder(categories=categories)\n\n\ngbrt_pipeline = make_pipeline(\n ColumnTransformer(\n transformers=[\n (\"categorical\", ordinal_encoder, categorical_columns),\n ],\n remainder=\"passthrough\",\n ),\n HistGradientBoostingRegressor(\n categorical_features=range(4),\n ),\n)"
300+
"from sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import OrdinalEncoder\nfrom sklearn.compose import ColumnTransformer\nfrom sklearn.ensemble import HistGradientBoostingRegressor\nfrom sklearn.model_selection import cross_validate\n\n\ncategorical_columns = [\n \"weather\",\n \"season\",\n \"holiday\",\n \"workingday\",\n]\ncategories = [\n [\"clear\", \"misty\", \"rain\"],\n [\"spring\", \"summer\", \"fall\", \"winter\"],\n [\"False\", \"True\"],\n [\"False\", \"True\"],\n]\nordinal_encoder = OrdinalEncoder(categories=categories)\n\n\ngbrt_pipeline = make_pipeline(\n ColumnTransformer(\n transformers=[\n (\"categorical\", ordinal_encoder, categorical_columns),\n ],\n remainder=\"passthrough\",\n # Use short feature names to make it easier to specify the categorical\n # variables in the HistGradientBoostingRegressor in the next\n # step of the pipeline.\n verbose_feature_names_out=False,\n ),\n HistGradientBoostingRegressor(\n categorical_features=categorical_columns,\n ),\n).set_output(transform=\"pandas\")"
301301
]
302302
},
303303
{

dev/_downloads/9fcbbc59ab27a20d07e209a711ac4f50/plot_cyclical_feature_engineering.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,11 +209,15 @@
209209
("categorical", ordinal_encoder, categorical_columns),
210210
],
211211
remainder="passthrough",
212+
# Use short feature names to make it easier to specify the categorical
213+
# variables in the HistGradientBoostingRegressor in the next
214+
# step of the pipeline.
215+
verbose_feature_names_out=False,
212216
),
213217
HistGradientBoostingRegressor(
214-
categorical_features=range(4),
218+
categorical_features=categorical_columns,
215219
),
216-
)
220+
).set_output(transform="pandas")
217221

218222
# %%
219223
#

dev/_downloads/acc6f0183d4b7293ae5914724f55bc28/plot_gradient_boosting_categorical.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@
6262
X = X[categorical_columns_subset + numerical_columns_subset]
6363
X[categorical_columns_subset] = X[categorical_columns_subset].astype("category")
6464

65-
n_categorical_features = X.select_dtypes(include="category").shape[1]
65+
categorical_columns = X.select_dtypes(include="category").columns
66+
n_categorical_features = len(categorical_columns)
6667
n_numerical_features = X.select_dtypes(include="number").shape[1]
6768

6869
print(f"Number of samples: {X.shape[0]}")
@@ -122,6 +123,10 @@
122123
make_column_selector(dtype_include="category"),
123124
),
124125
remainder="passthrough",
126+
# Use short feature names to make it easier to specify the categorical
127+
# variables in the HistGradientBoostingRegressor in the next step
128+
# of the pipeline.
129+
verbose_feature_names_out=False,
125130
)
126131

127132
hist_ordinal = make_pipeline(
@@ -146,13 +151,13 @@
146151
# The ordinal encoder will first output the categorical features, and then the
147152
# continuous (passed-through) features
148153

149-
categorical_mask = [True] * n_categorical_features + [False] * n_numerical_features
150154
hist_native = make_pipeline(
151155
ordinal_encoder,
152156
HistGradientBoostingRegressor(
153-
random_state=42, categorical_features=categorical_mask
157+
random_state=42,
158+
categorical_features=categorical_columns,
154159
),
155-
)
160+
).set_output(transform="pandas")
156161

157162
# %%
158163
# Model comparison

dev/_downloads/cd5de29451c4f8624f47d18def81839c/plot_gradient_boosting_categorical.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
},
3434
"outputs": [],
3535
"source": [
36-
"from sklearn.datasets import fetch_openml\n\nX, y = fetch_openml(data_id=42165, as_frame=True, return_X_y=True, parser=\"pandas\")\n\n# Select only a subset of features of X to make the example faster to run\ncategorical_columns_subset = [\n \"BldgType\",\n \"GarageFinish\",\n \"LotConfig\",\n \"Functional\",\n \"MasVnrType\",\n \"HouseStyle\",\n \"FireplaceQu\",\n \"ExterCond\",\n \"ExterQual\",\n \"PoolQC\",\n]\n\nnumerical_columns_subset = [\n \"3SsnPorch\",\n \"Fireplaces\",\n \"BsmtHalfBath\",\n \"HalfBath\",\n \"GarageCars\",\n \"TotRmsAbvGrd\",\n \"BsmtFinSF1\",\n \"BsmtFinSF2\",\n \"GrLivArea\",\n \"ScreenPorch\",\n]\n\nX = X[categorical_columns_subset + numerical_columns_subset]\nX[categorical_columns_subset] = X[categorical_columns_subset].astype(\"category\")\n\nn_categorical_features = X.select_dtypes(include=\"category\").shape[1]\nn_numerical_features = X.select_dtypes(include=\"number\").shape[1]\n\nprint(f\"Number of samples: {X.shape[0]}\")\nprint(f\"Number of features: {X.shape[1]}\")\nprint(f\"Number of categorical features: {n_categorical_features}\")\nprint(f\"Number of numerical features: {n_numerical_features}\")"
36+
"from sklearn.datasets import fetch_openml\n\nX, y = fetch_openml(data_id=42165, as_frame=True, return_X_y=True, parser=\"pandas\")\n\n# Select only a subset of features of X to make the example faster to run\ncategorical_columns_subset = [\n \"BldgType\",\n \"GarageFinish\",\n \"LotConfig\",\n \"Functional\",\n \"MasVnrType\",\n \"HouseStyle\",\n \"FireplaceQu\",\n \"ExterCond\",\n \"ExterQual\",\n \"PoolQC\",\n]\n\nnumerical_columns_subset = [\n \"3SsnPorch\",\n \"Fireplaces\",\n \"BsmtHalfBath\",\n \"HalfBath\",\n \"GarageCars\",\n \"TotRmsAbvGrd\",\n \"BsmtFinSF1\",\n \"BsmtFinSF2\",\n \"GrLivArea\",\n \"ScreenPorch\",\n]\n\nX = X[categorical_columns_subset + numerical_columns_subset]\nX[categorical_columns_subset] = X[categorical_columns_subset].astype(\"category\")\n\ncategorical_columns = X.select_dtypes(include=\"category\").columns\nn_categorical_features = len(categorical_columns)\nn_numerical_features = X.select_dtypes(include=\"number\").shape[1]\n\nprint(f\"Number of samples: {X.shape[0]}\")\nprint(f\"Number of features: {X.shape[1]}\")\nprint(f\"Number of categorical features: {n_categorical_features}\")\nprint(f\"Number of numerical features: {n_numerical_features}\")"
3737
]
3838
},
3939
{
@@ -87,7 +87,7 @@
8787
},
8888
"outputs": [],
8989
"source": [
90-
"from sklearn.preprocessing import OrdinalEncoder\nimport numpy as np\n\nordinal_encoder = make_column_transformer(\n (\n OrdinalEncoder(handle_unknown=\"use_encoded_value\", unknown_value=np.nan),\n make_column_selector(dtype_include=\"category\"),\n ),\n remainder=\"passthrough\",\n)\n\nhist_ordinal = make_pipeline(\n ordinal_encoder, HistGradientBoostingRegressor(random_state=42)\n)"
90+
"from sklearn.preprocessing import OrdinalEncoder\nimport numpy as np\n\nordinal_encoder = make_column_transformer(\n (\n OrdinalEncoder(handle_unknown=\"use_encoded_value\", unknown_value=np.nan),\n make_column_selector(dtype_include=\"category\"),\n ),\n remainder=\"passthrough\",\n # Use short feature names to make it easier to specify the categorical\n # variables in the HistGradientBoostingRegressor in the next step\n # of the pipeline.\n verbose_feature_names_out=False,\n)\n\nhist_ordinal = make_pipeline(\n ordinal_encoder, HistGradientBoostingRegressor(random_state=42)\n)"
9191
]
9292
},
9393
{
@@ -105,7 +105,7 @@
105105
},
106106
"outputs": [],
107107
"source": [
108-
"# The ordinal encoder will first output the categorical features, and then the\n# continuous (passed-through) features\n\ncategorical_mask = [True] * n_categorical_features + [False] * n_numerical_features\nhist_native = make_pipeline(\n ordinal_encoder,\n HistGradientBoostingRegressor(\n random_state=42, categorical_features=categorical_mask\n ),\n)"
108+
"# The ordinal encoder will first output the categorical features, and then the\n# continuous (passed-through) features\n\nhist_native = make_pipeline(\n ordinal_encoder,\n HistGradientBoostingRegressor(\n random_state=42,\n categorical_features=categorical_columns,\n ),\n).set_output(transform=\"pandas\")"
109109
]
110110
},
111111
{

0 commit comments

Comments
 (0)