Skip to content

Commit 13c102b

Browse files
committed
Pushing the docs to dev/ for branch: main, commit e883b4b8c0b2ca858e6d7ddff52f6fa76981411f
1 parent 1c9a42e commit 13c102b

File tree

1,318 files changed

+7164
-7161
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,318 files changed

+7164
-7161
lines changed

dev/.buildinfo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: b8552c1840e582d8069bb272f008b1c5
3+
config: a8992306144a58818a4d64d1a9e413cf
44
tags: 645f666f9bcd5a90fca523b33c5a78b7
Binary file not shown.
Binary file not shown.

dev/_downloads/86c888008757148890daaf43d664fa71/plot_tweedie_regression_insurance_claims.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -222,15 +222,16 @@ def score_estimator(
222222

223223
df = load_mtpl2()
224224

225-
# Note: filter out claims with zero amount, as the severity model
226-
# requires strictly positive target values.
227-
df.loc[(df["ClaimAmount"] == 0) & (df["ClaimNb"] >= 1), "ClaimNb"] = 0
228225

229226
# Correct for unreasonable observations (that might be data error)
230227
# and a few exceptionally large claim amounts
231228
df["ClaimNb"] = df["ClaimNb"].clip(upper=4)
232229
df["Exposure"] = df["Exposure"].clip(upper=1)
233230
df["ClaimAmount"] = df["ClaimAmount"].clip(upper=200000)
231+
# If the claim amount is 0, then we do not count it as a claim. The loss function
232+
# used by the severity model needs strictly positive claim amounts. This way
233+
# frequency and severity are more consistent with each other.
234+
df.loc[(df["ClaimAmount"] == 0) & (df["ClaimNb"] >= 1), "ClaimNb"] = 0
234235

235236
log_scale_transformer = make_pipeline(
236237
FunctionTransformer(func=np.log), StandardScaler()

dev/_downloads/a97bf662e52d471b04e1ab480c0ad7f2/plot_tweedie_regression_insurance_claims.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
},
4545
"outputs": [],
4646
"source": [
47-
"from sklearn.compose import ColumnTransformer\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import (\n FunctionTransformer,\n KBinsDiscretizer,\n OneHotEncoder,\n StandardScaler,\n)\n\ndf = load_mtpl2()\n\n# Note: filter out claims with zero amount, as the severity model\n# requires strictly positive target values.\ndf.loc[(df[\"ClaimAmount\"] == 0) & (df[\"ClaimNb\"] >= 1), \"ClaimNb\"] = 0\n\n# Correct for unreasonable observations (that might be data error)\n# and a few exceptionally large claim amounts\ndf[\"ClaimNb\"] = df[\"ClaimNb\"].clip(upper=4)\ndf[\"Exposure\"] = df[\"Exposure\"].clip(upper=1)\ndf[\"ClaimAmount\"] = df[\"ClaimAmount\"].clip(upper=200000)\n\nlog_scale_transformer = make_pipeline(\n FunctionTransformer(func=np.log), StandardScaler()\n)\n\ncolumn_trans = ColumnTransformer(\n [\n (\n \"binned_numeric\",\n KBinsDiscretizer(n_bins=10, subsample=int(2e5), random_state=0),\n [\"VehAge\", \"DrivAge\"],\n ),\n (\n \"onehot_categorical\",\n OneHotEncoder(),\n [\"VehBrand\", \"VehPower\", \"VehGas\", \"Region\", \"Area\"],\n ),\n (\"passthrough_numeric\", \"passthrough\", [\"BonusMalus\"]),\n (\"log_scaled_numeric\", log_scale_transformer, [\"Density\"]),\n ],\n remainder=\"drop\",\n)\nX = column_trans.fit_transform(df)\n\n# Insurances companies are interested in modeling the Pure Premium, that is\n# the expected total claim amount per unit of exposure for each policyholder\n# in their portfolio:\ndf[\"PurePremium\"] = df[\"ClaimAmount\"] / df[\"Exposure\"]\n\n# This can be indirectly approximated by a 2-step modeling: the product of the\n# Frequency times the average claim amount per claim:\ndf[\"Frequency\"] = df[\"ClaimNb\"] / df[\"Exposure\"]\ndf[\"AvgClaimAmount\"] = df[\"ClaimAmount\"] / np.fmax(df[\"ClaimNb\"], 1)\n\nwith pd.option_context(\"display.max_columns\", 15):\n print(df[df.ClaimAmount > 0].head())"
47+
"from sklearn.compose import ColumnTransformer\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import (\n FunctionTransformer,\n KBinsDiscretizer,\n OneHotEncoder,\n StandardScaler,\n)\n\ndf = load_mtpl2()\n\n\n# Correct for unreasonable observations (that might be data error)\n# and a few exceptionally large claim amounts\ndf[\"ClaimNb\"] = df[\"ClaimNb\"].clip(upper=4)\ndf[\"Exposure\"] = df[\"Exposure\"].clip(upper=1)\ndf[\"ClaimAmount\"] = df[\"ClaimAmount\"].clip(upper=200000)\n# If the claim amount is 0, then we do not count it as a claim. The loss function\n# used by the severity model needs strictly positive claim amounts. This way\n# frequency and severity are more consistent with each other.\ndf.loc[(df[\"ClaimAmount\"] == 0) & (df[\"ClaimNb\"] >= 1), \"ClaimNb\"] = 0\n\nlog_scale_transformer = make_pipeline(\n FunctionTransformer(func=np.log), StandardScaler()\n)\n\ncolumn_trans = ColumnTransformer(\n [\n (\n \"binned_numeric\",\n KBinsDiscretizer(n_bins=10, subsample=int(2e5), random_state=0),\n [\"VehAge\", \"DrivAge\"],\n ),\n (\n \"onehot_categorical\",\n OneHotEncoder(),\n [\"VehBrand\", \"VehPower\", \"VehGas\", \"Region\", \"Area\"],\n ),\n (\"passthrough_numeric\", \"passthrough\", [\"BonusMalus\"]),\n (\"log_scaled_numeric\", log_scale_transformer, [\"Density\"]),\n ],\n remainder=\"drop\",\n)\nX = column_trans.fit_transform(df)\n\n# Insurances companies are interested in modeling the Pure Premium, that is\n# the expected total claim amount per unit of exposure for each policyholder\n# in their portfolio:\ndf[\"PurePremium\"] = df[\"ClaimAmount\"] / df[\"Exposure\"]\n\n# This can be indirectly approximated by a 2-step modeling: the product of the\n# Frequency times the average claim amount per claim:\ndf[\"Frequency\"] = df[\"ClaimNb\"] / df[\"Exposure\"]\ndf[\"AvgClaimAmount\"] = df[\"ClaimAmount\"] / np.fmax(df[\"ClaimNb\"], 1)\n\nwith pd.option_context(\"display.max_columns\", 15):\n print(df[df.ClaimAmount > 0].head())"
4848
]
4949
},
5050
{

0 commit comments

Comments
 (0)