Skip to content

Commit cf39e6a

Browse files
committed
Examples for case 0a
1 parent 6772931 commit cf39e6a

File tree

1 file changed

+93
-1
lines changed

1 file changed

+93
-1
lines changed

slep006/cases_opt0a.py

Lines changed: 93 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,98 @@
1+
import numpy as np
2+
13
from defs import (accuracy, group_cv, make_scorer, SelectKBest,
24
LogisticRegressionCV, cross_validate,
35
make_pipeline, X, y, my_groups, my_weights,
46
my_other_weights)
57

6-
# TODO
8+
# %%
9+
# Case A: weighted scoring and fitting
10+
11+
12+
GROUPS_IDX = -1
13+
WEIGHT_IDX = -2
14+
15+
16+
def unwrap_X(X):
17+
return X[:, -2:]
18+
19+
20+
class WrappedGroupCV:
21+
def __init__(self, base_cv, groups_idx=GROUPS_IDX):
22+
self.base_cv = base_cv
23+
self.groups_idx = groups_idx
24+
25+
def split(self, X, y, groups=None):
26+
groups = X[:, self.groups_idx]
27+
return self.base_cv.split(unwrap_X(X), y, groups=groups)
28+
29+
def get_n_splits(self, X, y, groups=None):
30+
groups = X[:, self.groups_idx]
31+
return self.base_cv.split(unwrap_X(X), y, groups=groups)
32+
33+
34+
wrapped_group_cv = WrappedGroupCV(group_cv)
35+
36+
37+
class WrappedLogisticRegressionCV(LogisticRegressionCV):
38+
def fit(self, X, y):
39+
return super().fit(unwrap_X(X), y, sample_weight=X[:, WEIGHT_IDX])
40+
41+
42+
weighted_acc = make_scorer(accuracy, request_props=['sample_weight'])
43+
44+
45+
def wrapped_weighted_acc(est, X, y, sample_weight=None):
46+
return weighted_acc(est, unwrap_X(X), y, sample_weight=X[:, WEIGHT_IDX])
47+
48+
49+
lr = WrappedLogisticRegressionCV(
50+
cv=wrapped_group_cv,
51+
scoring=wrapped_weighted_acc,
52+
).set_props_request(['sample_weight'])
53+
cross_validate(lr, np.hstack([X, my_weights, my_groups]), y,
54+
cv=wrapped_group_cv,
55+
scoring=wrapped_weighted_acc)
56+
57+
# %%
58+
# Case B: weighted scoring and unweighted fitting
59+
60+
class UnweightedWrappedLogisticRegressionCV(LogisticRegressionCV):
61+
def fit(self, X, y):
62+
return super().fit(unwrap_X(X), y)
63+
64+
65+
lr = UnweightedWrappedLogisticRegressionCV(
66+
cv=wrapped_group_cv,
67+
scoring=wrapped_weighted_acc,
68+
).set_props_request(['sample_weight'])
69+
cross_validate(lr, np.hstack([X, my_weights, my_groups]), y,
70+
cv=wrapped_group_cv,
71+
scoring=wrapped_weighted_acc)
72+
73+
74+
# %%
75+
# Case C: unweighted feature selection
76+
77+
class UnweightedWrappedSelectKBest(SelectKBest):
78+
def fit(self, X, y):
79+
return super().fit(unwrap_X(X), y)
80+
81+
82+
lr = WrappedLogisticRegressionCV(
83+
cv=wrapped_group_cv,
84+
scoring=weighted_acc,
85+
).set_props_request(['sample_weight'])
86+
sel = UnweightedWrappedSelectKBest()
87+
pipe = make_pipeline(sel, lr)
88+
cross_validate(pipe, np.hstack([X, my_weights, my_groups]), y,
89+
cv=wrapped_group_cv,
90+
scoring=wrapped_weighted_acc)
91+
92+
# %%
93+
# Case D: different scoring and fitting weights
94+
95+
SCORING_WEIGHT_IDX = -3
96+
97+
# TODO: proceed from here. Note that this change implies the need to add
98+
# a parameter to unwrap_X, since we will now append an additional column to X.

0 commit comments

Comments
 (0)