Examples for case 0b

jnothman · jnothman · commit 1eb2642beecf · 2020-08-04T22:51:35.000+10:00
diff --git a/slep006/cases_opt0a.py b/slep006/cases_opt0a.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-from defs import (accuracy, group_cv, make_scorer, SelectKBest,
+from defs import (accuracy, group_cv, get_scorer, SelectKBest,
                   LogisticRegressionCV, cross_validate,
                   make_pipeline, X, y, my_groups, my_weights,
                   my_other_weights)
@@ -28,7 +28,7 @@ def split(self, X, y, groups=None):
 
     def get_n_splits(self, X, y, groups=None):
         groups = X[:, self.groups_idx]
-        return self.base_cv.split(unwrap_X(X), y, groups=groups)
+        return self.base_cv.get_n_splits(unwrap_X(X), y, groups=groups)
 
 
 wrapped_group_cv = WrappedGroupCV(group_cv)
@@ -39,11 +39,11 @@ def fit(self, X, y):
         return super().fit(unwrap_X(X), y, sample_weight=X[:, WEIGHT_IDX])
 
 
-weighted_acc = make_scorer(accuracy, request_props=['sample_weight'])
+acc_scorer = get_scorer('accuracy')
 
 
 def wrapped_weighted_acc(est, X, y, sample_weight=None):
-    return weighted_acc(est, unwrap_X(X), y, sample_weight=X[:, WEIGHT_IDX])
+    return acc_scorer(est, unwrap_X(X), y, sample_weight=X[:, WEIGHT_IDX])
 
 
 lr = WrappedLogisticRegressionCV(
@@ -81,7 +81,7 @@ def fit(self, X, y):
 
 lr = WrappedLogisticRegressionCV(
     cv=wrapped_group_cv,
-    scoring=weighted_acc,
+    scoring=wrapped_weighted_acc,
 ).set_props_request(['sample_weight'])
 sel = UnweightedWrappedSelectKBest()
 pipe = make_pipeline(sel, lr)
diff --git a/slep006/cases_opt0b.py b/slep006/cases_opt0b.py
@@ -1,7 +1,91 @@
 import pandas as pd
-from defs import (accuracy, group_cv, make_scorer, SelectKBest,
+from defs import (accuracy, group_cv, get_scorer, SelectKBest,
                   LogisticRegressionCV, cross_validate,
                   make_pipeline, X, y, my_groups, my_weights,
                   my_other_weights)
 
-# TODO
+X = pd.DataFrame(X)
+MY_GROUPS = pd.Series(my_groups)
+MY_WEIGHTS = pd.Series(my_weights)
+MY_OTHER_WEIGHTS = pd.Series(my_other_weights)
+
+# %%
+# Case A: weighted scoring and fitting
+
+
+class WrappedGroupCV:
+    def __init__(self, base_cv):
+        self.base_cv = base_cv
+
+    def split(self, X, y, groups=None):
+        return self.base_cv.split(X, y, groups=MY_GROUPS.loc[X.index])
+
+    def get_n_splits(self, X, y, groups=None):
+        return self.base_cv.get_n_splits(X, y, groups=MY_GROUPS.loc[X.index])
+
+
+wrapped_group_cv = WrappedGroupCV(group_cv)
+
+
+class WeightedLogisticRegressionCV(LogisticRegressionCV):
+    def fit(self, X, y):
+        return super().fit(X, y, sample_weight=MY_WEIGHTS.loc[X.index])
+
+
+acc_scorer = get_scorer('accuracy')
+
+
+def wrapped_weighted_acc(est, X, y, sample_weight=None):
+    return acc_scorer(est, X, y, sample_weight=MY_WEIGHTS.loc[X.index])
+
+
+lr = WeightedLogisticRegressionCV(
+    cv=wrapped_group_cv,
+    scoring=wrapped_weighted_acc,
+).set_props_request(['sample_weight'])
+cross_validate(lr, X, y,
+               cv=wrapped_group_cv,
+               scoring=wrapped_weighted_acc)
+
+# %%
+# Case B: weighted scoring and unweighted fitting
+
+lr = LogisticRegressionCV(
+    cv=wrapped_group_cv,
+    scoring=wrapped_weighted_acc,
+).set_props_request(['sample_weight'])
+cross_validate(lr, X, y,
+               cv=wrapped_group_cv,
+               scoring=wrapped_weighted_acc)
+
+
+# %%
+# Case C: unweighted feature selection
+
+lr = WeightedLogisticRegressionCV(
+    cv=wrapped_group_cv,
+    scoring=wrapped_weighted_acc,
+).set_props_request(['sample_weight'])
+sel = SelectKBest()
+pipe = make_pipeline(sel, lr)
+cross_validate(pipe, X, y,
+               cv=wrapped_group_cv,
+               scoring=wrapped_weighted_acc)
+
+# %%
+# Case D: different scoring and fitting weights
+
+
+def other_weighted_acc(est, X, y, sample_weight=None):
+    return acc_scorer(est, X, y, sample_weight=MY_OTHER_WEIGHTS.loc[X.index])
+
+
+lr = WeightedLogisticRegressionCV(
+    cv=wrapped_group_cv,
+    scoring=other_weighted_acc,
+).set_props_request(['sample_weight'])
+sel = SelectKBest()
+pipe = make_pipeline(sel, lr)
+cross_validate(pipe, X, y,
+               cv=wrapped_group_cv,
+               scoring=other_weighted_acc)