Add variant for solution 4

jnothman · jnothman · commit 70f7a951893c · 2020-08-04T22:51:35.000+10:00
diff --git a/slep006/cases_opt4b.py b/slep006/cases_opt4b.py
@@ -0,0 +1,78 @@
+from defs import (accuracy, group_cv, make_scorer, SelectKBest,
+                  LogisticRegressionCV, cross_validate,
+                  make_pipeline, X, y, my_groups, my_weights,
+                  my_other_weights)
+
+# %%
+# Case A: weighted scoring and fitting
+
+# Here we presume that GroupKFold requests `groups` by default.
+# We need to explicitly request weights in make_scorer and for
+# LogisticRegressionCV. Both of these consumers understand the meaning
+# of the key "sample_weight".
+
+weighted_acc = make_scorer(accuracy, request_props=['sample_weight'])
+lr = LogisticRegressionCV(
+    cv=group_cv,
+    scoring=weighted_acc,
+).request_sample_weight(fit=['sample_weight'])
+cross_validate(lr, X, y, cv=group_cv,
+               props={'sample_weight': my_weights, 'groups': my_groups},
+               scoring=weighted_acc)
+
+# Error handling: if props={'sample_eight': my_weights, ...} was passed,
+# cross_validate would raise an error, since 'sample_eight' was not requested
+# by any of its children.
+
+# %%
+# Case B: weighted scoring and unweighted fitting
+
+# Since LogisticRegressionCV requires that weights explicitly be requested,
+# removing that request means the fitting is unweighted.
+
+weighted_acc = make_scorer(accuracy, request_props=['sample_weight'])
+lr = LogisticRegressionCV(
+    cv=group_cv,
+    scoring=weighted_acc,
+)
+cross_validate(lr, X, y, cv=group_cv,
+               props={'sample_weight': my_weights, 'groups': my_groups},
+               scoring=weighted_acc)
+
+# %%
+# Case C: unweighted feature selection
+
+# Like LogisticRegressionCV, SelectKBest needs to request weights explicitly.
+# Here it does not request them.
+
+weighted_acc = make_scorer(accuracy, request_props=['sample_weight'])
+lr = LogisticRegressionCV(
+    cv=group_cv,
+    scoring=weighted_acc,
+).request_sample_weight(fit=['sample_weight'])
+sel = SelectKBest()
+pipe = make_pipeline(sel, lr)
+cross_validate(pipe, X, y, cv=group_cv,
+               props={'sample_weight': my_weights, 'groups': my_groups},
+               scoring=weighted_acc)
+
+# %%
+# Case D: different scoring and fitting weights
+
+# Despite make_scorer and LogisticRegressionCV both expecting a key
+# sample_weight, we can use aliases to pass different weights to different
+# consumers.
+
+weighted_acc = make_scorer(accuracy,
+                           request_props={'scoring_weight': 'sample_weight'})
+lr = LogisticRegressionCV(
+    cv=group_cv,
+    scoring=weighted_acc,
+).request_sample_weight(fit='fitting_weight')
+cross_validate(lr, X, y, cv=group_cv,
+               props={
+                    'scoring_weight': my_weights,
+                    'fitting_weight': my_other_weights,
+                    'groups': my_groups,
+               },
+               scoring=weighted_acc)
diff --git a/slep006/proposal.rst b/slep006/proposal.rst
@@ -79,7 +79,7 @@ Other related issues include: :issue:`1574`, :issue:`2630`, :issue:`3524`,
 :issue:`4632`, :issue:`4652`, :issue:`4660`, :issue:`4696`, :issue:`6322`,
 :issue:`7112`, :issue:`7646`, :issue:`7723`, :issue:`8127`, :issue:`8158`,
 :issue:`8710`, :issue:`8950`, :issue:`11429`, :issue:`12052`, :issue:`15282`,
-:issues:`15370`, :issue:`15425`.
+:issues:`15370`, :issue:`15425`, :issue:`18028`.
 
 Desiderata
 ----------
@@ -368,6 +368,14 @@ Disadvantages:
   `set_props_request` method (instead of the `request_props` constructor
   parameter approach) such that all legacy base estimators are
   automatically equipped.
+* Aliasing is a bit confusing in this design, in that the consumer still
+  accepts the fit param by its original name (e.g. `sample_weight`) even if it
+  has a request that specifies a different key given to the router (e.g.
+  `fit_sample_weight`). This design has the advantage that the handling of
+  props within a consumer is simple and unchanged; the complexity is in
+  how it is forwarded the data by the router, but it may be conceptually
+  difficult for users to understand. (This may be acceptable, as an advanced
+  feature.)
 * For estimators to be cloned, this request information needs to be cloned with
   it. This implies one of: the request information be stored as a constructor
   paramerter; or `clone` is extended to explicitly copy request information.
@@ -389,6 +397,22 @@ Test cases:
 
 .. literalinclude:: cases_opt4.py
 
+Extensions and alternatives to the syntax considered while working on
+:pr:`16079`:
+
+* `set_prop_request` and `get_props_request` have lists of props requested
+  **for each method** i.e. fit, score, transform, predict and perhaps others.
+* `set_props_request` could be replaced by a method (or parameter) representing
+  the routing of each prop that it consumes. For example, an estimator that
+  consumes `sample_weight` would have a `request_sample_weight` method. One of
+  the difficulties of this approach is automatically introducing
+  `request_sample_weight` into classes inheriting from BaseEstimator without
+  too much magic (e.g. meta-classes, which might be the simplest solution).
+
+These are demonstrated together in the following:
+
+.. literalinclude:: cases_opt4b.py
+
 Naming
 ------