make list-like hover_data more robust

nicolaskruchten · nicolaskruchten · commit f8f08803f92f · 2020-05-24T13:35:20.000-04:00
diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py
@@ -1022,6 +1022,11 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
                 args["hover_data"][k] = (True, args["hover_data"][k])
             if not isinstance(args["hover_data"][k], tuple):
                 args["hover_data"][k] = (args["hover_data"][k], None)
+            if df_provided and args["hover_data"][k][1] is not None and k in df_input:
+                raise ValueError(
+                    "Ambiguous input: values for '%s' appear both in hover_data and data_frame"
+                    % k
+                )
     # Loop over possible arguments
     for field_name in all_attrables:
         # Massaging variables
@@ -1074,19 +1079,36 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
                     and hover_data_is_dict
                     and args["hover_data"][str(argument)][1] is not None
                 ):
+                    # hover_data has onboard data
+                    # previously-checked to have no name-conflict with data_frame
                     col_name = str(argument)
-                    df_output[col_name] = args["hover_data"][col_name][1]
-                    continue
-
-                if not df_provided:
+                    real_argument = args["hover_data"][col_name][1]
+
+                    if length and len(real_argument) != length:
+                        raise ValueError(
+                            "All arguments should have the same length. "
+                            "The length of hover_data key `%s` is %d, whereas the "
+                            "length of previously-processed arguments %s is %d"
+                            % (
+                                argument,
+                                len(real_argument),
+                                str(list(df_output.columns)),
+                                length,
+                            )
+                        )
+                    if hasattr(real_argument, "values"):
+                        df_output[col_name] = real_argument.values
+                    else:
+                        df_output[col_name] = np.array(real_argument)
+                elif not df_provided:
                     raise ValueError(
                         "String or int arguments are only possible when a "
                         "DataFrame or an array is provided in the `data_frame` "
                         "argument. No DataFrame was provided, but argument "
                         "'%s' is of type str or int." % field
                     )
                 # Check validity of column name
-                if argument not in df_input.columns:
+                elif argument not in df_input.columns:
                     if wide_mode and argument in (value_name, var_name):
                         continue
                     else:
@@ -1098,20 +1120,21 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
                         if argument == "index":
                             err_msg += "\n To use the index, pass it in directly as `df.index`."
                         raise ValueError(err_msg)
-                if length and len(df_input[argument]) != length:
+                elif length and len(df_input[argument]) != length:
                     raise ValueError(
                         "All arguments should have the same length. "
                         "The length of column argument `df[%s]` is %d, whereas the "
-                        "length of previous arguments %s is %d"
+                        "length of  previously-processed arguments %s is %d"
                         % (
                             field,
                             len(df_input[argument]),
                             str(list(df_output.columns)),
                             length,
                         )
                     )
-                col_name = str(argument)
-                df_output[col_name] = df_input[argument].values
+                else:
+                    col_name = str(argument)
+                    df_output[col_name] = df_input[argument].values
             # ----------------- argument is a column / array / list.... -------
             else:
                 if df_provided and hasattr(argument, "name"):
@@ -1137,7 +1160,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
                     raise ValueError(
                         "All arguments should have the same length. "
                         "The length of argument `%s` is %d, whereas the "
-                        "length of previous arguments %s is %d"
+                        "length of  previously-processed arguments %s is %d"
                         % (field, len(argument), str(list(df_output.columns)), length)
                     )
                 if hasattr(argument, "values"):
diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_hover.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_hover.py
@@ -2,7 +2,6 @@
 import numpy as np
 import pandas as pd
 import pytest
-import plotly.graph_objects as go
 from collections import OrderedDict  # an OrderedDict is needed for Python 2
 
 
@@ -74,24 +73,69 @@ def test_newdatain_hover_data():
 
 
 def test_fail_wrong_column():
-    with pytest.raises(ValueError):
-        fig = px.scatter(
+    with pytest.raises(ValueError) as err_msg:
+        px.scatter(
             {"a": [1, 2], "b": [3, 4], "c": [2, 1]},
             x="a",
             y="b",
             hover_data={"d": True},
         )
-    with pytest.raises(ValueError):
-        fig = px.scatter(
+    assert (
+        "Value of 'hover_data_0' is not the name of a column in 'data_frame'."
+        in str(err_msg.value)
+    )
+    with pytest.raises(ValueError) as err_msg:
+        px.scatter(
             {"a": [1, 2], "b": [3, 4], "c": [2, 1]},
             x="a",
             y="b",
             hover_data={"d": ":.1f"},
         )
-    with pytest.raises(ValueError):
-        fig = px.scatter(
+    assert (
+        "Value of 'hover_data_0' is not the name of a column in 'data_frame'."
+        in str(err_msg.value)
+    )
+    with pytest.raises(ValueError) as err_msg:
+        px.scatter(
+            {"a": [1, 2], "b": [3, 4], "c": [2, 1]},
+            x="a",
+            y="b",
+            hover_data={"d": [3, 4, 5]},  # d is too long
+        )
+    assert (
+        "All arguments should have the same length. The length of hover_data key `d` is 3"
+        in str(err_msg.value)
+    )
+    with pytest.raises(ValueError) as err_msg:
+        px.scatter(
+            {"a": [1, 2], "b": [3, 4], "c": [2, 1]},
+            x="a",
+            y="b",
+            hover_data={"d": (True, [3, 4, 5])},  # d is too long
+        )
+    assert (
+        "All arguments should have the same length. The length of hover_data key `d` is 3"
+        in str(err_msg.value)
+    )
+    with pytest.raises(ValueError) as err_msg:
+        px.scatter(
+            {"a": [1, 2], "b": [3, 4], "c": [2, 1]},
+            x="a",
+            y="b",
+            hover_data={"c": [3, 4]},
+        )
+    assert (
+        "Ambiguous input: values for 'c' appear both in hover_data and data_frame"
+        in str(err_msg.value)
+    )
+    with pytest.raises(ValueError) as err_msg:
+        px.scatter(
             {"a": [1, 2], "b": [3, 4], "c": [2, 1]},
             x="a",
             y="b",
-            hover_data={"d": (True, [3, 4, 5])},
+            hover_data={"c": (True, [3, 4])},
         )
+    assert (
+        "Ambiguous input: values for 'c' appear both in hover_data and data_frame"
+        in str(err_msg.value)
+    )
diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py
@@ -675,7 +675,8 @@ def append_special_case(df_in, args_in, args_expect, df_expect):
         dict(c=[7, 8, 7, 8], d=["a", "a", "b", "b"], value=[1, 2, 3, 4])
     ),
 )
-# y = columns
+
+# y = columns subset
 df = pd.DataFrame(dict(a=[1, 2], b=[3, 4]), index=[7, 8])
 df.index.name = "c"
 df.columns.name = "d"
@@ -686,6 +687,27 @@ def append_special_case(df_in, args_in, args_expect, df_expect):
     df_expect=pd.DataFrame(dict(c=[7, 8], variable=["a", "a"], value=[1, 2])),
 )
 
+# list-like hover_data
+df = pd.DataFrame(dict(a=[1, 2], b=[3, 4]), index=[7, 8])
+df.index.name = "c"
+df.columns.name = "d"
+append_special_case(
+    df_in=df,
+    args_in=dict(x=None, y=None, color=None, hover_data=dict(new=[5, 6])),
+    args_expect=dict(
+        x="c",
+        y="value",
+        color="d",
+        orientation="v",
+        hover_data=dict(new=(True, [5, 6])),
+    ),
+    df_expect=pd.DataFrame(
+        dict(
+            c=[7, 8, 7, 8], d=["a", "a", "b", "b"], new=[5, 6, 5, 6], value=[1, 2, 3, 4]
+        )
+    ),
+)
+
 
 @pytest.mark.parametrize("df_in, args_in, args_expect, df_expect", special_cases)
 def test_wide_mode_internal_special_cases(df_in, args_in, args_expect, df_expect):