BUG: dont' coerce reductions in a groupby always to datetimes; only when we have

jreback · jreback · commit dae37eddf09e · 2013-12-28T22:57:13.000-05:00
actual Timestamps in the data (GH5788,GH5789)
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -2268,8 +2268,12 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
                                  columns=columns).convert_objects(convert_dates=cd, convert_numeric=True)
 
             else:
-                return Series(values, index=key_index).convert_objects(
-                    convert_dates='coerce',convert_numeric=True)
+                # only coerce dates if we find at least 1 datetime
+                cd = False
+                if any([ isinstance(v,Timestamp) for v in values ]):
+                    cd = 'coerce'
+                return Series(values, index=key_index).convert_objects(convert_dates=cd)
+
         else:
             # Handle cases like BinGrouper
             return self._concat_objects(keys, values,
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
@@ -28,7 +28,7 @@
 import pandas.core.nanops as nanops
 
 import pandas.util.testing as tm
-
+import pandas as pd
 
 def commonSetUp(self):
     self.dateRange = bdate_range('1/1/2005', periods=250)
@@ -481,6 +481,36 @@ def test_apply_describe_bug(self):
         grouped = self.mframe.groupby(level='first')
         result = grouped.describe()  # it works!
 
+    def test_apply_issues(self):
+        # GH 5788
+
+        s="""2011.05.16,00:00,1.40893
+2011.05.16,01:00,1.40760
+2011.05.16,02:00,1.40750
+2011.05.16,03:00,1.40649
+2011.05.17,02:00,1.40893
+2011.05.17,03:00,1.40760
+2011.05.17,04:00,1.40750
+2011.05.17,05:00,1.40649
+2011.05.18,02:00,1.40893
+2011.05.18,03:00,1.40760
+2011.05.18,04:00,1.40750
+2011.05.18,05:00,1.40649"""
+
+        df = pd.read_csv(StringIO(s), header=None, names=['date', 'time', 'value'], parse_dates=[['date', 'time']])
+        df = df.set_index('date_time')
+
+        expected = df.groupby(df.index.date).idxmax()
+        result = df.groupby(df.index.date).apply(lambda x: x.idxmax())
+        assert_frame_equal(result,expected)
+
+        # GH 5789
+        # don't auto coerce dates
+        df = pd.read_csv(StringIO(s), header=None, names=['date', 'time', 'value'])
+        expected = Series(['00:00','02:00','02:00'],index=['2011.05.16','2011.05.17','2011.05.18'])
+        result = df.groupby('date').apply(lambda x: x['time'][x['value'].idxmax()])
+        assert_series_equal(result,expected)
+
     def test_len(self):
         df = tm.makeTimeDataFrame()
         grouped = df.groupby([lambda x: x.year,