Skip to content

Commit dae37ed

Browse files
committed
BUG: dont' coerce reductions in a groupby always to datetimes; only when we have
actual Timestamps in the data (GH5788,GH5789)
1 parent b6ec4e2 commit dae37ed

File tree

2 files changed

+37
-3
lines changed

2 files changed

+37
-3
lines changed

pandas/core/groupby.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2268,8 +2268,12 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
22682268
columns=columns).convert_objects(convert_dates=cd, convert_numeric=True)
22692269

22702270
else:
2271-
return Series(values, index=key_index).convert_objects(
2272-
convert_dates='coerce',convert_numeric=True)
2271+
# only coerce dates if we find at least 1 datetime
2272+
cd = False
2273+
if any([ isinstance(v,Timestamp) for v in values ]):
2274+
cd = 'coerce'
2275+
return Series(values, index=key_index).convert_objects(convert_dates=cd)
2276+
22732277
else:
22742278
# Handle cases like BinGrouper
22752279
return self._concat_objects(keys, values,

pandas/tests/test_groupby.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
import pandas.core.nanops as nanops
2929

3030
import pandas.util.testing as tm
31-
31+
import pandas as pd
3232

3333
def commonSetUp(self):
3434
self.dateRange = bdate_range('1/1/2005', periods=250)
@@ -481,6 +481,36 @@ def test_apply_describe_bug(self):
481481
grouped = self.mframe.groupby(level='first')
482482
result = grouped.describe() # it works!
483483

484+
def test_apply_issues(self):
485+
# GH 5788
486+
487+
s="""2011.05.16,00:00,1.40893
488+
2011.05.16,01:00,1.40760
489+
2011.05.16,02:00,1.40750
490+
2011.05.16,03:00,1.40649
491+
2011.05.17,02:00,1.40893
492+
2011.05.17,03:00,1.40760
493+
2011.05.17,04:00,1.40750
494+
2011.05.17,05:00,1.40649
495+
2011.05.18,02:00,1.40893
496+
2011.05.18,03:00,1.40760
497+
2011.05.18,04:00,1.40750
498+
2011.05.18,05:00,1.40649"""
499+
500+
df = pd.read_csv(StringIO(s), header=None, names=['date', 'time', 'value'], parse_dates=[['date', 'time']])
501+
df = df.set_index('date_time')
502+
503+
expected = df.groupby(df.index.date).idxmax()
504+
result = df.groupby(df.index.date).apply(lambda x: x.idxmax())
505+
assert_frame_equal(result,expected)
506+
507+
# GH 5789
508+
# don't auto coerce dates
509+
df = pd.read_csv(StringIO(s), header=None, names=['date', 'time', 'value'])
510+
expected = Series(['00:00','02:00','02:00'],index=['2011.05.16','2011.05.17','2011.05.18'])
511+
result = df.groupby('date').apply(lambda x: x['time'][x['value'].idxmax()])
512+
assert_series_equal(result,expected)
513+
484514
def test_len(self):
485515
df = tm.makeTimeDataFrame()
486516
grouped = df.groupby([lambda x: x.year,

0 commit comments

Comments
 (0)