Skip to content

Commit 331671f

Browse files
Propagate group_keys in DataFrameGroupBy (#1174)
1 parent 77d0f89 commit 331671f

File tree

2 files changed

+22
-1
lines changed

2 files changed

+22
-1
lines changed

dask_expr/_groupby.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1621,6 +1621,7 @@ def __getitem__(self, key):
16211621
return SeriesGroupBy(
16221622
self.obj,
16231623
by=self.by,
1624+
group_keys=self.group_keys,
16241625
slice=key,
16251626
sort=self.sort,
16261627
dropna=self.dropna,
@@ -2194,6 +2195,7 @@ def __init__(
21942195
self,
21952196
obj,
21962197
by,
2198+
group_keys=True,
21972199
sort=None,
21982200
observed=None,
21992201
dropna=None,
@@ -2218,7 +2220,13 @@ def __init__(
22182220
obj._meta.groupby(by, **_as_dict("observed", observed))
22192221

22202222
super().__init__(
2221-
obj, by=by, slice=slice, observed=observed, dropna=dropna, sort=sort
2223+
obj,
2224+
by=by,
2225+
group_keys=group_keys,
2226+
slice=slice,
2227+
observed=observed,
2228+
dropna=dropna,
2229+
sort=sort,
22222230
)
22232231

22242232
@derived_from(pd.core.groupby.SeriesGroupBy)

dask_expr/tests/test_groupby.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1048,3 +1048,16 @@ def test_groupby_index_modified_divisions():
10481048
df.groupby(df.index.dt.date).count(),
10491049
pdf.groupby(pdf.index.date).count(),
10501050
)
1051+
1052+
1053+
def test_groupby_getitem_apply_group_keys():
1054+
pdf = pd.DataFrame(
1055+
{
1056+
"A": [0, 1] * 4,
1057+
"B": [1] * 8,
1058+
}
1059+
)
1060+
df = from_pandas(pdf, npartitions=4)
1061+
result = df.groupby("A", group_keys=False).B.apply(lambda x: x, meta=("B", int))
1062+
expected = pdf.groupby("A", group_keys=False).B.apply(lambda x: x)
1063+
assert_eq(result, expected)

0 commit comments

Comments
 (0)