Skip to content

Commit e3b0b5d

Browse files
committed
Fix bug in ~Series.describe where median percentile is included when the percentiles argument is passed
1 parent c0371ce commit e3b0b5d

File tree

4 files changed

+40
-5
lines changed

4 files changed

+40
-5
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -838,6 +838,7 @@ Other
838838
- Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
839839
- Bug in :meth:`Index.sort_values` when passing a key function that turns values into tuples, e.g. ``key=natsort.natsort_key``, would raise ``TypeError`` (:issue:`56081`)
840840
- Bug in :meth:`MultiIndex.fillna` error message was referring to ``isna`` instead of ``fillna`` (:issue:`60974`)
841+
- Bug in :meth:`Series.describe` where median percentile is included when the ``percentiles`` argument is passed (:issue:`60550`).
841842
- Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
842843
- Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`)
843844
- Bug in :meth:`Series.isin` raising ``TypeError`` when series is large (>10**6) and ``values`` contains NA (:issue:`60678`)

pandas/core/methods/describe.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -229,10 +229,15 @@ def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series:
229229

230230
formatted_percentiles = format_percentiles(percentiles)
231231

232+
if len(percentiles) == 0:
233+
quantiles = []
234+
else:
235+
quantiles = series.quantile(percentiles).tolist()
236+
232237
stat_index = ["count", "mean", "std", "min"] + formatted_percentiles + ["max"]
233238
d = (
234239
[series.count(), series.mean(), series.std(), series.min()]
235-
+ series.quantile(percentiles).tolist()
240+
+ quantiles
236241
+ [series.max()]
237242
)
238243
# GH#48340 - always return float on non-complex numeric data
@@ -354,10 +359,6 @@ def _refine_percentiles(
354359
# get them all to be in [0, 1]
355360
validate_percentile(percentiles)
356361

357-
# median should always be included
358-
if 0.5 not in percentiles:
359-
percentiles.append(0.5)
360-
361362
percentiles = np.asarray(percentiles)
362363

363364
# sort and check for duplicates

pandas/io/formats/format.py

+3
Original file line numberDiff line numberDiff line change
@@ -1565,6 +1565,9 @@ def format_percentiles(
15651565
>>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999])
15661566
['0%', '50%', '2.0%', '50%', '66.67%', '99.99%']
15671567
"""
1568+
if len(percentiles) == 0:
1569+
return []
1570+
15681571
percentiles = np.asarray(percentiles)
15691572

15701573
# It checks for np.nan as well

pandas/tests/frame/methods/test_describe.py

+30
Original file line numberDiff line numberDiff line change
@@ -413,3 +413,33 @@ def test_describe_exclude_pa_dtype(self):
413413
dtype=pd.ArrowDtype(pa.float64()),
414414
)
415415
tm.assert_frame_equal(result, expected)
416+
417+
@pytest.mark.parametrize("percentiles", [None, [], [0.2]])
418+
def test_refine_percentiles(self, percentiles):
419+
"""
420+
Test that the percentiles are returned correctly depending on the `percentiles`
421+
argument.
422+
- The default behavior is to return the 25th, 50th, and 75 percentiles
423+
- If `percentiles` is an empty list, no percentiles are returned
424+
- If `percentiles` is a non-empty list, only those percentiles are returned
425+
"""
426+
# GH#60550
427+
df = DataFrame({"a": np.arange(0, 10, 1)})
428+
429+
result = df.describe(percentiles=percentiles)
430+
431+
if percentiles is None:
432+
percentiles = [0.25, 0.5, 0.75]
433+
434+
expected = Series(
435+
{
436+
"count": len(df.a),
437+
"mean": df.a.mean(),
438+
"std": df.a.std(),
439+
"min": df.a.min(),
440+
**{f"{p:.0%}": df.a.quantile(p) for p in percentiles},
441+
"max": df.a.max(),
442+
},
443+
).to_frame(name="a")
444+
445+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)