Change sorts to stable instead of mergesort

Jeffrharr · Jeffrharr · commit 84f829110b99 · 2025-03-13T16:03:29.000-06:00
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -68,6 +68,7 @@ Other enhancements
 - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
 - :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
 - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
+- :meth:`Series.nlargest` has improved performance when there are duplicate values in the index (:issue:`55767`)
 - :meth:`Series.str.get_dummies` now accepts a  ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
 - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
 - :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`)
@@ -148,6 +149,8 @@ These improvements also fixed certain bugs in groupby:
 - :meth:`.DataFrameGroupBy.sum` would have incorrect values when there are multiple groupings, unobserved groups, and non-numeric data (:issue:`43891`)
 - :meth:`.DataFrameGroupBy.value_counts` would produce incorrect results when used with some categorical and some non-categorical groupings and ``observed=False`` (:issue:`56016`)
 
+- :meth:`Series.nlargest`
+
 .. _whatsnew_300.notable_bug_fixes.notable_bug_fix2:
 
 notable_bug_fix2
diff --git a/pandas/core/methods/selectn.py b/pandas/core/methods/selectn.py
@@ -119,18 +119,18 @@ def compute(self, method: str) -> Series:
         original_index: Index = self.obj.index
         cur_series = self.obj.reset_index(drop=True)
 
-        dropped = cur_series.dropna()
-        nan_index = cur_series.drop(dropped.index)
-
         # slow method
         if n >= len(cur_series):
             ascending = method == "nsmallest"
             final_series = cur_series.sort_values(
-                ascending=ascending, kind="mergesort"
+                ascending=ascending, kind="stable"
             ).head(n)
             final_series.index = original_index.take(final_series.index)
             return final_series
 
+        dropped = cur_series.dropna()
+        nan_index = cur_series.drop(dropped.index)
+
         # fast method
         new_dtype = dropped.dtype
 
@@ -291,4 +291,4 @@ def get_indexer(current_indexer: Index, other_indexer: Index) -> Index:
 
         ascending = method == "nsmallest"
 
-        return frame.sort_values(columns, ascending=ascending, kind="mergesort")
+        return frame.sort_values(columns, ascending=ascending, kind="stable")
diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py
@@ -153,11 +153,11 @@ def test_nlargest_n_duplicate_index(self, n, order, request):
             index=[0, 0, 1, 1, 1],
         )
         result = df.nsmallest(n, order)
-        expected = df.sort_values(order, kind="mergesort").head(n)
+        expected = df.sort_values(order, kind="stable").head(n)
         tm.assert_frame_equal(result, expected)
 
         result = df.nlargest(n, order)
-        expected = df.sort_values(order, ascending=False, kind="mergesort").head(n)
+        expected = df.sort_values(order, ascending=False, kind="stable").head(n)
         if Version(np.__version__) >= Version("1.25") and (
             (order == ["a"] and n in (1, 2, 3, 4)) or ((order == ["a", "b"]) and n == 5)
         ):