33#include <listobject.h>
44#include <stdlib.h>
55#include <time.h>
6+ #include <math.h>
67
78#ifndef PY_SSIZE_T_CLEAN
89#define PY_SSIZE_T_CLEAN
1213#define SELECTLIB_VERSION "1.0.0"
1314#endif
1415
16+ /* Forward declaration for heapselect so that it can be used
17+ in quickselect's fallback if the iteration limit is exceeded.
18+ */
19+ static PyObject * selectlib_heapselect (PyObject * self , PyObject * args , PyObject * kwargs );
20+
1521/*
1622 Helper function that compares two PyObject*s using the < operator.
1723 Returns 1 if a < b, 0 if not, or -1 if an error occurred.
@@ -43,10 +49,10 @@ swap_items(PyObject *list, Py_ssize_t i, Py_ssize_t j, PyObject **keys)
4349}
4450
4551/*
46- Standard in- place three- way partition (Dutch National Flag style) based on a given pivot.
52+ Standard in‐ place three‐ way partition (Dutch National Flag style) based on a given pivot.
4753 Rearranges the list (and keys array if present) so that all elements whose key is less than
4854 pivot come first, followed by those equal to pivot, then those greater.
49- Upon return, *low is the first index of the "equal" section and *mid is one past the end.
55+ Upon return, *low is the first index of the "equal" section and *mid is one past its end.
5056*/
5157static int
5258partition_by_pivot (PyObject * list , PyObject * * keys , Py_ssize_t n , PyObject * pivot ,
@@ -78,9 +84,11 @@ partition_by_pivot(PyObject *list, PyObject **keys, Py_ssize_t n, PyObject *pivo
7884}
7985
8086/*
81- Original in-place quickselect implementation.
82- It partitions the list (and keys array if provided) so that
83- the element at index k is in its final sorted position.
87+ Original in‐place quickselect implementation with an added iteration counter.
88+ It partitions the list (and keys array if provided) so that the element at index k
89+ is in its final sorted position.
90+ If the number of iterations exceeds 4× the expected maximum recursion depth,
91+ the function returns -2 to signal that a fallback is desired.
8492*/
8593static int
8694quickselect_inplace (PyObject * list , PyObject * * keys ,
@@ -91,13 +99,17 @@ quickselect_inplace(PyObject *list, PyObject **keys,
9199 srand ((unsigned )time (NULL ));
92100 seeded = 1 ;
93101 }
102+ int iterations = 0 ;
103+ /* Compute a max iteration limit: 4 times (1 + log₂(n)) */
104+ double log_val = log ((double )(right - left + 1 )) / log (2.0 );
105+ long max_iter = 4 * (1 + (long )log_val );
94106
95107 while (left < right ) {
108+ iterations ++ ;
109+ if (iterations > max_iter )
110+ return -2 ;
96111 Py_ssize_t pivot_index = left + rand () % (right - left + 1 );
97112 Py_ssize_t pos ;
98- /* Use partition function similar to before.
99- We reuse the swap_items function and keys if available.
100- */
101113 /* Move pivot to the end */
102114 swap_items (list , pivot_index , right , keys );
103115 PyObject * pivot_val = keys ? keys [right ] : PyList_GET_ITEM (list , right );
@@ -125,7 +137,7 @@ quickselect_inplace(PyObject *list, PyObject **keys,
125137
126138/*
127139 quickselect(values: list[Any], index: int, key=None) -> None
128- Partition the list in- place so that the element at the given index is in its
140+ Partition the list in‐ place so that the element at the given index is in its
129141 final sorted position. An optional key function may be provided.
130142*/
131143static PyObject *
@@ -180,15 +192,23 @@ selectlib_quickselect(PyObject *self, PyObject *args, PyObject *kwargs)
180192 }
181193 }
182194
183- if (n > 0 ) {
184- if (quickselect_inplace (values , keys , 0 , n - 1 , target_index ) < 0 ) {
185- if (keys ) {
186- for (Py_ssize_t i = 0 ; i < n ; i ++ )
187- Py_DECREF (keys [i ]);
188- PyMem_Free (keys );
189- }
190- return NULL ;
195+ int ret = quickselect_inplace (values , keys , 0 , n - 1 , target_index );
196+ if (ret == -2 ) {
197+ /* Exceeded iteration limit; use heapselect fallback. */
198+ if (keys ) {
199+ for (Py_ssize_t i = 0 ; i < n ; i ++ )
200+ Py_DECREF (keys [i ]);
201+ PyMem_Free (keys );
202+ }
203+ return selectlib_heapselect (self , args , kwargs );
204+ }
205+ else if (ret < 0 ) {
206+ if (keys ) {
207+ for (Py_ssize_t i = 0 ; i < n ; i ++ )
208+ Py_DECREF (keys [i ]);
209+ PyMem_Free (keys );
191210 }
211+ return NULL ;
192212 }
193213 if (keys ) {
194214 for (Py_ssize_t i = 0 ; i < n ; i ++ )
@@ -199,7 +219,7 @@ selectlib_quickselect(PyObject *self, PyObject *args, PyObject *kwargs)
199219 Py_RETURN_NONE ;
200220}
201221
202- /* ---------- New heapselect implementation ---------- */
222+ /* ---------- heapselect implementation ---------- */
203223
204224/* Structure to hold an element for the heap.
205225 Each HeapItem contains a pointer to the list element (value) and
@@ -252,9 +272,9 @@ build_max_heap(HeapItem *heap, Py_ssize_t heap_size)
252272
253273/*
254274 heapselect(values: list[Any], index: int, key=None) -> None
255- Partition the list in- place so that the element at the given index (k) is in its
275+ Partition the list in‐ place so that the element at the given index (k) is in its
256276 final sorted position. This implementation uses a heap strategy (specifically,
257- building a fixed- size max-heap on the first k+1 elements, then processing the rest)
277+ building a fixed‐ size max-heap on the first k+1 elements, then processing the rest)
258278 to determine the kth smallest element.
259279*/
260280static PyObject *
@@ -342,7 +362,6 @@ selectlib_heapselect(PyObject *self, PyObject *args, PyObject *kwargs)
342362 PyObject * current_key = use_key ? keys [i ] : PyList_GET_ITEM (values , i );
343363 int cmp = less_than (current_key , heap [0 ].key );
344364 if (cmp < 0 ) {
345- /* Propagate error */
346365 PyMem_Free (heap );
347366 if (keys ) {
348367 for (Py_ssize_t j = 0 ; j < n ; j ++ )
@@ -352,25 +371,28 @@ selectlib_heapselect(PyObject *self, PyObject *args, PyObject *kwargs)
352371 return NULL ;
353372 }
354373 if (cmp == 1 ) { /* current < heap root */
355- /* Replace the root with current and re-heapify */
356374 heap [0 ].value = PyList_GET_ITEM (values , i );
357375 heap [0 ].key = current_key ;
358376 max_heapify (heap , heap_size , 0 );
359377 }
360378 }
361379
362- /* The kth smallest element candidate is now at the root of the heap. */
363- PyObject * pivot = heap [0 ].value ;
364- PyObject * pivot_key = heap [0 ].key ;
365-
380+ /* Save the pivot value and its key (if in use) from the heap’s root */
381+ PyObject * pivot ;
382+ PyObject * pivot_key = NULL ;
383+ if (use_key ) {
384+ pivot_key = heap [0 ].key ;
385+ pivot = heap [0 ].value ;
386+ } else {
387+ pivot = heap [0 ].value ;
388+ }
366389 PyMem_Free (heap );
367390
368- /* Partition the entire list around the pivot value (using pivot_key).
369- After partitioning, the block of elements equal to the pivot should contain
370- the target_index.
391+ /* Partition the entire list around the pivot.
392+ If a key function is in use, pass the computed pivot_key.
371393 */
372394 Py_ssize_t low , mid ;
373- if (partition_by_pivot (values , keys , n , pivot_key , & low , & mid ) < 0 ) {
395+ if (partition_by_pivot (values , keys , n , use_key ? pivot_key : pivot , & low , & mid ) < 0 ) {
374396 if (keys ) {
375397 for (Py_ssize_t i = 0 ; i < n ; i ++ )
376398 Py_DECREF (keys [i ]);
@@ -379,9 +401,6 @@ selectlib_heapselect(PyObject *self, PyObject *args, PyObject *kwargs)
379401 return NULL ;
380402 }
381403
382- /* Verify that target_index lies in the "equal" partition.
383- (In a correct run this must be true.)
384- */
385404 if (!(target_index >= low && target_index < mid )) {
386405 if (keys ) {
387406 for (Py_ssize_t i = 0 ; i < n ; i ++ )
@@ -401,6 +420,98 @@ selectlib_heapselect(PyObject *self, PyObject *args, PyObject *kwargs)
401420 Py_RETURN_NONE ;
402421}
403422
423+ /*
424+ nth_element(values: list[Any], index: int, key=None) -> None
425+ Partition the list in‐place so that the element at the given index is in its
426+ final sorted position. This interface adapts the selection algorithm as follows:
427+ • If index is less than (len(values) >> 3), the heapselect method is used.
428+ • Otherwise, quickselect is attempted. If quickselect exceeds 4× the expected
429+ recursion depth (detected via iteration count), the routine falls back to heapselect.
430+ */
431+ static PyObject *
432+ selectlib_nth_element (PyObject * self , PyObject * args , PyObject * kwargs )
433+ {
434+ static char * kwlist [] = {"values" , "index" , "key" , NULL };
435+ PyObject * values ;
436+ Py_ssize_t target_index ;
437+ PyObject * key = Py_None ;
438+
439+ if (!PyArg_ParseTupleAndKeywords (args , kwargs , "On|O:nth_element" ,
440+ kwlist , & values , & target_index , & key ))
441+ return NULL ;
442+
443+ if (!PyList_Check (values )) {
444+ PyErr_SetString (PyExc_TypeError , "values must be a list" );
445+ return NULL ;
446+ }
447+ Py_ssize_t n = PyList_Size (values );
448+ if (n == 0 || target_index < 0 || target_index >= n ) {
449+ PyErr_SetString (PyExc_IndexError , "index out of range" );
450+ return NULL ;
451+ }
452+
453+ /* If target_index is small compared to n, use heapselect directly */
454+ if (target_index < (n >> 3 )) {
455+ return selectlib_heapselect (self , args , kwargs );
456+ }
457+
458+ int use_key = 0 ;
459+ if (key != Py_None ) {
460+ if (!PyCallable_Check (key )) {
461+ PyErr_SetString (PyExc_TypeError , "key must be callable" );
462+ return NULL ;
463+ }
464+ use_key = 1 ;
465+ }
466+
467+ PyObject * * keys = NULL ;
468+ if (use_key ) {
469+ keys = PyMem_New (PyObject * , n );
470+ if (keys == NULL ) {
471+ PyErr_NoMemory ();
472+ return NULL ;
473+ }
474+ for (Py_ssize_t i = 0 ; i < n ; i ++ ) {
475+ PyObject * item = PyList_GET_ITEM (values , i );
476+ PyObject * keyval = PyObject_CallFunctionObjArgs (key , item , NULL );
477+ if (keyval == NULL ) {
478+ for (Py_ssize_t j = 0 ; j < i ; j ++ )
479+ Py_DECREF (keys [j ]);
480+ PyMem_Free (keys );
481+ return NULL ;
482+ }
483+ keys [i ] = keyval ;
484+ }
485+ }
486+
487+ int ret ;
488+ ret = quickselect_inplace (values , keys , 0 , n - 1 , target_index );
489+ if (ret == -2 ) {
490+ /* Exceeded iteration threshold; fall back to heapselect. */
491+ if (keys ) {
492+ for (Py_ssize_t i = 0 ; i < n ; i ++ )
493+ Py_DECREF (keys [i ]);
494+ PyMem_Free (keys );
495+ }
496+ return selectlib_heapselect (self , args , kwargs );
497+ } else if (ret < 0 ) {
498+ if (keys ) {
499+ for (Py_ssize_t i = 0 ; i < n ; i ++ )
500+ Py_DECREF (keys [i ]);
501+ PyMem_Free (keys );
502+ }
503+ return NULL ;
504+ }
505+
506+ if (keys ) {
507+ for (Py_ssize_t i = 0 ; i < n ; i ++ )
508+ Py_DECREF (keys [i ]);
509+ PyMem_Free (keys );
510+ }
511+
512+ Py_RETURN_NONE ;
513+ }
514+
404515/* ---------- Module method definitions ---------- */
405516static PyMethodDef selectlib_methods [] = {
406517 {"quickselect" , (PyCFunction )selectlib_quickselect ,
@@ -411,13 +522,18 @@ static PyMethodDef selectlib_methods[] = {
411522 METH_VARARGS | METH_KEYWORDS ,
412523 "heapselect(values: list[Any], index: int, key=None) -> None\n\n"
413524 "Partition the list in-place using a heap strategy so that the element at the given index is in its final sorted position." },
525+ {"nth_element" , (PyCFunction )selectlib_nth_element ,
526+ METH_VARARGS | METH_KEYWORDS ,
527+ "nth_element(values: list[Any], index: int, key=None) -> None\n\n"
528+ "Partition the list in-place so that the element at the given index is in its final sorted position. "
529+ "Uses heapselect if the target index is less than (len(values) >> 3) or if quickselect exceeds its iteration limit." },
414530 {NULL , NULL , 0 , NULL }
415531};
416532
417533static struct PyModuleDef selectlibmodule = {
418534 PyModuleDef_HEAD_INIT ,
419535 "selectlib" ,
420- "Module that implements the quickselect and heapselect algorithms." ,
536+ "Module that implements the quickselect, heapselect, and nth_element algorithms." ,
421537 -1 ,
422538 selectlib_methods ,
423539};
0 commit comments