Skip to content

Commit 7647408

Browse files
committed
Add nth_element
1 parent f742699 commit 7647408

File tree

2 files changed

+151
-34
lines changed

2 files changed

+151
-34
lines changed

selectlib.c

Lines changed: 150 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <listobject.h>
44
#include <stdlib.h>
55
#include <time.h>
6+
#include <math.h>
67

78
#ifndef PY_SSIZE_T_CLEAN
89
#define PY_SSIZE_T_CLEAN
@@ -12,6 +13,11 @@
1213
#define SELECTLIB_VERSION "1.0.0"
1314
#endif
1415

16+
/* Forward declaration for heapselect so that it can be used
17+
in quickselect's fallback if the iteration limit is exceeded.
18+
*/
19+
static PyObject * selectlib_heapselect(PyObject *self, PyObject *args, PyObject *kwargs);
20+
1521
/*
1622
Helper function that compares two PyObject*s using the < operator.
1723
Returns 1 if a < b, 0 if not, or -1 if an error occurred.
@@ -43,10 +49,10 @@ swap_items(PyObject *list, Py_ssize_t i, Py_ssize_t j, PyObject **keys)
4349
}
4450

4551
/*
46-
Standard in-place three-way partition (Dutch National Flag style) based on a given pivot.
52+
Standard inplace threeway partition (Dutch National Flag style) based on a given pivot.
4753
Rearranges the list (and keys array if present) so that all elements whose key is less than
4854
pivot come first, followed by those equal to pivot, then those greater.
49-
Upon return, *low is the first index of the "equal" section and *mid is one past the end.
55+
Upon return, *low is the first index of the "equal" section and *mid is one past its end.
5056
*/
5157
static int
5258
partition_by_pivot(PyObject *list, PyObject **keys, Py_ssize_t n, PyObject *pivot,
@@ -78,9 +84,11 @@ partition_by_pivot(PyObject *list, PyObject **keys, Py_ssize_t n, PyObject *pivo
7884
}
7985

8086
/*
81-
Original in-place quickselect implementation.
82-
It partitions the list (and keys array if provided) so that
83-
the element at index k is in its final sorted position.
87+
Original in‐place quickselect implementation with an added iteration counter.
88+
It partitions the list (and keys array if provided) so that the element at index k
89+
is in its final sorted position.
90+
If the number of iterations exceeds 4× the expected maximum recursion depth,
91+
the function returns -2 to signal that a fallback is desired.
8492
*/
8593
static int
8694
quickselect_inplace(PyObject *list, PyObject **keys,
@@ -91,13 +99,17 @@ quickselect_inplace(PyObject *list, PyObject **keys,
9199
srand((unsigned)time(NULL));
92100
seeded = 1;
93101
}
102+
int iterations = 0;
103+
/* Compute a max iteration limit: 4 times (1 + log₂(n)) */
104+
double log_val = log((double)(right - left + 1)) / log(2.0);
105+
long max_iter = 4 * (1 + (long)log_val);
94106

95107
while (left < right) {
108+
iterations++;
109+
if (iterations > max_iter)
110+
return -2;
96111
Py_ssize_t pivot_index = left + rand() % (right - left + 1);
97112
Py_ssize_t pos;
98-
/* Use partition function similar to before.
99-
We reuse the swap_items function and keys if available.
100-
*/
101113
/* Move pivot to the end */
102114
swap_items(list, pivot_index, right, keys);
103115
PyObject *pivot_val = keys ? keys[right] : PyList_GET_ITEM(list, right);
@@ -125,7 +137,7 @@ quickselect_inplace(PyObject *list, PyObject **keys,
125137

126138
/*
127139
quickselect(values: list[Any], index: int, key=None) -> None
128-
Partition the list in-place so that the element at the given index is in its
140+
Partition the list inplace so that the element at the given index is in its
129141
final sorted position. An optional key function may be provided.
130142
*/
131143
static PyObject *
@@ -180,15 +192,23 @@ selectlib_quickselect(PyObject *self, PyObject *args, PyObject *kwargs)
180192
}
181193
}
182194

183-
if (n > 0) {
184-
if (quickselect_inplace(values, keys, 0, n - 1, target_index) < 0) {
185-
if (keys) {
186-
for (Py_ssize_t i = 0; i < n; i++)
187-
Py_DECREF(keys[i]);
188-
PyMem_Free(keys);
189-
}
190-
return NULL;
195+
int ret = quickselect_inplace(values, keys, 0, n - 1, target_index);
196+
if (ret == -2) {
197+
/* Exceeded iteration limit; use heapselect fallback. */
198+
if (keys) {
199+
for (Py_ssize_t i = 0; i < n; i++)
200+
Py_DECREF(keys[i]);
201+
PyMem_Free(keys);
202+
}
203+
return selectlib_heapselect(self, args, kwargs);
204+
}
205+
else if (ret < 0) {
206+
if (keys) {
207+
for (Py_ssize_t i = 0; i < n; i++)
208+
Py_DECREF(keys[i]);
209+
PyMem_Free(keys);
191210
}
211+
return NULL;
192212
}
193213
if (keys) {
194214
for (Py_ssize_t i = 0; i < n; i++)
@@ -199,7 +219,7 @@ selectlib_quickselect(PyObject *self, PyObject *args, PyObject *kwargs)
199219
Py_RETURN_NONE;
200220
}
201221

202-
/* ---------- New heapselect implementation ---------- */
222+
/* ---------- heapselect implementation ---------- */
203223

204224
/* Structure to hold an element for the heap.
205225
Each HeapItem contains a pointer to the list element (value) and
@@ -252,9 +272,9 @@ build_max_heap(HeapItem *heap, Py_ssize_t heap_size)
252272

253273
/*
254274
heapselect(values: list[Any], index: int, key=None) -> None
255-
Partition the list in-place so that the element at the given index (k) is in its
275+
Partition the list inplace so that the element at the given index (k) is in its
256276
final sorted position. This implementation uses a heap strategy (specifically,
257-
building a fixed-size max-heap on the first k+1 elements, then processing the rest)
277+
building a fixedsize max-heap on the first k+1 elements, then processing the rest)
258278
to determine the kth smallest element.
259279
*/
260280
static PyObject *
@@ -342,7 +362,6 @@ selectlib_heapselect(PyObject *self, PyObject *args, PyObject *kwargs)
342362
PyObject *current_key = use_key ? keys[i] : PyList_GET_ITEM(values, i);
343363
int cmp = less_than(current_key, heap[0].key);
344364
if (cmp < 0) {
345-
/* Propagate error */
346365
PyMem_Free(heap);
347366
if (keys) {
348367
for (Py_ssize_t j = 0; j < n; j++)
@@ -352,25 +371,28 @@ selectlib_heapselect(PyObject *self, PyObject *args, PyObject *kwargs)
352371
return NULL;
353372
}
354373
if (cmp == 1) { /* current < heap root */
355-
/* Replace the root with current and re-heapify */
356374
heap[0].value = PyList_GET_ITEM(values, i);
357375
heap[0].key = current_key;
358376
max_heapify(heap, heap_size, 0);
359377
}
360378
}
361379

362-
/* The kth smallest element candidate is now at the root of the heap. */
363-
PyObject *pivot = heap[0].value;
364-
PyObject *pivot_key = heap[0].key;
365-
380+
/* Save the pivot value and its key (if in use) from the heap’s root */
381+
PyObject *pivot;
382+
PyObject *pivot_key = NULL;
383+
if (use_key) {
384+
pivot_key = heap[0].key;
385+
pivot = heap[0].value;
386+
} else {
387+
pivot = heap[0].value;
388+
}
366389
PyMem_Free(heap);
367390

368-
/* Partition the entire list around the pivot value (using pivot_key).
369-
After partitioning, the block of elements equal to the pivot should contain
370-
the target_index.
391+
/* Partition the entire list around the pivot.
392+
If a key function is in use, pass the computed pivot_key.
371393
*/
372394
Py_ssize_t low, mid;
373-
if (partition_by_pivot(values, keys, n, pivot_key, &low, &mid) < 0) {
395+
if (partition_by_pivot(values, keys, n, use_key ? pivot_key : pivot, &low, &mid) < 0) {
374396
if (keys) {
375397
for (Py_ssize_t i = 0; i < n; i++)
376398
Py_DECREF(keys[i]);
@@ -379,9 +401,6 @@ selectlib_heapselect(PyObject *self, PyObject *args, PyObject *kwargs)
379401
return NULL;
380402
}
381403

382-
/* Verify that target_index lies in the "equal" partition.
383-
(In a correct run this must be true.)
384-
*/
385404
if (!(target_index >= low && target_index < mid)) {
386405
if (keys) {
387406
for (Py_ssize_t i = 0; i < n; i++)
@@ -401,6 +420,98 @@ selectlib_heapselect(PyObject *self, PyObject *args, PyObject *kwargs)
401420
Py_RETURN_NONE;
402421
}
403422

423+
/*
424+
nth_element(values: list[Any], index: int, key=None) -> None
425+
Partition the list in‐place so that the element at the given index is in its
426+
final sorted position. This interface adapts the selection algorithm as follows:
427+
• If index is less than (len(values) >> 3), the heapselect method is used.
428+
• Otherwise, quickselect is attempted. If quickselect exceeds 4× the expected
429+
recursion depth (detected via iteration count), the routine falls back to heapselect.
430+
*/
431+
static PyObject *
432+
selectlib_nth_element(PyObject *self, PyObject *args, PyObject *kwargs)
433+
{
434+
static char *kwlist[] = {"values", "index", "key", NULL};
435+
PyObject *values;
436+
Py_ssize_t target_index;
437+
PyObject *key = Py_None;
438+
439+
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "On|O:nth_element",
440+
kwlist, &values, &target_index, &key))
441+
return NULL;
442+
443+
if (!PyList_Check(values)) {
444+
PyErr_SetString(PyExc_TypeError, "values must be a list");
445+
return NULL;
446+
}
447+
Py_ssize_t n = PyList_Size(values);
448+
if (n == 0 || target_index < 0 || target_index >= n) {
449+
PyErr_SetString(PyExc_IndexError, "index out of range");
450+
return NULL;
451+
}
452+
453+
/* If target_index is small compared to n, use heapselect directly */
454+
if (target_index < (n >> 3)) {
455+
return selectlib_heapselect(self, args, kwargs);
456+
}
457+
458+
int use_key = 0;
459+
if (key != Py_None) {
460+
if (!PyCallable_Check(key)) {
461+
PyErr_SetString(PyExc_TypeError, "key must be callable");
462+
return NULL;
463+
}
464+
use_key = 1;
465+
}
466+
467+
PyObject **keys = NULL;
468+
if (use_key) {
469+
keys = PyMem_New(PyObject *, n);
470+
if (keys == NULL) {
471+
PyErr_NoMemory();
472+
return NULL;
473+
}
474+
for (Py_ssize_t i = 0; i < n; i++) {
475+
PyObject *item = PyList_GET_ITEM(values, i);
476+
PyObject *keyval = PyObject_CallFunctionObjArgs(key, item, NULL);
477+
if (keyval == NULL) {
478+
for (Py_ssize_t j = 0; j < i; j++)
479+
Py_DECREF(keys[j]);
480+
PyMem_Free(keys);
481+
return NULL;
482+
}
483+
keys[i] = keyval;
484+
}
485+
}
486+
487+
int ret;
488+
ret = quickselect_inplace(values, keys, 0, n - 1, target_index);
489+
if (ret == -2) {
490+
/* Exceeded iteration threshold; fall back to heapselect. */
491+
if (keys) {
492+
for (Py_ssize_t i = 0; i < n; i++)
493+
Py_DECREF(keys[i]);
494+
PyMem_Free(keys);
495+
}
496+
return selectlib_heapselect(self, args, kwargs);
497+
} else if (ret < 0) {
498+
if (keys) {
499+
for (Py_ssize_t i = 0; i < n; i++)
500+
Py_DECREF(keys[i]);
501+
PyMem_Free(keys);
502+
}
503+
return NULL;
504+
}
505+
506+
if (keys) {
507+
for (Py_ssize_t i = 0; i < n; i++)
508+
Py_DECREF(keys[i]);
509+
PyMem_Free(keys);
510+
}
511+
512+
Py_RETURN_NONE;
513+
}
514+
404515
/* ---------- Module method definitions ---------- */
405516
static PyMethodDef selectlib_methods[] = {
406517
{"quickselect", (PyCFunction)selectlib_quickselect,
@@ -411,13 +522,18 @@ static PyMethodDef selectlib_methods[] = {
411522
METH_VARARGS | METH_KEYWORDS,
412523
"heapselect(values: list[Any], index: int, key=None) -> None\n\n"
413524
"Partition the list in-place using a heap strategy so that the element at the given index is in its final sorted position."},
525+
{"nth_element", (PyCFunction)selectlib_nth_element,
526+
METH_VARARGS | METH_KEYWORDS,
527+
"nth_element(values: list[Any], index: int, key=None) -> None\n\n"
528+
"Partition the list in-place so that the element at the given index is in its final sorted position. "
529+
"Uses heapselect if the target index is less than (len(values) >> 3) or if quickselect exceeds its iteration limit."},
414530
{NULL, NULL, 0, NULL}
415531
};
416532

417533
static struct PyModuleDef selectlibmodule = {
418534
PyModuleDef_HEAD_INIT,
419535
"selectlib",
420-
"Module that implements the quickselect and heapselect algorithms.",
536+
"Module that implements the quickselect, heapselect, and nth_element algorithms.",
421537
-1,
422538
selectlib_methods,
423539
};

test_selectlib.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ def setUp(self):
1616
self.algorithms = [
1717
("quickselect", selectlib.quickselect),
1818
("heapselect", selectlib.heapselect),
19+
("nth_element", selectlib.nth_element),
1920
]
2021

2122
def sorted_index_check(self, func, values, k, key=None):

0 commit comments

Comments
 (0)