Skip to content

Commit 472b9a9

Browse files
committed
Tune selectlib, update benchmark, and regen plot
1 parent 7647408 commit 472b9a9

File tree

3 files changed

+32
-26
lines changed

3 files changed

+32
-26
lines changed

benchmark.py

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env python3
22
"""
3-
Benchmark comparisons for four methods to obtain the K smallest items from a list,
3+
Benchmark comparisons for five methods to obtain the K smallest items from a list,
44
for various values of K with different list sizes N (varying from 1,000 to 1,000,000).
55
66
For each method and each chosen K (as a percentage of N), the test is run 5 times
@@ -9,8 +9,9 @@
99
Methods benchmarked:
1010
1. Using built‐in sort: sort the list and slice the first K elements.
1111
2. Using heapq.nsmallest: use the heap‐based algorithm.
12-
3. Using quickselect: partition the list with selectlib.quickselect and slice the first K elements.
13-
4. Using heapselect: partition the list with selectlib.heapselect and slice the first K elements.
12+
3. Using quickselect: partition the list with selectlib.quickselect and then sort the first K elements.
13+
4. Using heapselect: partition the list with selectlib.heapselect and then sort the first K elements.
14+
5. Using nth_element: partition the list with selectlib.nth_element and then sort the first K elements.
1415
1516
The benchmark results are then plotted as grouped bar charts (one per N value) in a vertical stack.
1617
Note: The percentages for K are now 0.2%, 1%, 10%, and 25% of N.
@@ -24,6 +25,7 @@
2425
import selectlib
2526

2627
# Define benchmark methods
28+
2729
def bench_sort(values, K):
2830
"""Sort a copy of the list and return the first K smallest items."""
2931
lst = values.copy()
@@ -41,7 +43,6 @@ def bench_quickselect(values, K):
4143
is in the correct sorted position; then sort and return the first K elements.
4244
"""
4345
lst = values.copy()
44-
# Partition in-place so that the element at index (K-1) is in the correct position
4546
selectlib.quickselect(lst, K - 1)
4647
result = lst[:K]
4748
result.sort()
@@ -53,24 +54,35 @@ def bench_heapselect(values, K):
5354
is in the correct sorted position; then sort and return the first K elements.
5455
"""
5556
lst = values.copy()
56-
# Partition in-place so that the element at index (K-1) is in the correct position.
5757
selectlib.heapselect(lst, K - 1)
5858
result = lst[:K]
5959
result.sort()
6060
return result
6161

62-
# List of methods to benchmark
62+
def bench_nth_element(values, K):
63+
"""
64+
Use selectlib.nth_element on a copy of the list to partition it so that the element at index K-1
65+
is in the correct sorted position; then sort and return the first K elements.
66+
"""
67+
lst = values.copy()
68+
selectlib.nth_element(lst, K - 1)
69+
result = lst[:K]
70+
result.sort()
71+
return result
72+
73+
# Dictionary of methods to benchmark.
6374
methods = {
6475
"sort": bench_sort,
6576
"heapq.nsmallest": bench_heapq,
6677
"quickselect": bench_quickselect,
6778
"heapselect": bench_heapselect,
79+
"nth_element": bench_nth_element,
6880
}
6981

7082
def run_benchmarks():
7183
"""
7284
Runs the benchmarks for different list sizes.
73-
For each N in N_VALUES, constructs a random list of integers and then, for each K (as a percentage of N),
85+
For each N in N_values, constructs a random list of integers and then, for each K (as a percentage of N),
7486
runs each method 5 times and records the median runtime.
7587
Returns a dictionary mapping each N to its benchmark results.
7688
"""
@@ -111,47 +123,42 @@ def plot_results(overall_results):
111123
For each subplot, the x-axis shows K along with its percentage of N,
112124
and the y-axis shows the median time in ms.
113125
"""
114-
# Determine the number of charts (one for each N)
115126
num_charts = len(overall_results)
116127
fig, axes = plt.subplots(nrows=num_charts, ncols=1, figsize=(10, 4*num_charts))
117128

118-
# If only one subplot, put it into a list for uniform processing.
119129
if num_charts == 1:
120130
axes = [axes]
121131

122-
# Define bar appearance
123-
bar_width = 0.2
132+
# Bar appearance settings
133+
bar_width = 0.15
124134
method_offsets = {
125-
"sort": -bar_width,
126-
"heapq.nsmallest": 0,
127-
"quickselect": bar_width,
128-
"heapselect": bar_width*2,
135+
"sort": -2*bar_width,
136+
"heapq.nsmallest": -bar_width,
137+
"quickselect": 0,
138+
"heapselect": bar_width,
139+
"nth_element": 2*bar_width,
129140
}
130141
method_colors = {
131142
"sort": '#1f77b4',
132143
"heapq.nsmallest": '#ff7f0e',
133144
"quickselect": '#2ca02c',
134-
"heapselect": '#d62728'
145+
"heapselect": '#d62728',
146+
"nth_element": '#9467bd',
135147
}
136148

137-
# Sort the overall_results by N for proper ordering (smallest to largest)
149+
# Process each chart (one per N value)
138150
for ax, (N, data) in zip(axes, sorted(overall_results.items(), key=lambda x: x[0])):
139151
K_VALUES = data["K_values"]
140152
results = data["results"]
141-
# Create x positions (one per K value)
142153
x_positions = list(range(len(K_VALUES)))
143-
# Create x-axis labels as "K (percentage)" with comma formatting for K
144154
x_labels = [f"{K:,} ({(K/N)*100:.1f}%)" for K in K_VALUES]
145155

146156
for method, timing_dict in results.items():
147-
# Extract times (convert seconds to milliseconds)
148157
times_ms = [timing_dict[K]*1000 for K in K_VALUES]
149-
# Compute adjusted positions for grouped bars
150158
positions = [x + method_offsets[method] for x in x_positions]
151159
bars = ax.bar(positions, times_ms, width=bar_width, label=method, color=method_colors.get(method))
152160
ax.bar_label(bars, fmt='%.2f', padding=1, fontsize=8)
153161

154-
# Use comma formatting for N in the title
155162
ax.set_title(f"N = {N:,}")
156163
ax.set_xlabel("K (percentage of N)")
157164
ax.set_ylabel("Median time (ms)")
@@ -160,7 +167,6 @@ def plot_results(overall_results):
160167
ax.legend(title="Method")
161168
ax.grid(True, linestyle='--', alpha=0.5)
162169

163-
# Improved overall title for the charts
164170
plt.suptitle("Performance Benchmark for N-Smallest Methods", fontsize=18)
165171
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
166172
plt.savefig('plot.png')

plot.png

12.7 KB
Loading

selectlib.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ selectlib_heapselect(PyObject *self, PyObject *args, PyObject *kwargs)
424424
nth_element(values: list[Any], index: int, key=None) -> None
425425
Partition the list in‐place so that the element at the given index is in its
426426
final sorted position. This interface adapts the selection algorithm as follows:
427-
• If index is less than (len(values) >> 3), the heapselect method is used.
427+
• If index is less than (len(values) >> 4), the heapselect method is used.
428428
• Otherwise, quickselect is attempted. If quickselect exceeds 4× the expected
429429
recursion depth (detected via iteration count), the routine falls back to heapselect.
430430
*/
@@ -451,7 +451,7 @@ selectlib_nth_element(PyObject *self, PyObject *args, PyObject *kwargs)
451451
}
452452

453453
/* If target_index is small compared to n, use heapselect directly */
454-
if (target_index < (n >> 3)) {
454+
if (target_index < (n >> 4)) {
455455
return selectlib_heapselect(self, args, kwargs);
456456
}
457457

@@ -526,7 +526,7 @@ static PyMethodDef selectlib_methods[] = {
526526
METH_VARARGS | METH_KEYWORDS,
527527
"nth_element(values: list[Any], index: int, key=None) -> None\n\n"
528528
"Partition the list in-place so that the element at the given index is in its final sorted position. "
529-
"Uses heapselect if the target index is less than (len(values) >> 3) or if quickselect exceeds its iteration limit."},
529+
"Uses heapselect if the target index is less than (len(values) >> 4) or if quickselect exceeds its iteration limit."},
530530
{NULL, NULL, 0, NULL}
531531
};
532532

0 commit comments

Comments
 (0)