1212 3. Using quickselect: partition the list with selectlib.quickselect and slice the first K elements.
1313
1414The benchmark results are then plotted as grouped bar charts (one per N value) in a vertical stack.
15+ Note: The percentages for K are now 0.1%, 1%, 10%, and 25% of N.
1516"""
1617
1718import random
@@ -61,15 +62,15 @@ def run_benchmarks():
6162 """
6263 # List sizes to test (varying by a factor of 10)
6364 N_values = [1000 , 10_000 , 100_000 , 1_000_000 ]
64- # Percentages for K (0.1 %, 1%, 10%, and 50 % of N)
65- percentages = [0.001 , 0.01 , 0.1 , 0.5 ]
65+ # Percentages for K (0.2 %, 1%, 10%, and 25 % of N)
66+ percentages = [0.002 , 0.01 , 0.1 , 0.25 ]
6667
6768 overall_results = {} # {N: {"K_values": [...], "results": {method: {K: time, ...}} } }
6869
6970 for N in N_values :
7071 # Compute K values (ensure at least 1)
7172 K_VALUES = [max (1 , int (N * p )) for p in percentages ]
72- print (f"\n Benchmarking for N = { N } (K values: { K_VALUES } )" )
73+ print (f"\n Benchmarking for N = { N :, } (K values: { [ f' { k :, } ' for k in K_VALUES ] } )" )
7374 # Generate a random list of integers
7475 original = [random .randint (0 , 1_000_000 ) for _ in range (N )]
7576
@@ -78,14 +79,14 @@ def run_benchmarks():
7879
7980 # For each K value, run each method 5 times and take the median time
8081 for K in K_VALUES :
81- print (f" K = { K } " )
82+ print (f" K = { K :, } " )
8283 for name , func in methods .items ():
8384 test_callable = lambda : func (original , K )
8485 times = timeit .repeat (stmt = test_callable , repeat = 5 , number = 1 )
8586 med = statistics .median (times )
8687 results [name ][K ] = med
87- times_ms = [f"{ t * 1000 :.3f} " for t in times ]
88- print (f" { name :15} : median = { med * 1000 :.3f} ms (runs: { times_ms } ms)" )
88+ times_ms = [f"{ t * 1000 :, .3f} " for t in times ]
89+ print (f" { name :15} : median = { med * 1000 :, .3f} ms (runs: { times_ms } ms)" )
8990 overall_results [N ] = {"K_values" : K_VALUES , "results" : results }
9091 return overall_results
9192
@@ -123,26 +124,28 @@ def plot_results(overall_results):
123124 results = data ["results" ]
124125 # Create x positions (one per K value)
125126 x_positions = list (range (len (K_VALUES )))
126- # Create x-axis labels as "K (percentage)"
127- x_labels = [f"{ K } ({ (K / N )* 100 :.1f} %)" for K in K_VALUES ]
127+ # Create x-axis labels as "K (percentage)" with comma formatting for K
128+ x_labels = [f"{ K :, } ({ (K / N )* 100 :.1f} %)" for K in K_VALUES ]
128129
129130 for method , timing_dict in results .items ():
130131 # Extract times (convert seconds to milliseconds)
131132 times_ms = [timing_dict [K ]* 1000 for K in K_VALUES ]
132133 # Compute adjusted positions for grouped bars
133134 positions = [x + method_offsets [method ] for x in x_positions ]
134135 bars = ax .bar (positions , times_ms , width = bar_width , label = method , color = method_colors .get (method ))
135- ax .bar_label (bars , fmt = '%.2f' , padding = 3 , fontsize = 8 )
136+ ax .bar_label (bars , fmt = '%.2f' , padding = 1 , fontsize = 8 )
136137
137- ax .set_title (f"N = { N } " )
138+ # Use comma formatting for N in the title
139+ ax .set_title (f"N = { N :,} " )
138140 ax .set_xlabel ("K (percentage of N)" )
139141 ax .set_ylabel ("Median time (ms)" )
140142 ax .set_xticks (x_positions )
141143 ax .set_xticklabels (x_labels )
142144 ax .legend (title = "Method" )
143145 ax .grid (True , linestyle = '--' , alpha = 0.5 )
144146
145- plt .suptitle ("Benchmark: Performance Comparison for Varying K and N" , fontsize = 16 )
147+ # Improved overall title for the charts
148+ plt .suptitle ("Performance Benchmark for N-Smallest Methods" , fontsize = 18 )
146149 plt .tight_layout (rect = [0 , 0.03 , 1 , 0.95 ])
147150 plt .savefig ('plot.png' )
148151 plt .show ()
0 commit comments