grantjenks
diff --git a/‎benchmark.py‎
Lines changed: 14 additions & 11 deletions b/‎benchmark.py‎
Lines changed: 14 additions & 11 deletions
diff --git a/‎plot.png‎
153 KB b/‎plot.png‎
153 KB
@@ -12,6 +12,7 @@
   3. Using quickselect: partition the list with selectlib.quickselect and slice the first K elements.
 
 The benchmark results are then plotted as grouped bar charts (one per N value) in a vertical stack.
+Note: The percentages for K are now 0.1%, 1%, 10%, and 25% of N.
 """
 
 import random
@@ -61,15 +62,15 @@ def run_benchmarks():
     """
     # List sizes to test (varying by a factor of 10)
     N_values = [1000, 10_000, 100_000, 1_000_000]
-    # Percentages for K (0.1%, 1%, 10%, and 50% of N)
-    percentages = [0.001, 0.01, 0.1, 0.5]
+    # Percentages for K (0.2%, 1%, 10%, and 25% of N)
+    percentages = [0.002, 0.01, 0.1, 0.25]
 
     overall_results = {}  # {N: {"K_values": [...], "results": {method: {K: time, ...}} } }
 
     for N in N_values:
         # Compute K values (ensure at least 1)
         K_VALUES = [max(1, int(N * p)) for p in percentages]
-        print(f"\nBenchmarking for N = {N} (K values: {K_VALUES})")
+        print(f"\nBenchmarking for N = {N:,} (K values: {[f'{k:,}' for k in K_VALUES]})")
         # Generate a random list of integers
         original = [random.randint(0, 1_000_000) for _ in range(N)]
 
@@ -78,14 +79,14 @@ def run_benchmarks():
 
         # For each K value, run each method 5 times and take the median time
         for K in K_VALUES:
-            print(f"  K = {K}")
+            print(f"  K = {K:,}")
             for name, func in methods.items():
                 test_callable = lambda: func(original, K)
                 times = timeit.repeat(stmt=test_callable, repeat=5, number=1)
                 med = statistics.median(times)
                 results[name][K] = med
-                times_ms = [f"{t*1000:.3f}" for t in times]
-                print(f"    {name:15}: median = {med*1000:.3f} ms  (runs: {times_ms} ms)")
+                times_ms = [f"{t*1000:,.3f}" for t in times]
+                print(f"    {name:15}: median = {med*1000:,.3f} ms  (runs: {times_ms} ms)")
         overall_results[N] = {"K_values": K_VALUES, "results": results}
     return overall_results
 
@@ -123,26 +124,28 @@ def plot_results(overall_results):
         results = data["results"]
         # Create x positions (one per K value)
         x_positions = list(range(len(K_VALUES)))
-        # Create x-axis labels as "K (percentage)"
-        x_labels = [f"{K} ({(K/N)*100:.1f}%)" for K in K_VALUES]
+        # Create x-axis labels as "K (percentage)" with comma formatting for K
+        x_labels = [f"{K:,} ({(K/N)*100:.1f}%)" for K in K_VALUES]
 
         for method, timing_dict in results.items():
             # Extract times (convert seconds to milliseconds)
             times_ms = [timing_dict[K]*1000 for K in K_VALUES]
             # Compute adjusted positions for grouped bars
             positions = [x + method_offsets[method] for x in x_positions]
             bars = ax.bar(positions, times_ms, width=bar_width, label=method, color=method_colors.get(method))
-            ax.bar_label(bars, fmt='%.2f', padding=3, fontsize=8)
+            ax.bar_label(bars, fmt='%.2f', padding=1, fontsize=8)
 
-        ax.set_title(f"N = {N}")
+        # Use comma formatting for N in the title
+        ax.set_title(f"N = {N:,}")
         ax.set_xlabel("K (percentage of N)")
         ax.set_ylabel("Median time (ms)")
         ax.set_xticks(x_positions)
         ax.set_xticklabels(x_labels)
         ax.legend(title="Method")
         ax.grid(True, linestyle='--', alpha=0.5)
 
-    plt.suptitle("Benchmark: Performance Comparison for Varying K and N", fontsize=16)
+    # Improved overall title for the charts
+    plt.suptitle("Performance Benchmark for N-Smallest Methods", fontsize=18)
     plt.tight_layout(rect=[0, 0.03, 1, 0.95])
     plt.savefig('plot.png')
     plt.show()