-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathevaluate_runtime.py
More file actions
190 lines (153 loc) · 6.99 KB
/
evaluate_runtime.py
File metadata and controls
190 lines (153 loc) · 6.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
import time
import os
import matplotlib.pyplot as plt
import numpy as np
from pycosat_solver import PycosatSolver
from cpsat_solver import CpSatSolver
from backtrack_solver import BacktrackSolver
from generate_puzzle import convert_txt_line_to_board
def evaluate_solvers(solvers, puzzle_dir="puzzles", sizes=None):
"""
Benchmark solvers on puzzles of different sizes.
Args:
solvers: Dictionary of solvers to benchmark {"solver_name": solver_class}
puzzle_dir: Directory containing puzzle files
sizes: List of puzzle sizes to benchmark
num_samples: Number of puzzles to sample for each size
Returns:
Dictionary with benchmark results
"""
if sizes is None:
# Get all sizes from puzzle files
sizes = []
for filename in os.listdir(puzzle_dir):
if filename.startswith("size_") and filename.endswith(".txt"):
size = int(filename.split("_")[1].split(".")[0])
sizes.append(size)
sizes.sort()
results = {
"sizes": sizes,
}
# Initialize results for each solver
for solver_name in solvers:
results[solver_name] = {size: [] for size in sizes}
for size in sizes:
print(f"Evaluating runtime of puzzles of size {size}x{size}...")
# Load puzzles
puzzle_file = os.path.join(puzzle_dir, f"size_{size}.txt")
if not os.path.exists(puzzle_file):
print(f"No puzzles found for size {size}. Skipping.")
continue
with open(puzzle_file, "r") as f:
puzzle_lines = f.readlines()
# Run solver on puzzle
for i, line in enumerate(puzzle_lines):
board = convert_txt_line_to_board(line)
times = {}
solutions = []
# Compute solve time
for solver_name, solver_class in solvers.items():
solver = solver_class(board)
start_time = time.time()
solution = solver.solve()
solve_time = time.time() - start_time
results[solver_name][size].append(solve_time)
times[solver_name] = solve_time
solutions.append(solution)
# Check all pairs of solutions and make sure they are the same
for j, solution in enumerate(solutions):
for k, other_solution in enumerate(solutions):
if solution != other_solution:
print("\n--- MISMATCH DETECTED ---")
print(f"Puzzle index: {i+1}")
print("Board:")
for row in board:
print(row)
print(f"Solution from {list(solvers.keys())[j]}:")
for row in solution or []:
print(row)
print(f"Solution from {list(solvers.keys())[k]}:")
for row in other_solution or []:
print(row)
print(f"Times: {times}")
assert solution == other_solution
# Get times for each solver and print it out for each puzzle
time_strings = [f"{name}: {time:.4f}s" for name, time in times.items()]
print(f" Puzzle {i+1}/{len(puzzle_lines)}: {', '.join(time_strings)}")
return results
def plot_execution_time_vs_size(results, log_scale=False):
"""Plot average execution time vs puzzle size for all solvers"""
sizes = results["sizes"]
solver_names = [key for key in results.keys() if key != "sizes"]
plt.figure(figsize=(10, 6))
for solver_name in solver_names:
# Calculate avg and stdev of times
avg_times = [np.mean(results[solver_name][size]) for size in sizes]
std_times = [np.std(results[solver_name][size]) for size in sizes]
plt.errorbar(sizes, avg_times, yerr=std_times, fmt="o-", label=solver_name)
plt.xlabel("Puzzle Size (n × n)")
plt.ylabel("Average Execution Time (seconds)")
plt.title("Solver Performance vs Puzzle Size")
plt.legend()
plt.grid(True)
# Use log scale if max time >= 10x min time
all_times = [time for solver in solver_names for size in sizes for time in results[solver][size]]
if log_scale and max(all_times) / max(0.000001, min(all_times)) > 10:
plt.yscale("log")
plt.title("Solver Performance vs Puzzle Size (Log Scale)")
plt.tight_layout()
# Output directory
os.makedirs("figs", exist_ok=True)
plt.savefig("figs/execution_time_vs_size.png" if not log_scale else "figs/execution_time_vs_size_log.png")
plt.close()
def plot_execution_time_percentage_difference(results, solver1, solver2):
"""
Plot runtime percentage difference between two solvers
Args:
results: Dictionary with results from evaluate_solvers
solver1: First solver name
solver2: Second solver name
"""
sizes = results["sizes"]
if solver1 not in results or solver2 not in results:
print(f"Error: One solver missing from results")
return
plt.figure(figsize=(10, 6))
percentage_diffs = []
std_percentage_diffs = []
for size in sizes:
# Calculate percentage differences
perc_diffs = [(results[solver2][size][i] - results[solver1][size][i]) / max(results[solver1][size][i], 1e-6) * 100
for i in range(len(results[solver1][size]))]
percentage_diffs.append(np.mean(perc_diffs))
std_percentage_diffs.append(np.std(perc_diffs))
# Create bar chart
plt.bar(sizes, percentage_diffs, yerr=std_percentage_diffs, alpha=0.7)
# Baseline 0 horizontal line
plt.axhline(y=0, color="r", linestyle="-", alpha=0.3)
plt.xlabel("Puzzle Size (n × n)")
plt.ylabel(f"Time Difference: ({solver2} - {solver1}) / {solver1} (%)")
plt.title(f"Runtime Percentage Difference: {solver2} vs {solver1}")
plt.grid(True, axis="y", alpha=0.3)
plt.tight_layout()
# Output directory
os.makedirs("figs", exist_ok=True)
plt.savefig(f"figs/solver_difference_{solver1}_vs_{solver2}.png")
plt.close()
if __name__ == "__main__":
np.random.seed(42)
solvers = {
"PycosatSolver": PycosatSolver,
"PycosatSolver (with heuristics)": lambda board: PycosatSolver(board, use_heuristic=True),
"CpSatSolver": CpSatSolver,
"CpSatSolver (with heuristics)": lambda board: CpSatSolver(board, use_heuristic=True)
}
results = evaluate_solvers(solvers, sizes=list(range(5,16)))
# Create visualizations
plot_execution_time_vs_size(results, log_scale=False)
plot_execution_time_vs_size(results, log_scale=True)
solver_names = list(solvers.keys())
for i, solver1 in enumerate(solver_names):
for solver2 in solver_names[i+1:]:
plot_execution_time_percentage_difference(results, solver1, solver2)
print("Evaluation done.")