|
6 | 6 | import pathlib |
7 | 7 | import random |
8 | 8 | import sys |
| 9 | +import time |
9 | 10 | from typing import Optional |
10 | 11 |
|
11 | 12 | from .api import analyze, from_string, is_valid, solve, to_string |
12 | 13 | from .canonical import canonical_form |
13 | 14 | from .generate import generate |
14 | 15 | from .rating import rate |
| 16 | +from statistics import mean |
15 | 17 |
|
16 | 18 |
|
17 | 19 | def _read_grid_arg(ns: argparse.Namespace) -> str: |
@@ -148,6 +150,97 @@ def cmd_rate_file(ns: argparse.Namespace) -> int: |
148 | 150 | return 0 |
149 | 151 |
|
150 | 152 |
|
| 153 | +def _percentile(xs: list[float], p: float) -> float: |
| 154 | + if not xs: |
| 155 | + return 0.0 |
| 156 | + xs = sorted(xs) |
| 157 | + k = (len(xs) - 1) * p |
| 158 | + f = int(k) |
| 159 | + c = min(f + 1, len(xs) - 1) |
| 160 | + if f == c: |
| 161 | + return xs[f] |
| 162 | + return xs[f] + (xs[c] - xs[f]) * (k - f) |
| 163 | + |
| 164 | + |
| 165 | +def cmd_stats_file(ns: argparse.Namespace) -> int: |
| 166 | + inp = pathlib.Path(ns.in_path) |
| 167 | + total = 0 |
| 168 | + n_valid = n_solvable = n_unique = 0 |
| 169 | + givens: list[int] = [] |
| 170 | + diffs: list[float] = [] |
| 171 | + ms_list: list[float] = [] |
| 172 | + t0 = time.perf_counter() |
| 173 | + with inp.open("r", encoding="utf-8") as handle: |
| 174 | + for line in handle: |
| 175 | + s = "".join(ch for ch in line.strip() if not ch.isspace()) |
| 176 | + if not s: |
| 177 | + continue |
| 178 | + try: |
| 179 | + grid = from_string(s) |
| 180 | + except Exception: |
| 181 | + continue |
| 182 | + data = analyze(grid) |
| 183 | + total += 1 |
| 184 | + if data["valid"]: |
| 185 | + n_valid += 1 |
| 186 | + if data["solvable"]: |
| 187 | + n_solvable += 1 |
| 188 | + if data["unique"]: |
| 189 | + n_unique += 1 |
| 190 | + givens.append(int(data["givens"])) |
| 191 | + diffs.append(float(data["difficulty"])) |
| 192 | + ms_list.append(float(data["stats"]["ms"])) |
| 193 | + if total == 0: |
| 194 | + print("no puzzles read", file=sys.stderr) |
| 195 | + return 2 |
| 196 | + elapsed = (time.perf_counter() - t0) * 1000.0 |
| 197 | + report = { |
| 198 | + "count": total, |
| 199 | + "valid_pct": round(100.0 * n_valid / total, 2), |
| 200 | + "solvable_pct": round(100.0 * n_solvable / total, 2), |
| 201 | + "unique_pct": round(100.0 * n_unique / total, 2), |
| 202 | + "givens_mean": round(mean(givens), 2), |
| 203 | + "givens_min": min(givens), |
| 204 | + "givens_max": max(givens), |
| 205 | + "difficulty_mean": round(mean(diffs), 3), |
| 206 | + "difficulty_p50": round(_percentile(diffs, 0.50), 3), |
| 207 | + "difficulty_p90": round(_percentile(diffs, 0.90), 3), |
| 208 | + "difficulty_p99": round(_percentile(diffs, 0.99), 3), |
| 209 | + "solve_ms_mean": round(mean(ms_list), 2), |
| 210 | + "elapsed_ms": round(elapsed, 1), |
| 211 | + } |
| 212 | + print(json.dumps(report, separators=(",", ":"), sort_keys=True)) |
| 213 | + if ns.json_path: |
| 214 | + pathlib.Path(ns.json_path).write_text( |
| 215 | + json.dumps(report, indent=2, sort_keys=True), encoding="utf-8" |
| 216 | + ) |
| 217 | + if ns.csv_path: |
| 218 | + bins = max(1, ns.bins) |
| 219 | + lo, hi = 0.0, 10.0 |
| 220 | + width = (hi - lo) / bins |
| 221 | + counts = [0] * bins |
| 222 | + for diff in diffs: |
| 223 | + if diff < lo: |
| 224 | + idx = 0 |
| 225 | + elif diff >= hi: |
| 226 | + idx = bins - 1 |
| 227 | + else: |
| 228 | + idx = int((diff - lo) // width) |
| 229 | + counts[idx] += 1 |
| 230 | + with open(ns.csv_path, "w", newline="", encoding="utf-8") as csv_handle: |
| 231 | + writer = csv.writer(csv_handle) |
| 232 | + writer.writerow(["bin_lower", "bin_upper", "count"]) |
| 233 | + for i, count in enumerate(counts): |
| 234 | + writer.writerow( |
| 235 | + [ |
| 236 | + round(lo + i * width, 3), |
| 237 | + round(lo + (i + 1) * width, 3), |
| 238 | + count, |
| 239 | + ] |
| 240 | + ) |
| 241 | + return 0 |
| 242 | + |
| 243 | + |
151 | 244 | def cmd_dedupe(ns: argparse.Namespace) -> int: |
152 | 245 | inp = pathlib.Path(ns.in_path) |
153 | 246 | outp = pathlib.Path(ns.out_path) |
@@ -238,6 +331,19 @@ def main(argv: Optional[list[str]] = None) -> int: |
238 | 331 | ) |
239 | 332 | ratef_parser.set_defaults(func=cmd_rate_file) |
240 | 333 |
|
| 334 | + stats_parser = sub.add_parser("stats-file", help="summarize a file of puzzles") |
| 335 | + stats_parser.add_argument( |
| 336 | + "--in", dest="in_path", required=True, help="input text file (81-char per line)" |
| 337 | + ) |
| 338 | + stats_parser.add_argument("--json", dest="json_path", help="write JSON report to file") |
| 339 | + stats_parser.add_argument( |
| 340 | + "--csv", dest="csv_path", help="write difficulty histogram CSV" |
| 341 | + ) |
| 342 | + stats_parser.add_argument( |
| 343 | + "--bins", type=int, default=11, help="histogram bins (default 11 for 0..10)" |
| 344 | + ) |
| 345 | + stats_parser.set_defaults(func=cmd_stats_file) |
| 346 | + |
241 | 347 | gen_parser = sub.add_parser("gen", help="generate a puzzle") |
242 | 348 | gen_parser.add_argument("--seed", type=int, default=None) |
243 | 349 | gen_parser.add_argument("--givens", type=int, default=28, help="target number of clues (approx)") |
|
0 commit comments