Skip to content

Commit 04b4df5

Browse files
Add stats-file CLI command
1 parent 92357fd commit 04b4df5

File tree

3 files changed

+133
-0
lines changed

3 files changed

+133
-0
lines changed

README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,15 @@ sudoku-dlx gen --seed 123 --givens 30 --pretty
6060
# Analyze (valid/solvable/unique/difficulty/stats/canonical)
6161
sudoku-dlx check --grid "<81chars>"
6262
sudoku-dlx check --grid "<81chars>" --json > report.json
63+
64+
# Dataset stats
65+
sudoku-dlx stats-file --in puzzles.txt --json stats.json --csv diff_hist.csv
66+
# prints a compact JSON summary to stdout and writes optional files:
67+
# {
68+
# "count": 1000, "valid_pct": 100.0, "solvable_pct": 100.0, "unique_pct": 100.0,
69+
# "givens_mean": 29.4, "difficulty_mean": 4.2, "difficulty_p90": 6.8, ...
70+
# }
71+
6372
# Advanced generator flags:
6473
sudoku-dlx gen --seed 123 --givens 28 --minimal
6574
sudoku-dlx gen --seed 123 --givens 28 --symmetry rot180

src/sudoku_dlx/cli.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@
66
import pathlib
77
import random
88
import sys
9+
import time
910
from typing import Optional
1011

1112
from .api import analyze, from_string, is_valid, solve, to_string
1213
from .canonical import canonical_form
1314
from .generate import generate
1415
from .rating import rate
16+
from statistics import mean
1517

1618

1719
def _read_grid_arg(ns: argparse.Namespace) -> str:
@@ -148,6 +150,97 @@ def cmd_rate_file(ns: argparse.Namespace) -> int:
148150
return 0
149151

150152

153+
def _percentile(xs: list[float], p: float) -> float:
154+
if not xs:
155+
return 0.0
156+
xs = sorted(xs)
157+
k = (len(xs) - 1) * p
158+
f = int(k)
159+
c = min(f + 1, len(xs) - 1)
160+
if f == c:
161+
return xs[f]
162+
return xs[f] + (xs[c] - xs[f]) * (k - f)
163+
164+
165+
def cmd_stats_file(ns: argparse.Namespace) -> int:
166+
inp = pathlib.Path(ns.in_path)
167+
total = 0
168+
n_valid = n_solvable = n_unique = 0
169+
givens: list[int] = []
170+
diffs: list[float] = []
171+
ms_list: list[float] = []
172+
t0 = time.perf_counter()
173+
with inp.open("r", encoding="utf-8") as handle:
174+
for line in handle:
175+
s = "".join(ch for ch in line.strip() if not ch.isspace())
176+
if not s:
177+
continue
178+
try:
179+
grid = from_string(s)
180+
except Exception:
181+
continue
182+
data = analyze(grid)
183+
total += 1
184+
if data["valid"]:
185+
n_valid += 1
186+
if data["solvable"]:
187+
n_solvable += 1
188+
if data["unique"]:
189+
n_unique += 1
190+
givens.append(int(data["givens"]))
191+
diffs.append(float(data["difficulty"]))
192+
ms_list.append(float(data["stats"]["ms"]))
193+
if total == 0:
194+
print("no puzzles read", file=sys.stderr)
195+
return 2
196+
elapsed = (time.perf_counter() - t0) * 1000.0
197+
report = {
198+
"count": total,
199+
"valid_pct": round(100.0 * n_valid / total, 2),
200+
"solvable_pct": round(100.0 * n_solvable / total, 2),
201+
"unique_pct": round(100.0 * n_unique / total, 2),
202+
"givens_mean": round(mean(givens), 2),
203+
"givens_min": min(givens),
204+
"givens_max": max(givens),
205+
"difficulty_mean": round(mean(diffs), 3),
206+
"difficulty_p50": round(_percentile(diffs, 0.50), 3),
207+
"difficulty_p90": round(_percentile(diffs, 0.90), 3),
208+
"difficulty_p99": round(_percentile(diffs, 0.99), 3),
209+
"solve_ms_mean": round(mean(ms_list), 2),
210+
"elapsed_ms": round(elapsed, 1),
211+
}
212+
print(json.dumps(report, separators=(",", ":"), sort_keys=True))
213+
if ns.json_path:
214+
pathlib.Path(ns.json_path).write_text(
215+
json.dumps(report, indent=2, sort_keys=True), encoding="utf-8"
216+
)
217+
if ns.csv_path:
218+
bins = max(1, ns.bins)
219+
lo, hi = 0.0, 10.0
220+
width = (hi - lo) / bins
221+
counts = [0] * bins
222+
for diff in diffs:
223+
if diff < lo:
224+
idx = 0
225+
elif diff >= hi:
226+
idx = bins - 1
227+
else:
228+
idx = int((diff - lo) // width)
229+
counts[idx] += 1
230+
with open(ns.csv_path, "w", newline="", encoding="utf-8") as csv_handle:
231+
writer = csv.writer(csv_handle)
232+
writer.writerow(["bin_lower", "bin_upper", "count"])
233+
for i, count in enumerate(counts):
234+
writer.writerow(
235+
[
236+
round(lo + i * width, 3),
237+
round(lo + (i + 1) * width, 3),
238+
count,
239+
]
240+
)
241+
return 0
242+
243+
151244
def cmd_dedupe(ns: argparse.Namespace) -> int:
152245
inp = pathlib.Path(ns.in_path)
153246
outp = pathlib.Path(ns.out_path)
@@ -238,6 +331,19 @@ def main(argv: Optional[list[str]] = None) -> int:
238331
)
239332
ratef_parser.set_defaults(func=cmd_rate_file)
240333

334+
stats_parser = sub.add_parser("stats-file", help="summarize a file of puzzles")
335+
stats_parser.add_argument(
336+
"--in", dest="in_path", required=True, help="input text file (81-char per line)"
337+
)
338+
stats_parser.add_argument("--json", dest="json_path", help="write JSON report to file")
339+
stats_parser.add_argument(
340+
"--csv", dest="csv_path", help="write difficulty histogram CSV"
341+
)
342+
stats_parser.add_argument(
343+
"--bins", type=int, default=11, help="histogram bins (default 11 for 0..10)"
344+
)
345+
stats_parser.set_defaults(func=cmd_stats_file)
346+
241347
gen_parser = sub.add_parser("gen", help="generate a puzzle")
242348
gen_parser.add_argument("--seed", type=int, default=None)
243349
gen_parser.add_argument("--givens", type=int, default=28, help="target number of clues (approx)")

tests/test_stats_file.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from sudoku_dlx import cli
2+
3+
4+
def test_stats_file_small(tmp_path):
5+
puzzles = tmp_path / "p.txt"
6+
# make a tiny set
7+
grids = [
8+
"53..7....6..195... .98....6.8...6...3 4..8.3..17...2...6 .6....28... .419..5....8..79".replace(
9+
" ", ""
10+
),
11+
"53..7....6..195... .98....6.8...6...3 4..8.3..17...2...6 .6....28... .419..5....8..79".replace(
12+
" ", ""
13+
),
14+
]
15+
puzzles.write_text("\n".join(grids) + "\n", encoding="utf-8")
16+
# ensure it runs and prints JSON
17+
rc = cli.main(["stats-file", "--in", str(puzzles)])
18+
assert rc == 0

0 commit comments

Comments
 (0)