Skip to content

Commit 576de11

Browse files
Add CLI dedupe command using canonical form
1 parent 7cd2d37 commit 576de11

File tree

3 files changed

+86
-10
lines changed

3 files changed

+86
-10
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ sudoku-dlx rate --grid "<81chars>"
4646
sudoku-dlx canon --grid "<81chars>" # D4 × bands/stacks × inner row/col × digit relabel
4747
# Produces a stable 81-char string for deduping datasets.
4848

49+
# Dedupe a file of puzzles (fast)
50+
sudoku-dlx dedupe --in puzzles.txt --out unique.txt
51+
4952
# Generate a unique puzzle (deterministic with seed)
5053
sudoku-dlx gen --seed 123 --givens 30 # ~target clue count (approx)
5154
sudoku-dlx gen --seed 123 --givens 30 --pretty

src/sudoku_dlx/cli.py

Lines changed: 52 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import argparse
2+
import pathlib
23
import sys
34
from typing import Optional
45

@@ -69,6 +70,32 @@ def cmd_canon(ns: argparse.Namespace) -> int:
6970
return 0
7071

7172

73+
def cmd_dedupe(ns: argparse.Namespace) -> int:
74+
inp = pathlib.Path(ns.in_path)
75+
outp = pathlib.Path(ns.out_path)
76+
seen: set[str] = set()
77+
uniq: list[str] = []
78+
with inp.open("r", encoding="utf-8") as handle:
79+
for line in handle:
80+
s = "".join(ch for ch in line.strip() if not ch.isspace())
81+
if not s:
82+
continue
83+
try:
84+
grid = from_string(s)
85+
except Exception:
86+
continue
87+
canon = canonical_form(grid)
88+
if canon not in seen:
89+
seen.add(canon)
90+
uniq.append(canon)
91+
outp.parent.mkdir(parents=True, exist_ok=True)
92+
with outp.open("w", encoding="utf-8") as handle:
93+
for value in uniq:
94+
handle.write(value + "\n")
95+
print(f"# unique: {len(uniq)}", file=sys.stderr)
96+
return 0
97+
98+
7299
def main(argv: Optional[list[str]] = None) -> int:
73100
parser = argparse.ArgumentParser(
74101
prog="sudoku-dlx",
@@ -88,6 +115,31 @@ def main(argv: Optional[list[str]] = None) -> int:
88115
rate_parser.add_argument("--file", help="path to a file with 9 lines of 9 chars")
89116
rate_parser.set_defaults(func=cmd_rate)
90117

118+
canon_parser = sub.add_parser(
119+
"canon",
120+
help=(
121+
"print canonical 81-char form (D4 × bands/stacks × inner row/col swaps × digit relabel)"
122+
),
123+
)
124+
canon_parser.add_argument("--grid", help="81-char string; 0/./- for blanks")
125+
canon_parser.add_argument("--file", help="path to a file with 9 lines of 9 chars")
126+
canon_parser.set_defaults(func=cmd_canon)
127+
128+
dedupe_parser = sub.add_parser(
129+
"dedupe",
130+
help="dedupe puzzles by canonical form (one 81-char grid per line)",
131+
)
132+
dedupe_parser.add_argument(
133+
"--in", dest="in_path", required=True, help="input text file with one grid per line"
134+
)
135+
dedupe_parser.add_argument(
136+
"--out",
137+
dest="out_path",
138+
required=True,
139+
help="output file path for unique canonical grids",
140+
)
141+
dedupe_parser.set_defaults(func=cmd_dedupe)
142+
91143
gen_parser = sub.add_parser("gen", help="generate a puzzle")
92144
gen_parser.add_argument("--seed", type=int, default=None)
93145
gen_parser.add_argument("--givens", type=int, default=28, help="target number of clues (approx)")
@@ -105,16 +157,6 @@ def main(argv: Optional[list[str]] = None) -> int:
105157
gen_parser.add_argument("--pretty", action="store_true")
106158
gen_parser.set_defaults(func=cmd_gen)
107159

108-
canon_parser = sub.add_parser(
109-
"canon",
110-
help=(
111-
"print canonical 81-char form (D4 × bands/stacks × inner row/col swaps × digit relabel)"
112-
),
113-
)
114-
canon_parser.add_argument("--grid", help="81-char string; 0/./- for blanks")
115-
canon_parser.add_argument("--file", help="path to a file with 9 lines of 9 chars")
116-
canon_parser.set_defaults(func=cmd_canon)
117-
118160
args = parser.parse_args(argv)
119161
if not hasattr(args, "func"):
120162
parser.print_help()

tests/test_dedupe.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import os
2+
import tempfile
3+
4+
from sudoku_dlx import cli
5+
6+
S = (
7+
"53..7...."
8+
"6..195..."
9+
".98....6."
10+
"8...6...3"
11+
"4..8.3..1"
12+
"7...2...6"
13+
".6....28."
14+
"...419..5"
15+
"....8..79"
16+
)
17+
18+
19+
def test_cli_dedupe_makes_unique_file():
20+
s180 = "".join(reversed(S))
21+
with tempfile.TemporaryDirectory() as tmpdir:
22+
infile = os.path.join(tmpdir, "in.txt")
23+
outfile = os.path.join(tmpdir, "out.txt")
24+
with open(infile, "w", encoding="utf-8") as handle:
25+
handle.write(S + "\n")
26+
handle.write(s180 + "\n")
27+
rc = cli.main(["dedupe", "--in", infile, "--out", outfile])
28+
assert rc == 0
29+
with open(outfile, "r", encoding="utf-8") as handle:
30+
lines = [line.strip() for line in handle if line.strip()]
31+
assert len(lines) == 1

0 commit comments

Comments
 (0)