Skip to content

Commit b649802

Browse files
committed
Add explanatory comments to shogi_compare.py
Documents the three coordinate systems (UCI/USI/KIF), their mappings, Fairy-Stockfish's FEN conventions, engine notation conventions, and KIF special notation (同/打/成).
1 parent b5715a3 commit b649802

1 file changed

Lines changed: 159 additions & 37 deletions

File tree

tests/shogi_compare.py

Lines changed: 159 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,75 @@
11
#!/usr/bin/env python3
2-
"""Compare our Japanese Shogi notation against lishogi.org .kif exports.
2+
"""Compare Fairy-Stockfish Japanese Shogi notation against lishogi.org .kif exports.
33
4-
Verifies that KIF moves parse to legal UCI moves and that our engine's
5-
Japanese notation matches KIF conventions.
4+
This script verifies our engine's Japanese notation output by:
5+
1. Parsing KIF game records into UCI moves
6+
2. Checking each move is legal in the current position
7+
3. Comparing our engine's notation output against the KIF destination
68
7-
Usage:
8-
python3 tests/shogi_compare.py <game.kif> # single game
9-
python3 tests/shogi_compare.py <games.kif> # multi-game
10-
python3 tests/shogi_compare.py <games.kif> --max 5 # first 5 games only
9+
== Coordinate systems ==
10+
11+
Three coordinate systems are involved:
12+
13+
UCI (Universal Chess Interface) — used by Fairy-Stockfish internally
14+
Files: a-i, left to right (a = leftmost)
15+
Ranks: 1-9, bottom to top (1 = bottom)
16+
Example: g7g6 = pawn from g7 to g6
17+
18+
USI (Universal Shogi Interface) — used by lishogi.org / shogiops
19+
Files: 1-9, right to left (1 = rightmost from sente's view)
20+
Ranks: a-i, top to bottom (a = sente's back rank, i = gote's back rank)
21+
Example: 7g7f = pawn from 7g to 7f
22+
Note: In our FEN, sente is at the TOP, so USI rank a maps to UCI rank 9.
23+
24+
KIF (KiFu) — the .kif game record format from lishogi.org
25+
Files: 1-9, left to right (1 = leftmost, same as UCI)
26+
Ranks: 一-九, bottom to top (一 = bottom, same as UCI)
27+
Origin squares in parentheses: (XY) where X=file, Y=rank
28+
Example: 七六歩(77) = pawn from (7,7) to file 七 rank 六
29+
30+
Fairy-Stockfish's FEN convention for shogi:
31+
Sente (black/lowercase) at the TOP of the FEN (ranks 7-9)
32+
Gote (white/uppercase) at the BOTTOM of the FEN (ranks 1-3)
33+
This is OPPOSITE to standard USI where sente is at the bottom.
34+
35+
== Engine notation conventions ==
36+
37+
File (横): The engine counts files from RIGHT (一 = rightmost).
38+
This matches how shogi players count files (from right in standard orientation).
39+
40+
Rank (段): The engine uses DIFFERENT conventions per side:
41+
Sente (Black): rank = UCI rank (counts from bottom of the FEN)
42+
Gote (White): rank = 10 - UCI rank (counts from top of the FEN)
43+
This is because the engine treats the FEN orientation literally:
44+
sente's "forward" is toward rank 1 (bottom), gote's is toward rank 9 (top).
45+
46+
KIF always counts ranks from the BOTTOM (absolute), regardless of side.
47+
Therefore: engine gote rank != KIF rank for gote moves.
48+
The comparison accounts for this with engine_rank_to_kif_rank().
49+
50+
== KIF special notation ==
51+
52+
同 (dou): "same square" — the move lands on the same destination as the
53+
previous move. Always implies a capture. The previous destination must
54+
be tracked via last_dest_sq.
55+
56+
打 (da): "drop" — placing a piece from hand onto the board.
57+
Format: piece名 + 打, e.g., 歩六五打 = drop pawn at 6五
58+
59+
成 / 不成: "promotes" / "does not promote".
60+
CRITICAL: 成 at the END of piece text = promotion (e.g., 桂成 = knight promotes)
61+
成 at the START = already-promoted piece name (e.g., 成桂 = promoted knight)
62+
UCI marks promotion with '+' suffix: g7g7+ vs g7g7
63+
64+
Game-end tokens (not moves): 投了, 切れ負け, 詰み, 千日手, etc.
65+
These are game results and are skipped, not parsed as moves.
66+
67+
== KIF file mapping ==
68+
69+
KIF files count from LEFT (1=leftmost, 9=rightmost), same as UCI.
70+
This is different from USI where files count from the right.
71+
KIF rank numbers count from the bottom, same as UCI ranks.
72+
Therefore: KIF (file, rank) -> UCI is simply chr(ord('a')+(file-1)) + str(rank).
1173
"""
1274

1375
import re
@@ -17,50 +79,58 @@
1779
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
1880
import pyffish as sf
1981

82+
# Standard shogi starting position in Fairy-Stockfish's FEN convention.
83+
# Sente (lowercase) at top, gote (uppercase) at bottom.
84+
# "b" = sente (black) to move.
2085
SHOGI_FEN = "lnsgkgsnl/1r5b1/ppppppppp/9/9/9/PPPPPPPPP/1B5R1/LNSGKGSNL[-] b - - 0 1"
2186

22-
# KIF piece names -> UCI piece letters
87+
# KIF piece names -> UCI piece letters.
88+
# Includes base pieces and promoted forms (龍=promoted飛, 馬=promoted角, と=promoted歩).
2389
KIF_PIECE_MAP = {
2490
"歩": "P", "香": "L", "桂": "N", "銀": "S", "金": "G",
2591
"角": "B", "飛": "R", "玉": "K", "王": "K",
2692
"龍": "R", "馬": "B", "と": "P",
2793
}
2894

29-
# KIF rank kanji -> number
95+
# KIF rank kanji -> numeric value (一=1 through 九=9).
3096
KIF_RANK_MAP = {
3197
"一": 1, "二": 2, "三": 3, "四": 4, "五": 5,
3298
"六": 6, "七": 7, "八": 8, "九": 9,
3399
}
34100

35-
# Number -> kanji rank (1-indexed)
101+
# Number -> kanji rank for display (index 0 unused, 1="一" through 9="九").
36102
KANJI_RANK = ["", "一", "二", "三", "四", "五", "六", "七", "八", "九"]
37103

38104

39105
def fullwidth_to_int(ch):
40-
"""Convert full-width digit '1'-'9' or half-width '1'-'9' to int."""
106+
"""Convert full-width digit (e.g. '1') or half-width digit ('1') to int."""
41107
if ord(ch) >= 0xFF11 and ord(ch) <= 0xFF19:
42108
return ord(ch) - 0xFF10
43109
return int(ch)
44110

45111

46112
def kif_file_to_uci(file_num):
47-
"""KIF file 1-9 (left-to-right, same as UCI) -> UCI file letter."""
113+
"""KIF file 1-9 (left-to-right, same as UCI) -> UCI file letter a-i."""
48114
return chr(ord("a") + (int(file_num) - 1))
49115

50116

51117
def kif_rank_to_uci(rank_num):
52-
"""KIF rank -> UCI rank (both count from bottom of the FEN)."""
118+
"""KIF rank 1-9 -> UCI rank number (both count from bottom)."""
53119
return int(rank_num)
54120

55121

56122
def kif_sq_to_uci(file_num, rank_num):
57-
"""KIF square (file, rank) -> UCI square string."""
123+
"""Convert a KIF square (file, rank) to a UCI square string like 'g7'."""
58124
return f"{kif_file_to_uci(file_num)}{kif_rank_to_uci(rank_num)}"
59125

60126

61127
def parse_kif_dest(dest_text):
62128
"""Parse KIF destination text like '5六歩' or '7二角成'.
63-
Returns (file_num, rank_num, piece_char, is_promo) or None.
129+
130+
Extracts the destination coordinates and identifies the piece and
131+
promotion status from the text AFTER the coordinates.
132+
133+
Returns (file_num, rank_num, piece_char, is_promo, is_not_promo) or None.
64134
"""
65135
m = re.match(r"([1-91-9])([一二三四五六七八九])", dest_text)
66136
if not m:
@@ -70,21 +140,30 @@ def parse_kif_dest(dest_text):
70140
if rank_num == 0:
71141
return None
72142

143+
# Everything after the coordinates is the piece name + promotion markers.
144+
# e.g. "歩" (pawn), "角成" (bishop promotes), "成桂" (promoted knight).
73145
rest = dest_text[m.end():]
74146
piece_char = None
75147
for ch in rest:
76148
if ch in KIF_PIECE_MAP:
77149
piece_char = ch
78150
break
79151

152+
# Promotion detection:
153+
# "桂成" = knight promotes -> is_promo = True (成 at END, not START)
154+
# "成桂" = promoted knight -> is_promo = False (成 at START = piece name)
155+
# "歩不成" = pawn does not promote -> is_promo = False
80156
is_promo = rest.endswith("成") and not rest.startswith("成") and "不成" not in rest
81157
is_not_promo = "不成" in rest
82158

83159
return file_num, rank_num, piece_char, is_promo, is_not_promo
84160

85161

86162
def parse_kif_origin(origin_text):
87-
"""Parse KIF origin '(77)' or '(77)'. Returns (file_num, rank_num) or None."""
163+
"""Parse KIF origin square '(77)' or '(77)'.
164+
165+
Returns (file_num, rank_num) or None.
166+
"""
88167
m = re.search(r"[(\uff08](\d)(\d)[)\uff09]", origin_text)
89168
if m:
90169
return int(m.group(1)), int(m.group(2))
@@ -95,16 +174,30 @@ def parse_kif_origin(origin_text):
95174

96175

97176
def kif_move_to_uci(move_text, last_dest_sq=None):
98-
"""Convert KIF move text to UCI move string.
177+
"""Convert a KIF move text to a UCI move string.
178+
179+
Handles three move types:
180+
1. 同 (same-square capture) — uses last_dest_sq as destination
181+
2. 打 (drop) — piece@square format, e.g. P@e5
182+
3. Normal move — from_sq + to_sq, e.g. g7g6, with optional '+' for promotion
99183
100-
Returns (uci_move, is_promo, is_drop) or None on failure.
101-
For 同 (same-square), last_dest_sq must be provided.
184+
Args:
185+
move_text: KIF move string, e.g. "7六歩(77)" or "同 銀(33)"
186+
last_dest_sq: UCI square of the previous move's destination, needed for 同
187+
188+
Returns:
189+
(uci_move, is_promo, is_drop) or (None, False, False) on parse failure
102190
"""
103-
# 同 (same-square capture)
191+
# --- 同 (dou): same-square capture ---
192+
# "同 銀(33)" means "capture on the same square as the previous move,
193+
# using the silver from origin (33)". The destination is last_dest_sq.
104194
if move_text.startswith("同"):
195+
# Strip "同" and whitespace, then strip the origin parentheses
105196
after_dou = re.sub(r"^同[\s ]*", "", move_text)
106197
after_dou = re.sub(r"[\((]\d+[\))]", "", after_dou).strip()
107198

199+
# Extract the base piece name (skip 成 which is a promotion marker or
200+
# part of promoted piece names like 成桂)
108201
piece_match = re.search(r"([歩香桂銀金角飛玉龍馬と])", after_dou)
109202
if not piece_match:
110203
return None, False, False
@@ -113,11 +206,13 @@ def kif_move_to_uci(move_text, last_dest_sq=None):
113206
and "不成" not in after_dou)
114207
piece = KIF_PIECE_MAP.get(piece_char, "?")
115208

209+
# 同打 = same-square drop (rare but possible)
116210
if "打" in move_text:
117211
if last_dest_sq:
118212
return f"{piece}@{last_dest_sq}", False, True
119213
return None, False, False
120214

215+
# 同 with origin = normal capture on last_dest_sq
121216
if last_dest_sq:
122217
origin = parse_kif_origin(move_text)
123218
if origin:
@@ -128,7 +223,8 @@ def kif_move_to_uci(move_text, last_dest_sq=None):
128223
return uci, is_promo, False
129224
return None, False, False
130225

131-
# Drop: 7七歩打, 5五金打, etc.
226+
# --- Drop: 7七歩打, 5五金打, etc. ---
227+
# Format: destination + piece + 打. E.g., "7七歩打" = drop pawn at 7七
132228
if "打" in move_text:
133229
drop_dest = re.match(r"([^\((]+?)打", move_text)
134230
piece_match = re.search(r"([歩香桂銀金角飛])打", move_text)
@@ -140,13 +236,15 @@ def kif_move_to_uci(move_text, last_dest_sq=None):
140236
return f"{piece}@{uci_dest}", False, True
141237
return None, False, False
142238

143-
# Normal move: 5六歩(57) or 9三桂成(21)
239+
# --- Normal move: 5六歩(57) or 9三桂成(21) ---
240+
# Format: destination + piece + optional promotion + origin in parentheses
144241
origin = parse_kif_origin(move_text)
145242
if not origin:
146243
return None, False, False
147244

148245
from_sq = kif_sq_to_uci(origin[0], origin[1])
149246

247+
# Extract destination text (everything before the opening parenthesis)
150248
dest_text = re.match(r"(.+?)[\((]", move_text)
151249
if not dest_text:
152250
return None, False, False
@@ -166,8 +264,13 @@ def kif_move_to_uci(move_text, last_dest_sq=None):
166264

167265

168266
def parse_kif_file(filepath):
169-
"""Parse .kif file (single or multi-game).
170-
Returns list of games, each a list of (move_num, move_text).
267+
"""Parse a .kif file (single or multi-game format).
268+
269+
lishogi.org exports multi-game files where games are separated by
270+
"開始日時" headers or blank lines. Each move line has format:
271+
" 1 7六歩(77) (00:00/00:00:00)"
272+
273+
Returns a list of games, each a list of (move_num, move_text) tuples.
171274
"""
172275
with open(filepath, "r", encoding="utf-8") as f:
173276
content = f.read()
@@ -193,9 +296,13 @@ def parse_kif_file(filepath):
193296

194297

195298
def extract_dest_from_san(san):
196-
"""Extract destination file number and rank number from engine Japanese SAN.
197-
Returns (file_num, rank_num) where file_num is 1-9 (right-to-left)
198-
and rank_num is the engine's rank number.
299+
"""Extract destination file and rank from engine Japanese SAN output.
300+
301+
The engine outputs notation like "3六歩" (file 三 rank 六 pawn).
302+
This extracts the two kanji characters representing destination coordinates.
303+
304+
Returns (file_num, rank_num) or (None, None) for 同 or unrecognized format.
305+
Note: file_num here is the ENGINE's file (counted from right), not KIF's.
199306
"""
200307
m = re.match(r"([一二三四五六七八九])([一二三四五六七八九])", san)
201308
if m:
@@ -211,10 +318,16 @@ def extract_dest_from_san(san):
211318

212319

213320
def engine_rank_to_kif_rank(engine_rank, is_sente):
214-
"""Convert engine rank number to KIF rank number.
321+
"""Convert engine rank number to KIF rank number for comparison.
322+
323+
The engine and KIF use different rank conventions for gote moves:
215324
216-
Engine convention: sente counts from bottom (same as UCI), gote counts
217-
from top (10 - UCI). KIF always counts from bottom (absolute).
325+
Engine: sente rank = UCI rank (from bottom), gote rank = 10 - UCI rank
326+
KIF: always counts from the bottom (absolute), both sides
327+
328+
Therefore:
329+
sente: engine_rank == KIF_rank (no conversion needed)
330+
gote: engine_rank == 10 - KIF_rank (need to flip)
218331
"""
219332
if is_sente:
220333
return engine_rank
@@ -259,21 +372,25 @@ def main():
259372
skip = 0
260373

261374
for move_num, kif_text in moves:
375+
# Strip origin suffix for display: "七六歩(77)" -> "七六歩"
262376
kif_notation = re.sub(r"[\((]\d+[\))]", "", kif_text).strip()
263377

378+
# Skip game-end tokens (not actual moves)
264379
if kif_text in ("投了", "切れ負け", "詰み", "千日手", "入玉宣言",
265380
"反則勝ち", "反則負け", "中断"):
266381
print(f"{move_num:4d} {kif_notation:20s} {'(end)':10s} SKIP (end)")
267382
skip += 1
268383
continue
269384

385+
# Parse KIF move text to UCI format
270386
uci, is_promo, is_drop = kif_move_to_uci(kif_text, last_dest_sq)
271387

272388
if not uci:
273389
print(f"{move_num:4d} {kif_notation:20s} {'?':10s} SKIP (parse)")
274390
skip += 1
275391
continue
276392

393+
# Verify move is legal in current position
277394
legal = sf.legal_moves("shogi", fen, [])
278395
if uci not in legal:
279396
print(
@@ -282,6 +399,7 @@ def main():
282399
fail += 1
283400
continue
284401

402+
# Get engine's Japanese notation for this move
285403
is_sente = move_num % 2 == 1
286404

287405
try:
@@ -291,6 +409,7 @@ def main():
291409
except Exception as e:
292410
our_san = f"ERROR({e})"
293411

412+
# Compare engine output with KIF destination
294413
engine_file, engine_rank = extract_dest_from_san(our_san)
295414

296415
dest_match = True
@@ -303,11 +422,15 @@ def main():
303422
kif_dest_file = kif_dest[0]
304423
kif_dest_rank = kif_dest[1]
305424

425+
# Engine files count from RIGHT, KIF files count from LEFT.
426+
# For a 9-file board: engine_file + kif_file = 10
427+
# (e.g. engine 三 = 3rd from right = KIF 七 = 7th from left)
306428
engine_as_kif_file = 10 - engine_file
307429
if engine_as_kif_file != kif_dest_file:
308430
file_match = False
309431
dest_match = False
310432

433+
# Engine gote ranks are inverted relative to KIF
311434
kif_expected_rank = engine_rank_to_kif_rank(
312435
engine_rank, is_sente
313436
)
@@ -327,22 +450,21 @@ def main():
327450
status = f"MISMATCH ({'+'.join(reason)})"
328451
fail += 1
329452

330-
origin_str = ""
331-
origin = parse_kif_origin(kif_text)
332-
if origin:
333-
origin_str = f"({origin[0]}{origin[1]})"
334-
335453
print(
336454
f"{move_num:4d} {kif_notation:20s} {uci:10s} "
337455
f"{our_san:20s} {status}"
338456
)
339457

458+
# Track last destination for 同 disambiguation.
459+
# Drops use "piece@square" format, so the square starts at index 2.
460+
# Normal moves have fixed-length format: from_sq(2) + to_sq(2).
340461
if len(uci) >= 4:
341462
if is_drop:
342-
last_dest_sq = uci[2:]
463+
last_dest_sq = uci[2:] # e.g. "P@e5" -> "e5"
343464
else:
344-
last_dest_sq = uci[2:4]
465+
last_dest_sq = uci[2:4] # e.g. "g7g6" -> "g6"
345466

467+
# Advance position for the next move
346468
try:
347469
fen = sf.get_fen("shogi", fen, [uci], False, False)
348470
except Exception:

0 commit comments

Comments
 (0)