11#!/usr/bin/env python3
2- """Compare our Japanese Shogi notation against lishogi.org .kif exports.
2+ """Compare Fairy-Stockfish Japanese Shogi notation against lishogi.org .kif exports.
33
4- Verifies that KIF moves parse to legal UCI moves and that our engine's
5- Japanese notation matches KIF conventions.
4+ This script verifies our engine's Japanese notation output by:
5+ 1. Parsing KIF game records into UCI moves
6+ 2. Checking each move is legal in the current position
7+ 3. Comparing our engine's notation output against the KIF destination
68
7- Usage:
8- python3 tests/shogi_compare.py <game.kif> # single game
9- python3 tests/shogi_compare.py <games.kif> # multi-game
10- python3 tests/shogi_compare.py <games.kif> --max 5 # first 5 games only
9+ == Coordinate systems ==
10+
11+ Three coordinate systems are involved:
12+
13+ UCI (Universal Chess Interface) — used by Fairy-Stockfish internally
14+ Files: a-i, left to right (a = leftmost)
15+ Ranks: 1-9, bottom to top (1 = bottom)
16+ Example: g7g6 = pawn from g7 to g6
17+
18+ USI (Universal Shogi Interface) — used by lishogi.org / shogiops
19+ Files: 1-9, right to left (1 = rightmost from sente's view)
20+ Ranks: a-i, top to bottom (a = sente's back rank, i = gote's back rank)
21+ Example: 7g7f = pawn from 7g to 7f
22+ Note: In our FEN, sente is at the TOP, so USI rank a maps to UCI rank 9.
23+
24+ KIF (KiFu) — the .kif game record format from lishogi.org
25+ Files: 1-9, left to right (1 = leftmost, same as UCI)
26+ Ranks: 一-九, bottom to top (一 = bottom, same as UCI)
27+ Origin squares in parentheses: (XY) where X=file, Y=rank
28+ Example: 七六歩(77) = pawn from (7,7) to file 七 rank 六
29+
30+ Fairy-Stockfish's FEN convention for shogi:
31+ Sente (black/lowercase) at the TOP of the FEN (ranks 7-9)
32+ Gote (white/uppercase) at the BOTTOM of the FEN (ranks 1-3)
33+ This is OPPOSITE to standard USI where sente is at the bottom.
34+
35+ == Engine notation conventions ==
36+
37+ File (横): The engine counts files from RIGHT (一 = rightmost).
38+ This matches how shogi players count files (from right in standard orientation).
39+
40+ Rank (段): The engine uses DIFFERENT conventions per side:
41+ Sente (Black): rank = UCI rank (counts from bottom of the FEN)
42+ Gote (White): rank = 10 - UCI rank (counts from top of the FEN)
43+ This is because the engine treats the FEN orientation literally:
44+ sente's "forward" is toward rank 1 (bottom), gote's is toward rank 9 (top).
45+
46+ KIF always counts ranks from the BOTTOM (absolute), regardless of side.
47+ Therefore: engine gote rank != KIF rank for gote moves.
48+ The comparison accounts for this with engine_rank_to_kif_rank().
49+
50+ == KIF special notation ==
51+
52+ 同 (dou): "same square" — the move lands on the same destination as the
53+ previous move. Always implies a capture. The previous destination must
54+ be tracked via last_dest_sq.
55+
56+ 打 (da): "drop" — placing a piece from hand onto the board.
57+ Format: piece名 + 打, e.g., 歩六五打 = drop pawn at 6五
58+
59+ 成 / 不成: "promotes" / "does not promote".
60+ CRITICAL: 成 at the END of piece text = promotion (e.g., 桂成 = knight promotes)
61+ 成 at the START = already-promoted piece name (e.g., 成桂 = promoted knight)
62+ UCI marks promotion with '+' suffix: g7g7+ vs g7g7
63+
64+ Game-end tokens (not moves): 投了, 切れ負け, 詰み, 千日手, etc.
65+ These are game results and are skipped, not parsed as moves.
66+
67+ == KIF file mapping ==
68+
69+ KIF files count from LEFT (1=leftmost, 9=rightmost), same as UCI.
70+ This is different from USI where files count from the right.
71+ KIF rank numbers count from the bottom, same as UCI ranks.
72+ Therefore: KIF (file, rank) -> UCI is simply chr(ord('a')+(file-1)) + str(rank).
1173"""
1274
1375import re
1779sys .path .insert (0 , os .path .join (os .path .dirname (__file__ ), ".." ))
1880import pyffish as sf
1981
82+ # Standard shogi starting position in Fairy-Stockfish's FEN convention.
83+ # Sente (lowercase) at top, gote (uppercase) at bottom.
84+ # "b" = sente (black) to move.
2085SHOGI_FEN = "lnsgkgsnl/1r5b1/ppppppppp/9/9/9/PPPPPPPPP/1B5R1/LNSGKGSNL[-] b - - 0 1"
2186
22- # KIF piece names -> UCI piece letters
87+ # KIF piece names -> UCI piece letters.
88+ # Includes base pieces and promoted forms (龍=promoted飛, 馬=promoted角, と=promoted歩).
2389KIF_PIECE_MAP = {
2490 "歩" : "P" , "香" : "L" , "桂" : "N" , "銀" : "S" , "金" : "G" ,
2591 "角" : "B" , "飛" : "R" , "玉" : "K" , "王" : "K" ,
2692 "龍" : "R" , "馬" : "B" , "と" : "P" ,
2793}
2894
29- # KIF rank kanji -> number
95+ # KIF rank kanji -> numeric value (一=1 through 九=9).
3096KIF_RANK_MAP = {
3197 "一" : 1 , "二" : 2 , "三" : 3 , "四" : 4 , "五" : 5 ,
3298 "六" : 6 , "七" : 7 , "八" : 8 , "九" : 9 ,
3399}
34100
35- # Number -> kanji rank (1-indexed)
101+ # Number -> kanji rank for display (index 0 unused, 1="一" through 9="九").
36102KANJI_RANK = ["" , "一" , "二" , "三" , "四" , "五" , "六" , "七" , "八" , "九" ]
37103
38104
39105def fullwidth_to_int (ch ):
40- """Convert full-width digit '1'-'9' or half-width '1'-'9' to int."""
106+ """Convert full-width digit (e.g. '1') or half-width digit ( '1') to int."""
41107 if ord (ch ) >= 0xFF11 and ord (ch ) <= 0xFF19 :
42108 return ord (ch ) - 0xFF10
43109 return int (ch )
44110
45111
46112def kif_file_to_uci (file_num ):
47- """KIF file 1-9 (left-to-right, same as UCI) -> UCI file letter."""
113+ """KIF file 1-9 (left-to-right, same as UCI) -> UCI file letter a-i ."""
48114 return chr (ord ("a" ) + (int (file_num ) - 1 ))
49115
50116
51117def kif_rank_to_uci (rank_num ):
52- """KIF rank - > UCI rank (both count from bottom of the FEN )."""
118+ """KIF rank 1-9 - > UCI rank number (both count from bottom)."""
53119 return int (rank_num )
54120
55121
56122def kif_sq_to_uci (file_num , rank_num ):
57- """KIF square (file, rank) -> UCI square string."""
123+ """Convert a KIF square (file, rank) to a UCI square string like 'g7' ."""
58124 return f"{ kif_file_to_uci (file_num )} { kif_rank_to_uci (rank_num )} "
59125
60126
61127def parse_kif_dest (dest_text ):
62128 """Parse KIF destination text like '5六歩' or '7二角成'.
63- Returns (file_num, rank_num, piece_char, is_promo) or None.
129+
130+ Extracts the destination coordinates and identifies the piece and
131+ promotion status from the text AFTER the coordinates.
132+
133+ Returns (file_num, rank_num, piece_char, is_promo, is_not_promo) or None.
64134 """
65135 m = re .match (r"([1-91-9])([一二三四五六七八九])" , dest_text )
66136 if not m :
@@ -70,21 +140,30 @@ def parse_kif_dest(dest_text):
70140 if rank_num == 0 :
71141 return None
72142
143+ # Everything after the coordinates is the piece name + promotion markers.
144+ # e.g. "歩" (pawn), "角成" (bishop promotes), "成桂" (promoted knight).
73145 rest = dest_text [m .end ():]
74146 piece_char = None
75147 for ch in rest :
76148 if ch in KIF_PIECE_MAP :
77149 piece_char = ch
78150 break
79151
152+ # Promotion detection:
153+ # "桂成" = knight promotes -> is_promo = True (成 at END, not START)
154+ # "成桂" = promoted knight -> is_promo = False (成 at START = piece name)
155+ # "歩不成" = pawn does not promote -> is_promo = False
80156 is_promo = rest .endswith ("成" ) and not rest .startswith ("成" ) and "不成" not in rest
81157 is_not_promo = "不成" in rest
82158
83159 return file_num , rank_num , piece_char , is_promo , is_not_promo
84160
85161
86162def parse_kif_origin (origin_text ):
87- """Parse KIF origin '(77)' or '(77)'. Returns (file_num, rank_num) or None."""
163+ """Parse KIF origin square '(77)' or '(77)'.
164+
165+ Returns (file_num, rank_num) or None.
166+ """
88167 m = re .search (r"[(\uff08](\d)(\d)[)\uff09]" , origin_text )
89168 if m :
90169 return int (m .group (1 )), int (m .group (2 ))
@@ -95,16 +174,30 @@ def parse_kif_origin(origin_text):
95174
96175
97176def kif_move_to_uci (move_text , last_dest_sq = None ):
98- """Convert KIF move text to UCI move string.
177+ """Convert a KIF move text to a UCI move string.
178+
179+ Handles three move types:
180+ 1. 同 (same-square capture) — uses last_dest_sq as destination
181+ 2. 打 (drop) — piece@square format, e.g. P@e5
182+ 3. Normal move — from_sq + to_sq, e.g. g7g6, with optional '+' for promotion
99183
100- Returns (uci_move, is_promo, is_drop) or None on failure.
101- For 同 (same-square), last_dest_sq must be provided.
184+ Args:
185+ move_text: KIF move string, e.g. "7六歩(77)" or "同 銀(33)"
186+ last_dest_sq: UCI square of the previous move's destination, needed for 同
187+
188+ Returns:
189+ (uci_move, is_promo, is_drop) or (None, False, False) on parse failure
102190 """
103- # 同 (same-square capture)
191+ # --- 同 (dou): same-square capture ---
192+ # "同 銀(33)" means "capture on the same square as the previous move,
193+ # using the silver from origin (33)". The destination is last_dest_sq.
104194 if move_text .startswith ("同" ):
195+ # Strip "同" and whitespace, then strip the origin parentheses
105196 after_dou = re .sub (r"^同[\s ]*" , "" , move_text )
106197 after_dou = re .sub (r"[\((]\d+[\))]" , "" , after_dou ).strip ()
107198
199+ # Extract the base piece name (skip 成 which is a promotion marker or
200+ # part of promoted piece names like 成桂)
108201 piece_match = re .search (r"([歩香桂銀金角飛玉龍馬と])" , after_dou )
109202 if not piece_match :
110203 return None , False , False
@@ -113,11 +206,13 @@ def kif_move_to_uci(move_text, last_dest_sq=None):
113206 and "不成" not in after_dou )
114207 piece = KIF_PIECE_MAP .get (piece_char , "?" )
115208
209+ # 同打 = same-square drop (rare but possible)
116210 if "打" in move_text :
117211 if last_dest_sq :
118212 return f"{ piece } @{ last_dest_sq } " , False , True
119213 return None , False , False
120214
215+ # 同 with origin = normal capture on last_dest_sq
121216 if last_dest_sq :
122217 origin = parse_kif_origin (move_text )
123218 if origin :
@@ -128,7 +223,8 @@ def kif_move_to_uci(move_text, last_dest_sq=None):
128223 return uci , is_promo , False
129224 return None , False , False
130225
131- # Drop: 7七歩打, 5五金打, etc.
226+ # --- Drop: 7七歩打, 5五金打, etc. ---
227+ # Format: destination + piece + 打. E.g., "7七歩打" = drop pawn at 7七
132228 if "打" in move_text :
133229 drop_dest = re .match (r"([^\((]+?)打" , move_text )
134230 piece_match = re .search (r"([歩香桂銀金角飛])打" , move_text )
@@ -140,13 +236,15 @@ def kif_move_to_uci(move_text, last_dest_sq=None):
140236 return f"{ piece } @{ uci_dest } " , False , True
141237 return None , False , False
142238
143- # Normal move: 5六歩(57) or 9三桂成(21)
239+ # --- Normal move: 5六歩(57) or 9三桂成(21) ---
240+ # Format: destination + piece + optional promotion + origin in parentheses
144241 origin = parse_kif_origin (move_text )
145242 if not origin :
146243 return None , False , False
147244
148245 from_sq = kif_sq_to_uci (origin [0 ], origin [1 ])
149246
247+ # Extract destination text (everything before the opening parenthesis)
150248 dest_text = re .match (r"(.+?)[\((]" , move_text )
151249 if not dest_text :
152250 return None , False , False
@@ -166,8 +264,13 @@ def kif_move_to_uci(move_text, last_dest_sq=None):
166264
167265
168266def parse_kif_file (filepath ):
169- """Parse .kif file (single or multi-game).
170- Returns list of games, each a list of (move_num, move_text).
267+ """Parse a .kif file (single or multi-game format).
268+
269+ lishogi.org exports multi-game files where games are separated by
270+ "開始日時" headers or blank lines. Each move line has format:
271+ " 1 7六歩(77) (00:00/00:00:00)"
272+
273+ Returns a list of games, each a list of (move_num, move_text) tuples.
171274 """
172275 with open (filepath , "r" , encoding = "utf-8" ) as f :
173276 content = f .read ()
@@ -193,9 +296,13 @@ def parse_kif_file(filepath):
193296
194297
195298def extract_dest_from_san (san ):
196- """Extract destination file number and rank number from engine Japanese SAN.
197- Returns (file_num, rank_num) where file_num is 1-9 (right-to-left)
198- and rank_num is the engine's rank number.
299+ """Extract destination file and rank from engine Japanese SAN output.
300+
301+ The engine outputs notation like "3六歩" (file 三 rank 六 pawn).
302+ This extracts the two kanji characters representing destination coordinates.
303+
304+ Returns (file_num, rank_num) or (None, None) for 同 or unrecognized format.
305+ Note: file_num here is the ENGINE's file (counted from right), not KIF's.
199306 """
200307 m = re .match (r"([一二三四五六七八九])([一二三四五六七八九])" , san )
201308 if m :
@@ -211,10 +318,16 @@ def extract_dest_from_san(san):
211318
212319
213320def engine_rank_to_kif_rank (engine_rank , is_sente ):
214- """Convert engine rank number to KIF rank number.
321+ """Convert engine rank number to KIF rank number for comparison.
322+
323+ The engine and KIF use different rank conventions for gote moves:
215324
216- Engine convention: sente counts from bottom (same as UCI), gote counts
217- from top (10 - UCI). KIF always counts from bottom (absolute).
325+ Engine: sente rank = UCI rank (from bottom), gote rank = 10 - UCI rank
326+ KIF: always counts from the bottom (absolute), both sides
327+
328+ Therefore:
329+ sente: engine_rank == KIF_rank (no conversion needed)
330+ gote: engine_rank == 10 - KIF_rank (need to flip)
218331 """
219332 if is_sente :
220333 return engine_rank
@@ -259,21 +372,25 @@ def main():
259372 skip = 0
260373
261374 for move_num , kif_text in moves :
375+ # Strip origin suffix for display: "七六歩(77)" -> "七六歩"
262376 kif_notation = re .sub (r"[\((]\d+[\))]" , "" , kif_text ).strip ()
263377
378+ # Skip game-end tokens (not actual moves)
264379 if kif_text in ("投了" , "切れ負け" , "詰み" , "千日手" , "入玉宣言" ,
265380 "反則勝ち" , "反則負け" , "中断" ):
266381 print (f"{ move_num :4d} { kif_notation :20s} { '(end)' :10s} SKIP (end)" )
267382 skip += 1
268383 continue
269384
385+ # Parse KIF move text to UCI format
270386 uci , is_promo , is_drop = kif_move_to_uci (kif_text , last_dest_sq )
271387
272388 if not uci :
273389 print (f"{ move_num :4d} { kif_notation :20s} { '?' :10s} SKIP (parse)" )
274390 skip += 1
275391 continue
276392
393+ # Verify move is legal in current position
277394 legal = sf .legal_moves ("shogi" , fen , [])
278395 if uci not in legal :
279396 print (
@@ -282,6 +399,7 @@ def main():
282399 fail += 1
283400 continue
284401
402+ # Get engine's Japanese notation for this move
285403 is_sente = move_num % 2 == 1
286404
287405 try :
@@ -291,6 +409,7 @@ def main():
291409 except Exception as e :
292410 our_san = f"ERROR({ e } )"
293411
412+ # Compare engine output with KIF destination
294413 engine_file , engine_rank = extract_dest_from_san (our_san )
295414
296415 dest_match = True
@@ -303,11 +422,15 @@ def main():
303422 kif_dest_file = kif_dest [0 ]
304423 kif_dest_rank = kif_dest [1 ]
305424
425+ # Engine files count from RIGHT, KIF files count from LEFT.
426+ # For a 9-file board: engine_file + kif_file = 10
427+ # (e.g. engine 三 = 3rd from right = KIF 七 = 7th from left)
306428 engine_as_kif_file = 10 - engine_file
307429 if engine_as_kif_file != kif_dest_file :
308430 file_match = False
309431 dest_match = False
310432
433+ # Engine gote ranks are inverted relative to KIF
311434 kif_expected_rank = engine_rank_to_kif_rank (
312435 engine_rank , is_sente
313436 )
@@ -327,22 +450,21 @@ def main():
327450 status = f"MISMATCH ({ '+' .join (reason )} )"
328451 fail += 1
329452
330- origin_str = ""
331- origin = parse_kif_origin (kif_text )
332- if origin :
333- origin_str = f"({ origin [0 ]} { origin [1 ]} )"
334-
335453 print (
336454 f"{ move_num :4d} { kif_notation :20s} { uci :10s} "
337455 f"{ our_san :20s} { status } "
338456 )
339457
458+ # Track last destination for 同 disambiguation.
459+ # Drops use "piece@square" format, so the square starts at index 2.
460+ # Normal moves have fixed-length format: from_sq(2) + to_sq(2).
340461 if len (uci ) >= 4 :
341462 if is_drop :
342- last_dest_sq = uci [2 :]
463+ last_dest_sq = uci [2 :] # e.g. "P@e5" -> "e5"
343464 else :
344- last_dest_sq = uci [2 :4 ]
465+ last_dest_sq = uci [2 :4 ] # e.g. "g7g6" -> "g6"
345466
467+ # Advance position for the next move
346468 try :
347469 fen = sf .get_fen ("shogi" , fen , [uci ], False , False )
348470 except Exception :
0 commit comments