77
88use ndarray:: Array3 ;
99
10- use crate :: actions:: { action_index_to_action, compute_full_action_mask, policy_size} ;
10+ use crate :: actions:: {
11+ action_index_to_action, compute_full_action_mask, policy_size,
12+ ACTION_MOVE , ACTION_WALL_HORIZONTAL , ACTION_WALL_VERTICAL ,
13+ } ;
1114use crate :: agents:: ActionSelector ;
1215use crate :: game_state:: { apply_action, check_win, create_initial_state} ;
16+ use crate :: grid:: CELL_WALL ;
1317use crate :: grid_helpers:: grid_game_state_to_resnet_input;
1418use crate :: rotation:: {
1519 rotate_action_coords, rotate_goal_rows,
1620 rotate_grid_180, rotate_player_positions,
1721} ;
1822
23+ /// Format an action triple as a human-readable string.
24+ fn format_action ( _board_size : i32 , row : i32 , col : i32 , action_type : i32 ) -> String {
25+ match action_type {
26+ ACTION_MOVE => format ! ( "Move to ({}, {})" , row, col) ,
27+ ACTION_WALL_HORIZONTAL => format ! ( "Place horizontal wall at ({}, {})" , row, col) ,
28+ ACTION_WALL_VERTICAL => format ! ( "Place vertical wall at ({}, {})" , row, col) ,
29+ _ => format ! ( "Unknown action type {}" , action_type) ,
30+ }
31+ }
32+
33+ /// Render the board state as a human-readable string.
34+ ///
35+ /// Shows player positions as `1` and `2`, walls as `|` (vertical) and `-`
36+ /// (horizontal), and empty cells as `.`.
37+ ///
38+ /// The board is always shown in the original (un-rotated) orientation.
39+ pub fn display_board (
40+ grid : & ndarray:: ArrayView2 < i8 > ,
41+ player_positions : & ndarray:: ArrayView2 < i32 > ,
42+ walls_remaining : & ndarray:: ArrayView1 < i32 > ,
43+ board_size : i32 ,
44+ ) -> String {
45+ let mut out = String :: new ( ) ;
46+ let bs = board_size as usize ;
47+
48+ // Column header
49+ out. push_str ( " " ) ;
50+ for c in 0 ..bs {
51+ out. push_str ( & format ! ( " {} " , c) ) ;
52+ }
53+ out. push ( '\n' ) ;
54+
55+ let p0_row = player_positions[ [ 0 , 0 ] ] as usize ;
56+ let p0_col = player_positions[ [ 0 , 1 ] ] as usize ;
57+ let p1_row = player_positions[ [ 1 , 0 ] ] as usize ;
58+ let p1_col = player_positions[ [ 1 , 1 ] ] as usize ;
59+
60+ for row in 0 ..bs {
61+ // --- cell row ---
62+ out. push_str ( & format ! ( "{:>3} " , row) ) ;
63+ for col in 0 ..bs {
64+ // cell content
65+ if row == p0_row && col == p0_col {
66+ out. push ( '1' ) ;
67+ } else if row == p1_row && col == p1_col {
68+ out. push ( '2' ) ;
69+ } else {
70+ out. push ( '.' ) ;
71+ }
72+
73+ // vertical wall to the right
74+ if col < bs - 1 {
75+ // Grid coord of the gap between (row,col) and (row,col+1)
76+ let gr = ( row * 2 + 2 ) as usize ;
77+ let gc = ( col * 2 + 3 ) as usize ;
78+ if grid[ [ gr, gc] ] == CELL_WALL {
79+ out. push_str ( " | " ) ;
80+ } else {
81+ out. push_str ( " " ) ;
82+ }
83+ }
84+ }
85+ // Metadata on the right of first two rows
86+ match row {
87+ 0 => out. push_str ( & format ! ( " P1 walls: {}" , walls_remaining[ 0 ] ) ) ,
88+ 1 => out. push_str ( & format ! ( " P2 walls: {}" , walls_remaining[ 1 ] ) ) ,
89+ _ => { }
90+ }
91+ out. push ( '\n' ) ;
92+
93+ // --- horizontal wall row between this row and the next ---
94+ if row < bs - 1 {
95+ out. push_str ( " " ) ;
96+ for col in 0 ..bs {
97+ // Grid coord of the gap between (row,col) and (row+1,col)
98+ let gr = ( row * 2 + 3 ) as usize ;
99+ let gc = ( col * 2 + 2 ) as usize ;
100+ if grid[ [ gr, gc] ] == CELL_WALL {
101+ out. push ( '-' ) ;
102+ } else {
103+ out. push ( ' ' ) ;
104+ }
105+ if col < bs - 1 {
106+ out. push_str ( " " ) ;
107+ }
108+ }
109+ out. push ( '\n' ) ;
110+ }
111+ }
112+ out
113+ }
114+
19115/// One turn's training data, stored in "current-player-faces-downward" coords.
20116pub struct ReplayBufferItem {
21117 /// ResNet input tensor (5, M, M) — the batch dimension is squeezed out.
@@ -46,12 +142,17 @@ pub struct GameResult {
46142/// Player 0 moves first. When Player 1 is the current player, the board is
47143/// rotated 180° before being passed to `agent_p2` so the network always sees
48144/// "current player moving downward".
145+ ///
146+ /// When `trace` is `true`, each step prints whose turn it is, the action
147+ /// chosen, and the resulting board state in the original (un-rotated)
148+ /// orientation.
49149pub fn play_game (
50150 agent_p1 : & mut dyn ActionSelector ,
51151 agent_p2 : & mut dyn ActionSelector ,
52152 board_size : i32 ,
53153 max_walls : i32 ,
54154 max_steps : i32 ,
155+ trace : bool ,
55156) -> anyhow:: Result < GameResult > {
56157 let ( mut grid, mut player_positions, mut walls_remaining, goal_rows) =
57158 create_initial_state ( board_size, max_walls) ;
@@ -147,6 +248,19 @@ pub fn play_game(
147248 & action_arr. view ( ) ,
148249 ) ;
149250
251+ if trace {
252+ let player_label = if current_player == 0 { "P1" } else { "P2" } ;
253+ println ! (
254+ "--- Step {} | {} ---\n {}" ,
255+ step + 1 ,
256+ player_label,
257+ format_action( board_size, a_row, a_col, a_type) ,
258+ ) ;
259+ print ! ( "{}\n " ,
260+ display_board( & grid. view( ) , & player_positions. view( ) , & walls_remaining. view( ) , board_size) ,
261+ ) ;
262+ }
263+
150264 // Check win
151265 if check_win ( & player_positions. view ( ) , & goal_rows. view ( ) , current_player) {
152266 winner = Some ( current_player) ;
@@ -209,7 +323,7 @@ mod tests {
209323 fn test_play_game_completes ( ) {
210324 let mut p1 = FirstValidAgent ;
211325 let mut p2 = FirstValidAgent ;
212- let result = play_game ( & mut p1, & mut p2, 5 , 3 , 200 ) . unwrap ( ) ;
326+ let result = play_game ( & mut p1, & mut p2, 5 , 3 , 200 , false ) . unwrap ( ) ;
213327
214328 // Game should complete within 200 steps on a 5×5 board
215329 assert ! ( result. num_turns > 0 ) ;
@@ -220,7 +334,7 @@ mod tests {
220334 fn test_play_game_alternating_players ( ) {
221335 let mut p1 = FirstValidAgent ;
222336 let mut p2 = FirstValidAgent ;
223- let result = play_game ( & mut p1, & mut p2, 5 , 0 , 200 ) . unwrap ( ) ;
337+ let result = play_game ( & mut p1, & mut p2, 5 , 0 , 200 , false ) . unwrap ( ) ;
224338
225339 // With 0 walls the game should end quickly via moves only
226340 // Players should alternate
@@ -233,7 +347,7 @@ mod tests {
233347 fn test_play_game_winner_values ( ) {
234348 let mut p1 = FirstValidAgent ;
235349 let mut p2 = FirstValidAgent ;
236- let result = play_game ( & mut p1, & mut p2, 5 , 0 , 200 ) . unwrap ( ) ;
350+ let result = play_game ( & mut p1, & mut p2, 5 , 0 , 200 , false ) . unwrap ( ) ;
237351
238352 if let Some ( w) = result. winner {
239353 for item in & result. replay_items {
@@ -251,7 +365,7 @@ mod tests {
251365 let mut p1 = FirstValidAgent ;
252366 let mut p2 = FirstValidAgent ;
253367 // Very short max_steps to force truncation
254- let result = play_game ( & mut p1, & mut p2, 5 , 3 , 2 ) . unwrap ( ) ;
368+ let result = play_game ( & mut p1, & mut p2, 5 , 3 , 2 , false ) . unwrap ( ) ;
255369
256370 if result. winner . is_none ( ) {
257371 for item in & result. replay_items {
@@ -264,7 +378,7 @@ mod tests {
264378 fn test_replay_items_have_correct_shapes ( ) {
265379 let mut p1 = FirstValidAgent ;
266380 let mut p2 = FirstValidAgent ;
267- let result = play_game ( & mut p1, & mut p2, 5 , 3 , 200 ) . unwrap ( ) ;
381+ let result = play_game ( & mut p1, & mut p2, 5 , 3 , 200 , false ) . unwrap ( ) ;
268382
269383 let grid_size = 5 * 2 + 3 ; // 13
270384 let total_actions = policy_size ( 5 ) ;
0 commit comments