Skip to content

Commit 0939355

Browse files
author
Julian Cerruti
committed
feat: add --trace flag for step-by-step game tracing
When enabled, prints each step's player, action taken, and the resulting board state in the original (un-rotated) orientation with row/col headers, player positions (1/2), vertical walls (|), horizontal walls (-), and walls remaining.
1 parent 9b71a9d commit 0939355

File tree

2 files changed

+125
-6
lines changed

2 files changed

+125
-6
lines changed

deep_quoridor/rust/src/bin/selfplay.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ struct Cli {
5151
/// Use "random" for a random agent.
5252
#[arg(long)]
5353
p2: Option<String>,
54+
55+
/// Print a step-by-step trace of each game (whose turn, action, board).
56+
#[arg(long, default_value = "false")]
57+
trace: bool,
5458
}
5559

5660
fn main() -> Result<()> {
@@ -106,6 +110,7 @@ fn main() -> Result<()> {
106110
q.board_size,
107111
q.max_walls,
108112
q.max_steps as i32,
113+
cli.trace,
109114
)?;
110115

111116
// Update stats

deep_quoridor/rust/src/game_runner.rs

Lines changed: 120 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,111 @@
77
88
use ndarray::Array3;
99

10-
use crate::actions::{action_index_to_action, compute_full_action_mask, policy_size};
10+
use crate::actions::{
11+
action_index_to_action, compute_full_action_mask, policy_size,
12+
ACTION_MOVE, ACTION_WALL_HORIZONTAL, ACTION_WALL_VERTICAL,
13+
};
1114
use crate::agents::ActionSelector;
1215
use crate::game_state::{apply_action, check_win, create_initial_state};
16+
use crate::grid::CELL_WALL;
1317
use crate::grid_helpers::grid_game_state_to_resnet_input;
1418
use crate::rotation::{
1519
rotate_action_coords, rotate_goal_rows,
1620
rotate_grid_180, rotate_player_positions,
1721
};
1822

23+
/// Format an action triple as a human-readable string.
24+
fn format_action(_board_size: i32, row: i32, col: i32, action_type: i32) -> String {
25+
match action_type {
26+
ACTION_MOVE => format!("Move to ({}, {})", row, col),
27+
ACTION_WALL_HORIZONTAL => format!("Place horizontal wall at ({}, {})", row, col),
28+
ACTION_WALL_VERTICAL => format!("Place vertical wall at ({}, {})", row, col),
29+
_ => format!("Unknown action type {}", action_type),
30+
}
31+
}
32+
33+
/// Render the board state as a human-readable string.
34+
///
35+
/// Shows player positions as `1` and `2`, walls as `|` (vertical) and `-`
36+
/// (horizontal), and empty cells as `.`.
37+
///
38+
/// The board is always shown in the original (un-rotated) orientation.
39+
pub fn display_board(
40+
grid: &ndarray::ArrayView2<i8>,
41+
player_positions: &ndarray::ArrayView2<i32>,
42+
walls_remaining: &ndarray::ArrayView1<i32>,
43+
board_size: i32,
44+
) -> String {
45+
let mut out = String::new();
46+
let bs = board_size as usize;
47+
48+
// Column header
49+
out.push_str(" ");
50+
for c in 0..bs {
51+
out.push_str(&format!(" {} ", c));
52+
}
53+
out.push('\n');
54+
55+
let p0_row = player_positions[[0, 0]] as usize;
56+
let p0_col = player_positions[[0, 1]] as usize;
57+
let p1_row = player_positions[[1, 0]] as usize;
58+
let p1_col = player_positions[[1, 1]] as usize;
59+
60+
for row in 0..bs {
61+
// --- cell row ---
62+
out.push_str(&format!("{:>3} ", row));
63+
for col in 0..bs {
64+
// cell content
65+
if row == p0_row && col == p0_col {
66+
out.push('1');
67+
} else if row == p1_row && col == p1_col {
68+
out.push('2');
69+
} else {
70+
out.push('.');
71+
}
72+
73+
// vertical wall to the right
74+
if col < bs - 1 {
75+
// Grid coord of the gap between (row,col) and (row,col+1)
76+
let gr = (row * 2 + 2) as usize;
77+
let gc = (col * 2 + 3) as usize;
78+
if grid[[gr, gc]] == CELL_WALL {
79+
out.push_str(" | ");
80+
} else {
81+
out.push_str(" ");
82+
}
83+
}
84+
}
85+
// Metadata on the right of first two rows
86+
match row {
87+
0 => out.push_str(&format!(" P1 walls: {}", walls_remaining[0])),
88+
1 => out.push_str(&format!(" P2 walls: {}", walls_remaining[1])),
89+
_ => {}
90+
}
91+
out.push('\n');
92+
93+
// --- horizontal wall row between this row and the next ---
94+
if row < bs - 1 {
95+
out.push_str(" ");
96+
for col in 0..bs {
97+
// Grid coord of the gap between (row,col) and (row+1,col)
98+
let gr = (row * 2 + 3) as usize;
99+
let gc = (col * 2 + 2) as usize;
100+
if grid[[gr, gc]] == CELL_WALL {
101+
out.push('-');
102+
} else {
103+
out.push(' ');
104+
}
105+
if col < bs - 1 {
106+
out.push_str(" ");
107+
}
108+
}
109+
out.push('\n');
110+
}
111+
}
112+
out
113+
}
114+
19115
/// One turn's training data, stored in "current-player-faces-downward" coords.
20116
pub struct ReplayBufferItem {
21117
/// ResNet input tensor (5, M, M) — the batch dimension is squeezed out.
@@ -46,12 +142,17 @@ pub struct GameResult {
46142
/// Player 0 moves first. When Player 1 is the current player, the board is
47143
/// rotated 180° before being passed to `agent_p2` so the network always sees
48144
/// "current player moving downward".
145+
///
146+
/// When `trace` is `true`, each step prints whose turn it is, the action
147+
/// chosen, and the resulting board state in the original (un-rotated)
148+
/// orientation.
49149
pub fn play_game(
50150
agent_p1: &mut dyn ActionSelector,
51151
agent_p2: &mut dyn ActionSelector,
52152
board_size: i32,
53153
max_walls: i32,
54154
max_steps: i32,
155+
trace: bool,
55156
) -> anyhow::Result<GameResult> {
56157
let (mut grid, mut player_positions, mut walls_remaining, goal_rows) =
57158
create_initial_state(board_size, max_walls);
@@ -147,6 +248,19 @@ pub fn play_game(
147248
&action_arr.view(),
148249
);
149250

251+
if trace {
252+
let player_label = if current_player == 0 { "P1" } else { "P2" };
253+
println!(
254+
"--- Step {} | {} ---\n{}",
255+
step + 1,
256+
player_label,
257+
format_action(board_size, a_row, a_col, a_type),
258+
);
259+
print!("{}\n",
260+
display_board(&grid.view(), &player_positions.view(), &walls_remaining.view(), board_size),
261+
);
262+
}
263+
150264
// Check win
151265
if check_win(&player_positions.view(), &goal_rows.view(), current_player) {
152266
winner = Some(current_player);
@@ -209,7 +323,7 @@ mod tests {
209323
fn test_play_game_completes() {
210324
let mut p1 = FirstValidAgent;
211325
let mut p2 = FirstValidAgent;
212-
let result = play_game(&mut p1, &mut p2, 5, 3, 200).unwrap();
326+
let result = play_game(&mut p1, &mut p2, 5, 3, 200, false).unwrap();
213327

214328
// Game should complete within 200 steps on a 5×5 board
215329
assert!(result.num_turns > 0);
@@ -220,7 +334,7 @@ mod tests {
220334
fn test_play_game_alternating_players() {
221335
let mut p1 = FirstValidAgent;
222336
let mut p2 = FirstValidAgent;
223-
let result = play_game(&mut p1, &mut p2, 5, 0, 200).unwrap();
337+
let result = play_game(&mut p1, &mut p2, 5, 0, 200, false).unwrap();
224338

225339
// With 0 walls the game should end quickly via moves only
226340
// Players should alternate
@@ -233,7 +347,7 @@ mod tests {
233347
fn test_play_game_winner_values() {
234348
let mut p1 = FirstValidAgent;
235349
let mut p2 = FirstValidAgent;
236-
let result = play_game(&mut p1, &mut p2, 5, 0, 200).unwrap();
350+
let result = play_game(&mut p1, &mut p2, 5, 0, 200, false).unwrap();
237351

238352
if let Some(w) = result.winner {
239353
for item in &result.replay_items {
@@ -251,7 +365,7 @@ mod tests {
251365
let mut p1 = FirstValidAgent;
252366
let mut p2 = FirstValidAgent;
253367
// Very short max_steps to force truncation
254-
let result = play_game(&mut p1, &mut p2, 5, 3, 2).unwrap();
368+
let result = play_game(&mut p1, &mut p2, 5, 3, 2, false).unwrap();
255369

256370
if result.winner.is_none() {
257371
for item in &result.replay_items {
@@ -264,7 +378,7 @@ mod tests {
264378
fn test_replay_items_have_correct_shapes() {
265379
let mut p1 = FirstValidAgent;
266380
let mut p2 = FirstValidAgent;
267-
let result = play_game(&mut p1, &mut p2, 5, 3, 200).unwrap();
381+
let result = play_game(&mut p1, &mut p2, 5, 3, 200, false).unwrap();
268382

269383
let grid_size = 5 * 2 + 3; // 13
270384
let total_actions = policy_size(5);

0 commit comments

Comments
 (0)