Merge pull request #334 from jonbinney/jdb/compact-simple

jonbinney · web-flow · commit 8320dbbf04f3 · 2026-02-10T11:38:31.000-05:00
Make compact minimax an option for simple agent
diff --git a/deep_quoridor/rust/src/compact/q_bit_repr.rs b/deep_quoridor/rust/src/compact/q_bit_repr.rs
@@ -31,7 +31,7 @@ pub struct QBitRepr {
     num_player_positions: usize,
     position_bits: usize,
     walls_remaining_bits: usize,
-    steps_bits: usize,
+    completed_steps_bits: usize,
     total_bits: usize,
     total_bytes: usize,
 
@@ -40,7 +40,7 @@ pub struct QBitRepr {
     player_pos_offsets: [usize; 2], // Offset for each player's position
     walls_remaining_offsets: [usize; 2],
     current_player_offset: usize,
-    steps_offset: usize,
+    completed_steps_offset: usize,
 }
 
 impl QBitRepr {
@@ -50,7 +50,7 @@ impl QBitRepr {
         let num_player_positions = board_size * board_size;
         let position_bits = bits_needed(num_player_positions - 1);
         let walls_remaining_bits = bits_needed(max_walls);
-        let steps_bits = bits_needed(max_steps);
+        let completed_steps_bits = bits_needed(max_steps);
 
         let walls_offset = 0;
         let p1_pos_offset = walls_offset + num_wall_positions;
@@ -60,9 +60,9 @@ impl QBitRepr {
         let p2_walls_remaining_offset = p1_walls_remaining_offset + walls_remaining_bits;
         let walls_remaining_offsets = [p1_walls_remaining_offset, p2_walls_remaining_offset];
         let current_player_offset = p2_walls_remaining_offset + walls_remaining_bits;
-        let steps_offset = current_player_offset + 1;
+        let completed_steps_offset = current_player_offset + 1;
 
-        let total_bits = steps_offset + steps_bits;
+        let total_bits = completed_steps_offset + completed_steps_bits;
         let total_bytes = (total_bits + 7) / 8;
 
         Self {
@@ -73,14 +73,14 @@ impl QBitRepr {
             num_player_positions,
             position_bits,
             walls_remaining_bits,
-            steps_bits,
+            completed_steps_bits,
             total_bits,
             total_bytes,
             walls_offset,
             player_pos_offsets,
             walls_remaining_offsets,
             current_player_offset,
-            steps_offset,
+            completed_steps_offset,
         }
     }
 
@@ -231,13 +231,18 @@ impl QBitRepr {
 
     /// Get the number of completed steps
     pub fn get_completed_steps(&self, data: &[u8]) -> usize {
-        self.get_bits(data, self.steps_offset, self.steps_bits)
+        self.get_bits(data, self.completed_steps_offset, self.completed_steps_bits)
     }
 
     /// Set the number of completed steps
     pub fn set_completed_steps(&self, data: &mut [u8], steps: usize) {
         debug_assert!(steps <= self.max_steps);
-        self.set_bits(data, self.steps_offset, self.steps_bits, steps);
+        self.set_bits(
+            data,
+            self.completed_steps_offset,
+            self.completed_steps_bits,
+            steps,
+        );
     }
 
     /// Convert a (row, col) position to a flat index
@@ -322,6 +327,7 @@ impl QBitRepr {
             format!("Current: P{}", current_player + 1),
             format!("P1 walls: {}", p0_walls),
             format!("P2 walls: {}", p1_walls),
+            format!("Max steps: {}", self.max_steps),
         ];
 
         let mut line_idx = 0;
diff --git a/deep_quoridor/rust/src/compact/q_game_mechanics.rs b/deep_quoridor/rust/src/compact/q_game_mechanics.rs
@@ -329,53 +329,6 @@ impl QGameMechanics {
         true
     }
 
-    /// Check if a move from current position to destination is valid
-    pub fn is_move_valid(
-        &self,
-        data: &[u8],
-        player: usize,
-        dest_row: usize,
-        dest_col: usize,
-    ) -> bool {
-        let board_size = self.repr.board_size();
-
-        // Check bounds
-        if dest_row >= board_size || dest_col >= board_size {
-            return false;
-        }
-
-        // Get current position
-        let (curr_row, curr_col) = self.repr.get_player_position(data, player);
-
-        // Get opponent position
-        let opponent = 1 - player;
-        let (opp_row, opp_col) = self.repr.get_player_position(data, opponent);
-
-        // Check if destination is occupied by opponent
-        if dest_row == opp_row && dest_col == opp_col {
-            return false;
-        }
-
-        // For now, use simple adjacency check
-        // A more complete implementation would check walls and jumps
-        let row_diff = (dest_row as i32 - curr_row as i32).abs();
-        let col_diff = (dest_col as i32 - curr_col as i32).abs();
-
-        // Must move exactly 1 space (or 2 for jump)
-        if row_diff + col_diff > 2 {
-            return false;
-        }
-
-        // For complete validation, would need to check:
-        // 1. No walls blocking the path
-        // 2. Valid jump over opponent
-        // 3. Diagonal moves only when jumping
-
-        // TODO: Implement full move validation with wall checking
-
-        true
-    }
-
     /// Execute a move action
     pub fn execute_move(&self, data: &mut [u8], player: usize, dest_row: usize, dest_col: usize) {
         self.repr
@@ -582,8 +535,13 @@ impl QGameMechanics {
                 }
 
                 // Check no wall between opponent and diagonal destination
-                if self.is_wall_between(data, opp_row, opp_col, diag_row as usize, diag_col as usize)
-                {
+                if self.is_wall_between(
+                    data,
+                    opp_row,
+                    opp_col,
+                    diag_row as usize,
+                    diag_col as usize,
+                ) {
                     continue;
                 }
 
diff --git a/deep_quoridor/rust/src/compact/q_minimax.rs b/deep_quoridor/rust/src/compact/q_minimax.rs
@@ -133,7 +133,7 @@ fn sample_actions(
     // Get valid moves (type 0)
     let moves = mechanics.get_valid_moves(data);
     let mut actions: Vec<(usize, usize, usize)> =
-        moves.into_iter().map(|(row, col)| (row, col, 0)).collect();
+        moves.into_iter().map(|(row, col)| (row, col, 2)).collect();
 
     if actions.len() >= branching_factor {
         actions.shuffle(&mut rng);
@@ -150,7 +150,7 @@ fn sample_actions(
     // Add walls until we reach branching factor
     let num_walls_needed = branching_factor - actions.len();
     for (row, col, orientation) in walls.into_iter().take(num_walls_needed) {
-        actions.push((row, col, orientation + 1)); // Map 0->1, 1->2
+        actions.push((row, col, orientation));
     }
 
     actions
@@ -163,8 +163,8 @@ fn minimax(
     data: &[u8],
     current_player: usize,
     agent_player: usize,
-    completed_steps: usize,
-    max_depth: usize,
+    search_depth: usize,
+    max_search_depth: usize,
     branching_factor: usize,
     discount_factor: f32,
     heuristic: i32,
@@ -180,19 +180,19 @@ fn minimax(
 
     let opponent = 1 - current_player;
 
-    if completed_steps >= mechanics.repr().max_steps() {
+    if mechanics.repr().get_completed_steps(data) >= mechanics.repr().max_steps() {
         return 0.0; // Tie
     }
 
-    if completed_steps >= max_depth {
+    if search_depth >= max_search_depth {
         return compute_heuristic(mechanics, data, agent_player, heuristic);
     }
 
     let actions = sample_actions(mechanics, data, branching_factor);
     if actions.is_empty() {
         mechanics.print(data);
+        assert!(false, "No valid actions - should never happen");
     }
-    assert!(!actions.is_empty());
 
     let is_maximizing = current_player == agent_player;
     let mut best_value = if is_maximizing {
@@ -208,12 +208,12 @@ fn minimax(
         let mut new_data = data.to_vec();
 
         // Apply action
-        if *action_type == 0 {
+        if *action_type == 2 {
             // Move action
             mechanics.execute_move(&mut new_data, current_player, *row, *col);
         } else {
             // Wall action (type 1 or 2 indicates orientation)
-            let orientation = *action_type - 1;
+            let orientation = *action_type;
             mechanics.execute_wall_placement(
                 &mut new_data,
                 current_player,
@@ -232,8 +232,8 @@ fn minimax(
             &new_data,
             opponent,
             agent_player,
-            completed_steps + 1,
-            max_depth,
+            search_depth + 1,
+            max_search_depth,
             branching_factor,
             discount_factor,
             heuristic,
@@ -273,7 +273,7 @@ fn minimax(
 pub fn evaluate_actions(
     mechanics: &QGameMechanics,
     data: &[u8],
-    max_depth: usize,
+    max_search_depth: usize,
     branching_factor: usize,
     discount_factor: f32,
     heuristic: i32,
@@ -306,10 +306,10 @@ pub fn evaluate_actions(
             let mut new_data = data.to_vec();
 
             // Apply action
-            if *action_type == 0 {
+            if *action_type == 2 {
                 mechanics.execute_move(&mut new_data, current_player, *row, *col);
             } else {
-                let orientation = *action_type - 1;
+                let orientation = *action_type;
                 mechanics.execute_wall_placement(
                     &mut new_data,
                     current_player,
@@ -328,8 +328,8 @@ pub fn evaluate_actions(
                 &new_data,
                 1 - current_player,
                 current_player,
-                1,
-                max_depth,
+                1, // search_depth
+                max_search_depth,
                 branching_factor,
                 discount_factor,
                 heuristic,
diff --git a/deep_quoridor/rust/src/lib.rs b/deep_quoridor/rust/src/lib.rs
@@ -328,37 +328,36 @@ fn q_evaluate_actions<'py>(
     walls_remaining: PyReadonlyArray1<i32>,
     _goal_rows: PyReadonlyArray1<i32>,
     current_player: i32,
-    max_steps: i32,
+    completed_steps: i32,
+    max_search_depth: usize,
     branching_factor: usize,
     _wall_sigma: f32,
     discount_factor: f32,
     heuristic: i32,
     board_size: usize,
     max_walls: usize,
+    max_steps: usize,
 ) -> PyResult<(Bound<'py, PyArray2<i32>>, Bound<'py, numpy::PyArray1<f32>>)> {
-    use compact::q_bit_repr::QBitRepr;
     use compact::q_game_mechanics::QGameMechanics;
 
-    // Create QBitRepr and QGameMechanics
-    let repr = QBitRepr::new(board_size, max_walls, max_steps as usize);
-    let mechanics = QGameMechanics::new(board_size, max_walls, max_steps as usize);
+    let mechanics = QGameMechanics::new(board_size, max_walls, max_steps);
 
     // Convert game state to QBitRepr format
-    let mut data = repr.create_data();
-    repr.from_game_state(
+    let mut data = mechanics.repr().create_data();
+    mechanics.repr().from_game_state(
         &mut data,
         &grid.as_array(),
         &player_positions.as_array(),
         &walls_remaining.as_array(),
         current_player,
-        0, // completed_steps - always 0 when evaluating actions
+        completed_steps,
     );
 
     // Evaluate actions using QBitRepr minimax
     let (actions, values, _logs) = compact::q_minimax::evaluate_actions(
         &mechanics,
         &data,
-        max_steps as usize,
+        max_search_depth,
         branching_factor,
         discount_factor,
         heuristic,
diff --git a/deep_quoridor/src/agents/simple.py b/deep_quoridor/src/agents/simple.py
diff --git a/deep_quoridor/src/quoridor.py b/deep_quoridor/src/quoridor.py