Skip to content

Commit 8320dbb

Browse files
authored
Merge pull request #334 from jonbinney/jdb/compact-simple
Make compact minimax an option for simple agent
2 parents bb7547e + b66a46d commit 8320dbb

File tree

6 files changed

+85
-94
lines changed

6 files changed

+85
-94
lines changed

deep_quoridor/rust/src/compact/q_bit_repr.rs

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ pub struct QBitRepr {
3131
num_player_positions: usize,
3232
position_bits: usize,
3333
walls_remaining_bits: usize,
34-
steps_bits: usize,
34+
completed_steps_bits: usize,
3535
total_bits: usize,
3636
total_bytes: usize,
3737

@@ -40,7 +40,7 @@ pub struct QBitRepr {
4040
player_pos_offsets: [usize; 2], // Offset for each player's position
4141
walls_remaining_offsets: [usize; 2],
4242
current_player_offset: usize,
43-
steps_offset: usize,
43+
completed_steps_offset: usize,
4444
}
4545

4646
impl QBitRepr {
@@ -50,7 +50,7 @@ impl QBitRepr {
5050
let num_player_positions = board_size * board_size;
5151
let position_bits = bits_needed(num_player_positions - 1);
5252
let walls_remaining_bits = bits_needed(max_walls);
53-
let steps_bits = bits_needed(max_steps);
53+
let completed_steps_bits = bits_needed(max_steps);
5454

5555
let walls_offset = 0;
5656
let p1_pos_offset = walls_offset + num_wall_positions;
@@ -60,9 +60,9 @@ impl QBitRepr {
6060
let p2_walls_remaining_offset = p1_walls_remaining_offset + walls_remaining_bits;
6161
let walls_remaining_offsets = [p1_walls_remaining_offset, p2_walls_remaining_offset];
6262
let current_player_offset = p2_walls_remaining_offset + walls_remaining_bits;
63-
let steps_offset = current_player_offset + 1;
63+
let completed_steps_offset = current_player_offset + 1;
6464

65-
let total_bits = steps_offset + steps_bits;
65+
let total_bits = completed_steps_offset + completed_steps_bits;
6666
let total_bytes = (total_bits + 7) / 8;
6767

6868
Self {
@@ -73,14 +73,14 @@ impl QBitRepr {
7373
num_player_positions,
7474
position_bits,
7575
walls_remaining_bits,
76-
steps_bits,
76+
completed_steps_bits,
7777
total_bits,
7878
total_bytes,
7979
walls_offset,
8080
player_pos_offsets,
8181
walls_remaining_offsets,
8282
current_player_offset,
83-
steps_offset,
83+
completed_steps_offset,
8484
}
8585
}
8686

@@ -231,13 +231,18 @@ impl QBitRepr {
231231

232232
/// Get the number of completed steps
233233
pub fn get_completed_steps(&self, data: &[u8]) -> usize {
234-
self.get_bits(data, self.steps_offset, self.steps_bits)
234+
self.get_bits(data, self.completed_steps_offset, self.completed_steps_bits)
235235
}
236236

237237
/// Set the number of completed steps
238238
pub fn set_completed_steps(&self, data: &mut [u8], steps: usize) {
239239
debug_assert!(steps <= self.max_steps);
240-
self.set_bits(data, self.steps_offset, self.steps_bits, steps);
240+
self.set_bits(
241+
data,
242+
self.completed_steps_offset,
243+
self.completed_steps_bits,
244+
steps,
245+
);
241246
}
242247

243248
/// Convert a (row, col) position to a flat index
@@ -322,6 +327,7 @@ impl QBitRepr {
322327
format!("Current: P{}", current_player + 1),
323328
format!("P1 walls: {}", p0_walls),
324329
format!("P2 walls: {}", p1_walls),
330+
format!("Max steps: {}", self.max_steps),
325331
];
326332

327333
let mut line_idx = 0;

deep_quoridor/rust/src/compact/q_game_mechanics.rs

Lines changed: 7 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -329,53 +329,6 @@ impl QGameMechanics {
329329
true
330330
}
331331

332-
/// Check if a move from current position to destination is valid
333-
pub fn is_move_valid(
334-
&self,
335-
data: &[u8],
336-
player: usize,
337-
dest_row: usize,
338-
dest_col: usize,
339-
) -> bool {
340-
let board_size = self.repr.board_size();
341-
342-
// Check bounds
343-
if dest_row >= board_size || dest_col >= board_size {
344-
return false;
345-
}
346-
347-
// Get current position
348-
let (curr_row, curr_col) = self.repr.get_player_position(data, player);
349-
350-
// Get opponent position
351-
let opponent = 1 - player;
352-
let (opp_row, opp_col) = self.repr.get_player_position(data, opponent);
353-
354-
// Check if destination is occupied by opponent
355-
if dest_row == opp_row && dest_col == opp_col {
356-
return false;
357-
}
358-
359-
// For now, use simple adjacency check
360-
// A more complete implementation would check walls and jumps
361-
let row_diff = (dest_row as i32 - curr_row as i32).abs();
362-
let col_diff = (dest_col as i32 - curr_col as i32).abs();
363-
364-
// Must move exactly 1 space (or 2 for jump)
365-
if row_diff + col_diff > 2 {
366-
return false;
367-
}
368-
369-
// For complete validation, would need to check:
370-
// 1. No walls blocking the path
371-
// 2. Valid jump over opponent
372-
// 3. Diagonal moves only when jumping
373-
374-
// TODO: Implement full move validation with wall checking
375-
376-
true
377-
}
378-
379332
/// Execute a move action
380333
pub fn execute_move(&self, data: &mut [u8], player: usize, dest_row: usize, dest_col: usize) {
381334
self.repr
@@ -582,8 +535,13 @@ impl QGameMechanics {
582535
}
583536

584537
// Check no wall between opponent and diagonal destination
585-
if self.is_wall_between(data, opp_row, opp_col, diag_row as usize, diag_col as usize)
586-
{
538+
if self.is_wall_between(
539+
data,
540+
opp_row,
541+
opp_col,
542+
diag_row as usize,
543+
diag_col as usize,
544+
) {
587545
continue;
588546
}
589547

deep_quoridor/rust/src/compact/q_minimax.rs

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ fn sample_actions(
133133
// Get valid moves (type 0)
134134
let moves = mechanics.get_valid_moves(data);
135135
let mut actions: Vec<(usize, usize, usize)> =
136-
moves.into_iter().map(|(row, col)| (row, col, 0)).collect();
136+
moves.into_iter().map(|(row, col)| (row, col, 2)).collect();
137137

138138
if actions.len() >= branching_factor {
139139
actions.shuffle(&mut rng);
@@ -150,7 +150,7 @@ fn sample_actions(
150150
// Add walls until we reach branching factor
151151
let num_walls_needed = branching_factor - actions.len();
152152
for (row, col, orientation) in walls.into_iter().take(num_walls_needed) {
153-
actions.push((row, col, orientation + 1)); // Map 0->1, 1->2
153+
actions.push((row, col, orientation));
154154
}
155155

156156
actions
@@ -163,8 +163,8 @@ fn minimax(
163163
data: &[u8],
164164
current_player: usize,
165165
agent_player: usize,
166-
completed_steps: usize,
167-
max_depth: usize,
166+
search_depth: usize,
167+
max_search_depth: usize,
168168
branching_factor: usize,
169169
discount_factor: f32,
170170
heuristic: i32,
@@ -180,19 +180,19 @@ fn minimax(
180180

181181
let opponent = 1 - current_player;
182182

183-
if completed_steps >= mechanics.repr().max_steps() {
183+
if mechanics.repr().get_completed_steps(data) >= mechanics.repr().max_steps() {
184184
return 0.0; // Tie
185185
}
186186

187-
if completed_steps >= max_depth {
187+
if search_depth >= max_search_depth {
188188
return compute_heuristic(mechanics, data, agent_player, heuristic);
189189
}
190190

191191
let actions = sample_actions(mechanics, data, branching_factor);
192192
if actions.is_empty() {
193193
mechanics.print(data);
194+
assert!(false, "No valid actions - should never happen");
194195
}
195-
assert!(!actions.is_empty());
196196

197197
let is_maximizing = current_player == agent_player;
198198
let mut best_value = if is_maximizing {
@@ -208,12 +208,12 @@ fn minimax(
208208
let mut new_data = data.to_vec();
209209

210210
// Apply action
211-
if *action_type == 0 {
211+
if *action_type == 2 {
212212
// Move action
213213
mechanics.execute_move(&mut new_data, current_player, *row, *col);
214214
} else {
215215
// Wall action (type 1 or 2 indicates orientation)
216-
let orientation = *action_type - 1;
216+
let orientation = *action_type;
217217
mechanics.execute_wall_placement(
218218
&mut new_data,
219219
current_player,
@@ -232,8 +232,8 @@ fn minimax(
232232
&new_data,
233233
opponent,
234234
agent_player,
235-
completed_steps + 1,
236-
max_depth,
235+
search_depth + 1,
236+
max_search_depth,
237237
branching_factor,
238238
discount_factor,
239239
heuristic,
@@ -273,7 +273,7 @@ fn minimax(
273273
pub fn evaluate_actions(
274274
mechanics: &QGameMechanics,
275275
data: &[u8],
276-
max_depth: usize,
276+
max_search_depth: usize,
277277
branching_factor: usize,
278278
discount_factor: f32,
279279
heuristic: i32,
@@ -306,10 +306,10 @@ pub fn evaluate_actions(
306306
let mut new_data = data.to_vec();
307307

308308
// Apply action
309-
if *action_type == 0 {
309+
if *action_type == 2 {
310310
mechanics.execute_move(&mut new_data, current_player, *row, *col);
311311
} else {
312-
let orientation = *action_type - 1;
312+
let orientation = *action_type;
313313
mechanics.execute_wall_placement(
314314
&mut new_data,
315315
current_player,
@@ -328,8 +328,8 @@ pub fn evaluate_actions(
328328
&new_data,
329329
1 - current_player,
330330
current_player,
331-
1,
332-
max_depth,
331+
1, // search_depth
332+
max_search_depth,
333333
branching_factor,
334334
discount_factor,
335335
heuristic,

deep_quoridor/rust/src/lib.rs

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -328,37 +328,36 @@ fn q_evaluate_actions<'py>(
328328
walls_remaining: PyReadonlyArray1<i32>,
329329
_goal_rows: PyReadonlyArray1<i32>,
330330
current_player: i32,
331-
max_steps: i32,
331+
completed_steps: i32,
332+
max_search_depth: usize,
332333
branching_factor: usize,
333334
_wall_sigma: f32,
334335
discount_factor: f32,
335336
heuristic: i32,
336337
board_size: usize,
337338
max_walls: usize,
339+
max_steps: usize,
338340
) -> PyResult<(Bound<'py, PyArray2<i32>>, Bound<'py, numpy::PyArray1<f32>>)> {
339-
use compact::q_bit_repr::QBitRepr;
340341
use compact::q_game_mechanics::QGameMechanics;
341342

342-
// Create QBitRepr and QGameMechanics
343-
let repr = QBitRepr::new(board_size, max_walls, max_steps as usize);
344-
let mechanics = QGameMechanics::new(board_size, max_walls, max_steps as usize);
343+
let mechanics = QGameMechanics::new(board_size, max_walls, max_steps);
345344

346345
// Convert game state to QBitRepr format
347-
let mut data = repr.create_data();
348-
repr.from_game_state(
346+
let mut data = mechanics.repr().create_data();
347+
mechanics.repr().from_game_state(
349348
&mut data,
350349
&grid.as_array(),
351350
&player_positions.as_array(),
352351
&walls_remaining.as_array(),
353352
current_player,
354-
0, // completed_steps - always 0 when evaluating actions
353+
completed_steps,
355354
);
356355

357356
// Evaluate actions using QBitRepr minimax
358357
let (actions, values, _logs) = compact::q_minimax::evaluate_actions(
359358
&mechanics,
360359
&data,
361-
max_steps as usize,
360+
max_search_depth,
362361
branching_factor,
363362
discount_factor,
364363
heuristic,

0 commit comments

Comments
 (0)