Skip to content

Commit 74edd3b

Browse files
fix(cfr): dedupe ActionPicker weights by quantised action index (#265)
`ActionPicker::pick_action` iterated `possible_actions` and read `weights[idx]` for each action independently. When two actions (e.g., two nearby bet sizes) quantise to the same 52-slot index via `ActionIndexMapper`, both contributed the same weight to the cumulative distribution — biasing the sampler toward the collided index proportional to the collision multiplicity. `explore_all_actions` already dedupes by index before training, so the picker's view was inconsistent with how the regret matcher was updated. Dedupe by index in both `pick_action` and `pick_best_action`, keeping the first action per index and preserving input order. Add a regression test with two colliding bet sizes that verifies the distribution is split 50/50 with dedupe vs. ~67/33 without. On the CFR hot path, avoid heap allocations: - Introduce a stack-allocated `DedupedActions` buffer (`[(u8, &AgentAction); 16]`, initialised with a `static` fallback sentinel) that replaces the `Vec<(usize, &AgentAction)>` previously built every call. No `unsafe`, no `MaybeUninit`. - `pick_action` uses the buffer for the weighted path so the two passes (total weight, then cumulative sample) don't re-call `action_to_idx`, which has `ln()` calls for `Bet` variants. - No-matcher `pick_action` uses inline reservoir sampling — one pass over `possible_actions`, no buffer. - `pick_best_action` walks inline with an `ActionBitSet` and tracks the max-weight action — no buffer. Add `benches/action_picker.rs` micro-benchmarks to measure the four picker paths directly (the full CFR bench has ±15% run-to-run variance that obscures this signal). Measured on this machine: pick_action_typical 77.3 ns -> 68.0 ns (-12.0%) pick_action_collisions 87.5 ns -> 79.5 ns ( -9.1%) pick_best_action 64.1 ns -> 60.7 ns ( -5.3%) pick_action_uniform 33.7 ns -> 30.9 ns ( -8.3%)
1 parent 9985048 commit 74edd3b

3 files changed

Lines changed: 307 additions & 57 deletions

File tree

Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,11 @@ required-features = ["arena"]
135135
name = "sample_one"
136136
harness = false
137137

138+
[[bench]]
139+
name = "action_picker"
140+
harness = false
141+
required-features = ["arena"]
142+
138143
[[bench]]
139144
name = "omaha"
140145
harness = false

benches/action_picker.rs

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
use criterion::{Criterion, criterion_group, criterion_main};
2+
use little_sorry::{PcfrPlusRegretMatcher, RegretMinimizer};
3+
use rand::SeedableRng;
4+
use rand::rngs::StdRng;
5+
use rs_poker::arena::GameStateBuilder;
6+
use rs_poker::arena::action::AgentAction;
7+
use rs_poker::arena::cfr::{ActionIndexMapper, ActionIndexMapperConfig, ActionPicker};
8+
9+
fn make_state_and_mapper() -> (rs_poker::arena::GameState, ActionIndexMapper) {
10+
let gs = GameStateBuilder::new()
11+
.num_players_with_stack(2, 10_000.0)
12+
.blinds(100.0, 50.0)
13+
.build()
14+
.unwrap();
15+
let mapper = ActionIndexMapper::new(ActionIndexMapperConfig::new(100.0, 10_000.0));
16+
(gs, mapper)
17+
}
18+
19+
fn make_trained_matcher(
20+
mapper: &ActionIndexMapper,
21+
gs: &rs_poker::arena::GameState,
22+
) -> PcfrPlusRegretMatcher {
23+
let mut m = PcfrPlusRegretMatcher::new(52);
24+
let mut rewards = vec![0.0f32; 52];
25+
rewards[0] = 10.0;
26+
rewards[1] = 30.0;
27+
rewards[mapper.action_to_idx(&AgentAction::Bet(300.0), gs)] = 20.0;
28+
rewards[mapper.action_to_idx(&AgentAction::Bet(600.0), gs)] = 15.0;
29+
rewards[mapper.action_to_idx(&AgentAction::Bet(1200.0), gs)] = 5.0;
30+
rewards[51] = 2.0;
31+
for _ in 0..16 {
32+
m.update_regret(&rewards);
33+
}
34+
m
35+
}
36+
37+
fn bench_pick_action(c: &mut Criterion) {
38+
let (gs, mapper) = make_state_and_mapper();
39+
let matcher = make_trained_matcher(&mapper, &gs);
40+
41+
// Typical CFR action set: fold, call, several bets, all-in
42+
let actions = vec![
43+
AgentAction::Fold,
44+
AgentAction::Bet(100.0), // call
45+
AgentAction::Bet(300.0),
46+
AgentAction::Bet(600.0),
47+
AgentAction::Bet(1200.0),
48+
AgentAction::AllIn,
49+
];
50+
51+
c.bench_function("pick_action_typical", |b| {
52+
let mut rng = StdRng::seed_from_u64(42);
53+
b.iter(|| {
54+
let picker = ActionPicker::new(&mapper, &actions, Some(&matcher), &gs);
55+
std::hint::black_box(picker.pick_action(&mut rng))
56+
})
57+
});
58+
59+
// Collision-heavy set: many bet sizes that will quantise together
60+
let collision_actions = vec![
61+
AgentAction::Fold,
62+
AgentAction::Bet(100.0),
63+
AgentAction::Bet(200.0),
64+
AgentAction::Bet(205.0),
65+
AgentAction::Bet(210.0),
66+
AgentAction::Bet(500.0),
67+
AgentAction::Bet(510.0),
68+
AgentAction::Bet(520.0),
69+
AgentAction::AllIn,
70+
];
71+
72+
c.bench_function("pick_action_collisions", |b| {
73+
let mut rng = StdRng::seed_from_u64(42);
74+
b.iter(|| {
75+
let picker = ActionPicker::new(&mapper, &collision_actions, Some(&matcher), &gs);
76+
std::hint::black_box(picker.pick_action(&mut rng))
77+
})
78+
});
79+
80+
c.bench_function("pick_best_action_typical", |b| {
81+
b.iter(|| {
82+
let picker = ActionPicker::new(&mapper, &actions, Some(&matcher), &gs);
83+
std::hint::black_box(picker.pick_best_action())
84+
})
85+
});
86+
87+
// No regret matcher (uniform random path)
88+
c.bench_function("pick_action_uniform", |b| {
89+
let mut rng = StdRng::seed_from_u64(42);
90+
b.iter(|| {
91+
let picker = ActionPicker::new(&mapper, &actions, None, &gs);
92+
std::hint::black_box(picker.pick_action(&mut rng))
93+
})
94+
});
95+
}
96+
97+
criterion_group!(benches, bench_pick_action);
98+
criterion_main!(benches);

0 commit comments

Comments
 (0)