fix(cfr): dedupe ActionPicker weights by quantised action index (#265)

elliottneilclark · web-flow · commit 74edd3bd3823 · 2026-04-15T14:45:30.000-05:00
`ActionPicker::pick_action` iterated `possible_actions` and read
`weights[idx]` for each action independently. When two actions (e.g.,
two nearby bet sizes) quantise to the same 52-slot index via
`ActionIndexMapper`, both contributed the same weight to the cumulative
distribution — biasing the sampler toward the collided index
proportional to the collision multiplicity.

`explore_all_actions` already dedupes by index before training, so the
picker's view was inconsistent with how the regret matcher was updated.
Dedupe by index in both `pick_action` and `pick_best_action`, keeping
the first action per index and preserving input order. Add a regression
test with two colliding bet sizes that verifies the distribution is
split 50/50 with dedupe vs. ~67/33 without.

On the CFR hot path, avoid heap allocations:

- Introduce a stack-allocated `DedupedActions` buffer (`[(u8, &amp;AgentAction); 16]`,
  initialised with a `static` fallback sentinel) that replaces the
  `Vec&lt;(usize, &amp;AgentAction)&gt;` previously built every call. No `unsafe`,
  no `MaybeUninit`.
- `pick_action` uses the buffer for the weighted path so the two passes
  (total weight, then cumulative sample) don't re-call `action_to_idx`,
  which has `ln()` calls for `Bet` variants.
- No-matcher `pick_action` uses inline reservoir sampling — one pass over
  `possible_actions`, no buffer.
- `pick_best_action` walks inline with an `ActionBitSet` and tracks the
  max-weight action — no buffer.

Add `benches/action_picker.rs` micro-benchmarks to measure the four
picker paths directly (the full CFR bench has ±15% run-to-run variance
that obscures this signal). Measured on this machine:

  pick_action_typical     77.3 ns -&gt; 68.0 ns  (-12.0%)
  pick_action_collisions  87.5 ns -&gt; 79.5 ns  ( -9.1%)
  pick_best_action        64.1 ns -&gt; 60.7 ns  ( -5.3%)
  pick_action_uniform     33.7 ns -&gt; 30.9 ns  ( -8.3%)
diff --git a/Cargo.toml b/Cargo.toml
@@ -135,6 +135,11 @@ required-features = ["arena"]
 name = "sample_one"
 harness = false
 
+[[bench]]
+name = "action_picker"
+harness = false
+required-features = ["arena"]
+
 [[bench]]
 name = "omaha"
 harness = false
diff --git a/benches/action_picker.rs b/benches/action_picker.rs
@@ -0,0 +1,98 @@
+use criterion::{Criterion, criterion_group, criterion_main};
+use little_sorry::{PcfrPlusRegretMatcher, RegretMinimizer};
+use rand::SeedableRng;
+use rand::rngs::StdRng;
+use rs_poker::arena::GameStateBuilder;
+use rs_poker::arena::action::AgentAction;
+use rs_poker::arena::cfr::{ActionIndexMapper, ActionIndexMapperConfig, ActionPicker};
+
+fn make_state_and_mapper() -> (rs_poker::arena::GameState, ActionIndexMapper) {
+    let gs = GameStateBuilder::new()
+        .num_players_with_stack(2, 10_000.0)
+        .blinds(100.0, 50.0)
+        .build()
+        .unwrap();
+    let mapper = ActionIndexMapper::new(ActionIndexMapperConfig::new(100.0, 10_000.0));
+    (gs, mapper)
+}
+
+fn make_trained_matcher(
+    mapper: &ActionIndexMapper,
+    gs: &rs_poker::arena::GameState,
+) -> PcfrPlusRegretMatcher {
+    let mut m = PcfrPlusRegretMatcher::new(52);
+    let mut rewards = vec![0.0f32; 52];
+    rewards[0] = 10.0;
+    rewards[1] = 30.0;
+    rewards[mapper.action_to_idx(&AgentAction::Bet(300.0), gs)] = 20.0;
+    rewards[mapper.action_to_idx(&AgentAction::Bet(600.0), gs)] = 15.0;
+    rewards[mapper.action_to_idx(&AgentAction::Bet(1200.0), gs)] = 5.0;
+    rewards[51] = 2.0;
+    for _ in 0..16 {
+        m.update_regret(&rewards);
+    }
+    m
+}
+
+fn bench_pick_action(c: &mut Criterion) {
+    let (gs, mapper) = make_state_and_mapper();
+    let matcher = make_trained_matcher(&mapper, &gs);
+
+    // Typical CFR action set: fold, call, several bets, all-in
+    let actions = vec![
+        AgentAction::Fold,
+        AgentAction::Bet(100.0), // call
+        AgentAction::Bet(300.0),
+        AgentAction::Bet(600.0),
+        AgentAction::Bet(1200.0),
+        AgentAction::AllIn,
+    ];
+
+    c.bench_function("pick_action_typical", |b| {
+        let mut rng = StdRng::seed_from_u64(42);
+        b.iter(|| {
+            let picker = ActionPicker::new(&mapper, &actions, Some(&matcher), &gs);
+            std::hint::black_box(picker.pick_action(&mut rng))
+        })
+    });
+
+    // Collision-heavy set: many bet sizes that will quantise together
+    let collision_actions = vec![
+        AgentAction::Fold,
+        AgentAction::Bet(100.0),
+        AgentAction::Bet(200.0),
+        AgentAction::Bet(205.0),
+        AgentAction::Bet(210.0),
+        AgentAction::Bet(500.0),
+        AgentAction::Bet(510.0),
+        AgentAction::Bet(520.0),
+        AgentAction::AllIn,
+    ];
+
+    c.bench_function("pick_action_collisions", |b| {
+        let mut rng = StdRng::seed_from_u64(42);
+        b.iter(|| {
+            let picker = ActionPicker::new(&mapper, &collision_actions, Some(&matcher), &gs);
+            std::hint::black_box(picker.pick_action(&mut rng))
+        })
+    });
+
+    c.bench_function("pick_best_action_typical", |b| {
+        b.iter(|| {
+            let picker = ActionPicker::new(&mapper, &actions, Some(&matcher), &gs);
+            std::hint::black_box(picker.pick_best_action())
+        })
+    });
+
+    // No regret matcher (uniform random path)
+    c.bench_function("pick_action_uniform", |b| {
+        let mut rng = StdRng::seed_from_u64(42);
+        b.iter(|| {
+            let picker = ActionPicker::new(&mapper, &actions, None, &gs);
+            std::hint::black_box(picker.pick_action(&mut rng))
+        })
+    });
+}
+
+criterion_group!(benches, bench_pick_action);
+criterion_main!(benches);
diff --git a/src/arena/cfr/action_picker.rs b/src/arena/cfr/action_picker.rs