Merge pull request #204 from shmercer:PR-34/spec-gaps

shmercer · web-flow · commit 931a7f8687c4 · 2026-02-17T18:05:30.000-08:00
fix gaps
diff --git a/R/adaptive_round_candidates.R b/R/adaptive_round_candidates.R
@@ -2,11 +2,31 @@
 # Adaptive stage candidates: anchors, strata, and per-stage generation.
 # -------------------------------------------------------------------------
 
-.adaptive_rank_proxy <- function(state) {
+.adaptive_rank_proxy <- function(state, prefer_btl = FALSE) {
   ids <- as.character(state$trueskill_state$items$item_id)
   mu <- as.double(state$trueskill_state$items$mu)
   names(mu) <- ids
   refit_id <- as.integer(state$refit_meta$last_refit_round_id %||% 0L)
+  if (isTRUE(prefer_btl)) {
+    theta_mean <- tryCatch(
+      .adaptive_btl_fit_theta_mean(state$btl_fit %||% list()),
+      error = function(e) NULL
+    )
+
+    if (is.numeric(theta_mean) && length(theta_mean) > 0L && !is.null(names(theta_mean))) {
+      theta <- as.double(theta_mean)
+      names(theta) <- as.character(names(theta_mean))
+      theta_scores <- as.double(theta[ids])
+      names(theta_scores) <- ids
+      if (length(theta_scores) == length(ids) && all(is.finite(theta_scores))) {
+        return(list(
+          scores = theta_scores,
+          source = "btl_theta_eap",
+          refit_id = refit_id
+        ))
+      }
+    }
+  }
 
   list(
     scores = mu[ids],
@@ -147,7 +167,7 @@
     return(out)
   }
 
-  proxy <- .adaptive_rank_proxy(out)
+  proxy <- .adaptive_rank_proxy(out, prefer_btl = TRUE)
   anchors <- .adaptive_select_rolling_anchors(proxy$scores, defaults)
   out$round$anchor_ids <- as.character(anchors)
   out$round$anchor_refresh_source <- as.character(proxy$source)
diff --git a/R/adaptive_run.R b/R/adaptive_run.R
@@ -281,7 +281,31 @@
 #' @keywords internal
 #' @noRd
 .adaptive_link_all_spokes_stopped <- function(state) {
-  FALSE
+  controller <- .adaptive_controller_resolve(state)
+  if (!.adaptive_link_mode_active(controller)) {
+    return(FALSE)
+  }
+  phase_ctx <- .adaptive_link_phase_context(state, controller = controller)
+  if (!identical(as.character(phase_ctx$phase %||% "phase_a"), "phase_b")) {
+    return(FALSE)
+  }
+  spoke_ids <- as.integer(phase_ctx$active_spokes %||% phase_ctx$ready_spokes %||% integer())
+  spoke_ids <- sort(unique(spoke_ids[!is.na(spoke_ids)]))
+  if (length(spoke_ids) < 1L) {
+    return(FALSE)
+  }
+  stopped_map <- controller$link_stopped_by_spoke %||% list()
+  all_stopped <- all(vapply(as.character(spoke_ids), function(key) isTRUE(stopped_map[[key]]), logical(1L)))
+  if (!isTRUE(all_stopped)) {
+    return(FALSE)
+  }
+
+  probe_cap <- as.integer(controller$probe_pairs_per_refit_per_spoke %||% 2L)
+  probe_cap <- max(0L, probe_cap)
+  if (probe_cap > 0L) {
+    return(FALSE)
+  }
+  TRUE
 }
 
 #' @keywords internal
diff --git a/R/adaptive_select.R b/R/adaptive_select.R
@@ -1057,6 +1057,7 @@ select_next_pair <- function(state, step_id = NULL, candidates = NULL) {
   link_phase_b <- .adaptive_link_mode_active(controller) && identical(phase_ctx$phase, "phase_b")
   link_phase_b_concurrent <- isTRUE(link_phase_b) &&
     identical(as.character(controller$multi_spoke_mode %||% "independent"), "concurrent")
+  frozen_map <- link_controller$link_transform_frozen_by_spoke %||% list()
   active_link_spoke <- as.integer(NA_integer_)
   ranked_link_spokes <- integer()
   link_progress <- NULL
@@ -1177,6 +1178,7 @@ select_next_pair <- function(state, step_id = NULL, candidates = NULL) {
   local_priority_mode <- NA_character_
   is_explore_step <- FALSE
   selected_link_spoke_attempt <- as.integer(NA_integer_)
+  selected_is_probe_ordering <- FALSE
   selected_round_stage <- as.character(round_stage)
   selected_stage_quota <- as.integer(stage_quota)
   selected_stage_committed_so_far <- as.integer(stage_committed_so_far)
@@ -1258,18 +1260,24 @@ select_next_pair <- function(state, step_id = NULL, candidates = NULL) {
       if (is.null(cand) || nrow(cand) == 0L) {
         next
       }
+      spoke_for_utility <- as.integer(NA_integer_)
+      stage_is_probe_ordering <- FALSE
       if (isTRUE(is_link_mode) && isTRUE(link_phase_b)) {
         spoke_for_utility <- if ("link_spoke_id" %in% names(cand)) {
           as.integer(unique(stats::na.omit(as.integer(cand$link_spoke_id)))[1L] %||% NA_integer_)
         } else {
           as.integer(spoke_attempt %||% NA_integer_)
         }
-        cand <- .adaptive_link_attach_predictive_utility(
-          candidates = cand,
-          state = state,
-          controller = link_controller,
-          spoke_id = as.integer(spoke_for_utility)
-        )
+        stage_is_probe_ordering <- !is.na(spoke_for_utility) &&
+          isTRUE(frozen_map[[as.character(spoke_for_utility)]])
+        if (!isTRUE(stage_is_probe_ordering)) {
+          cand <- .adaptive_link_attach_predictive_utility(
+            candidates = cand,
+            state = state,
+            controller = link_controller,
+            spoke_id = as.integer(spoke_for_utility)
+          )
+        }
       }
 
       explore_rate <- defaults$explore_rate
@@ -1379,9 +1387,9 @@ select_next_pair <- function(state, step_id = NULL, candidates = NULL) {
             na.rm = TRUE
           )
         selected_utility_mode <- .adaptive_selection_utility_mode(
-          run_mode = controller$run_mode,
+          run_mode = if (isTRUE(stage_is_probe_ordering)) "within_set" else controller$run_mode,
           has_regularization = isTRUE(has_regularized_utility),
-          is_cross_set = isTRUE(is_link_mode) && isTRUE(link_phase_b)
+          is_cross_set = isTRUE(is_link_mode) && isTRUE(link_phase_b) && !isTRUE(stage_is_probe_ordering)
         )
         if (isTRUE(is_link_mode) && isTRUE(link_phase_b)) {
           # Linking mode keeps canonical candidate generation/filtering via
@@ -1420,6 +1428,7 @@ select_next_pair <- function(state, step_id = NULL, candidates = NULL) {
       local_priority_mode <- stage_local_priority_mode
       selected_stage <- stage
       selected_link_spoke_attempt <- as.integer(spoke_attempt %||% NA_integer_)
+      selected_is_probe_ordering <- isTRUE(stage_is_probe_ordering)
       selected_round_stage <- as.character(attempt_round_stage)
       selected_stage_quota <- as.integer(attempt_stage_quota)
       selected_stage_committed_so_far <- as.integer(attempt_stage_committed_so_far)
@@ -1545,16 +1554,20 @@ select_next_pair <- function(state, step_id = NULL, candidates = NULL) {
   B_id <- as.character(order_vals[["B_id"]] %||% NA_character_)
   p_ij_ts <- trueskill_win_probability(A_id, B_id, state$trueskill_state)
   p_ij <- as.double(p_ij_ts)
-  has_regularized_utility <- "u" %in% names(selected_pair) &&
-    "u0" %in% names(selected_pair) &&
-    is.finite(as.double(selected_pair$u[[1L]])) &&
-    is.finite(as.double(selected_pair$u0[[1L]])) &&
-    abs(as.double(selected_pair$u[[1L]]) - as.double(selected_pair$u0[[1L]])) > sqrt(.Machine$double.eps)
-  utility_mode <- .adaptive_selection_utility_mode(
-    run_mode = controller$run_mode,
-    has_regularization = isTRUE(has_regularized_utility),
-    is_cross_set = isTRUE(selected_is_cross_set)
-  )
+  if (isTRUE(selected_is_cross_set) && isTRUE(selected_is_probe_ordering)) {
+    utility_mode <- NA_character_
+  } else {
+    has_regularized_utility <- "u" %in% names(selected_pair) &&
+      "u0" %in% names(selected_pair) &&
+      is.finite(as.double(selected_pair$u[[1L]])) &&
+      is.finite(as.double(selected_pair$u0[[1L]])) &&
+      abs(as.double(selected_pair$u[[1L]]) - as.double(selected_pair$u0[[1L]])) > sqrt(.Machine$double.eps)
+    utility_mode <- .adaptive_selection_utility_mode(
+      run_mode = controller$run_mode,
+      has_regularization = isTRUE(has_regularized_utility),
+      is_cross_set = isTRUE(selected_is_cross_set)
+    )
+  }
   if (isTRUE(is_link_mode) && !is.na(selected_spoke_id)) {
     p_link_oriented <- .adaptive_link_predictive_prob_oriented(
       state = state,
diff --git a/R/adaptive_step.R b/R/adaptive_step.R
@@ -264,11 +264,12 @@ validate_judge_result <- function(result, A_id, B_id) {
   }
   run_mode <- as.character(row$run_mode[[1L]] %||% "within_set")
   is_link_run_mode <- run_mode %in% c("link_one_spoke", "link_multi_spoke", "link_probe")
+  is_probe_run_mode <- identical(run_mode, "link_probe")
 
   is_cross <- row$is_cross_set[[1L]]
   if (isTRUE(is_cross)) {
     required_cross <- c("set_i", "set_j", "link_spoke_id", "run_mode", "posterior_win_prob_pre")
-    if (isTRUE(is_link_run_mode)) {
+    if (isTRUE(is_link_run_mode) && !isTRUE(is_probe_run_mode)) {
       required_cross <- c(required_cross, "cross_set_utility_pre")
     }
     bad <- required_cross[vapply(required_cross, function(col) is.na(row[[col]][[1L]]), logical(1L))]
@@ -285,14 +286,34 @@ validate_judge_result <- function(result, A_id, B_id) {
         "step_log append completeness failure for cross-set row: `link_stage` must be populated for stage-routed steps."
       )
     }
-    if (isTRUE(is_link_run_mode) && !identical(utility_mode, "linking_d_optimal")) {
+    if (isTRUE(is_link_run_mode) &&
+      !isTRUE(is_probe_run_mode) &&
+      !identical(utility_mode, "linking_d_optimal")) {
       rlang::abort(
         paste0(
           "step_log append completeness failure for cross-set row: ",
           "`utility_mode` must be linking_d_optimal."
         )
       )
     }
+    if (isTRUE(is_probe_run_mode) &&
+      identical(utility_mode, "linking_d_optimal")) {
+      rlang::abort(
+        paste0(
+          "step_log append completeness failure for cross-set probe row: ",
+          "`utility_mode` must not be linking_d_optimal."
+        )
+      )
+    }
+    if (isTRUE(is_probe_run_mode) &&
+      !is.na(row$cross_set_utility_pre[[1L]])) {
+      rlang::abort(
+        paste0(
+          "step_log append completeness failure for cross-set probe row: ",
+          "`cross_set_utility_pre` must be NA."
+        )
+      )
+    }
     posterior_pre <- as.double(row$posterior_win_prob_pre[[1L]] %||% NA_real_)
     if (!is.finite(posterior_pre) || posterior_pre < 0 || posterior_pre > 1) {
       rlang::abort(
@@ -621,6 +642,7 @@ run_one_step <- function(state, judge, ...) {
   is_probe_step <- if (isTRUE(is_cross_set) && identical(run_mode, "link_probe")) TRUE else FALSE
   cross_set_utility_pre <- if (isTRUE(is_cross_set) &&
     isTRUE(is_link_run_mode) &&
+    !isTRUE(is_probe_step) &&
     identical(utility_mode, "linking_d_optimal")) {
     as.double(
       if (is.finite(as.double(selection$link_d_opt_gain %||% NA_real_))) {
diff --git a/tests/testthat/fixtures/linking-e2e-golden.rds b/tests/testthat/fixtures/linking-e2e-golden.rds
diff --git a/tests/testthat/test-5006-candidates.R b/tests/testthat/test-5006-candidates.R
@@ -133,12 +133,12 @@ test_that("rolling anchors refresh deterministically from trueskill", {
   expect_equal(state_1$round$anchor_refresh_source, "trueskill_mu")
 
   state_1$btl_fit <- list(
-    theta_mean = stats::setNames(seq(1, 10), as.character(items$item_id))
+    theta_mean = stats::setNames(seq(10, 1), as.character(items$item_id))
   )
   state_1$refit_meta$last_refit_round_id <- 1L
   state_2 <- pairwiseLLM:::.adaptive_refresh_round_anchors(state_1)
 
-  expect_equal(state_2$round$anchor_refresh_source, "trueskill_mu")
+  expect_equal(state_2$round$anchor_refresh_source, "btl_theta_eap")
   expect_identical(state_2$round$anchor_ids, anchors_1)
   expect_equal(state_2$round$anchor_refit_round_id, 1L)
 
@@ -147,6 +147,20 @@ test_that("rolling anchors refresh deterministically from trueskill", {
   expect_equal(state_3$round$anchor_refit_round_id, 1L)
 })
 
+test_that("rank proxy falls back to trueskill when BTL theta is incomplete", {
+  items <- make_test_items(5)
+  trueskill_state <- make_test_trueskill_state(items, mu = seq(5, 1))
+  state <- make_test_state(items, trueskill_state)
+  state$btl_fit <- list(
+    theta_mean = stats::setNames(c(1, 2, 3, 4), as.character(1:4))
+  )
+
+  proxy <- pairwiseLLM:::.adaptive_rank_proxy(state)
+
+  expect_identical(proxy$source, "trueskill_mu")
+  expect_equal(unname(proxy$scores), as.double(trueskill_state$items$mu))
+})
+
 test_that("rolling anchor count follows clamped default", {
   scores_small <- stats::setNames(seq(9, 1), as.character(1:9))
   scores_mid <- stats::setNames(seq(30, 1), as.character(1:30))
diff --git a/tests/testthat/test-5011-run-one-step-transactional.R b/tests/testthat/test-5011-run-one-step-transactional.R
@@ -176,6 +176,71 @@ test_that("run_one_step logs linking pre-step transform estimates when available
   expect_equal(row$log_alpha_spoke_sd_pre[[1L]], 0.02, tolerance = 1e-12)
 })
 
+test_that("run_one_step logs probe rows without linking d-opt utility fields", {
+  items <- tibble::tibble(
+    item_id = c("h1", "h2", "s21", "s22"),
+    set_id = c(1L, 1L, 2L, 2L),
+    global_item_id = c("gh1", "gh2", "gs21", "gs22")
+  )
+  state <- adaptive_rank_start(
+    items,
+    seed = 37L,
+    adaptive_config = list(run_mode = "link_one_spoke", hub_id = 1L)
+  )
+  state$warm_start_done <- TRUE
+  state$linking$phase_a <- list(
+    set_status = tibble::tibble(
+      set_id = c(1L, 2L),
+      source = c("run", "run"),
+      status = c("ready", "ready"),
+      validation_message = c("ok", "ok"),
+      artifact_path = c(NA_character_, NA_character_)
+    ),
+    artifacts = list(
+      `1` = list(
+        items = tibble::tibble(
+          global_item_id = c("gh1", "gh2"),
+          theta_raw_mean = c(0.2, -0.2),
+          theta_raw_sd = c(0.1, 0.1),
+          rank_mu_raw = c(1, 2)
+        )
+      ),
+      `2` = list(
+        items = tibble::tibble(
+          global_item_id = c("gs21", "gs22"),
+          theta_raw_mean = c(0.1, -0.1),
+          theta_raw_sd = c(0.1, 0.1),
+          rank_mu_raw = c(1, 2)
+        )
+      )
+    ),
+    ready_for_phase_b = TRUE,
+    strict_ready_for_phase_b = TRUE,
+    required_sets = c(1L, 2L),
+    set_stop_pass_by_set = list(`1` = TRUE, `2` = TRUE),
+    phase = "phase_b",
+    ready_spokes = 2L,
+    active_phase_a_set = NA_integer_,
+    phase_b_started_at_step = 1L
+  )
+  state$controller$link_transform_frozen_by_spoke <- list(`2` = TRUE)
+  state$controller$link_transform_frozen_delta_by_spoke <- list(`2` = 0)
+  state$controller$link_transform_mode_by_spoke <- list(`2` = "shift_only")
+  state$controller$link_refit_stats_by_spoke <- list(`2` = list(
+    link_transform_mode = "shift_only",
+    delta_spoke_mean = 0,
+    delta_spoke_sd = 0.1
+  ))
+
+  out <- pairwiseLLM:::run_one_step(state, make_deterministic_judge("i_wins"))
+  row <- out$step_log[nrow(out$step_log), , drop = FALSE]
+
+  expect_identical(as.character(row$run_mode[[1L]]), "link_probe")
+  expect_true(isTRUE(row$is_probe_step[[1L]]))
+  expect_true(is.na(row$utility_mode[[1L]]))
+  expect_true(is.na(row$cross_set_utility_pre[[1L]]))
+})
+
 test_that("invalid linking step does not mutate controller link routing state", {
   items <- tibble::tibble(
     item_id = c("a", "b"),
diff --git a/tests/testthat/test-5049-linking-candidates-round-routing.R b/tests/testthat/test-5049-linking-candidates-round-routing.R
@@ -1187,6 +1187,8 @@ test_that("frozen spoke cross-set commits are tagged as probe steps", {
   expect_true(isTRUE(row$is_cross_set[[1L]]))
   expect_identical(as.character(row$run_mode[[1L]]), "link_probe")
   expect_true(isTRUE(row$is_probe_step[[1L]]))
+  expect_true(is.na(row$cross_set_utility_pre[[1L]]))
+  expect_true(is.na(row$utility_mode[[1L]]))
 })
 
 test_that("linking predictive utility applies signed position bias by (A,B) orientation", {
diff --git a/tests/testthat/test-5051-linking-invariant-guards.R b/tests/testthat/test-5051-linking-invariant-guards.R
diff --git a/tests/testthat/test-9002-linking-integration.R b/tests/testthat/test-9002-linking-integration.R