Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
373 changes: 316 additions & 57 deletions R/adaptive_btl_refit.R

Large diffs are not rendered by default.

8 changes: 7 additions & 1 deletion R/adaptive_rank.R
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,13 @@ make_adaptive_judge_llm <- function(
#' (`phase_a_mode`, `phase_a_import_failure_policy`,
#' `phase_a_required_reliability_min`, `phase_a_compatible_model_ids`,
#' `phase_a_compatible_config_hashes`, `phase_a_artifacts`,
#' `phase_a_set_source`). Unknown fields and invalid values abort with
#' `phase_a_set_source`). In linking Phase B with
#' `judge_param_mode = "phase_specific"`, startup can use deterministic
#' within/shared judge fallback before link-specific estimates exist; once
#' expected, malformed link-specific estimates abort. `link_refit_mode =
#' "joint_refit"` jointly estimates active hub+spoke item abilities and
#' transform parameters, and `hub_lock_mode`/`hub_lock_kappa` control hub
#' locking in that joint refit. Unknown fields and invalid values abort with
#' actionable errors.
#' @param btl_config Optional named list passed to [adaptive_rank_run_live()]
#' to control BTL refit cadence, stopping diagnostics, and selected
Expand Down
12 changes: 12 additions & 0 deletions R/adaptive_run.R
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,10 @@
#' win probability for pair \eqn{\{i, j\}}.
#' In linking Phase B, cross-set candidates are ranked using model-implied
#' predictive utility under the current transform and judge parameters.
#' When \code{judge_param_mode = "phase_specific"}, the first Phase B startup
#' step may use deterministic fallback from available within/shared judge
#' estimates if link-specific estimates are not yet available; once link-specific
#' estimates are expected, missing/non-finite values abort.
#' Bayesian BTL posterior draws are not used for pair selection; they are used
#' for posterior inference, diagnostics, and stopping at refit rounds.
#'
Expand Down Expand Up @@ -632,6 +636,14 @@ adaptive_rank_start <- function(items,
#' \deqn{U_0 = p_{ij}(1 - p_{ij})}.
#' Linking Phase B cross-set routing uses model-implied predictive utility under
#' the current transform and judge parameters.
#' When \code{judge_param_mode = "phase_specific"}, startup can use deterministic
#' fallback from within/shared judge estimates only until link-specific estimates
#' are expected, after which malformed link estimates abort.
#' In linking \code{joint_refit} mode, hub+spoke item abilities and transform
#' parameters are estimated together for the active hub+spoke graph, with hub
#' behavior controlled by \code{hub_lock_mode} (\code{hard_lock},
#' \code{soft_lock}, or \code{free}); \code{soft_lock} uses
#' \code{hub_lock_kappa}-scaled regularization to Phase A hub summaries.
#' Exploration/exploitation routing and fallback handling are recorded in
#' \code{step_log}.
#'
Expand Down
9 changes: 8 additions & 1 deletion R/adaptive_select.R
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,14 @@ adaptive_defaults <- function(N) {
theta_global <- c(hub_theta, spoke_theta_global)
theta_global <- theta_global[!duplicated(names(theta_global))]

judge_params <- .adaptive_link_judge_params(state, controller, scope = "link")
startup_gap <- .adaptive_link_phase_b_startup_gap_for_spoke(state, spoke_id = as.integer(spoke_id))
judge_params <- .adaptive_link_judge_params(
state,
controller,
scope = "link",
allow_cold_start_fallback = isTRUE(startup_gap),
expected_link_params = !isTRUE(startup_gap)
)
epsilon <- as.double(judge_params$epsilon %||% 0)
if (!is.finite(epsilon)) {
epsilon <- 0
Expand Down
7 changes: 6 additions & 1 deletion man/adaptive_rank.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions man/adaptive_rank_run_live.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/adaptive_rank_start.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 37 additions & 0 deletions tests/testthat/test-5048-linking-phase-a-artifacts.R
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,43 @@ test_that("phase_specific judge mode respects Phase A to Phase B boundary gating
expect_equal(nrow(out$step_log), 1L)
})

test_that("phase_specific Phase B startup falls back deterministically without link judge estimates", {
state <- make_phase_a_ready_state()
judge <- make_deterministic_judge("i_wins")

art1 <- .adaptive_phase_a_build_artifact(state, set_id = 1L)
art2 <- .adaptive_phase_a_build_artifact(state, set_id = 2L)
art1$quality_gate_accepted <- TRUE
art2$quality_gate_accepted <- TRUE

state$btl_fit$beta_link_mean <- NULL
state$btl_fit$epsilon_link_mean <- NULL
state$btl_fit$beta_within_mean <- 0.05
state$btl_fit$epsilon_within_mean <- 0.02

out <- expect_no_error(adaptive_rank_run_live(
state,
judge,
n_steps = 1L,
adaptive_config = list(
run_mode = "link_one_spoke",
hub_id = 1L,
judge_param_mode = "phase_specific",
phase_a_mode = "import",
phase_a_artifacts = list(`1` = art1, `2` = art2),
phase_a_compatible_config_hashes = c(art1$fit_config_hash, art2$fit_config_hash)
),
progress = "none"
))

row <- out$step_log[1L, , drop = FALSE]
expect_true(isTRUE(row$is_cross_set[[1L]]))
expect_true(is.finite(row$posterior_win_prob_pre[[1L]]))
expect_true(is.finite(row$cross_set_utility_pre[[1L]]))
expect_identical(as.character(row$link_stage[[1L]]), as.character(row$round_stage[[1L]]))
expect_true(is.integer(out$step_log$link_spoke_id))
})

test_that("phase A helper branch guards and edge paths are exercised", {
state <- make_phase_a_ready_state()

Expand Down
95 changes: 92 additions & 3 deletions tests/testthat/test-5050-linking-refit-transforms.R
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ test_that("hub lock boundary kappa=0 matches hard lock in joint refit", {
d_soft0 <- soft0$controller$link_refit_stats_by_spoke[["2"]]$delta_spoke_mean
d_free <- free$controller$link_refit_stats_by_spoke[["2"]]$delta_spoke_mean

hard_contract <- hard$controller$link_refit_stats_by_spoke[["2"]]$fit_contract
expect_equal(hard_contract$joint_refit$n_hub_items_estimated, 0L)

expect_equal(d_hard, d_soft0, tolerance = 1e-10)
expect_false(isTRUE(all.equal(d_hard, d_free, tolerance = 1e-10)))
})
Expand Down Expand Up @@ -200,6 +203,23 @@ test_that("soft lock uses artifact uncertainty and kappa strength", {
)))
})

test_that("joint_refit fit contract records joint theta estimation", {
state <- make_linking_refit_state(
list(link_refit_mode = "joint_refit", link_transform_mode = "shift_only")
)
state <- append_cross_step(state, 1L, "s21", "h1", 1L, spoke_id = 2L)
state <- append_cross_step(state, 2L, "h2", "s22", 0L, spoke_id = 2L)

out <- pairwiseLLM:::.adaptive_linking_refit_update_state(state, list(last_refit_step = 0L))
contract <- out$controller$link_refit_stats_by_spoke[["2"]]$fit_contract

expect_identical(contract$link_refit_mode, "joint_refit")
expect_true(all(c("theta_hub", "theta_spoke", "delta_s") %in% contract$parameters))
expect_true(isTRUE(contract$joint_refit$used))
expect_true(contract$joint_refit$n_hub_items_estimated >= 1L)
expect_true(contract$joint_refit$n_spoke_items_estimated >= 1L)
})

test_that("auto escalation triggers after consecutive PPC failures and is one-way", {
state <- make_linking_refit_state(
list(
Expand Down Expand Up @@ -282,22 +302,91 @@ test_that("judge parameter mode controls linking judge scope in fit contract", {
expect_equal(contract$judge$epsilon, 0.25, tolerance = 1e-12)
})

test_that("phase-specific judge mode aborts when link parameters are missing", {
test_that("phase-specific judge mode allows startup fallback but aborts after startup when link params are missing", {
state <- make_linking_refit_state(
list(link_transform_mode = "shift_only", link_refit_mode = "shift_only", judge_param_mode = "phase_specific")
)
state <- append_cross_step(state, 1L, "s21", "h1", 1L, spoke_id = 2L)
state <- append_cross_step(state, 2L, "h2", "s22", 0L, spoke_id = 2L)

state$btl_fit$beta_within_mean <- 0.03
state$btl_fit$epsilon_within_mean <- 0.02
state$btl_fit$beta_link_mean <- NULL
state$btl_fit$epsilon_link_mean <- NULL

out_startup <- expect_no_error(
pairwiseLLM:::.adaptive_linking_refit_update_state(state, list(last_refit_step = 0L))
)
expect_true(isTRUE(
out_startup$controller$link_refit_stats_by_spoke[["2"]]$fit_contract$judge$cold_start_fallback_used
))

out_startup$btl_fit$beta_link_mean <- NULL
out_startup$btl_fit$epsilon_link_mean <- NULL
out_startup <- append_cross_step(out_startup, 3L, "s21", "h3", 1L, spoke_id = 2L)

expect_error(
pairwiseLLM:::.adaptive_linking_refit_update_state(state, list(last_refit_step = 0L)),
pairwiseLLM:::.adaptive_linking_refit_update_state(out_startup, list(last_refit_step = 2L)),
"Phase-specific judge mode requires `beta_link_mean`"
)
})

test_that("startup-gap helper and edge extractors cover fallback edge paths", {
state <- make_linking_refit_state()

expect_true(isTRUE(pairwiseLLM:::.adaptive_link_phase_b_startup_gap_for_spoke(state, 2L)))
expect_equal(nrow(pairwiseLLM:::.adaptive_link_cross_edges(state, spoke_id = 2L)), 0L)
expect_equal(nrow(pairwiseLLM:::.adaptive_link_within_edges(state, set_id = 1L)), 0L)

state$controller$link_refit_stats_by_spoke <- list(`2` = list(delta_spoke_mean = 0))
expect_false(isTRUE(pairwiseLLM:::.adaptive_link_phase_b_startup_gap_for_spoke(state, 2L)))
})

test_that("joint shift_scale fit consumes within-set edges and records lock/joint fields", {
edges <- tibble::tibble(
spoke_item = c("s1", "s2"),
hub_item = c("h1", "h2"),
y_spoke = c(1L, 0L),
step_id = c(1L, 2L),
spoke_in_A = c(TRUE, FALSE)
)
attr(edges, "judge_params") <- list(beta = 0.1, epsilon = 0.05, mode = "phase_specific", scope = "link")
attr(edges, "refit_contract") <- list(
link_refit_mode = "joint_refit",
hub_lock_mode = "hard_like",
hub_lock_kappa = 0.5
)
attr(edges, "within_hub_edges") <- tibble::tibble(
A_item = c("h1", "h2"),
B_item = c("h2", "h1"),
y_A = c(1L, 0L),
step_id = c(3L, 4L)
)
attr(edges, "within_spoke_edges") <- tibble::tibble(
A_item = c("s1", "s2"),
B_item = c("s2", "s1"),
y_A = c(1L, 0L),
step_id = c(5L, 6L)
)

hub_theta <- c(h1 = 0.4, h2 = -0.1)
spoke_theta <- c(s1 = -0.3, s2 = 0.2)
attr(hub_theta, "theta_sd") <- c(h1 = 0.1, h2 = 0.1)
attr(spoke_theta, "theta_sd") <- c(s1 = 0.2, s2 = 0.2)

fit <- pairwiseLLM:::.adaptive_link_fit_transform(
edges,
hub_theta = hub_theta,
spoke_theta = spoke_theta,
transform_mode = "shift_scale"
)
expect_true(is.finite(fit$delta_mean))
expect_true(is.finite(fit$log_alpha_mean))
expect_equal(length(fit$theta_hub_post), 2L)
expect_equal(length(fit$theta_spoke_post), 2L)
expect_identical(fit$fit_contract$link_refit_mode, "joint_refit")
expect_true(isTRUE(fit$fit_contract$joint_refit$used))
})

test_that("link likelihood applies signed beta by original presentation side", {
edges_mixed <- tibble::tibble(
spoke_item = c("s1", "s1"),
Expand Down
44 changes: 44 additions & 0 deletions tests/testthat/test-9002-linking-integration.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,50 @@ test_that("two-set linking recovers spoke offset from cross-set outcomes", {
expect_true(rows$delta_spoke_mean[[nrow(rows)]] > -2)
})

test_that("joint_refit integration records joint mode and soft-lock runtime fields", {
withr::local_seed(20260213)

items <- make_linking_items_two_set()
state <- adaptive_rank_start(items, seed = 11L)
state$warm_start_done <- TRUE
state$warm_start_pairs <- tibble::tibble(i_id = character(), j_id = character())
artifacts <- make_phase_a_import_artifacts(state, spoke_shift = -1.2)
fit_stub <- make_deterministic_fit_fn(as.character(state$item_ids))
judge <- make_score_judge(c(
h1 = -0.5, h2 = 0.1, h3 = 0.7,
s21 = -0.2, s22 = 0.3, s23 = 0.9
))

out <- adaptive_rank_run_live(
state = state,
judge = judge,
n_steps = 18L,
fit_fn = fit_stub$fit_fn,
adaptive_config = list(
run_mode = "link_one_spoke",
hub_id = 1L,
link_refit_mode = "joint_refit",
hub_lock_mode = "soft_lock",
hub_lock_kappa = 0.75,
phase_a_mode = "import",
phase_a_artifacts = artifacts
),
btl_config = list(refit_pairs_target = 2L),
progress = "none"
)

expect_true(nrow(out$link_stage_log) >= 1L)
rows <- out$link_stage_log[out$link_stage_log$spoke_id == 2L, , drop = FALSE]
expect_true(nrow(rows) >= 1L)
expect_true(all(rows$link_refit_mode == "joint_refit"))
expect_true(all(rows$hub_lock_mode == "soft_lock"))
expect_true(is.finite(rows$delta_spoke_mean[[nrow(rows)]]))

contract <- out$controller$link_refit_stats_by_spoke[["2"]]$fit_contract
expect_true(isTRUE(contract$joint_refit$used))
expect_true(all(c("theta_hub", "theta_spoke", "delta_s") %in% contract$parameters))
})

test_that("three-set linking remains hub-spoke only and rotates across spokes", {
withr::local_seed(20260213)

Expand Down