pairwiseLLM/man/adaptive_rank.Rd at 30b0996bc3e11462594009f260d6cdb3e6c94789 · shmercer/pairwiseLLM · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/adaptive_rank.R
\name{adaptive_rank}
\alias{adaptive_rank}
\title{Run adaptive ranking end-to-end from data and model settings}
\usage{
adaptive_rank(
  data,
  id_col = 1,
  text_col = 2,
  backend = c("openai", "anthropic", "gemini", "together", "ollama"),
  model = NULL,
  trait = "overall_quality",
  trait_name = NULL,
  trait_description = NULL,
  prompt_template = set_prompt_template(),
  endpoint = "chat.completions",
  api_key = NULL,
  include_raw = FALSE,
  judge_args = list(),
  judge_call_args = list(),
  n_steps = 1L,
  fit_fn = NULL,
  adaptive_config = NULL,
  btl_config = NULL,
  session_dir = NULL,
  persist_item_log = FALSE,
  resume = TRUE,
  seed = 1L,
  progress = c("all", "refits", "steps", "none"),
  progress_redraw_every = 10L,
  progress_show_events = TRUE,
  progress_errors = TRUE,
  save_outputs = FALSE,
  output_file = NULL,
  judge = NULL
)
}
\arguments{
\item{data}{Data source: a data frame/tibble, a file path (\code{.csv}, \code{.tsv},
\code{.txt}, \code{.rds}), or a directory containing \code{.txt} files.}

\item{id_col}{ID column selector for tabular inputs. Passed to
\code{\link[=read_samples_df]{read_samples_df()}}.}

\item{text_col}{Text column selector for tabular inputs. Passed to
\code{\link[=read_samples_df]{read_samples_df()}}.}

\item{backend}{Backend passed to \code{\link[=make_adaptive_judge_llm]{make_adaptive_judge_llm()}}.}

\item{model}{Model passed to \code{\link[=make_adaptive_judge_llm]{make_adaptive_judge_llm()}}.}

\item{trait}{Built-in trait key used when no custom trait is supplied.
Ignored when both \code{trait_name} and \code{trait_description} are supplied.}

\item{trait_name}{Optional custom trait display name.}

\item{trait_description}{Optional custom trait definition.}

\item{prompt_template}{Prompt template string. Defaults to
\code{\link[=set_prompt_template]{set_prompt_template()}}.}

\item{endpoint}{Endpoint family passed to \code{\link[=make_adaptive_judge_llm]{make_adaptive_judge_llm()}}.
Only used when \code{backend = "openai"}; ignored otherwise.}

\item{api_key}{Optional API key passed to \code{\link[=make_adaptive_judge_llm]{make_adaptive_judge_llm()}}.}

\item{include_raw}{Logical; forwarded to \code{\link[=make_adaptive_judge_llm]{make_adaptive_judge_llm()}}.}

\item{judge_args}{Named list of fixed additional arguments forwarded to
\code{\link[=llm_compare_pair]{llm_compare_pair()}} by the generated judge.}

\item{judge_call_args}{Named list of additional arguments forwarded to the
judge at run time through \code{\link[=adaptive_rank_run_live]{adaptive_rank_run_live()}}.}

\item{n_steps}{Maximum number of attempted adaptive steps to execute in this
call. The run may return earlier due to candidate starvation or BTL stop
criteria. Attempted invalid steps also count toward this limit.}

\item{fit_fn}{Optional fit override passed to \code{\link[=adaptive_rank_run_live]{adaptive_rank_run_live()}}.}

\item{adaptive_config}{Optional named list passed to
\code{\link[=adaptive_rank_start]{adaptive_rank_start()}} and \code{\link[=adaptive_rank_run_live]{adaptive_rank_run_live()}} to control adaptive
controller behavior. Supported fields:
\code{global_identified_reliability_min}, \code{global_identified_rank_corr_min},
\code{p_long_low}, \code{p_long_high}, \code{long_taper_mult}, \code{long_frac_floor},
\code{mid_bonus_frac}, \code{explore_taper_mult}, \code{boundary_k}, \code{boundary_window},
\code{boundary_frac}, \code{p_star_override_margin}, and
\code{star_override_budget_per_round}, linking controls (\code{run_mode}, \code{hub_id},
\code{link_transform_mode}, \code{link_refit_mode}, \code{shift_only_theta_treatment},
\code{judge_param_mode}, \code{hub_lock_mode}, \code{hub_lock_kappa}), and Phase A controls
(\code{phase_a_mode}, \code{phase_a_import_failure_policy},
\code{phase_a_required_reliability_min}, \code{phase_a_compatible_model_ids},
\code{phase_a_compatible_config_hashes}, \code{phase_a_artifacts},
\code{phase_a_set_source}). In linking Phase B with
\code{judge_param_mode = "phase_specific"}, startup can use deterministic
within/shared judge fallback before link-specific estimates exist; once
expected, malformed link-specific estimates abort. \code{link_refit_mode = "joint_refit"} jointly estimates active hub+spoke item abilities and
transform parameters, and \code{hub_lock_mode}/\code{hub_lock_kappa} control hub
locking in that joint refit. Unknown fields and invalid values abort with
actionable errors.}

\item{btl_config}{Optional named list passed to \code{\link[=adaptive_rank_run_live]{adaptive_rank_run_live()}}
to control BTL refit cadence, stopping diagnostics, and selected
round-log diagnostics. Supported fields:
\code{refit_pairs_target}, \code{model_variant}, \code{ess_bulk_min},
\code{ess_bulk_min_near_stop}, \code{max_rhat}, \code{divergences_max},
\code{eap_reliability_min}, \code{stability_lag}, \code{theta_corr_min},
\code{theta_sd_rel_change_max}, \code{rank_spearman_min}, \code{near_tie_p_low},
and \code{near_tie_p_high} (\verb{near_tie_*} affects round logging only, not stop
decisions). Defaults are resolved from the current item count and merged
with user overrides.}

\item{session_dir}{Optional session directory for persistence/resume.}

\item{persist_item_log}{Logical; write per-refit item logs when \code{TRUE}.}

\item{resume}{Logical; when \code{TRUE} and \code{session_dir} contains a valid session,
resume from disk; otherwise initialize a new state.}

\item{seed}{Integer seed used when creating a new adaptive state.}

\item{progress}{Progress mode for \code{\link[=adaptive_rank_run_live]{adaptive_rank_run_live()}}.}

\item{progress_redraw_every}{Redraw interval for progress output.}

\item{progress_show_events}{Logical; show step events.}

\item{progress_errors}{Logical; show invalid-step events.}

\item{save_outputs}{Logical; when \code{TRUE}, save returned outputs as \code{.rds}.}

\item{output_file}{Optional output \code{.rds} path. If \code{NULL} and
\code{save_outputs = TRUE}, defaults to \code{file.path(session_dir, "adaptive_outputs.rds")}
when \code{session_dir} is set, otherwise to a temporary file.}

\item{judge}{Optional prebuilt judge function with contract
\code{judge(A, B, state, ...)}. If supplied, model/trait/template options are
ignored and this function is used directly.}
}
\value{
A list with:
\describe{
\item{state}{Final \code{adaptive_state}.}
\item{summary}{Run-level summary from \code{\link[=summarize_adaptive]{summarize_adaptive()}}.}
\item{refits}{Per-refit summary from \code{\link[=summarize_refits]{summarize_refits()}}.}
\item{items}{Item summary from \code{\link[=summarize_items]{summarize_items()}}.}
\item{logs}{Canonical logs from \code{\link[=adaptive_get_logs]{adaptive_get_logs()}}.}
\item{output_file}{Saved output path when \code{save_outputs = TRUE}, otherwise
\code{NULL}.}
}
}
\description{
High-level workflow wrapper that reads sample data, constructs an LLM judge,
starts or resumes adaptive state, runs \code{\link[=adaptive_rank_run_live]{adaptive_rank_run_live()}}, and
returns state plus summary outputs.
}
\details{
This helper is designed for end users who want one entry point for adaptive
runs. It supports:
\itemize{
\item data input from a data frame, file (\code{.csv}, \code{.tsv}, \code{.txt}, \code{.rds}),
or a directory of \code{.txt} files;
\item model/backend configuration through \code{\link[=make_adaptive_judge_llm]{make_adaptive_judge_llm()}};
\item all adaptive runtime controls exposed by \code{\link[=adaptive_rank_run_live]{adaptive_rank_run_live()}};
\item resumability via \code{session_dir} and \code{resume};
\item optional saving of run outputs to an \code{.rds} artifact.
}

Model options:
use \code{judge_args} (fixed) and \code{judge_call_args} (per-run overrides) to pass
any additional \code{\link[=llm_compare_pair]{llm_compare_pair()}} arguments, including provider-specific
controls such as \code{reasoning}, \code{service_tier}, \code{temperature}, \code{top_p},
\code{logprobs}, \code{include_thoughts}, or \code{host}.

Adaptive options:
all key controls from \code{\link[=adaptive_rank_run_live]{adaptive_rank_run_live()}} are available directly:
\code{n_steps}, \code{fit_fn}, \code{adaptive_config}, \code{btl_config}, \code{progress},
\code{progress_redraw_every}, \code{progress_show_events}, \code{progress_errors},
\code{session_dir}, and \code{persist_item_log}.
Use \code{adaptive_config} for identifiability-gated controller behavior and
\code{btl_config} for inference/diagnostics cadence only.

Selection semantics:
pair selection is TrueSkill-driven in one-pair transactional steps.
Rolling anchors are refreshed from current score proxies and anchor-link
routing compares exactly one anchor endpoint with one non-anchor endpoint.
Long/mid-link routing excludes anchor-anchor and anchor-non-anchor pairs,
while local-link routing admits same-stratum pairs and anchor-involving
pairs according to stage bounds.

Wrapper-visible defaults include top-band refinement
(\code{top_band_pct = 0.10}, \code{top_band_bins = 5}) with top-band size computed as
\code{ceiling(top_band_pct * N)}.

Exposure and repeat routing:
under-represented routing is degree-based (\code{deg <= D_min + 1}), while
repeat-pressure gating is based on recent exposure (bottom-quantile
\code{recent_deg} with quantile default \code{0.25}) and per-endpoint repeat slot
accounting.

Inference separation:
BTL refits are used for posterior inference, diagnostics, and stopping only.
They are not used to choose the next pair.

Resume behavior:
when \code{resume = TRUE} and \code{session_dir} already contains adaptive artifacts,
failed session loads abort with an actionable error instead of starting a
fresh run silently.
}
\examples{
data("example_writing_samples", package = "pairwiseLLM")

out <- adaptive_rank(
  data = example_writing_samples[1:8, c("ID", "text", "quality_score")],
  id_col = "ID",
  text_col = "text",
  model = "gpt-5.1",
  judge = function(A, B, state, ...) {
    y <- as.integer(A$quality_score[[1]] >= B$quality_score[[1]])
    list(is_valid = TRUE, Y = y, invalid_reason = NA_character_)
  },
  n_steps = 4,
  progress = "none"
)

out$summary
head(out$logs$step_log)

\dontrun{
# Live run with OpenAI gpt-5.1 + flex priority.
live <- adaptive_rank(
  data = example_writing_samples[1:12, c("ID", "text")],
  backend = "openai",
  model = "gpt-5.1",
  endpoint = "responses",
  judge_args = list(
    reasoning = "low",
    service_tier = "flex",
    include_thoughts = FALSE
  ),
  btl_config = list(
    refit_pairs_target = 20L,
    ess_bulk_min = 500,
    eap_reliability_min = 0.92
  ),
  adaptive_config = list(
    explore_taper_mult = 0.40,
    star_override_budget_per_round = 2L
  ),
  n_steps = 120,
  session_dir = file.path(tempdir(), "adaptive-live"),
  persist_item_log = TRUE,
  resume = TRUE,
  progress = "all",
  save_outputs = TRUE
)

print(live$state)
live$summary
}

}
\seealso{
\code{\link[=make_adaptive_judge_llm]{make_adaptive_judge_llm()}}, \code{\link[=adaptive_rank_run_live]{adaptive_rank_run_live()}},
\code{\link[=adaptive_rank_start]{adaptive_rank_start()}}, \code{\link[=adaptive_rank_resume]{adaptive_rank_resume()}}, \code{\link[=llm_compare_pair]{llm_compare_pair()}}

Other adaptive ranking:
\code{\link{adaptive_rank_resume}()},
\code{\link{adaptive_rank_run_live}()},
\code{\link{adaptive_rank_start}()},
\code{\link{make_adaptive_judge_llm}()},
\code{\link{summarize_adaptive}()}
}
\concept{adaptive ranking}