@@ -115,9 +115,11 @@ static void common_reasoning_budget_accept(struct llama_sampler * smpl, llama_to
115115 break ;
116116 }
117117 case REASONING_BUDGET_FORCING:
118- // force_pos is advanced in apply(), not here.
119- // This ensures the first forced token isn't skipped when the sampler
120- // is initialized directly in FORCING state (e.g. COUNTING + budget=0)
118+ ctx->force_pos ++;
119+ if (ctx->force_pos >= ctx->forced_tokens .size ()) {
120+ ctx->state = REASONING_BUDGET_DONE;
121+ LOG_INF (" reasoning-budget: forced sequence complete, done\n " );
122+ }
121123 break ;
122124 case REASONING_BUDGET_DONE:
123125 break ;
@@ -144,14 +146,6 @@ static void common_reasoning_budget_apply(struct llama_sampler * smpl, llama_tok
144146 cur_p->data [i].logit = -INFINITY;
145147 }
146148 }
147-
148- // advance to next forced token (done here rather than in accept so that
149- // the first forced token isn't skipped when starting in FORCING state)
150- ctx->force_pos ++;
151- if (ctx->force_pos >= ctx->forced_tokens .size ()) {
152- ctx->state = REASONING_BUDGET_DONE;
153- LOG_INF (" reasoning-budget: forced sequence complete, done\n " );
154- }
155149}
156150
157151static void common_reasoning_budget_reset (struct llama_sampler * smpl) {
@@ -261,3 +255,10 @@ struct llama_sampler * common_reasoning_budget_init(
261255 common_reasoning_budget_state initial_state) {
262256 return common_reasoning_budget_init_state (vocab, start_tokens, end_tokens, forced_tokens, budget, initial_state);
263257}
258+
259+ common_reasoning_budget_state common_reasoning_budget_get_state (const struct llama_sampler * smpl) {
260+ if (!smpl) {
261+ return REASONING_BUDGET_IDLE;
262+ }
263+ return ((const common_reasoning_budget_ctx *)smpl->ctx )->state ;
264+ }
0 commit comments