@@ -251,34 +251,100 @@ def test_error_count_resets_on_success(self):
251251 assert rlm ._consecutive_errors == 0
252252
253253 def test_budget_check_raises (self ):
254- """_check_iteration_limits should raise BudgetExceededError when budget exceeded ."""
255- from rlm .core .types import RLMIteration
254+ """_completion_turn syncs handler cost; _check_iteration_limits detects overspend ."""
255+ from rlm .core .types import REPLResult
256256
257- rlm = RLM (
257+ rlm_inst = RLM (
258258 backend = "openai" ,
259259 backend_kwargs = {"model_name" : "test" },
260260 max_budget = 0.01 ,
261261 )
262262
263+ # Mock handler: completion returns no code blocks, handler spent $0.05
263264 mock_handler = Mock ()
265+ mock_handler .completion .return_value = "No code to run."
264266 mock_handler .get_usage_summary .return_value = UsageSummary (
265267 model_usage_summaries = {
266268 "test" : ModelUsageSummary (
267- total_calls = 10 ,
268- total_input_tokens = 10000 ,
269- total_output_tokens = 10000 ,
270- total_cost = 0.05 ,
269+ total_calls = 1 , total_input_tokens = 0 , total_output_tokens = 0 , total_cost = 0.05
271270 )
272271 }
273272 )
274273
275- iteration = RLMIteration (prompt = "test" , response = "code" , code_blocks = [])
274+ mock_env = Mock ()
275+
276+ # _completion_turn calls _update_handler_cost → adds $0.05 to _cumulative_cost
277+ iteration = rlm_inst ._completion_turn (
278+ prompt = [{"role" : "user" , "content" : "test" }],
279+ lm_handler = mock_handler ,
280+ environment = mock_env ,
281+ )
282+
283+ assert rlm_inst ._cumulative_cost == 0.05
276284
277285 with pytest .raises (BudgetExceededError ) as exc_info :
278- rlm ._check_iteration_limits (iteration , 0 , mock_handler )
279- assert exc_info .value .spent > 0.01
286+ rlm_inst ._check_iteration_limits (iteration , 0 , mock_handler )
287+ assert exc_info .value .spent == 0.05
280288 assert exc_info .value .budget == 0.01
281289
290+ def test_budget_includes_child_cost_after_iteration (self ):
291+ """Regression: _cumulative_cost must include both handler and child subcall costs.
292+
293+ Exercises the real flow: _completion_turn calls _update_handler_cost
294+ (syncing handler cost delta into _cumulative_cost), then executes code
295+ blocks where _subcall adds child cost to _cumulative_cost.
296+ _check_iteration_limits should see the accumulated total.
297+ """
298+ from rlm .core .types import REPLResult
299+
300+ rlm_inst = RLM (
301+ backend = "openai" ,
302+ backend_kwargs = {"model_name" : "test" },
303+ max_budget = 5.0 ,
304+ )
305+
306+ # Mock handler: completion returns a response with a code block,
307+ # handler spent $1.0
308+ mock_handler = Mock ()
309+ mock_handler .completion .return_value = (
310+ "Running subcall\n ```repl\n rlm_query('hello')\n ```"
311+ )
312+ mock_handler .get_usage_summary .return_value = UsageSummary (
313+ model_usage_summaries = {
314+ "test" : ModelUsageSummary (
315+ total_calls = 1 , total_input_tokens = 0 , total_output_tokens = 0 , total_cost = 1.0
316+ )
317+ }
318+ )
319+
320+ # Mock environment: execute_code simulates _subcall adding $9 child cost
321+ mock_env = Mock ()
322+
323+ def execute_with_child_cost (code_str ):
324+ rlm_inst ._cumulative_cost += 9.0
325+ return REPLResult (stdout = "" , stderr = "" , locals = {})
326+
327+ mock_env .execute_code .side_effect = execute_with_child_cost
328+
329+ # _completion_turn:
330+ # 1. lm_handler.completion() → response with code block
331+ # 2. _update_handler_cost() → adds $1 handler delta
332+ # 3. execute_code() → child adds $9 via side effect
333+ iteration = rlm_inst ._completion_turn (
334+ prompt = [{"role" : "user" , "content" : "test" }],
335+ lm_handler = mock_handler ,
336+ environment = mock_env ,
337+ )
338+
339+ # Total: $1 (handler) + $9 (child) = $10 > $5 budget
340+ assert rlm_inst ._cumulative_cost == 10.0
341+
342+ with pytest .raises (BudgetExceededError ) as exc_info :
343+ rlm_inst ._check_iteration_limits (iteration , 0 , mock_handler )
344+
345+ assert exc_info .value .spent == 10.0
346+ assert exc_info .value .budget == 5.0
347+
282348 def test_token_limit_check_raises (self ):
283349 """_check_iteration_limits should raise TokenLimitExceededError when tokens exceeded."""
284350 from rlm .core .types import RLMIteration
0 commit comments