1212from agentscope .formatter import OpenAIChatFormatter
1313from agentscope .message import Msg
1414from agentscope .tuner import (
15- TunerChatModel ,
16- Dataset ,
15+ TunerModelConfig ,
16+ DatasetConfig ,
1717 JudgeOutput ,
1818 WorkflowOutput ,
19- Algorithm ,
19+ AlgorithmConfig ,
2020 tune ,
2121)
22+ from agentscope .model import ChatModelBase
2223
2324
2425SYSTEM_PROMPT = """You are an email search agent. You are given a user query
4243
4344async def run_email_search_agent (
4445 task : Dict ,
45- model : TunerChatModel ,
46- auxiliary_models : Dict [str , TunerChatModel ],
46+ model : ChatModelBase ,
47+ auxiliary_models : Dict [str , ChatModelBase ],
4748) -> WorkflowOutput : # noqa: PLR0915
4849 """A workflow function using the Email Search agent to solve tasks.
4950
@@ -172,17 +173,17 @@ def _initialize_rubric(
172173async def email_search_judge (
173174 task : Dict ,
174175 response : Msg ,
175- auxiliary_models : Dict [str , TunerChatModel ],
176+ auxiliary_models : Dict [str , ChatModelBase ],
176177) -> JudgeOutput :
177178 """A judge function to calculate reward based on agent's response.
178179
179180 Args:
180181 task (Dict): The task information for the corresponding workflow.
181182 response (Msg): The response generated by the corresponding workflow.
182- auxiliary_models (Dict[str, TunerChatModel ]):
183+ auxiliary_models (Dict[str, ChatModelBase ]):
183184 A dictionary of additional chat models available for LLM-as-a-Judge
184185 usage. The keys are model names, and the values are the
185- corresponding TunerChatModel instances.
186+ corresponding ChatModelBase instances.
186187
187188 Returns:
188189 JudgeOutput: The reward value assigned by the judge function.
@@ -277,7 +278,7 @@ async def email_search_judge(
277278async def judge_correctness (
278279 answer : str ,
279280 query : QueryModel ,
280- judge : TunerChatModel ,
281+ judge : ChatModelBase ,
281282) -> bool :
282283 """Use an LLM to decide whether *answer* matches *query.answer*.
283284
@@ -339,19 +340,19 @@ async def judge_correctness(
339340 os .path .dirname (__file__ ),
340341 "config.yaml" ,
341342 )
342- dataset = Dataset (
343+ dataset = DatasetConfig (
343344 path = "/path/to/enron_emails_dataset" ,
344345 split = "train" ,
345346 )
346- tuner_model = TunerChatModel (
347+ tuner_model = TunerModelConfig (
347348 model_path = "Qwen/Qwen3-4B-Instruct-2507" ,
348349 max_model_len = 20480 ,
349350 max_tokens = 4096 ,
350351 inference_engine_num = 4 ,
351352 reasoning_parser = None ,
352353 )
353354 aux_models = {
354- "judge" : TunerChatModel (
355+ "judge" : TunerModelConfig (
355356 model_path = "Qwen/Qwen3-30B-A3B-Instruct-2507" ,
356357 max_model_len = 2500 ,
357358 max_tokens = 2048 ,
@@ -361,7 +362,7 @@ async def judge_correctness(
361362 reasoning_parser = None ,
362363 ),
363364 }
364- algorithm = Algorithm (
365+ algorithm = AlgorithmConfig (
365366 algorithm_type = "multi_step_grpo" ,
366367 group_size = 8 ,
367368 batch_size = 64 ,
0 commit comments