rename some config

hiyuchang · hiyuchang · commit a1b4d5471f13 · 2026-01-13T17:44:43.000+08:00
diff --git a/tuner/email_search/README.md b/tuner/email_search/README.md
@@ -73,8 +73,8 @@ The workflow function `run_email_search_agent` implements the agent-environment
 ```python
 async def run_email_search_agent(
     task: Dict,
-    model: TunerChatModel,
-    auxiliary_models: Dict[str, TunerChatModel],
+    model: ChatModelBase,
+    auxiliary_models: Dict[str, ChatModelBase],
 ) -> WorkflowOutput:
     # Parse task and create agent
     agent = EmailSearchAgent(
@@ -103,7 +103,7 @@ The judge function `email_search_judge` implements reward calculation using LLM-
 async def email_search_judge(
     task: Dict,
     response: Msg,
-    auxiliary_models: Dict[str, TunerChatModel],
+    auxiliary_models: Dict[str, ChatModelBase],
 ) -> JudgeOutput:
     # Extract answer and sources from response
     answer = answer_and_sources.get("answer")
@@ -152,9 +152,9 @@ See [`main.py`](./main.py) and [`email_search_agent.py`](./email_search_agent.py
 
 Adjust the configuration file ([`config.yaml`](./config.yaml)) based on your hardware. Key configuration sections include:
 
-- **TunerChatModel**: Set `model_path` to your model checkpoint path
-- **Algorithm**: Configure RL algorithm parameters (e.g., `multi_step_grpo`, learning rate, policy loss function)
-- **Dataset**: The dataset path is specified in `main.py` when creating the `Dataset` object
+- **TunerModelConfig**: Set `model_path` to your model checkpoint path
+- **AlgorithmConfig**: Configure RL algorithm parameters (e.g., `multi_step_grpo`, learning rate, policy loss function)
+- **DatasetConfig**: The dataset path is specified in `main.py` when creating the `DatasetConfig` object
 - **Auxiliary Models**: Configure judge model settings for LLM-as-a-Judge
 
 For full configuration details, see [Trinity-RFT Configuration Guide](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/trinity_configs.html).
diff --git a/tuner/email_search/main.py b/tuner/email_search/main.py
@@ -12,13 +12,14 @@
 from agentscope.formatter import OpenAIChatFormatter
 from agentscope.message import Msg
 from agentscope.tuner import (
-    TunerChatModel,
-    Dataset,
+    TunerModelConfig,
+    DatasetConfig,
     JudgeOutput,
     WorkflowOutput,
-    Algorithm,
+    AlgorithmConfig,
     tune,
 )
+from agentscope.model import ChatModelBase
 
 
 SYSTEM_PROMPT = """You are an email search agent. You are given a user query
@@ -42,8 +43,8 @@
 
 async def run_email_search_agent(
     task: Dict,
-    model: TunerChatModel,
-    auxiliary_models: Dict[str, TunerChatModel],
+    model: ChatModelBase,
+    auxiliary_models: Dict[str, ChatModelBase],
 ) -> WorkflowOutput:  # noqa: PLR0915
     """A workflow function using the Email Search agent to solve tasks.
 
@@ -172,17 +173,17 @@ def _initialize_rubric(
 async def email_search_judge(
     task: Dict,
     response: Msg,
-    auxiliary_models: Dict[str, TunerChatModel],
+    auxiliary_models: Dict[str, ChatModelBase],
 ) -> JudgeOutput:
     """A judge function to calculate reward based on agent's response.
 
     Args:
         task (Dict): The task information for the corresponding workflow.
         response (Msg): The response generated by the corresponding workflow.
-        auxiliary_models (Dict[str, TunerChatModel]):
+        auxiliary_models (Dict[str, ChatModelBase]):
             A dictionary of additional chat models available for LLM-as-a-Judge
             usage. The keys are model names, and the values are the
-            corresponding TunerChatModel instances.
+            corresponding ChatModelBase instances.
 
     Returns:
         JudgeOutput: The reward value assigned by the judge function.
@@ -277,7 +278,7 @@ async def email_search_judge(
 async def judge_correctness(
     answer: str,
     query: QueryModel,
-    judge: TunerChatModel,
+    judge: ChatModelBase,
 ) -> bool:
     """Use an LLM to decide whether *answer* matches *query.answer*.
 
@@ -339,19 +340,19 @@ async def judge_correctness(
         os.path.dirname(__file__),
         "config.yaml",
     )
-    dataset = Dataset(
+    dataset = DatasetConfig(
         path="/path/to/enron_emails_dataset",
         split="train",
     )
-    tuner_model = TunerChatModel(
+    tuner_model = TunerModelConfig(
         model_path="Qwen/Qwen3-4B-Instruct-2507",
         max_model_len=20480,
         max_tokens=4096,
         inference_engine_num=4,
         reasoning_parser=None,
     )
     aux_models = {
-        "judge": TunerChatModel(
+        "judge": TunerModelConfig(
             model_path="Qwen/Qwen3-30B-A3B-Instruct-2507",
             max_model_len=2500,
             max_tokens=2048,
@@ -361,7 +362,7 @@ async def judge_correctness(
             reasoning_parser=None,
         ),
     }
-    algorithm = Algorithm(
+    algorithm = AlgorithmConfig(
         algorithm_type="multi_step_grpo",
         group_size=8,
         batch_size=64,
diff --git a/tuner/frozen_lake/README.md b/tuner/frozen_lake/README.md
@@ -89,8 +89,8 @@ The workflow function `run_frozen_lake` implements the agent-environment interac
 ```python
 async def run_frozen_lake(
     task: Dict,
-    model: TrinityChatModel,
-    auxiliary_models: Dict[str, TrinityChatModel],
+    model: ChatModelBase,
+    auxiliary_models: Dict[str, ChatModelBase],
 ) -> WorkflowOutput:
     # ...
 
@@ -159,14 +159,14 @@ See [frozenlake_env.py](./frozenlake_env.py) and [frozenlake_agent.py](./frozenl
 ### Step 4: Use `tune` to train the workflow
 
 ```python
-from agentscope.tuner import tune, Dataset
+from agentscope.tuner import tune, DatasetConfig
 
 if __name__ == "__main__":
     config_path = os.path.join(
         os.path.dirname(__file__),
         "config.yaml",
     )
-    dataset = Dataset(
+    dataset = DatasetConfig(
         path="/path/to/frozenlake_dataset",
         name="default",
         split="train",
@@ -212,19 +212,19 @@ Update the dataset path in `main.py` to point to your generated dataset director
 
 Key configuration can be identified in the code, including:
 
-**Algorithm Configuration** (`algorithm`):
+**Algorithm Configuration** (`AlgorithmConfig`):
 - `algorithm_type`: `multi_step_grpo` (Group Relative Policy Optimization for multi-step tasks)
 - `group_size`: Number of policy update iterations per batch (default: 16)
 - `batch_size`: Batch size for training (default: 32)
 - `learning_rate`: Learning rate (default: 1e-6)
 
-**Model Configuration** (`model`):
+**Model Configuration** (`TunerModelConfig`):
 - `model_path`: Path to the base model (e.g., `Qwen/Qwen2.5-3B-Instruct`)
 - `max_model_len`: Maximum model context length (default: 25600)
 - `max_tokens`: Maximum tokens for response generation (default: 2048)
 - `inference_engine_num`: Number of inference engines (default: 6)
 
-**Dataset Configuration** (`dataset`):
+**Dataset Configuration** (`DatasetConfig`):
 - `path`: Path to the dataset (default: `/path/to/frozenlake`)
 - `split`: Split of the dataset (default: `train`)
 
diff --git a/tuner/frozen_lake/main.py b/tuner/frozen_lake/main.py
@@ -8,23 +8,24 @@
 from agentscope.tuner import (
     tune,
     WorkflowOutput,
-    Dataset,
-    TunerChatModel,
-    Algorithm,
+    DatasetConfig,
+    TunerModelConfig,
+    AlgorithmConfig,
 )
+from agentscope.model import ChatModelBase
 
 
 async def run_frozen_lake(
     task: Dict,
-    model: TunerChatModel,
-    auxiliary_models: Dict[str, TunerChatModel],
+    model: ChatModelBase,
+    auxiliary_models: Dict[str, ChatModelBase],
 ) -> WorkflowOutput:
     """A workflow function using the FrozenLake agent to solve tasks.
 
     Args:
         task (Dict): The task to be solved, containing environment parameters
             like size, p, seed, is_slippery, etc.
-        model (TunerChatModel): The language model to use.
+        model (ChatModelBase): The language model to use.
 
     Returns:
         WorkflowOutput: The workflow output containing the reward, response and
@@ -120,18 +121,18 @@ async def run_frozen_lake(
 
 
 if __name__ == "__main__":
-    dataset = Dataset(
+    dataset = DatasetConfig(
         path="/path/to/frozenlake",
         split="train",
     )
-    tuner_model = TunerChatModel(
+    tuner_model = TunerModelConfig(
         model_path="Qwen/Qwen2.5-3B-Instruct",
         max_model_len=25600,
         max_tokens=2048,
         inference_engine_num=6,
         reasoning_parser=None,
     )
-    algorithm = Algorithm(
+    algorithm = AlgorithmConfig(
         algorithm_type="multi_step_grpo",
         group_size=16,
         batch_size=32,