Skip to content

Commit a1b4d54

Browse files
committed
rename some config
1 parent fd7f24f commit a1b4d54

File tree

4 files changed

+37
-35
lines changed

4 files changed

+37
-35
lines changed

tuner/email_search/README.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@ The workflow function `run_email_search_agent` implements the agent-environment
7373
```python
7474
async def run_email_search_agent(
7575
task: Dict,
76-
model: TunerChatModel,
77-
auxiliary_models: Dict[str, TunerChatModel],
76+
model: ChatModelBase,
77+
auxiliary_models: Dict[str, ChatModelBase],
7878
) -> WorkflowOutput:
7979
# Parse task and create agent
8080
agent = EmailSearchAgent(
@@ -103,7 +103,7 @@ The judge function `email_search_judge` implements reward calculation using LLM-
103103
async def email_search_judge(
104104
task: Dict,
105105
response: Msg,
106-
auxiliary_models: Dict[str, TunerChatModel],
106+
auxiliary_models: Dict[str, ChatModelBase],
107107
) -> JudgeOutput:
108108
# Extract answer and sources from response
109109
answer = answer_and_sources.get("answer")
@@ -152,9 +152,9 @@ See [`main.py`](./main.py) and [`email_search_agent.py`](./email_search_agent.py
152152

153153
Adjust the configuration file ([`config.yaml`](./config.yaml)) based on your hardware. Key configuration sections include:
154154

155-
- **TunerChatModel**: Set `model_path` to your model checkpoint path
156-
- **Algorithm**: Configure RL algorithm parameters (e.g., `multi_step_grpo`, learning rate, policy loss function)
157-
- **Dataset**: The dataset path is specified in `main.py` when creating the `Dataset` object
155+
- **TunerModelConfig**: Set `model_path` to your model checkpoint path
156+
- **AlgorithmConfig**: Configure RL algorithm parameters (e.g., `multi_step_grpo`, learning rate, policy loss function)
157+
- **DatasetConfig**: The dataset path is specified in `main.py` when creating the `DatasetConfig` object
158158
- **Auxiliary Models**: Configure judge model settings for LLM-as-a-Judge
159159

160160
For full configuration details, see [Trinity-RFT Configuration Guide](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/trinity_configs.html).

tuner/email_search/main.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,14 @@
1212
from agentscope.formatter import OpenAIChatFormatter
1313
from agentscope.message import Msg
1414
from agentscope.tuner import (
15-
TunerChatModel,
16-
Dataset,
15+
TunerModelConfig,
16+
DatasetConfig,
1717
JudgeOutput,
1818
WorkflowOutput,
19-
Algorithm,
19+
AlgorithmConfig,
2020
tune,
2121
)
22+
from agentscope.model import ChatModelBase
2223

2324

2425
SYSTEM_PROMPT = """You are an email search agent. You are given a user query
@@ -42,8 +43,8 @@
4243

4344
async def run_email_search_agent(
4445
task: Dict,
45-
model: TunerChatModel,
46-
auxiliary_models: Dict[str, TunerChatModel],
46+
model: ChatModelBase,
47+
auxiliary_models: Dict[str, ChatModelBase],
4748
) -> WorkflowOutput: # noqa: PLR0915
4849
"""A workflow function using the Email Search agent to solve tasks.
4950
@@ -172,17 +173,17 @@ def _initialize_rubric(
172173
async def email_search_judge(
173174
task: Dict,
174175
response: Msg,
175-
auxiliary_models: Dict[str, TunerChatModel],
176+
auxiliary_models: Dict[str, ChatModelBase],
176177
) -> JudgeOutput:
177178
"""A judge function to calculate reward based on agent's response.
178179
179180
Args:
180181
task (Dict): The task information for the corresponding workflow.
181182
response (Msg): The response generated by the corresponding workflow.
182-
auxiliary_models (Dict[str, TunerChatModel]):
183+
auxiliary_models (Dict[str, ChatModelBase]):
183184
A dictionary of additional chat models available for LLM-as-a-Judge
184185
usage. The keys are model names, and the values are the
185-
corresponding TunerChatModel instances.
186+
corresponding ChatModelBase instances.
186187
187188
Returns:
188189
JudgeOutput: The reward value assigned by the judge function.
@@ -277,7 +278,7 @@ async def email_search_judge(
277278
async def judge_correctness(
278279
answer: str,
279280
query: QueryModel,
280-
judge: TunerChatModel,
281+
judge: ChatModelBase,
281282
) -> bool:
282283
"""Use an LLM to decide whether *answer* matches *query.answer*.
283284
@@ -339,19 +340,19 @@ async def judge_correctness(
339340
os.path.dirname(__file__),
340341
"config.yaml",
341342
)
342-
dataset = Dataset(
343+
dataset = DatasetConfig(
343344
path="/path/to/enron_emails_dataset",
344345
split="train",
345346
)
346-
tuner_model = TunerChatModel(
347+
tuner_model = TunerModelConfig(
347348
model_path="Qwen/Qwen3-4B-Instruct-2507",
348349
max_model_len=20480,
349350
max_tokens=4096,
350351
inference_engine_num=4,
351352
reasoning_parser=None,
352353
)
353354
aux_models = {
354-
"judge": TunerChatModel(
355+
"judge": TunerModelConfig(
355356
model_path="Qwen/Qwen3-30B-A3B-Instruct-2507",
356357
max_model_len=2500,
357358
max_tokens=2048,
@@ -361,7 +362,7 @@ async def judge_correctness(
361362
reasoning_parser=None,
362363
),
363364
}
364-
algorithm = Algorithm(
365+
algorithm = AlgorithmConfig(
365366
algorithm_type="multi_step_grpo",
366367
group_size=8,
367368
batch_size=64,

tuner/frozen_lake/README.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,8 @@ The workflow function `run_frozen_lake` implements the agent-environment interac
8989
```python
9090
async def run_frozen_lake(
9191
task: Dict,
92-
model: TrinityChatModel,
93-
auxiliary_models: Dict[str, TrinityChatModel],
92+
model: ChatModelBase,
93+
auxiliary_models: Dict[str, ChatModelBase],
9494
) -> WorkflowOutput:
9595
# ...
9696

@@ -159,14 +159,14 @@ See [frozenlake_env.py](./frozenlake_env.py) and [frozenlake_agent.py](./frozenl
159159
### Step 4: Use `tune` to train the workflow
160160

161161
```python
162-
from agentscope.tuner import tune, Dataset
162+
from agentscope.tuner import tune, DatasetConfig
163163

164164
if __name__ == "__main__":
165165
config_path = os.path.join(
166166
os.path.dirname(__file__),
167167
"config.yaml",
168168
)
169-
dataset = Dataset(
169+
dataset = DatasetConfig(
170170
path="/path/to/frozenlake_dataset",
171171
name="default",
172172
split="train",
@@ -212,19 +212,19 @@ Update the dataset path in `main.py` to point to your generated dataset director
212212

213213
Key configuration can be identified in the code, including:
214214

215-
**Algorithm Configuration** (`algorithm`):
215+
**Algorithm Configuration** (`AlgorithmConfig`):
216216
- `algorithm_type`: `multi_step_grpo` (Group Relative Policy Optimization for multi-step tasks)
217217
- `group_size`: Number of policy update iterations per batch (default: 16)
218218
- `batch_size`: Batch size for training (default: 32)
219219
- `learning_rate`: Learning rate (default: 1e-6)
220220

221-
**Model Configuration** (`model`):
221+
**Model Configuration** (`TunerModelConfig`):
222222
- `model_path`: Path to the base model (e.g., `Qwen/Qwen2.5-3B-Instruct`)
223223
- `max_model_len`: Maximum model context length (default: 25600)
224224
- `max_tokens`: Maximum tokens for response generation (default: 2048)
225225
- `inference_engine_num`: Number of inference engines (default: 6)
226226

227-
**Dataset Configuration** (`dataset`):
227+
**Dataset Configuration** (`DatasetConfig`):
228228
- `path`: Path to the dataset (default: `/path/to/frozenlake`)
229229
- `split`: Split of the dataset (default: `train`)
230230

tuner/frozen_lake/main.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,24 @@
88
from agentscope.tuner import (
99
tune,
1010
WorkflowOutput,
11-
Dataset,
12-
TunerChatModel,
13-
Algorithm,
11+
DatasetConfig,
12+
TunerModelConfig,
13+
AlgorithmConfig,
1414
)
15+
from agentscope.model import ChatModelBase
1516

1617

1718
async def run_frozen_lake(
1819
task: Dict,
19-
model: TunerChatModel,
20-
auxiliary_models: Dict[str, TunerChatModel],
20+
model: ChatModelBase,
21+
auxiliary_models: Dict[str, ChatModelBase],
2122
) -> WorkflowOutput:
2223
"""A workflow function using the FrozenLake agent to solve tasks.
2324
2425
Args:
2526
task (Dict): The task to be solved, containing environment parameters
2627
like size, p, seed, is_slippery, etc.
27-
model (TunerChatModel): The language model to use.
28+
model (ChatModelBase): The language model to use.
2829
2930
Returns:
3031
WorkflowOutput: The workflow output containing the reward, response and
@@ -120,18 +121,18 @@ async def run_frozen_lake(
120121

121122

122123
if __name__ == "__main__":
123-
dataset = Dataset(
124+
dataset = DatasetConfig(
124125
path="/path/to/frozenlake",
125126
split="train",
126127
)
127-
tuner_model = TunerChatModel(
128+
tuner_model = TunerModelConfig(
128129
model_path="Qwen/Qwen2.5-3B-Instruct",
129130
max_model_len=25600,
130131
max_tokens=2048,
131132
inference_engine_num=6,
132133
reasoning_parser=None,
133134
)
134-
algorithm = Algorithm(
135+
algorithm = AlgorithmConfig(
135136
algorithm_type="multi_step_grpo",
136137
group_size=16,
137138
batch_size=32,

0 commit comments

Comments
 (0)