Skip to content

Commit f842150

Browse files
committed
Merge branch 'main' into release/3.12
2 parents 5b8710b + 2c19674 commit f842150

File tree

13 files changed

+273
-51
lines changed

13 files changed

+273
-51
lines changed

docs/source/Instruction/Supported-models-and-datasets.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -657,6 +657,8 @@
657657
|[IQuestLab/IQuest-Coder-V1-40B-Base-Stage1](https://modelscope.cn/models/IQuestLab/IQuest-Coder-V1-40B-Base-Stage1)|iquestcoder|iquestcoder|transformers==4.52.4|✘|-|[IQuestLab/IQuest-Coder-V1-40B-Base-Stage1](https://huggingface.co/IQuestLab/IQuest-Coder-V1-40B-Base-Stage1)|
658658
|[IQuestLab/IQuest-Coder-V1-40B-Base](https://modelscope.cn/models/IQuestLab/IQuest-Coder-V1-40B-Base)|iquestcoder|iquestcoder|transformers==4.52.4|✘|-|[IQuestLab/IQuest-Coder-V1-40B-Base](https://huggingface.co/IQuestLab/IQuest-Coder-V1-40B-Base)|
659659
|[IQuestLab/IQuest-Coder-V1-40B-Instruct](https://modelscope.cn/models/IQuestLab/IQuest-Coder-V1-40B-Instruct)|iquestcoder|iquestcoder|transformers==4.52.4|✘|-|[IQuestLab/IQuest-Coder-V1-40B-Instruct](https://huggingface.co/IQuestLab/IQuest-Coder-V1-40B-Instruct)|
660+
|[Tencent-YouTu-Research/Youtu-LLM-2B](https://modelscope.cn/models/Tencent-YouTu-Research/Youtu-LLM-2B)|youtu_llm|youtu_llm|transformers>=4.56|✘|-|[tencent/Youtu-LLM-2B](https://huggingface.co/tencent/Youtu-LLM-2B)|
661+
|[Tencent-YouTu-Research/Youtu-LLM-2B-Base](https://modelscope.cn/models/Tencent-YouTu-Research/Youtu-LLM-2B-Base)|youtu_llm|youtu_llm|transformers>=4.56|✘|-|[tencent/Youtu-LLM-2B-Base](https://huggingface.co/tencent/Youtu-LLM-2B-Base)|
660662
|[answerdotai/ModernBERT-base](https://modelscope.cn/models/answerdotai/ModernBERT-base)|modern_bert|dummy|transformers>=4.48|✘|bert|[answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base)|
661663
|[answerdotai/ModernBERT-large](https://modelscope.cn/models/answerdotai/ModernBERT-large)|modern_bert|dummy|transformers>=4.48|✘|bert|[answerdotai/ModernBERT-large](https://huggingface.co/answerdotai/ModernBERT-large)|
662664
|[iic/gte-modernbert-base](https://modelscope.cn/models/iic/gte-modernbert-base)|modern_bert_gte|dummy|transformers>=4.48|✘|bert, embedding|[Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base)|

docs/source_en/Instruction/Supported-models-and-datasets.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,8 @@ The table below introduces the models integrated with ms-swift:
658658
|[IQuestLab/IQuest-Coder-V1-40B-Base-Stage1](https://modelscope.cn/models/IQuestLab/IQuest-Coder-V1-40B-Base-Stage1)|iquestcoder|iquestcoder|transformers==4.52.4|✘|-|[IQuestLab/IQuest-Coder-V1-40B-Base-Stage1](https://huggingface.co/IQuestLab/IQuest-Coder-V1-40B-Base-Stage1)|
659659
|[IQuestLab/IQuest-Coder-V1-40B-Base](https://modelscope.cn/models/IQuestLab/IQuest-Coder-V1-40B-Base)|iquestcoder|iquestcoder|transformers==4.52.4|✘|-|[IQuestLab/IQuest-Coder-V1-40B-Base](https://huggingface.co/IQuestLab/IQuest-Coder-V1-40B-Base)|
660660
|[IQuestLab/IQuest-Coder-V1-40B-Instruct](https://modelscope.cn/models/IQuestLab/IQuest-Coder-V1-40B-Instruct)|iquestcoder|iquestcoder|transformers==4.52.4|✘|-|[IQuestLab/IQuest-Coder-V1-40B-Instruct](https://huggingface.co/IQuestLab/IQuest-Coder-V1-40B-Instruct)|
661+
|[Tencent-YouTu-Research/Youtu-LLM-2B](https://modelscope.cn/models/Tencent-YouTu-Research/Youtu-LLM-2B)|youtu_llm|youtu_llm|transformers>=4.56|✘|-|[tencent/Youtu-LLM-2B](https://huggingface.co/tencent/Youtu-LLM-2B)|
662+
|[Tencent-YouTu-Research/Youtu-LLM-2B-Base](https://modelscope.cn/models/Tencent-YouTu-Research/Youtu-LLM-2B-Base)|youtu_llm|youtu_llm|transformers>=4.56|✘|-|[tencent/Youtu-LLM-2B-Base](https://huggingface.co/tencent/Youtu-LLM-2B-Base)|
661663
|[answerdotai/ModernBERT-base](https://modelscope.cn/models/answerdotai/ModernBERT-base)|modern_bert|dummy|transformers>=4.48|✘|bert|[answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base)|
662664
|[answerdotai/ModernBERT-large](https://modelscope.cn/models/answerdotai/ModernBERT-large)|modern_bert|dummy|transformers>=4.48|✘|bert|[answerdotai/ModernBERT-large](https://huggingface.co/answerdotai/ModernBERT-large)|
663665
|[iic/gte-modernbert-base](https://modelscope.cn/models/iic/gte-modernbert-base)|modern_bert_gte|dummy|transformers>=4.48|✘|bert, embedding|[Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base)|

swift/llm/infer/rollout.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -394,9 +394,13 @@ def get_infer_engine(args: RolloutArguments, template=None, **kwargs):
394394
engine_kwargs = kwargs.get('engine_kwargs', {})
395395
# for RL rollout model weight sync
396396
engine_kwargs.update({'worker_extension_cls': 'swift.llm.infer.rollout.WeightSyncWorkerExtension'})
397-
# Use load_format from engine_kwargs if provided, otherwise default to 'dummy'
398-
if 'load_format' not in engine_kwargs:
399-
engine_kwargs['load_format'] = 'dummy'
397+
398+
# For RL rollout, we use 'dummy' load_format to prevent vLLM from loading weights from disk,
399+
# as they will be synced from the trainer process.
400+
# This will accelerate the rollout speed.
401+
load_format = engine_kwargs.pop('load_format', 'dummy')
402+
kwargs['load_format'] = load_format
403+
400404
if args.vllm_use_async_engine and args.vllm_data_parallel_size > 1:
401405
engine_kwargs['data_parallel_size'] = args.vllm_data_parallel_size
402406
kwargs['engine_kwargs'] = engine_kwargs

swift/llm/model/constant.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ class LLMModelType:
144144
longchat = 'longchat'
145145
minimind = 'minimind'
146146
iquestcoder = 'iquestcoder'
147+
youtu_llm = 'youtu_llm'
147148

148149

149150
class BertModelType:

swift/llm/model/model/llm.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,3 +413,18 @@ def get_model_tokenizer_yuan(model_dir: str,
413413
requires=['transformers==4.52.4'],
414414
architectures=['IQuestCoderForCausalLM'],
415415
))
416+
417+
register_model(
418+
ModelMeta(
419+
LLMModelType.youtu_llm,
420+
[
421+
ModelGroup([
422+
Model('Tencent-YouTu-Research/Youtu-LLM-2B', 'tencent/Youtu-LLM-2B'),
423+
Model('Tencent-YouTu-Research/Youtu-LLM-2B-Base', 'tencent/Youtu-LLM-2B-Base'),
424+
])
425+
],
426+
TemplateType.youtu_llm,
427+
get_model_tokenizer_with_flash_attn,
428+
architectures=['YoutuForCausalLM'],
429+
requires=['transformers>=4.56'],
430+
))

swift/llm/template/constant.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ class LLMTemplateType:
113113
bert = 'bert'
114114
minimind = 'minimind'
115115
iquestcoder = 'iquestcoder'
116+
youtu_llm = 'youtu_llm'
116117

117118

118119
class RMTemplateType:

swift/llm/template/template/llm.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,3 +430,56 @@ class GptOssTemplateMeta(TemplateMeta):
430430
LLMTemplateType.iquestcoder,
431431
default_system='You are LoopCoder, a helpful assistant developed by IQuest.',
432432
))
433+
434+
435+
class YoutuLLMTemplate(Template):
436+
437+
def _remove_thinking_content(self, content: str) -> str:
438+
if '</think>' in content:
439+
content = content.rsplit('</think>', 1)[-1].lstrip('\n')
440+
return self.template_meta.history_thinking_prefix + content.strip()
441+
442+
def _add_non_thinking_prefix(self, inputs) -> None:
443+
messages = inputs.messages
444+
non_thinking_prefix = self.template_meta.non_thinking_prefix
445+
if non_thinking_prefix and messages:
446+
# Find the last assistant message
447+
for i in range(len(messages) - 1, -1, -1):
448+
message = messages[i]
449+
if message['role'] == 'assistant' and isinstance(message['content'], str):
450+
if '<think>' not in message['content'] and '</think>' not in message['content']:
451+
message['content'] = non_thinking_prefix + message['content']
452+
break
453+
454+
def _remove_history_thinking(self, inputs) -> None:
455+
if self.is_training and self.loss_scale.base_strategy != 'last_round':
456+
return
457+
messages = inputs.messages
458+
first_tool_index = len(messages)
459+
for i, message in enumerate(messages):
460+
if message['role'] == 'tool' or (message['role'] == 'user' and isinstance(message.get('content'), str)
461+
and message['content'].startswith('<tool_response>')
462+
and message['content'].endswith('</tool_response>')):
463+
first_tool_index = i
464+
break
465+
# Only remove thinking content for assistant messages before first_tool_index - 1
466+
for i, message in enumerate(messages):
467+
if message['role'] == 'assistant' and isinstance(message['content'], str):
468+
is_last = (i == len(messages) - 1)
469+
if not is_last and i < first_tool_index - 1:
470+
message['content'] = self._remove_thinking_content(message['content'])
471+
472+
473+
register_template(
474+
TemplateMeta(
475+
LLMTemplateType.youtu_llm,
476+
template_cls=YoutuLLMTemplate,
477+
prefix=[['bos_token_id']],
478+
system_prefix=[['bos_token_id'], '{{SYSTEM}}'],
479+
prompt=['<|User|>{{QUERY}}<|Assistant|>'],
480+
chat_sep=['<|end_of_text|>'],
481+
suffix=['<|end_of_text|>'],
482+
is_thinking=True,
483+
non_thinking_prefix='<think>\n\n</think>\n\n',
484+
agent_template='youtu',
485+
))

swift/megatron/model/gpt_bridge.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,10 @@ def _set_module(self, mg_module, hf_state_dict, hf_prefix: str, to_mcore: bool):
282282
hf_state_dict[k] = v
283283
elif hf_state_dict is None:
284284
return {}
285+
else:
286+
if self._target_device is not None:
287+
for k, v in hf_state_dict.items():
288+
hf_state_dict[k] = v.to(self._target_device)
285289
return self._add_prefix(hf_state_dict, hf_prefix)
286290

287291
def _all_gather_tp(self, tensor, tp_dim, is_expert):
@@ -1431,6 +1435,7 @@ def export_weights(self,
14311435

14321436
def save_weights(self, mg_models, output_dir: str, is_peft_format: bool = False) -> None:
14331437
"""Save the mg_model checkpoint in HF format"""
1438+
torch.cuda.empty_cache()
14341439
saver = StreamingSafetensorSaver(
14351440
save_dir=output_dir, max_shard_size=self.args.max_shard_size, is_peft_format=is_peft_format)
14361441
for k, v in self.export_weights(

swift/plugin/agent_template/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from .react import ReactEnAgentTemplate, ReactZnAgentTemplate
1212
from .seed_oss import SeedAgentTemplate
1313
from .toolbench import ToolBenchAgentTemplate
14+
from .youtu import YoutuAgentTemplate
1415

1516
agent_templates = {
1617
# ref: https://qwen.readthedocs.io/zh-cn/latest/framework/function_call.html#function-calling-templates
@@ -36,5 +37,6 @@
3637
'seed_oss': SeedAgentTemplate,
3738
# extra
3839
'react_grpo': ReactGRPOAgentTemplate,
39-
'mistral': MistralAgentTemplate
40+
'mistral': MistralAgentTemplate,
41+
'youtu': YoutuAgentTemplate,
4042
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# Copyright (c) Alibaba, Inc. and its affiliates.
2+
from typing import TYPE_CHECKING, List, Optional, Tuple, Union
3+
4+
import json
5+
6+
from .hermes import HermesAgentTemplate
7+
8+
if TYPE_CHECKING:
9+
from swift.llm.template import Prompt
10+
11+
12+
class YoutuAgentTemplate(HermesAgentTemplate):
13+
"""Agent template for Youtu-LLM models.
14+
15+
Tool calling format:
16+
- Tool call: <tool_call>{"name": "function-name", "arguments": {...}}</tool_call>
17+
- Tool response: <tool_response>...</tool_response>
18+
"""
19+
20+
def _get_tool_responses(self, tool_messages):
21+
res_tool = []
22+
for tool_message in tool_messages:
23+
tool_content = tool_message['content']
24+
res_tool.append(f'<tool_response>{tool_content}</tool_response>')
25+
return '\n'.join(res_tool)
26+
27+
def _format_tool_responses(
28+
self,
29+
assistant_content: str,
30+
tool_messages,
31+
) -> Tuple[str, 'Prompt']:
32+
with_action = self.keyword.action in assistant_content and self.keyword.action_input in assistant_content
33+
if with_action:
34+
return super()._format_tool_responses(assistant_content, tool_messages)
35+
# For Youtu-LLM, tool responses are placed in user message
36+
if hasattr(self, 'template_meta'):
37+
prompt = self.template_meta.prompt
38+
chat_sep = self.template_meta.chat_sep
39+
else:
40+
prompt = ['<|User|>{{QUERY}}<|Assistant|>']
41+
chat_sep = ['<|end_of_text|>']
42+
res = chat_sep.copy()
43+
total_tool = self._get_tool_responses(tool_messages)
44+
for context in prompt:
45+
if isinstance(context, str):
46+
context = context.replace('{{QUERY}}', total_tool)
47+
res.append(context)
48+
return assistant_content, res
49+
50+
def _format_tools(self, tools: List[Union[str, dict]], system: Optional[str] = None, user_message=None) -> str:
51+
tool_descs = [json.dumps(self.wrap_tool(tool), ensure_ascii=False) for tool in tools]
52+
system = system or ''
53+
if system:
54+
system = f'{system}\n\n'
55+
return f"""{system}<|begin_of_tool_description|>Tool calling capabilities.
56+
You may call one or more functions to assist with the user query. You have the following functions available:
57+
""" + '\n'.join([f'```json\n{desc}\n```' for desc in tool_descs]) + """
58+
For tool call returns, you MUST use the following format:
59+
<tool_call>{"name": "function-name", "arguments": {"param1": "value1", "param2": "value2"}}</tool_call>
60+
<|end_of_tool_description|>"""
61+
62+
def _format_tool_calls(self, tool_call_messages):
63+
tool_calls = []
64+
for message in tool_call_messages:
65+
tool_call = self._parse_tool_call(message['content'])
66+
tool_calls.append(f'<tool_call>{json.dumps(tool_call, ensure_ascii=False)}</tool_call>')
67+
return ''.join(tool_calls)

0 commit comments

Comments
 (0)