Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
2 changes: 1 addition & 1 deletion docs/zh/processors.md → docs/zh/processors_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

### 💻 使用示例

下面是一个示例,展示如何加载 `Processor` 并处理图像/视频数据([Qwen2.5-VL-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct).
下面是一个示例,展示如何加载 `Processor` 并处理图像/视频数据([Qwen2.5-VL-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct)模型为例)。

- 示例 1:直接处理图像与文本:

Expand Down
File renamed without changes.
56 changes: 39 additions & 17 deletions paddleformers/generation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

import copy
import inspect
from typing import Optional, Union
from typing import Optional, Tuple, Union

import paddle
import paddle.distributed as dist
Expand Down Expand Up @@ -641,20 +641,16 @@ def get_decoder_start_token_id(self, decoder_start_token_id=None, bos_token_id=N

def prepare_inputs_for_generation(
self,
input_ids,
use_cache=True,
past_key_values=None,
inputs_embeds=None,
input_ids: paddle.Tensor,
past_key_values: Optional[Tuple[paddle.Tensor]] = None,
inputs_embeds: Optional[paddle.Tensor] = None,
**kwargs,
):
"""Prepares model inputs for generation in PaddlePaddle models.

Args:
input_ids (paddle.Tensor):
The input token IDs with shape [batch_size, sequence_length].
use_cache (bool, optional):
Whether to use cached key-value states for faster generation.
Defaults to False.
past_key_values (Optional[Tuple[paddle.Tensor]]):
Cached past key-value states from previous generation steps.
If provided, the input_ids will be truncated to only keep the last token.
Expand All @@ -675,26 +671,52 @@ def prepare_inputs_for_generation(
- "return_dict": Always set to True for consistent output format

"""
model_inputs = {}
model_inputs["past_key_values"] = past_key_values
model_inputs["cache_position"] = kwargs.get("cache_position", None)

if past_key_values:
input_ids = input_ids[:, -1:]

attention_mask = kwargs.get("attention_mask", None)
use_cache = kwargs.get("use_cache", None)
if use_cache is None:
use_cache = getattr(self.config, "use_cache", False)

# if `inputs_embeds` are passed, we only want to use them in the 1st generation step
if inputs_embeds is not None and past_key_values is None:
model_inputs = {"inputs_embeds": inputs_embeds}
else:
model_inputs = {"input_ids": input_ids}

model_inputs.update(
{
"past_key_values": past_key_values,
"use_cache": use_cache,
"attention_mask": attention_mask,
"return_dict": True,
}
)
attention_mask = kwargs.get("attention_mask", None)
if (
attention_mask is not None
and kwargs.get("position_ids") is None
and "position_ids" in set(inspect.signature(self.forward).parameters.keys())
):
position_ids = attention_mask.long().cumsum(-1) - 1
position_ids.masked_fill_(attention_mask == 0, 1)
kwargs["position_ids"] = position_ids # placed in kwargs for further processing (see below)

model_input = kwargs.get("position_ids")
if model_input is not None:
if past_key_values is not None or use_cache:
current_input_length = (
model_inputs["inputs_embeds"].shape[1]
if model_inputs.get("inputs_embeds") is not None
else model_inputs["input_ids"].shape[1]
)
model_input = model_input[:, -current_input_length:]
model_inputs["position_ids"] = model_input

model_inputs["return_dict"] = kwargs.get("return_dict", True)

for key, value in kwargs.items():
if key not in model_inputs:
model_inputs[key] = value

# Remove unexpected `generate` inputs
model_inputs.pop("labels", None)
return model_inputs

def adjust_logits_during_generation(self, logits):
Expand Down
2 changes: 1 addition & 1 deletion paddleformers/nn/attention/flashmask_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def flashmask_attention_forward(
key,
value,
startend_row_indices=attn_mask_startend_row_indices,
causal=True,
causal=is_causal if is_causal is not None else True,
)
else:
out = sink_attention_forward(
Expand Down
8 changes: 8 additions & 0 deletions paddleformers/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,13 @@
],
"qwen2.tokenizer": ["Qwen2Tokenizer"],
"qwen2.tokenizer_fast": ["Qwen2TokenizerFast"],
"qwen2_5_vl.configuration": ["Qwen2_5_VLConfig", "Qwen2_5_VLTextConfig"],
"qwen2_5_vl.modeling": [
"Qwen2_5_VLForConditionalGeneration",
"Qwen2_5_VLModel",
"Qwen2_5_VLPretrainedModel",
"Qwen2_5_VLTextModel",
],
"qwen2_5_vl.processor": ["Qwen2_5_VLProcessor"],
"qwen2_moe.configuration": ["Qwen2MoeConfig"],
"qwen2_moe.modeling": [
Expand Down Expand Up @@ -343,6 +350,7 @@
"ernie4_5": ["Ernie4_5DecoderLayer", "Ernie4_5Model", "Ernie4_5_ForCausalLM"],
"ernie4_5_moe": ["Ernie4_5_MoeDecoderLayer", "Ernie4_5_MoeModel", "Ernie4_5_MoeForCausalLM"],
"ernie4_5_moe_vl": [],
"qwen2_5_v;": [],
"qwen2_moe": [],
"qwen2_vl": [],
"qwen3_moe": [],
Expand Down
15 changes: 15 additions & 0 deletions paddleformers/transformers/auto/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
("llama", "LlamaConfig"),
("qwen", "QWenConfig"),
("qwen2", "Qwen2Config"),
("qwen2_5_vl", "Qwen2_5_VLConfig"),
("qwen2_5_vl_text", "Qwen2_5_VLTextConfig"),
("qwen2_moe", "Qwen2MoeConfig"),
("qwen3", "Qwen3Config"),
("qwen3_moe", "Qwen3MoeConfig"),
Expand All @@ -63,6 +65,8 @@
("llama", "Llama"),
("qwen", "QWen"),
("qwen2", "Qwen2"),
("qwen2_5_vl", "Qwen2_5_VL"),
("qwen2_5_vl_text", "Qwen2_5_VL"),
("qwen2_moe", "Qwen2Moe"),
("qwen3", "Qwen3"),
("qwen3_moe", "Qwen3Moe"),
Expand All @@ -74,6 +78,12 @@
[]
)

SPECIAL_MODEL_TYPE_TO_MODULE_NAME = OrderedDict(
[
("qwen2_5_vl_text", "qwen2_5_vl"),
]
)


def config_class_to_model_type(config):
"""Converts a config class name to the corresponding model type"""
Expand Down Expand Up @@ -182,6 +192,11 @@ def get_configurations() -> Dict[str, List[Type[PretrainedConfig]]]:

def model_type_to_module_name(key):
"""Converts a config key to the corresponding module."""
# Special treatment
if key in SPECIAL_MODEL_TYPE_TO_MODULE_NAME:
key = SPECIAL_MODEL_TYPE_TO_MODULE_NAME[key]
return key

key = key.replace("-", "_")
return key

Expand Down
3 changes: 2 additions & 1 deletion paddleformers/transformers/auto/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,9 @@
("Llama", "llama"),
("QWen", "qwen"),
("Qwen2", "qwen2"),
("Qwen3", "qwen3"),
("Qwen2_5_VL", "qwen2_5_vl"),
("Qwen2Moe", "qwen2_moe"),
("Qwen3", "qwen3"),
("Qwen3Moe", "qwen3_moe"),
("Glm4Moe", "glm4_moe"),
("GptOss", "gpt_oss"),
Expand Down
Loading
Loading