Skip to content

Commit bb23dd7

Browse files
Merge branch 'main' into sihao_issue_fix
2 parents 0030eea + 4429d93 commit bb23dd7

File tree

16 files changed

+832
-7
lines changed

16 files changed

+832
-7
lines changed

docker/Dockerfile.xpu

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ RUN python3 -m pip install -e tests/vllm_test_utils
7676
ENV NIXL_VERSION=0.7.0
7777
RUN python3 /workspace/vllm/tools/install_nixl_from_source_ubuntu.py
7878

79+
# PyJWT-2.7.0 will influence some wheel behaviors, remove its dist-info to avoid conflicts
80+
RUN rm /usr/lib/python3/dist-packages/PyJWT-2.7.0.dist-info/ -rf
81+
7982
# remove torch bundled oneccl to avoid conflicts
8083
RUN --mount=type=cache,target=/root/.cache/pip \
8184
pip uninstall oneccl oneccl-devel -y

docs/models/supported_models.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -661,6 +661,7 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen
661661
| `AriaForConditionalGeneration` | Aria | T + I<sup>+</sup> | `rhymes-ai/Aria` | | |
662662
| `AudioFlamingo3ForConditionalGeneration` | AudioFlamingo3 | T + A<sup>+</sup> | `nvidia/audio-flamingo-3-hf`, `nvidia/music-flamingo-hf` | ✅︎ | ✅︎ |
663663
| `AyaVisionForConditionalGeneration` | Aya Vision | T + I<sup>+</sup> | `CohereLabs/aya-vision-8b`, `CohereLabs/aya-vision-32b`, etc. | | ✅︎ |
664+
| `BagelForConditionalGeneration` | BAGEL | T + I<sup>+</sup> | `ByteDance-Seed/BAGEL-7B-MoT` | ✅︎ | ✅︎ |
664665
| `BeeForConditionalGeneration` | Bee-8B | T + I<sup>E+</sup> | `Open-Bee/Bee-8B-RL`, `Open-Bee/Bee-8B-SFT` | | ✅︎ |
665666
| `Blip2ForConditionalGeneration` | BLIP-2 | T + I<sup>E</sup> | `Salesforce/blip2-opt-2.7b`, `Salesforce/blip2-opt-6.7b`, etc. | | ✅︎ |
666667
| `ChameleonForConditionalGeneration` | Chameleon | T + I | `facebook/chameleon-7b`, etc. | | ✅︎ |

examples/offline_inference/vision_language.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,32 @@ def run_bee(questions: list[str], modality: str) -> ModelRequestData:
118118
)
119119

120120

121+
def run_bagel(questions: list[str], modality: str) -> ModelRequestData:
122+
assert modality == "image"
123+
model_name = "ByteDance-Seed/BAGEL-7B-MoT"
124+
125+
engine_args = EngineArgs(
126+
model=model_name,
127+
trust_remote_code=True,
128+
max_model_len=8192,
129+
max_num_seqs=2,
130+
limit_mm_per_prompt={modality: 1},
131+
)
132+
133+
prompts = [
134+
(
135+
f"<|im_start|>user\n<|image_pad|>\n{question}<|im_end|>\n"
136+
f"<|im_start|>assistant\n"
137+
)
138+
for question in questions
139+
]
140+
141+
return ModelRequestData(
142+
engine_args=engine_args,
143+
prompts=prompts,
144+
)
145+
146+
121147
# BLIP-2
122148
def run_blip2(questions: list[str], modality: str) -> ModelRequestData:
123149
assert modality == "image"
@@ -1832,6 +1858,7 @@ def run_tarsier2(questions: list[str], modality: str) -> ModelRequestData:
18321858
model_example_map = {
18331859
"aria": run_aria,
18341860
"aya_vision": run_aya_vision,
1861+
"bagel": run_bagel,
18351862
"bee": run_bee,
18361863
"blip-2": run_blip2,
18371864
"chameleon": run_chameleon,

tests/models/language/pooling/test_token_classification.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,34 @@ def test_modernbert_models(
6868
hf_output = torch.tensor(hf_output).cpu().float()
6969
vllm_output = torch.tensor(vllm_output).cpu().float()
7070
assert torch.allclose(hf_output, vllm_output, atol=1e-2)
71+
72+
73+
@pytest.mark.parametrize("model", ["bd2lcco/Qwen3-0.6B-finetuned"])
74+
@pytest.mark.parametrize("dtype", ["float"])
75+
@torch.inference_mode
76+
def test_auto_conversion(
77+
hf_runner,
78+
vllm_runner,
79+
example_prompts,
80+
model: str,
81+
dtype: str,
82+
) -> None:
83+
with vllm_runner(model, max_model_len=1024, dtype=dtype) as vllm_model:
84+
vllm_outputs = vllm_model.token_classify(example_prompts)
85+
86+
with hf_runner(
87+
model, dtype=dtype, auto_cls=AutoModelForTokenClassification
88+
) as hf_model:
89+
tokenizer = hf_model.tokenizer
90+
hf_outputs = []
91+
for prompt in example_prompts:
92+
inputs = tokenizer([prompt], return_tensors="pt")
93+
inputs = hf_model.wrap_device(inputs)
94+
output = hf_model.model(**inputs)
95+
hf_outputs.append(softmax(output.logits[0]))
96+
97+
# check logits difference
98+
for hf_output, vllm_output in zip(hf_outputs, vllm_outputs):
99+
hf_output = torch.tensor(hf_output).cpu().float()
100+
vllm_output = torch.tensor(vllm_output).cpu().float()
101+
assert torch.allclose(hf_output, vllm_output, atol=1e-2)

tests/models/registry.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,7 @@ def check_available_online(
573573
"Qwen3ForSequenceClassification": _HfExamplesInfo(
574574
"tomaarsen/Qwen3-Reranker-0.6B-seq-cls"
575575
),
576+
"Qwen3ForTokenClassification": _HfExamplesInfo("bd2lcco/Qwen3-0.6B-finetuned"),
576577
}
577578

578579
_MULTIMODAL_EXAMPLE_MODELS = {
@@ -582,6 +583,7 @@ def check_available_online(
582583
"nvidia/audio-flamingo-3-hf", min_transformers_version="5.0.0.dev"
583584
),
584585
"AyaVisionForConditionalGeneration": _HfExamplesInfo("CohereLabs/aya-vision-8b"),
586+
"BagelForConditionalGeneration": _HfExamplesInfo("ByteDance-Seed/BAGEL-7B-MoT"),
585587
"BeeForConditionalGeneration": _HfExamplesInfo(
586588
"Open-Bee/Bee-8B-RL",
587589
trust_remote_code=True,

vllm/config/model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1796,6 +1796,7 @@ def get_served_model_name(model: str, served_model_name: str | list[str] | None)
17961796
("ForTextEncoding", ("pooling", "embed")),
17971797
("EmbeddingModel", ("pooling", "embed")),
17981798
("ForSequenceClassification", ("pooling", "classify")),
1799+
("ForTokenClassification", ("pooling", "classify")),
17991800
("ForAudioClassification", ("pooling", "classify")),
18001801
("ForImageClassification", ("pooling", "classify")),
18011802
("ForVideoClassification", ("pooling", "classify")),

vllm/entrypoints/context.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -74,24 +74,24 @@ class TurnMetrics:
7474

7575
def __init__(
7676
self,
77-
input_tokens=0,
78-
output_tokens=0,
79-
cached_input_tokens=0,
80-
tool_output_tokens=0,
81-
):
77+
input_tokens: int = 0,
78+
output_tokens: int = 0,
79+
cached_input_tokens: int = 0,
80+
tool_output_tokens: int = 0,
81+
) -> None:
8282
self.input_tokens = input_tokens
8383
self.output_tokens = output_tokens
8484
self.cached_input_tokens = cached_input_tokens
8585
self.tool_output_tokens = tool_output_tokens
8686

87-
def reset(self):
87+
def reset(self) -> None:
8888
"""Reset counters for a new turn."""
8989
self.input_tokens = 0
9090
self.output_tokens = 0
9191
self.cached_input_tokens = 0
9292
self.tool_output_tokens = 0
9393

94-
def copy(self):
94+
def copy(self) -> "TurnMetrics":
9595
"""Create a copy of this turn's token counts."""
9696
return TurnMetrics(
9797
self.input_tokens,

vllm/model_executor/models/adapters.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,18 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
337337
tokens = getattr(text_config, "classifier_from_token", None)
338338
method = getattr(text_config, "method", None)
339339

340+
def auto_set_score_bias(weights):
341+
for name, weight in weights:
342+
if name == "score.bias":
343+
device = self.score.weight.device
344+
dtype = self.score.weight.dtype
345+
bias = weight.to(device).to(dtype)
346+
self.score.bias = torch.nn.Parameter(bias)
347+
self.score.skip_bias_add = False
348+
else:
349+
yield name, weight
350+
351+
weights = auto_set_score_bias(weights)
340352
if tokens is None and method is None:
341353
return super().load_weights(weights)
342354
else:

0 commit comments

Comments
 (0)