Skip to content

Commit bc4a350

Browse files
committed
update
1 parent 3081459 commit bc4a350

3 files changed

Lines changed: 114 additions & 5 deletions

File tree

.github/workflows/docker_nightly.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ jobs:
7070
RUN python3 -m pip install --no-cache-dir -r /tmp/requirements/lite.txt && \
7171
python3 -m pip install --no-cache-dir -r /tmp/requirements/test.txt && \
7272
pip install --no-cache-dir ${{ env.OFFLINE_REQUIREMENTS }}
73+
pip install --no-cache-dir nvidia-nccl-cu12>2.29
7374
EOF
7475
docker build . -f docker/Dockerfile.nightly-extended \
7576
--build-arg BASE_IMAGE=${{ env.TAG }} \

autotest/interface/pipeline/test_pipeline_longtext_func.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import numpy as np
55
import pytest
6+
from transformers import AutoTokenizer
67
from utils.config_utils import set_device_env_variable, unset_device_env_variable
78

89
from lmdeploy import GenerationConfig, PytorchEngineConfig, TurbomindEngineConfig, pipeline
@@ -189,12 +190,14 @@ def passkey_retrival_worker(config, model, backend, log_name, tp_num, session_le
189190
pipe = pipeline(model_path, backend_config=backend_config)
190191

191192
gen_config = GenerationConfig(top_k=40)
193+
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
194+
192195
# inference
193-
pass_key1, prompt = get_passkey_prompt(pipe, session_len)
196+
pass_key1, prompt = get_passkey_prompt(pipe, session_len, tokenizer)
194197
response1 = pipe(prompt, gen_config=gen_config)
195198

196199
# inference
197-
pass_key2, prompt = get_passkey_prompt(pipe, session_len)
200+
pass_key2, prompt = get_passkey_prompt(pipe, session_len, tokenizer)
198201
response2 = pipe([prompt] * 2, gen_config=gen_config)
199202

200203
pipe.close()
@@ -213,13 +216,12 @@ def passkey_retrival_worker(config, model, backend, log_name, tp_num, session_le
213216
assert str(pass_key2) in response2[0].text and str(pass_key2) in response2[1].text, str(response2)
214217

215218

216-
def get_passkey_prompt(pipe, session_len):
219+
def get_passkey_prompt(pipe, session_len, tokenizer):
217220
# create long context input
218-
tok = pipe.tokenizer
219221
task_description = 'There is an important info hidden inside a lot of irrelevant text. Find it and memorize them. I will quiz you about the important information there.' # noqa: E501
220222
garbage = 'The grass is green. The sky is blue. The sun is yellow. Here we go. There and back again.' # noqa: E501
221223

222-
n_times = (session_len - 1000) // len(tok.encode(garbage))
224+
n_times = (session_len - 1000) // len(tokenizer.encode(garbage))
223225
n_garbage_prefix = np.random.randint(0, n_times)
224226
n_garbage_suffix = n_times - n_garbage_prefix
225227
garbage_prefix = ' '.join([garbage] * n_garbage_prefix)
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# Ordered rules for per-model / env / func extra_params in get_func_config_list.
2+
# Applied sequentially; later rules overwrite the same keys in extra_params.
3+
4+
rules:
5+
- name: qwen3-235b-thinking-2507
6+
match:
7+
model_contains: Qwen3-235B-A22B-Thinking-2507
8+
extra_params:
9+
cache-max-entry-count: 0.9
10+
max-batch-size: 1024
11+
parallel_rules:
12+
- match:
13+
dp: 8
14+
ep: 8
15+
extra_params:
16+
max-batch-size: 256
17+
18+
- name: glm-5-fp8
19+
match:
20+
model_contains: GLM-5-FP8
21+
extra_params:
22+
cache-max-entry-count: 0.9
23+
max-batch-size: 128
24+
25+
- name: evaluate-default-session-len
26+
match:
27+
func_type: evaluate
28+
extra_missing_keys: [session_len, session-len]
29+
model_not_contains: Qwen3.5
30+
extra_params:
31+
session_len: 65536
32+
33+
- name: cogvlm-chat-hf-session-len
34+
match:
35+
model_contains: THUDM/cogvlm-chat-hf
36+
extra_params:
37+
session-len: 32568
38+
39+
- name: env-3090-5080-cache
40+
match:
41+
env_tag_in: ["3090", "5080"]
42+
extra_params:
43+
cache-max-entry-count: 0.5
44+
45+
- name: env-a100-large-models-cache
46+
match:
47+
env_tag_in: ["a100"]
48+
model_any:
49+
- model_contains: Qwen3-235B-A22B
50+
- model_equals: internlm/Intern-S1
51+
extra_params:
52+
cache-max-entry-count: 0.6
53+
54+
- name: sdar-dllm
55+
match:
56+
model_contains_ignore_case: sdar
57+
extra_params:
58+
dllm-block-length: 4
59+
dllm-denoising-steps: 4
60+
dllm-confidence-threshold: 0.9
61+
62+
- name: kimi-dpep16
63+
match:
64+
model_contains_ignore_case: kimi
65+
parallel_rules:
66+
- match:
67+
dp: 16
68+
ep: 16
69+
extra_params:
70+
max-batch-size: 256
71+
72+
- name: intern-s1-pro
73+
match:
74+
model_any:
75+
- model_contains: Intern-S1-Pro-FP8
76+
- model_contains: Intern-S1-Pro-BF16
77+
model_rules:
78+
- match:
79+
model_contains: Intern-S1-Pro-FP8
80+
extra_params:
81+
model-format: fp8
82+
parallel_rules:
83+
- match:
84+
dp: 16
85+
ep: 16
86+
extra_params:
87+
max-prefill-token-num: 1024
88+
max-batch-size: 128
89+
90+
- name: gpt-oss-turbomind-benchmark
91+
match:
92+
model_contains: openai/gpt-oss
93+
backend: turbomind
94+
func_type_in: [benchmark, longtext_benchmark]
95+
extra_params:
96+
model-format: mxfp4
97+
98+
- name: qwen35-mtp-evaluate
99+
match:
100+
func_type: mtp_evaluate
101+
model_contains: Qwen3.5
102+
extra_params:
103+
reasoning-parser: qwen-qwq
104+
speculative-algorithm: qwen3_5_mtp
105+
speculative-num-draft-tokens: 4
106+
max-batch-size: 256

0 commit comments

Comments
 (0)