diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 867a81716..13d0e6146 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -215,11 +215,13 @@ qwen3.5-fp4-mi355x-sglang: - isl: 1024 osl: 1024 search-space: - - { tp: 4, conc-start: 4, conc-end: 256 } + - { tp: 2, conc-start: 4, conc-end: 256 } + - { tp: 4, conc-start: 4, conc-end: 4 } - isl: 8192 osl: 1024 search-space: - - { tp: 4, conc-start: 4, conc-end: 256 } + - { tp: 2, conc-start: 4, conc-end: 256 } + - { tp: 4, conc-start: 4, conc-end: 32 } qwen3.5-fp8-mi300x-sglang: image: lmsysorg/sglang:v0.5.9-rocm720-mi30x diff --git a/perf-changelog.yaml b/perf-changelog.yaml index a9250cbd5..edfa24efd 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1336,3 +1336,9 @@ description: - "Bump GLM-5 FP8 B200 SGLang concurrency from 128 to 256" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1012 + +- config-keys: + - qwen3.5-fp4-mi355x-sglang + description: + - "TP2/TP4 seach space exploration for Qwen3.5 fp4 on SGL" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1022