1212 build-artifact-name :
1313 required : true
1414 type : string
15+ enabled-skus :
16+ required : true
17+ type : string
1518 model :
1619 required : false
1720 type : string
1821 default : " all"
19- extra-tag :
20- required : false
21- type : string
22- default : " in-service"
23- topology :
24- required : false
25- type : string
26- default : " topology-6u"
2722 mlperf-read-only :
2823 required : false
2924 type : boolean
3025 default : true
3126 description : " Set to false to allow write access to MLPerf mount"
3227
3328jobs :
34- generate -matrix :
29+ load-test -matrix :
3530 runs-on : ubuntu-latest
3631 outputs :
37- matrix : ${{ steps.generate.outputs.matrix }}
32+ matrix : ${{ steps.apply-model-filter.outputs.matrix }}
33+ env :
34+ TESTS_YAML_PATH : ./tests/pipeline_reorg/galaxy_demo_tests.yaml
3835 steps :
39- - name : Generate test matrix
40- id : generate
41- run : |
42- # Define test matrix with owner comments
43- # Djordje Ivanovic - llama3
44- # Miguel Tairum - llama3_long_context, llama3_evals
45- # Radoica Draskic - llama3_8b_dp, llama3_70b_dp
46- # Salar Hosseini - falcon7b (For Falcon7b, please see issue #31939)
47- # Ashai Reddy - sentence_bert
48- # Aniruddha Tupe - whisper
49- # Colman Glagovich - sd35, wan22
50- # Samuel Adesoye - flux1, motif, qwenimage
51- # Harry Andrews - gpt-oss
52- # Stephen Osborne - mochi
53- # Rico Zhu - qwen3_32b, qwen3_32b_long_context
54- # Yousef Al Rawwash - deepseek
55-
56- disabled_tests='[
57- { "name": "Galaxy Llama3 long context demo tests", "arch": "wormhole_b0", "model": "llama3_long_context", "timeout": 60, "owner_id": "U03PUAKE719" },
58- { "name": "Galaxy Llama3 evals tests", "arch": "wormhole_b0", "model": "llama3_evals", "timeout": 45, "owner_id": "U03PUAKE719" },
59- ]'
60-
61- all_tests='[
62- { "name": "Galaxy Llama3 demo tests", "arch": "wormhole_b0", "model": "llama3", "timeout": 20, "owner_id": "U053W15B6JF" },
63- { "name": "Galaxy Llama3 8B data-parallel demo tests", "arch": "wormhole_b0", "model": "llama3_8b_dp", "timeout": 20, "owner_id": "U08BH66EXAL" },
64- { "name": "Galaxy Llama3 70B data-parallel demo tests", "arch": "wormhole_b0", "model": "llama3_70b_dp", "timeout": 20, "owner_id": "U08BH66EXAL" },
65- { "name": "Galaxy Falcon7b demo tests", "arch": "wormhole_b0", "model": "falcon7b", "timeout": 10, "owner_id": "U05RWH3QUPM" },
66- { "name": "Galaxy SentenceBert demo tests", "arch": "wormhole_b0", "model": "sentence_bert", "timeout": 5, "owner_id": "U088413NP0Q" },
67- { "name": "Galaxy Whisper demo tests", "arch": "wormhole_b0", "model": "whisper", "timeout": 5, "owner_id": "U08HL8X1ECD" },
68- { "name": "Galaxy Stable Diffusion 3.5 Large demo tests", "arch": "wormhole_b0", "model": "sd35", "timeout": 10, "owner_id": "U03FJB5TM5Y" },
69- { "name": "Galaxy Flux.1-dev demo tests", "arch": "wormhole_b0", "model": "flux1", "timeout": 15, "owner_id": "U08TED0JM9D" },
70- { "name": "Galaxy GPT-OSS demo tests", "arch": "wormhole_b0", "model": "gpt-oss", "timeout": 45, "owner_id": "U08TJ70UFRT" },
71- { "name": "Galaxy Motif-Image-6B-Preview demo tests", "arch": "wormhole_b0", "model": "motif", "timeout": 15, "owner_id": "U08TED0JM9D" },
72- { "name": "Galaxy Wan2.2 demo tests", "arch": "wormhole_b0", "model": "wan22", "timeout": 20, "owner_id": "U03FJB5TM5Y" },
73- { "name": "Galaxy Mochi demo tests", "arch": "wormhole_b0", "model": "mochi", "timeout": 15, "owner_id": "U09ELB03XRU" },
74- { "name": "Galaxy DeepSeek v3 demo tests", "arch": "wormhole_b0", "model": "deepseek_v3", "timeout": 10, "owner_id": "U08H32XUS9W" },
75- { "name": "Galaxy QwenImage demo tests", "arch": "wormhole_b0", "model": "qwenimage", "timeout": 20, "owner_id": "U08TED0JM9D" }
76- ]'
77-
78- # Filter matrix based on model selection
36+ - name : Checkout the repository
37+ uses : actions/checkout@v4
38+ with :
39+ submodules : recursive
40+ - name : Install dependencies
41+ run : |
42+ pip3 install PyYAML
43+ - name : Verify test timeouts against budget
44+ run : |
45+ set -e
46+ echo "Verifying that timeouts defined in tests.yaml are within the allowed limits..."
47+ python3 .github/scripts/utils/verify_time_budget.py \
48+ ${{ env.TESTS_YAML_PATH }} \
49+ ./.github/time_budget.yaml \
50+ "demo"
51+ - name : Build unified test matrix based on enabled SKUs
52+ id : build-matrix
53+ run : |
54+ python3 .github/scripts/utils/prepare_test_matrix.py \
55+ ${{ env.TESTS_YAML_PATH }} \
56+ "${{ inputs.enabled-skus }}" \
57+ ./.github/sku_config.yaml
58+ - name : Apply model filter
59+ id : apply-model-filter
60+ run : |
61+ MATRIX='${{ steps.build-matrix.outputs.matrix }}'
7962 if [ "${{ inputs.model }}" = "all" ]; then
80- matrix="$all_tests"
63+ echo "matrix<<EOF" >> $GITHUB_OUTPUT
64+ echo "$MATRIX" >> $GITHUB_OUTPUT
65+ echo "EOF" >> $GITHUB_OUTPUT
8166 else
82- matrix=$(echo "$all_tests" | jq -c "[.[] | select(.model == \"${{ inputs.model }}\")]")
67+ FILTERED=$(echo "$MATRIX" | jq -c --arg m "${{ inputs.model }}" '[.[] | select(.model == $m)]')
68+ if [ "$FILTERED" = "[]" ]; then
69+ echo "::error::No tests found for model '${{ inputs.model }}'. Ensure the model is defined in the test matrix (TESTS_YAML_PATH=${TESTS_YAML_PATH})."
70+ exit 1
71+ fi
72+ echo "matrix<<EOF" >> $GITHUB_OUTPUT
73+ echo "$FILTERED" >> $GITHUB_OUTPUT
74+ echo "EOF" >> $GITHUB_OUTPUT
8375 fi
8476
85- echo "matrix=$(echo "$matrix" | jq -c .)" >> $GITHUB_OUTPUT
86-
8777 galaxy-demo-tests :
88- needs : generate -matrix
78+ needs : load-test -matrix
8979 strategy :
9080 fail-fast : false
9181 matrix :
92- test-group : ${{ fromJson(needs.generate -matrix.outputs.matrix) }}
82+ test-group : ${{ fromJson(needs.load-test -matrix.outputs.matrix) }}
9383 name : ${{ matrix.test-group.name }}
94- runs-on :
95- - arch-wormhole_b0
96- - ${{ inputs.topology }}
97- - ${{ inputs.extra-tag }}
84+ runs-on : ${{ matrix.test-group.runs_on }}
9885 container :
99- image : ${{ inputs.docker-image }}
86+ image : ${{ inputs.docker-image || 'docker-image-unresolved!' }}
10087 env :
10188 TT_METAL_HOME : /work
10289 PYTHONPATH : /work
10390 LD_LIBRARY_PATH : /work/build/lib
10491 LOGURU_LEVEL : INFO
105- ARCH_NAME : ${{ matrix.test-group.arch }}
10692 HF_HUB_CACHE : /mnt/MLPerf/huggingface/hub
10793 volumes :
10894 - ${{ github.workspace }}/docker-job:/work # Subdir to workaround https://github.com/actions/runner/issues/691
@@ -119,211 +105,20 @@ jobs:
119105 with :
120106 submodules : recursive
121107 path : docker-job
108+ - name : Mark repository as safe directory
109+ run : |
110+ git config --global --add safe.directory $GITHUB_WORKSPACE
122111 - name : ⬇️ Setup Job
123112 uses : ./docker-job/.github/actions/setup-job
124113 timeout-minutes : 10
125114 with :
126115 build-artifact-name : ${{ inputs.build-artifact-name }}
127116 wheel-artifact-name : ${{ inputs.wheel-artifact-name }}
128- - name : Run mochi demo tests (direct pytest)
129- if : ${{ matrix.test-group.model == 'mochi' }}
130- timeout-minutes : ${{ matrix.test-group.timeout }}
131- env :
132- TT_DIT_CACHE_DIR : /tmp/TT_DIT_CACHE
133- NO_PROMPT : 1
134- run : |
135- pytest \
136- models/tt_dit/tests/models/mochi/test_pipeline_mochi.py \
137- -k "4x8sp1tp0" \
138- --timeout=1500
139- - name : Run GPT-OSS demo tests (direct pytest)
140- if : ${{ matrix.test-group.model == 'gpt-oss' }}
141- timeout-minutes : ${{ matrix.test-group.timeout }}
142- env :
143- TT_CACHE_HOME : /mnt/MLPerf/huggingface/tt_cache
144- run : |
145- # Install requirements
146- uv pip install -r models/demos/gpt_oss/requirements.txt
147-
148- # Test GPT-OSS 20B model
149- TT_CACHE_PATH=/mnt/MLPerf/huggingface/tt_cache/openai--gpt-oss-20b/ HF_MODEL=/mnt/MLPerf/tt_dnn-models/openai/gpt-oss-20b/ \
150- pytest models/demos/gpt_oss/demo/text_demo.py -k "4x8"\
151- --timeout 1000
152-
153- TT_CACHE_PATH=/mnt/MLPerf/huggingface/tt_cache/openai--gpt-oss-20b/ HF_MODEL=/mnt/MLPerf/tt_dnn-models/openai/gpt-oss-20b/ \
154- pytest models/demos/gpt_oss/tests/accuracy/test_model.py -k "4x8"\
155- --timeout 900
156-
157- # Test GPT-OSS 120B model
158- TT_CACHE_PATH=/mnt/MLPerf/huggingface/tt_cache/openai--gpt-oss-120b/ HF_MODEL=/mnt/MLPerf/tt_dnn-models/openai/gpt-oss-120b/ \
159- pytest models/demos/gpt_oss/demo/text_demo.py -k "4x8"\
160- --timeout 1000
161-
162- TT_CACHE_PATH=/mnt/MLPerf/huggingface/tt_cache/openai--gpt-oss-120b/ HF_MODEL=/mnt/MLPerf/tt_dnn-models/openai/gpt-oss-120b/ \
163- pytest models/demos/gpt_oss/tests/accuracy/test_model.py -k "4x8"\
164- --timeout 900
165-
166- - name : Run Llama3 demo tests (direct pytest)
167- if : ${{ matrix.test-group.model == 'llama3' }}
168- timeout-minutes : ${{ matrix.test-group.timeout }}
169- env :
170- TT_CACHE_HOME : /mnt/MLPerf/huggingface/tt_cache
171- LLAMA_DIR : /mnt/MLPerf/tt_dnn-models/llama/Llama3.3-70B-Instruct/
172- FAKE_DEVICE : TG
173- run : |
174- pytest models/demos/llama3_70b_galaxy/demo/demo_decode.py -k "full" --timeout 1000
175- pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "repeat" --timeout 1000
176- pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "pcc-80L" --timeout 1000
177- pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "batch-32-non-uniform-sampling" --timeout 500
178- - name : Run Llama3 long context demo tests (direct pytest)
179- if : ${{ matrix.test-group.model == 'llama3_long_context' }}
180- timeout-minutes : ${{ matrix.test-group.timeout }}
181- env :
182- TT_CACHE_HOME : /mnt/MLPerf/huggingface/tt_cache
183- LLAMA_DIR : /mnt/MLPerf/tt_dnn-models/llama/Llama3.3-70B-Instruct/
184- FAKE_DEVICE : TG
185- run : |
186- pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "long-4k-b1" --timeout 1000
187- pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "long-8k-b1" --timeout 1000
188- pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "long-16k-b1" --timeout 1000
189- pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "long-32k-b1" --timeout 1000
190- pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "long-64k-b1" --timeout 1000
191- pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "long-128k-b1" --timeout 1000
192- - name : Run Llama3 evals tests (direct pytest)
193- if : ${{ matrix.test-group.model == 'llama3_evals' }}
194- timeout-minutes : ${{ matrix.test-group.timeout }}
195- env :
196- TT_CACHE_HOME : /mnt/MLPerf/huggingface/tt_cache
197- LLAMA_DIR : /mnt/MLPerf/tt_dnn-models/llama/Llama3.3-70B-Instruct/
198- FAKE_DEVICE : TG
199- run : |
200- pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "evals-1" --timeout 1000
201- pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "evals-32" --timeout 1000
202- pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "evals-long-prompts" --timeout 1000
203- - name : Run Llama3 8B data-parallel demo tests (direct pytest)
204- if : ${{ matrix.test-group.model == 'llama3_8b_dp' }}
205- timeout-minutes : ${{ matrix.test-group.timeout }}
206- env :
207- TT_CACHE_HOME : /mnt/MLPerf/huggingface/tt_cache
208- HF_MODEL : meta-llama/Llama-3.1-8B-Instruct
209- TT_CACHE_PATH : /mnt/MLPerf/huggingface/tt_cache/meta-llama/Llama-3.1-8B-Instruct
210- MESH_DEVICE : TG
211- run : |
212- pytest models/tt_transformers/demo/simple_text_demo.py --timeout 1000
213- - name : Run Llama3 70B data-parallel demo tests (direct pytest)
214- if : ${{ matrix.test-group.model == 'llama3_70b_dp' }}
215- timeout-minutes : ${{ matrix.test-group.timeout }}
216- env :
217- TT_CACHE_HOME : /mnt/MLPerf/huggingface/tt_cache
218- HF_MODEL : meta-llama/Llama-3.3-70B-Instruct
219- TT_CACHE_PATH : /mnt/MLPerf/huggingface/tt_cache/meta-llama/Llama-3.3-70B-Instruct
220- MESH_DEVICE : TG
221- run : |
222- pytest models/tt_transformers/demo/simple_text_demo.py --timeout 1000
223- - name : Run Falcon7b demo tests (direct pytest)
224- if : ${{ matrix.test-group.model == 'falcon7b' }}
225- timeout-minutes : ${{ matrix.test-group.timeout }}
226- run : |
227- pytest --disable-warnings -q -s \
228- --input-method=json \
229- --input-path='models/demos/tg/falcon7b/input_data_tg.json' \
230- models/demos/tg/falcon7b/demo_tg.py \
231- --timeout=1500
232- - name : Run SentenceBert demo tests (direct pytest)
233- if : ${{ matrix.test-group.model == 'sentence_bert' }}
234- timeout-minutes : ${{ matrix.test-group.timeout }}
235- run : |
236- pytest models/demos/tg/sentence_bert/tests/test_sentence_bert_e2e_performant.py --timeout=1500
237- - name : Run Whisper demo tests (direct pytest)
238- if : ${{ matrix.test-group.model == 'whisper' }}
239- timeout-minutes : ${{ matrix.test-group.timeout }}
240- run : |
241- pytest models/demos/audio/whisper/demo/demo.py::test_demo_for_conditional_generation --timeout 1500
242- - name : Run Stable Diffusion 3.5 demo tests (direct pytest)
243- if : ${{ matrix.test-group.model == 'sd35' }}
244- timeout-minutes : ${{ matrix.test-group.timeout }}
245- env :
246- TT_CACHE_HOME : /mnt/MLPerf/huggingface/tt_cache
247- NO_PROMPT : 1
248- TT_MM_THROTTLE_PERF : 5
249- run : |
250- pytest models/tt_dit/tests/models/sd35/test_pipeline_sd35.py \
251- -k "4x8cfg1sp0tp1" \
252- --timeout 1200
253- - name : Run Flux.1-dev demo tests (direct pytest)
254- if : ${{ matrix.test-group.model == 'flux1' }}
255- timeout-minutes : ${{ matrix.test-group.timeout }}
256- env :
257- TT_CACHE_HOME : /mnt/MLPerf/huggingface/tt_cache
258- NO_PROMPT : 1
259- TT_MM_THROTTLE_PERF : 5
260- run : |
261- pytest models/tt_dit/tests/models/flux1/test_pipeline_flux1.py \
262- -k "4x8sp0tp1-dev" \
263- --timeout 1200
264- - name : Run Motif-Image-6B-Preview demo tests (direct pytest)
265- if : ${{ matrix.test-group.model == 'motif' }}
266- timeout-minutes : ${{ matrix.test-group.timeout }}
267- env :
268- TT_CACHE_HOME : /mnt/MLPerf/huggingface/tt_cache
269- NO_PROMPT : 1
270- run : |
271- pytest models/tt_dit/tests/models/motif/test_pipeline_motif.py -k "4x8cfg1sp0tp1" --timeout 1200
272- - name : Run Wan2.2 demo tests (direct pytest)
273- if : ${{ matrix.test-group.model == 'wan22' }}
274- timeout-minutes : ${{ matrix.test-group.timeout }}
275- env :
276- TT_DIT_CACHE_DIR : /tmp/TT_DIT_CACHE
277- NO_PROMPT : 1
278- run : |
279- pytest \
280- models/tt_dit/tests/models/wan2_2/test_pipeline_wan.py \
281- -k "wh_4x8sp1tp0 and resolution_720p" \
282- --timeout 1500
283- - name : Run Qwen3-32B demo tests (direct pytest)
284- if : ${{ matrix.test-group.model == 'qwen3_32b' && (inputs.model == 'all' || inputs.model == 'qwen3_32b') }}
285- timeout-minutes : ${{ matrix.test-group.timeout }}
286- env :
287- TT_CACHE_HOME : /mnt/MLPerf/huggingface/tt_cache
288- HF_MODEL : Qwen/Qwen3-32B
289- TT_CACHE_PATH : /mnt/MLPerf/huggingface/tt_cache/Qwen/Qwen3-32B
290- MESH_DEVICE : TG
291- run : |
292- pytest models/demos/llama3_70b_galaxy/demo/demo_qwen_decode.py -k "full" --timeout 1000
293- pytest models/demos/llama3_70b_galaxy/demo/text_qwen_demo.py -k "batch-32" --timeout 1000
294- pytest models/demos/llama3_70b_galaxy/demo/text_qwen_demo.py -k "repeat2" --timeout 1000
295- - name : Run Qwen3-32B long context demo tests (direct pytest)
296- if : ${{ matrix.test-group.model == 'qwen3_32b_long_context' && (inputs.model == 'all' || inputs.model == 'qwen3_32b_long_context') }}
297- timeout-minutes : ${{ matrix.test-group.timeout }}
298- env :
299- TT_CACHE_HOME : /mnt/MLPerf/huggingface/tt_cache
300- HF_MODEL : Qwen/Qwen3-32B
301- TT_CACHE_PATH : /mnt/MLPerf/huggingface/tt_cache/Qwen/Qwen3-32B
302- MESH_DEVICE : TG
303- run : |
304- pytest models/demos/llama3_70b_galaxy/demo/text_qwen_demo.py -k "long-8k-b1" --timeout 1000
305- pytest models/demos/llama3_70b_galaxy/demo/text_qwen_demo.py -k "long-32k-b1" --timeout 1000
306- pytest models/demos/llama3_70b_galaxy/demo/text_qwen_demo.py -k "long-128k-b1" --timeout 1000
307- - name : Run DeepSeek v3 demo tests (direct pytest)
308- if : ${{ matrix.test-group.model == 'deepseek_v3' && (inputs.model == 'all' || inputs.model == 'deepseek_v3') }}
309- timeout-minutes : ${{ matrix.test-group.timeout }}
310- env :
311- DEEPSEEK_V3_HF_MODEL : /mnt/MLPerf/tt_dnn-models/deepseek-ai/DeepSeek-R1-0528
312- DEEPSEEK_V3_CACHE : /mnt/MLPerf/tt_dnn-models/deepseek-ai/DeepSeek-R1-0528-Cache/CI
313- MESH_DEVICE : TG
314- run : |
315- pytest models/demos/deepseek_v3/demo/test_demo.py --timeout 600
316- - name : Run QwenImage demo tests (direct pytest)
317- if : ${{ matrix.test-group.model == 'qwenimage' }}
117+ - name : ${{ matrix.test-group.name }}
318118 timeout-minutes : ${{ matrix.test-group.timeout }}
319- env :
320- TT_DIT_CACHE_DIR : /tmp/TT_DIT_CACHE
321- NO_PROMPT : 1
322119 run : |
323- pytest \
324- models/tt_dit/tests/models/qwenimage/test_pipeline_qwenimage.py \
325- -k "4x8" \
326- --timeout 1200
120+ echo "${{ matrix.test-group.cmd }}"
121+ ${{ matrix.test-group.cmd }}
327122 - uses : tenstorrent/tt-metal/.github/actions/slack-report@main
328123 if : ${{ failure() }}
329124 with :
@@ -355,3 +150,4 @@ jobs:
355150 with :
356151 prefix : " test_reports_"
357152 - uses : tenstorrent/tt-metal/.github/actions/cleanup@main
153+ if : always()
0 commit comments