tenstorrent
diff --git a/‎.github/time_budget.yaml‎
Lines changed: 1 addition & 0 deletions b/‎.github/time_budget.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/galaxy-demo-tests-impl.yaml‎
Lines changed: 55 additions & 259 deletions b/‎.github/workflows/galaxy-demo-tests-impl.yaml‎
Lines changed: 55 additions & 259 deletions
diff --git a/‎.github/workflows/galaxy-demo-tests.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/galaxy-demo-tests.yaml‎
Lines changed: 1 addition & 1 deletion
@@ -107,3 +107,4 @@ models:
     wh_galaxy: 260
   demo:
     wh_llmbox: 1090
+    wh_galaxy: 270
@@ -12,97 +12,83 @@ on:
       build-artifact-name:
         required: true
         type: string
+      enabled-skus:
+        required: true
+        type: string
       model:
         required: false
         type: string
         default: "all"
-      extra-tag:
-        required: false
-        type: string
-        default: "in-service"
-      topology:
-        required: false
-        type: string
-        default: "topology-6u"
       mlperf-read-only:
         required: false
         type: boolean
         default: true
         description: "Set to false to allow write access to MLPerf mount"
 
 jobs:
-  generate-matrix:
+  load-test-matrix:
     runs-on: ubuntu-latest
     outputs:
-      matrix: ${{ steps.generate.outputs.matrix }}
+      matrix: ${{ steps.apply-model-filter.outputs.matrix }}
+    env:
+      TESTS_YAML_PATH: ./tests/pipeline_reorg/galaxy_demo_tests.yaml
     steps:
-      - name: Generate test matrix
-        id: generate
-        run: |
-          # Define test matrix with owner comments
-          # Djordje Ivanovic - llama3
-          # Miguel Tairum - llama3_long_context, llama3_evals
-          # Radoica Draskic - llama3_8b_dp, llama3_70b_dp
-          # Salar Hosseini - falcon7b (For Falcon7b, please see issue #31939)
-          # Ashai Reddy - sentence_bert
-          # Aniruddha Tupe - whisper
-          # Colman Glagovich - sd35, wan22
-          # Samuel Adesoye - flux1, motif, qwenimage
-          # Harry Andrews - gpt-oss
-          # Stephen Osborne - mochi
-          # Rico Zhu - qwen3_32b, qwen3_32b_long_context
-          # Yousef Al Rawwash - deepseek
-
-          disabled_tests='[
-            { "name": "Galaxy Llama3 long context demo tests", "arch": "wormhole_b0", "model": "llama3_long_context", "timeout": 60, "owner_id": "U03PUAKE719" },
-            { "name": "Galaxy Llama3 evals tests", "arch": "wormhole_b0", "model": "llama3_evals", "timeout": 45, "owner_id": "U03PUAKE719" },
-          ]'
-
-          all_tests='[
-            { "name": "Galaxy Llama3 demo tests", "arch": "wormhole_b0", "model": "llama3", "timeout": 20, "owner_id": "U053W15B6JF" },
-            { "name": "Galaxy Llama3 8B data-parallel demo tests", "arch": "wormhole_b0", "model": "llama3_8b_dp", "timeout": 20, "owner_id": "U08BH66EXAL" },
-            { "name": "Galaxy Llama3 70B data-parallel demo tests", "arch": "wormhole_b0", "model": "llama3_70b_dp", "timeout": 20, "owner_id": "U08BH66EXAL" },
-            { "name": "Galaxy Falcon7b demo tests", "arch": "wormhole_b0", "model": "falcon7b", "timeout": 10, "owner_id": "U05RWH3QUPM" },
-            { "name": "Galaxy SentenceBert demo tests", "arch": "wormhole_b0", "model": "sentence_bert", "timeout": 5, "owner_id": "U088413NP0Q" },
-            { "name": "Galaxy Whisper demo tests", "arch": "wormhole_b0", "model": "whisper", "timeout": 5, "owner_id": "U08HL8X1ECD" },
-            { "name": "Galaxy Stable Diffusion 3.5 Large demo tests", "arch": "wormhole_b0", "model": "sd35", "timeout": 10, "owner_id": "U03FJB5TM5Y" },
-            { "name": "Galaxy Flux.1-dev demo tests", "arch": "wormhole_b0", "model": "flux1", "timeout": 15, "owner_id": "U08TED0JM9D" },
-            { "name": "Galaxy GPT-OSS demo tests", "arch": "wormhole_b0", "model": "gpt-oss", "timeout": 45, "owner_id": "U08TJ70UFRT" },
-            { "name": "Galaxy Motif-Image-6B-Preview demo tests", "arch": "wormhole_b0", "model": "motif", "timeout": 15, "owner_id": "U08TED0JM9D" },
-            { "name": "Galaxy Wan2.2 demo tests", "arch": "wormhole_b0", "model": "wan22", "timeout": 20, "owner_id": "U03FJB5TM5Y" },
-            { "name": "Galaxy Mochi demo tests", "arch": "wormhole_b0", "model": "mochi", "timeout": 15, "owner_id": "U09ELB03XRU" },
-            { "name": "Galaxy DeepSeek v3 demo tests", "arch": "wormhole_b0", "model": "deepseek_v3", "timeout": 10, "owner_id": "U08H32XUS9W" },
-            { "name": "Galaxy QwenImage demo tests", "arch": "wormhole_b0", "model": "qwenimage", "timeout": 20, "owner_id": "U08TED0JM9D" }
-          ]'
-
-          # Filter matrix based on model selection
+      - name: Checkout the repository
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - name: Install dependencies
+        run: |
+          pip3 install PyYAML
+      - name: Verify test timeouts against budget
+        run: |
+          set -e
+          echo "Verifying that timeouts defined in tests.yaml are within the allowed limits..."
+          python3 .github/scripts/utils/verify_time_budget.py \
+            ${{ env.TESTS_YAML_PATH }} \
+            ./.github/time_budget.yaml \
+            "demo"
+      - name: Build unified test matrix based on enabled SKUs
+        id: build-matrix
+        run: |
+          python3 .github/scripts/utils/prepare_test_matrix.py \
+            ${{ env.TESTS_YAML_PATH }} \
+            "${{ inputs.enabled-skus }}" \
+            ./.github/sku_config.yaml
+      - name: Apply model filter
+        id: apply-model-filter
+        run: |
+          MATRIX='${{ steps.build-matrix.outputs.matrix }}'
           if [ "${{ inputs.model }}" = "all" ]; then
-            matrix="$all_tests"
+            echo "matrix<<EOF" >> $GITHUB_OUTPUT
+            echo "$MATRIX" >> $GITHUB_OUTPUT
+            echo "EOF" >> $GITHUB_OUTPUT
           else
-            matrix=$(echo "$all_tests" | jq -c "[.[] | select(.model == \"${{ inputs.model }}\")]")
+            FILTERED=$(echo "$MATRIX" | jq -c --arg m "${{ inputs.model }}" '[.[] | select(.model == $m)]')
+            if [ "$FILTERED" = "[]" ]; then
+              echo "::error::No tests found for model '${{ inputs.model }}'. Ensure the model is defined in the test matrix (TESTS_YAML_PATH=${TESTS_YAML_PATH})."
+              exit 1
+            fi
+            echo "matrix<<EOF" >> $GITHUB_OUTPUT
+            echo "$FILTERED" >> $GITHUB_OUTPUT
+            echo "EOF" >> $GITHUB_OUTPUT
           fi
 
-          echo "matrix=$(echo "$matrix" | jq -c .)" >> $GITHUB_OUTPUT
-
   galaxy-demo-tests:
-    needs: generate-matrix
+    needs: load-test-matrix
     strategy:
       fail-fast: false
       matrix:
-        test-group: ${{ fromJson(needs.generate-matrix.outputs.matrix) }}
+        test-group: ${{ fromJson(needs.load-test-matrix.outputs.matrix) }}
     name: ${{ matrix.test-group.name }}
-    runs-on:
-      - arch-wormhole_b0
-      - ${{ inputs.topology }}
-      - ${{ inputs.extra-tag }}
+    runs-on: ${{ matrix.test-group.runs_on }}
     container:
-      image: ${{ inputs.docker-image }}
+      image: ${{ inputs.docker-image || 'docker-image-unresolved!' }}
       env:
         TT_METAL_HOME: /work
         PYTHONPATH: /work
         LD_LIBRARY_PATH: /work/build/lib
         LOGURU_LEVEL: INFO
-        ARCH_NAME: ${{ matrix.test-group.arch }}
         HF_HUB_CACHE: /mnt/MLPerf/huggingface/hub
       volumes:
         - ${{ github.workspace }}/docker-job:/work # Subdir to workaround https://github.com/actions/runner/issues/691
@@ -119,211 +105,20 @@ jobs:
         with:
           submodules: recursive
           path: docker-job
+      - name: Mark repository as safe directory
+        run: |
+          git config --global --add safe.directory $GITHUB_WORKSPACE
       - name: ⬇️  Setup Job
         uses: ./docker-job/.github/actions/setup-job
         timeout-minutes: 10
         with:
           build-artifact-name: ${{ inputs.build-artifact-name }}
           wheel-artifact-name: ${{ inputs.wheel-artifact-name }}
-      - name: Run mochi demo tests (direct pytest)
-        if: ${{ matrix.test-group.model == 'mochi' }}
-        timeout-minutes: ${{ matrix.test-group.timeout }}
-        env:
-          TT_DIT_CACHE_DIR: /tmp/TT_DIT_CACHE
-          NO_PROMPT: 1
-        run: |
-          pytest \
-            models/tt_dit/tests/models/mochi/test_pipeline_mochi.py \
-            -k "4x8sp1tp0" \
-            --timeout=1500
-      - name: Run GPT-OSS demo tests (direct pytest)
-        if: ${{ matrix.test-group.model == 'gpt-oss' }}
-        timeout-minutes: ${{ matrix.test-group.timeout }}
-        env:
-          TT_CACHE_HOME: /mnt/MLPerf/huggingface/tt_cache
-        run: |
-          # Install requirements
-          uv pip install -r models/demos/gpt_oss/requirements.txt
-
-          # Test GPT-OSS 20B model
-          TT_CACHE_PATH=/mnt/MLPerf/huggingface/tt_cache/openai--gpt-oss-20b/ HF_MODEL=/mnt/MLPerf/tt_dnn-models/openai/gpt-oss-20b/ \
-            pytest models/demos/gpt_oss/demo/text_demo.py -k "4x8"\
-            --timeout 1000
-
-          TT_CACHE_PATH=/mnt/MLPerf/huggingface/tt_cache/openai--gpt-oss-20b/ HF_MODEL=/mnt/MLPerf/tt_dnn-models/openai/gpt-oss-20b/ \
-            pytest models/demos/gpt_oss/tests/accuracy/test_model.py -k "4x8"\
-            --timeout 900
-
-          # Test GPT-OSS 120B model
-          TT_CACHE_PATH=/mnt/MLPerf/huggingface/tt_cache/openai--gpt-oss-120b/ HF_MODEL=/mnt/MLPerf/tt_dnn-models/openai/gpt-oss-120b/ \
-            pytest models/demos/gpt_oss/demo/text_demo.py -k "4x8"\
-            --timeout 1000
-
-          TT_CACHE_PATH=/mnt/MLPerf/huggingface/tt_cache/openai--gpt-oss-120b/ HF_MODEL=/mnt/MLPerf/tt_dnn-models/openai/gpt-oss-120b/ \
-            pytest models/demos/gpt_oss/tests/accuracy/test_model.py -k "4x8"\
-            --timeout 900
-
-      - name: Run Llama3 demo tests (direct pytest)
-        if: ${{ matrix.test-group.model == 'llama3' }}
-        timeout-minutes: ${{ matrix.test-group.timeout }}
-        env:
-          TT_CACHE_HOME: /mnt/MLPerf/huggingface/tt_cache
-          LLAMA_DIR: /mnt/MLPerf/tt_dnn-models/llama/Llama3.3-70B-Instruct/
-          FAKE_DEVICE: TG
-        run: |
-          pytest models/demos/llama3_70b_galaxy/demo/demo_decode.py -k "full" --timeout 1000
-          pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "repeat" --timeout 1000
-          pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "pcc-80L" --timeout 1000
-          pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "batch-32-non-uniform-sampling" --timeout 500
-      - name: Run Llama3 long context demo tests (direct pytest)
-        if: ${{ matrix.test-group.model == 'llama3_long_context' }}
-        timeout-minutes: ${{ matrix.test-group.timeout }}
-        env:
-          TT_CACHE_HOME: /mnt/MLPerf/huggingface/tt_cache
-          LLAMA_DIR: /mnt/MLPerf/tt_dnn-models/llama/Llama3.3-70B-Instruct/
-          FAKE_DEVICE: TG
-        run: |
-          pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "long-4k-b1" --timeout 1000
-          pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "long-8k-b1" --timeout 1000
-          pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "long-16k-b1" --timeout 1000
-          pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "long-32k-b1" --timeout 1000
-          pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "long-64k-b1" --timeout 1000
-          pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "long-128k-b1" --timeout 1000
-      - name: Run Llama3 evals tests (direct pytest)
-        if: ${{ matrix.test-group.model == 'llama3_evals' }}
-        timeout-minutes: ${{ matrix.test-group.timeout }}
-        env:
-          TT_CACHE_HOME: /mnt/MLPerf/huggingface/tt_cache
-          LLAMA_DIR: /mnt/MLPerf/tt_dnn-models/llama/Llama3.3-70B-Instruct/
-          FAKE_DEVICE: TG
-        run: |
-          pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "evals-1" --timeout 1000
-          pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "evals-32" --timeout 1000
-          pytest models/demos/llama3_70b_galaxy/demo/text_demo.py -k "evals-long-prompts" --timeout 1000
-      - name: Run Llama3 8B data-parallel demo tests (direct pytest)
-        if: ${{ matrix.test-group.model == 'llama3_8b_dp' }}
-        timeout-minutes: ${{ matrix.test-group.timeout }}
-        env:
-          TT_CACHE_HOME: /mnt/MLPerf/huggingface/tt_cache
-          HF_MODEL: meta-llama/Llama-3.1-8B-Instruct
-          TT_CACHE_PATH: /mnt/MLPerf/huggingface/tt_cache/meta-llama/Llama-3.1-8B-Instruct
-          MESH_DEVICE: TG
-        run: |
-          pytest models/tt_transformers/demo/simple_text_demo.py --timeout 1000
-      - name: Run Llama3 70B data-parallel demo tests (direct pytest)
-        if: ${{ matrix.test-group.model == 'llama3_70b_dp' }}
-        timeout-minutes: ${{ matrix.test-group.timeout }}
-        env:
-          TT_CACHE_HOME: /mnt/MLPerf/huggingface/tt_cache
-          HF_MODEL: meta-llama/Llama-3.3-70B-Instruct
-          TT_CACHE_PATH: /mnt/MLPerf/huggingface/tt_cache/meta-llama/Llama-3.3-70B-Instruct
-          MESH_DEVICE: TG
-        run: |
-          pytest models/tt_transformers/demo/simple_text_demo.py --timeout 1000
-      - name: Run Falcon7b demo tests (direct pytest)
-        if: ${{ matrix.test-group.model == 'falcon7b' }}
-        timeout-minutes: ${{ matrix.test-group.timeout }}
-        run: |
-          pytest --disable-warnings -q -s \
-            --input-method=json \
-            --input-path='models/demos/tg/falcon7b/input_data_tg.json' \
-            models/demos/tg/falcon7b/demo_tg.py \
-            --timeout=1500
-      - name: Run SentenceBert demo tests (direct pytest)
-        if: ${{ matrix.test-group.model == 'sentence_bert' }}
-        timeout-minutes: ${{ matrix.test-group.timeout }}
-        run: |
-          pytest models/demos/tg/sentence_bert/tests/test_sentence_bert_e2e_performant.py --timeout=1500
-      - name: Run Whisper demo tests (direct pytest)
-        if: ${{ matrix.test-group.model == 'whisper' }}
-        timeout-minutes: ${{ matrix.test-group.timeout }}
-        run: |
-          pytest models/demos/audio/whisper/demo/demo.py::test_demo_for_conditional_generation --timeout 1500
-      - name: Run Stable Diffusion 3.5 demo tests (direct pytest)
-        if: ${{ matrix.test-group.model == 'sd35' }}
-        timeout-minutes: ${{ matrix.test-group.timeout }}
-        env:
-          TT_CACHE_HOME: /mnt/MLPerf/huggingface/tt_cache
-          NO_PROMPT: 1
-          TT_MM_THROTTLE_PERF: 5
-        run: |
-          pytest models/tt_dit/tests/models/sd35/test_pipeline_sd35.py \
-            -k "4x8cfg1sp0tp1" \
-            --timeout 1200
-      - name: Run Flux.1-dev demo tests (direct pytest)
-        if: ${{ matrix.test-group.model == 'flux1' }}
-        timeout-minutes: ${{ matrix.test-group.timeout }}
-        env:
-          TT_CACHE_HOME: /mnt/MLPerf/huggingface/tt_cache
-          NO_PROMPT: 1
-          TT_MM_THROTTLE_PERF: 5
-        run: |
-          pytest models/tt_dit/tests/models/flux1/test_pipeline_flux1.py \
-            -k "4x8sp0tp1-dev" \
-            --timeout 1200
-      - name: Run Motif-Image-6B-Preview demo tests (direct pytest)
-        if: ${{ matrix.test-group.model == 'motif' }}
-        timeout-minutes: ${{ matrix.test-group.timeout }}
-        env:
-          TT_CACHE_HOME: /mnt/MLPerf/huggingface/tt_cache
-          NO_PROMPT: 1
-        run: |
-          pytest models/tt_dit/tests/models/motif/test_pipeline_motif.py -k "4x8cfg1sp0tp1" --timeout 1200
-      - name: Run Wan2.2 demo tests (direct pytest)
-        if: ${{ matrix.test-group.model == 'wan22' }}
-        timeout-minutes: ${{ matrix.test-group.timeout }}
-        env:
-          TT_DIT_CACHE_DIR: /tmp/TT_DIT_CACHE
-          NO_PROMPT: 1
-        run: |
-          pytest \
-            models/tt_dit/tests/models/wan2_2/test_pipeline_wan.py \
-            -k "wh_4x8sp1tp0 and resolution_720p" \
-            --timeout 1500
-      - name: Run Qwen3-32B demo tests (direct pytest)
-        if: ${{ matrix.test-group.model == 'qwen3_32b' && (inputs.model == 'all' || inputs.model == 'qwen3_32b') }}
-        timeout-minutes: ${{ matrix.test-group.timeout }}
-        env:
-          TT_CACHE_HOME: /mnt/MLPerf/huggingface/tt_cache
-          HF_MODEL: Qwen/Qwen3-32B
-          TT_CACHE_PATH: /mnt/MLPerf/huggingface/tt_cache/Qwen/Qwen3-32B
-          MESH_DEVICE: TG
-        run: |
-          pytest models/demos/llama3_70b_galaxy/demo/demo_qwen_decode.py -k "full" --timeout 1000
-          pytest models/demos/llama3_70b_galaxy/demo/text_qwen_demo.py -k "batch-32" --timeout 1000
-          pytest models/demos/llama3_70b_galaxy/demo/text_qwen_demo.py -k "repeat2" --timeout 1000
-      - name: Run Qwen3-32B long context demo tests (direct pytest)
-        if: ${{ matrix.test-group.model == 'qwen3_32b_long_context' && (inputs.model == 'all' || inputs.model == 'qwen3_32b_long_context') }}
-        timeout-minutes: ${{ matrix.test-group.timeout }}
-        env:
-          TT_CACHE_HOME: /mnt/MLPerf/huggingface/tt_cache
-          HF_MODEL: Qwen/Qwen3-32B
-          TT_CACHE_PATH: /mnt/MLPerf/huggingface/tt_cache/Qwen/Qwen3-32B
-          MESH_DEVICE: TG
-        run: |
-          pytest models/demos/llama3_70b_galaxy/demo/text_qwen_demo.py -k "long-8k-b1" --timeout 1000
-          pytest models/demos/llama3_70b_galaxy/demo/text_qwen_demo.py -k "long-32k-b1" --timeout 1000
-          pytest models/demos/llama3_70b_galaxy/demo/text_qwen_demo.py -k "long-128k-b1" --timeout 1000
-      - name: Run DeepSeek v3 demo tests (direct pytest)
-        if: ${{ matrix.test-group.model == 'deepseek_v3' && (inputs.model == 'all' || inputs.model == 'deepseek_v3') }}
-        timeout-minutes: ${{ matrix.test-group.timeout }}
-        env:
-          DEEPSEEK_V3_HF_MODEL: /mnt/MLPerf/tt_dnn-models/deepseek-ai/DeepSeek-R1-0528
-          DEEPSEEK_V3_CACHE: /mnt/MLPerf/tt_dnn-models/deepseek-ai/DeepSeek-R1-0528-Cache/CI
-          MESH_DEVICE: TG
-        run: |
-          pytest models/demos/deepseek_v3/demo/test_demo.py --timeout 600
-      - name: Run QwenImage demo tests (direct pytest)
-        if: ${{ matrix.test-group.model == 'qwenimage' }}
+      - name: ${{ matrix.test-group.name }}
         timeout-minutes: ${{ matrix.test-group.timeout }}
-        env:
-          TT_DIT_CACHE_DIR: /tmp/TT_DIT_CACHE
-          NO_PROMPT: 1
         run: |
-          pytest \
-            models/tt_dit/tests/models/qwenimage/test_pipeline_qwenimage.py \
-            -k "4x8" \
-            --timeout 1200
+          echo "${{ matrix.test-group.cmd }}"
+          ${{ matrix.test-group.cmd }}
       - uses: tenstorrent/tt-metal/.github/actions/slack-report@main
         if: ${{ failure() }}
         with:
@@ -355,3 +150,4 @@ jobs:
         with:
           prefix: "test_reports_"
       - uses: tenstorrent/tt-metal/.github/actions/cleanup@main
+        if: always()
@@ -79,5 +79,5 @@ jobs:
       wheel-artifact-name: ${{ needs.build-artifact.outputs.wheel-artifact-name }}
       build-artifact-name: ${{ needs.build-artifact.outputs.build-artifact-name }}
       model: ${{ inputs.model || 'all' }}
-      topology: topology-6u
+      enabled-skus: wh_galaxy
       mlperf-read-only: ${{ inputs.mlperf-read-only != false }}