tenstorrent
diff --git a/‎.github/model-analysis-config.sh‎
Lines changed: 2 additions & 2 deletions b/‎.github/model-analysis-config.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/build-image.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build-image.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/build.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/model-analysis.yml‎
Lines changed: 52 additions & 8 deletions b/‎.github/workflows/model-analysis.yml‎
Lines changed: 52 additions & 8 deletions
diff --git a/‎.github/workflows/test-model-analysis-sub.yml‎
Lines changed: 46 additions & 17 deletions b/‎.github/workflows/test-model-analysis-sub.yml‎
Lines changed: 46 additions & 17 deletions
diff --git a/‎.github/workflows/test-sub.yml‎
Lines changed: 16 additions & 1 deletion b/‎.github/workflows/test-sub.yml‎
Lines changed: 16 additions & 1 deletion
@@ -8,17 +8,17 @@ declare -A env_vars
 
 # Model ops test generation
 # 1) PR config
-env_vars["BRANCH_NAME"]="generate_models_ops_test"
+env_vars["BRANCH_NAME"]="generate_models_ops_tests"
 env_vars["COMMIT_MESSAGE"]="Generate and update models ops tests"
 env_vars["TITLE"]="Generate and update models ops tests"
 env_vars["BODY"]="This PR will generate models ops tests by extracting the unique ops configurations across all the models present inside the forge/test/models directory path."
-env_vars["OUTPUT_PATH"]="forge/test/models_ops/"
 
 # 2) Script config
 env_vars["UNIQUE_OPS_OUTPUT_DIR_PATH"]="models_unique_ops_output/"
 env_vars["MODELS_OPS_TEST_OUTPUT_DIR_PATH"]="forge/test"
 env_vars["MODELS_OPS_TEST_PACKAGE_NAME"]="models_ops"
 env_vars["SCRIPT_OUTPUT_LOG"]="generate_models_ops_test.log"
+env_vars["GENERATED_MODELS_OPS_TESTS_PATH"]="forge/test/models_ops/"
 
 
 for key in "${!env_vars[@]}"; do
 
@@ -26,7 +26,7 @@ permissions:
 jobs:
 
   build-image:
-    runs-on: tt-beta-ubuntu-2204-large
+    runs-on: tt-ubuntu-2204-large-stable
     outputs:
       docker-image: ${{ steps.build.outputs.docker-image }}
     steps:
 
@@ -50,7 +50,7 @@ jobs:
       fail-fast: false
       matrix:
         build: [
-          { runs-on: tt-beta-ubuntu-2204-large, build-type: Release },
+          { runs-on: tt-ubuntu-2204-large-stable, build-type: Release },
         ]
 
     runs-on:
@@ -141,7 +141,7 @@ jobs:
       fail-fast: false
       matrix:
         build: [
-          { runs-on: tt-beta-ubuntu-2204-large, build-type: Debug },
+          { runs-on: tt-ubuntu-2204-large-stable, build-type: Debug },
         ]
 
     runs-on:
 
@@ -18,7 +18,7 @@ on:
       runs-on:
         description: 'Runs on'
         required: false
-        default: runner
+        default: n150
         type: choice
         options:
           - runner
@@ -39,13 +39,20 @@ on:
         required: false
         type: boolean
         default: false
+      create_pull_request:
+        description: 'Automatically create a Pull Request containing the generated model ops tests.'
+        required: false
+        type: boolean
+        default: false
+  # schedule:
+  #   - cron: '0 0 * * *'  # Runs at 12:00 UTC every day
 
 
 permissions:
   packages: write
   checks: write
 
-run-name: "Model Analysis Ops Test Generation (${{inputs.runs-on}}-${{inputs.test_group_cnt}})"
+run-name: "Model Analysis (${{inputs.runs-on}}-${{inputs.test_group_cnt}})"
 
 jobs:
 
@@ -61,13 +68,23 @@ jobs:
       test_group_cnt: ${{ steps.set-inputs.outputs.test_group_cnt }}
       test_group_ids: ${{ steps.set-inputs.outputs.test_group_ids }}
       runs-on: ${{ steps.set-inputs.outputs.runs-on }}
+      runner: ${{ steps.set-inputs.outputs.runner}}
+      create_pr: ${{ steps.set-inputs.outputs.create_pr }}
     steps:
       - name: Inputs Management
         id: set-inputs
         run: |
-          echo "test_group_cnt=${{ inputs.test_group_cnt }}" >> $GITHUB_OUTPUT
-          echo "test_group_ids=[$(seq -s ',' 1 ${{ inputs.test_group_cnt }})]" >> $GITHUB_OUTPUT
-          echo "runs-on=[{\"runs-on\": \"${{ inputs.runs-on }}\"}]" >> $GITHUB_OUTPUT
+          default_test_group_cnt=10
+          default_runs_on=n150
+          default_create_pr=false
+          tgc=$(if [ -z "${{ inputs.test_group_cnt }}" ]; then echo $default_test_group_cnt; else echo ${{ inputs.test_group_cnt }}; fi)
+          runs_on=$(if [ -z "${{ inputs.runs-on }}" ]; then echo $default_runs_on; else echo ${{ inputs.runs-on}}; fi)
+          default_create_pr=$(if [ -z "${{ inputs.create_pull_request }}" ]; then echo $default_create_pr; else echo ${{ inputs.create_pull_request }}; fi)
+          echo "test_group_cnt=$tgc" >> $GITHUB_OUTPUT
+          echo "test_group_ids=[$(seq -s ',' 1 $tgc)]" >> $GITHUB_OUTPUT
+          echo "runs-on=[{\"runs-on\": \"$runs_on\"}]" >> $GITHUB_OUTPUT
+          echo "runner=$runs_on" >> $GITHUB_OUTPUT
+          echo "create_pr=$default_create_pr" >> $GITHUB_OUTPUT
 
   build:
     needs:
@@ -104,7 +121,7 @@ jobs:
       - build
       - extract-unique-ops-configuration
 
-    runs-on: ["in-service", "${{ inputs.runs-on }}"]
+    runs-on: ["in-service", "${{ needs.set-inputs.outputs.runner }}" ]
 
     container:
       image: ${{ needs.docker-build.outputs.docker-image }}
@@ -216,8 +233,16 @@ jobs:
           name: models-unique-ops-output
           path: ${{ env.UNIQUE_OPS_OUTPUT_DIR_PATH }}
 
+      - name: Upload Generated Models Ops Tests
+        uses: actions/upload-artifact@v4
+        if: ${{ needs.set-inputs.outputs.create_pr == 'false' }}
+        with:
+          name: generated-models-ops-tests
+          path: ${{ env.GENERATED_MODELS_OPS_TESTS_PATH }}
+
       - name: Create Pull Request
         uses: peter-evans/create-pull-request@v7
+        if: ${{ needs.set-inputs.outputs.create_pr == 'true' }}
         with:
           branch: ${{ env.BRANCH_NAME }}
           committer: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
@@ -227,7 +252,26 @@ jobs:
           title: ${{ env.TITLE }}
           body: ${{ env.BODY }}
           delete-branch: true
-          draft: true
           token: ${{ secrets.GH_TOKEN }}
           add-paths: |
-              ${{ env.OUTPUT_PATH }}
+              ${{ env.GENERATED_MODELS_OPS_TESTS_PATH }}
+
+  run-models-ops-tests:
+    if: ${{ needs.set-inputs.outputs.create_pr == 'false' }}
+    needs:
+      - docker-build
+      - set-inputs
+      - build
+      - extract-unique-ops-configuration
+      - generate-models-ops-tests
+    uses: ./.github/workflows/test-sub.yml
+    secrets: inherit
+    with:
+      docker-image: ${{ needs.docker-build.outputs.docker-image }}
+      test_mark: 'nightly_models_ops'
+      test_group_cnt: ${{ needs.set-inputs.outputs.test_group_cnt }}
+      test_group_ids: ${{ needs.set-inputs.outputs.test_group_ids }}
+      runs-on: '[{"runs-on": "n150"}]'
+      sh-runner: true
+      run_id: ${{ needs.build.outputs.run_id }}
+      run_models_ops_tests: true
@@ -40,6 +40,10 @@ on:
         required: false
         default: false
         type: boolean
+      sh-runner:
+        description: 'Run tests using shared runners'
+        required: false
+        type: boolean
 
 jobs:
   run-tests:
@@ -50,14 +54,12 @@ jobs:
         build: ${{ fromJson(inputs.runs-on) }}
         test_group_id: ${{ fromJSON(inputs.test_group_ids) }}
 
-    runs-on:
-      - in-service
-      - ${{ matrix.build.runs-on }}
+    runs-on: ${{ inputs.sh-runner && format('tt-beta-ubuntu-2204-{0}-large-stable', matrix.build.runs-on) || fromJson(format('["{0}", "in-service"]', matrix.build.runs-on)) }}
 
     continue-on-error: ${{ inputs.allow-fail }}
 
     container:
-      image: ${{ inputs.docker-image }}
+      image: ${{ inputs.sh-runner && format('harbor.ci.tenstorrent.net/{0}', inputs.docker-image) || inputs.docker-image }}
       options: --device /dev/tenstorrent/0
       volumes:
         - /dev/hugepages:/dev/hugepages
@@ -70,21 +72,22 @@ jobs:
     env:
       HF_TOKEN: ${{ secrets.HF_TOKEN }}
       HF_HOME: /mnt/dockercache/huggingface
+      IRD_LF_CACHE: ${{ vars.IRD_LF_CACHE }}
       FORGE_MODELS_CACHE: /mnt/dockercache/forge_models_cache
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       FORGE_DISABLE_REPORTIFY_DUMP: 1
       FORGE_EXTRACT_TVM_UNIQUE_OPS_CONFIG: 1
       FORGE_EXPORT_TVM_UNIQUE_OPS_CONFIG_DETAILS: 1
 
-    name: "run-tests ${{ inputs.test_mark }} (${{ matrix.build.runs-on }}, ${{ matrix.test_group_id }})"
+    name: "run-tests ${{ inputs.test_mark }} (${{ inputs.sh-runner && format('{0}-shared', matrix.build.runs-on) || (matrix.build.runs-on) }}, ${{ matrix.test_group_id }})"
 
     steps:
 
       - name: Fetch job id
         id: fetch-job-id
         uses: tenstorrent/tt-github-actions/.github/actions/job_id@main
         with:
-          job_name: "run-tests ${{ inputs.test_mark }} (${{ matrix.build.runs-on }}, ${{ matrix.test_group_id }})"
+          job_name: "run-tests ${{ inputs.test_mark }} (${{ inputs.sh-runner && format('{0}-shared', matrix.build.runs-on) || (matrix.build.runs-on) }}, ${{ matrix.test_group_id }})"
 
       - name: Set reusable strings
         id: strings
@@ -101,12 +104,18 @@ jobs:
       - uses: actions/checkout@v4
         with:
           sparse-checkout: |
+            .github/workflows/
             env/
             forge/test
             pytest.ini
             conftest.py
             .test_durations
 
+      - name: Setup Forge Models repo
+        shell: bash
+        run: |
+          git submodule update --init --recursive -f third_party/tt_forge_models
+
         # Clean everything from submodules (needed to avoid issues
         # with cmake generated files leftover from previous builds)
       - name: Cleanup submodules
@@ -146,18 +155,19 @@ jobs:
 
       - name: Extract and Export Unique Ops Configuration
         shell: bash
+        timeout-minutes: 1440 # Set execution time to 1 days(default: 6 hours)
         run: |
+          set -o pipefail
           source env/activate
-
+          echo "Collecting tests for group ${{ matrix.test_group_id }} with mark '${{ inputs.test_mark }}'..."
+          set +e
           pytest_args=(
             "--splits" "${{ inputs.test_group_cnt }}"
             "--group" "${{ matrix.test_group_id }}"
             "--splitting-algorithm" "least_duration"
             "-m" "${{ inputs.test_mark }}"
-            "--log-memory-usage"
-            "--runxfail"
-            "--no-skips"
-            "-vss"
+            "--collect-only"
+            "-q"
           )
           if [ -n "${{ inputs.tests_to_filter }}" ]; then
             # Split on commas and trim whitespace
@@ -167,15 +177,34 @@ jobs:
               pytest_args+=("$(echo "$tf" | xargs)")
             done
           fi
-
-          pytest forge/test/models/ "${pytest_args[@]}" 2>&1 | tee pytest.log
-
-      - name: Upload Test Log
+          pytest forge/test/models/ "${pytest_args[@]}" \
+                | sed -n '/^Collected tests /,/^collected /p' | sed '/^[Cc]ollected /d' >.pytest_tests_to_run
+
+          if [ $? -ne 0 ]; then
+            echo "Failed to collect tests. Doing dry run..."
+            set -e
+            pytest forge/test/models/ "${pytest_args[@]}"
+            exit 1
+          fi
+          echo "Collected tests."
+          cat .pytest_tests_to_run
+
+          python .github/workflows/test_runner.py  \
+               --continue-after-crash \
+               --log-memory-usage \
+               --runxfail \
+               --no-skips \
+               -vss \
+            2>&1 | tee extract-and-export-unique-ops-configs.log
+          exit_code=${PIPESTATUS[0]}
+          exit $exit_code
+
+      - name: Upload Extract And Export Unique Ops Configs Log
         uses: actions/upload-artifact@v4
         if: success() || failure()
         with:
-          name: test-log-${{ matrix.build.runs-on }}-${{ matrix.test_group_id }}-${{ inputs.test_mark }}-${{ steps.fetch-job-id.outputs.job_id }}
-          path: pytest.log
+           name: extract-and-export-unique-ops-configs-log-${{ matrix.build.runs-on }}-${{ matrix.test_group_id }}-${{ inputs.test_mark }}-${{ steps.fetch-job-id.outputs.job_id }}
+           path: extract-and-export-unique-ops-configs.log
 
       - name: Upload Memory Usage Log
         uses: actions/upload-artifact@v4
 
@@ -52,6 +52,11 @@ on:
         description: 'Run tests using shared runners'
         required: false
         type: boolean
+      run_models_ops_tests:
+        description: 'Run the generated models ops tests'
+        required: false
+        default: false
+        type: boolean
 
 jobs:
   run-tests:
@@ -62,7 +67,7 @@ jobs:
         build: ${{ fromJson(inputs.runs-on) }}
         test_group_id: ${{ fromJSON(inputs.test_group_ids) }}
 
-    runs-on: ${{ inputs.sh-runner && format('tt-beta-ubuntu-2204-{0}-large-stable', matrix.build.runs-on) || fromJson(format('["{0}", "in-service"]', matrix.build.runs-on)) }}
+    runs-on: ${{ inputs.sh-runner && format('tt-ubuntu-2204-{0}-stable', matrix.build.runs-on) || fromJson(format('["{0}", "in-service"]', matrix.build.runs-on)) }}
 
     continue-on-error: ${{ inputs.allow-fail }}
 
@@ -151,6 +156,16 @@ jobs:
           pip install tt_tvm*.whl --upgrade
           pip install tt_forge_fe*.whl --upgrade
 
+      - name: Download Generated Models Ops Tests
+        if: ${{ inputs.run_models_ops_tests }}
+        continue-on-error: true
+        uses: tenstorrent/tt-forge/.github/actions/download-artifact@main
+        with:
+          name: generated-models-ops-tests
+          run_id: ${{ inputs.run_id }}
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          path: forge/test/models_ops
+
       - name: Run Test
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}