Implement pipeline to run accuracy tests (#864)

dgomezTT · web-flow · commit 22264e92b63a · 2025-04-03T18:53:46.000Z
* Fix pipeline for accuracy-tests

* removing that hardcoded array

* Fixed a bunch of problems with the pipeline and created an action for test file calculation

* fixed the refactor in the common_model_test

* :bye ugly hardcoded array, setting some schedule to run accuracy tests

* Fixed an small typo in the config of before_merge.yaml

* Addressed the logic for before_merge

* Improved how we detect the accuracy tests

* Fixed merge issue

* fix docker for accuracy tests

* Fixed how we pass the accuracy test between steps

* Added mock test to speed up the process

* Added mock test to speed up the process

* Added mock test to speed up the process

* Added mock test to speed up the process

* fixed some docker configuration in the accuracy test

* Improved the way to select what tests to run

* fix filter group

* fix rebase error

* added the option to configure the docker image and fixed a cache issue

* fixed download artifacts

* fixed download artifacts

* fixed download artifacts
diff --git a/.github/actions/common_model_tests/action.yaml b/.github/actions/common_model_tests/action.yaml
@@ -1,5 +1,16 @@
 name: 'Run Model tests'
 description: 'Run Model tests'
+inputs:
+    splits:
+      description: 'Number of splits for test distribution'
+      required: true
+    commit_report:
+      description: 'Commit report input (None, Docs, All)'
+      required: false
+      default: 'None'
+    matrix_group:
+      description: 'Matrix group index for splitting tests'
+      required: true
 runs:
   using: "composite"
   steps:
@@ -12,7 +23,7 @@ runs:
         else
           num_iterations=5
         fi
-        python3 -m pytest --github-report tests/models/ --report_nth_iteration=$num_iterations --gen_op_accuracy_tests --splits 40 --group ${{ matrix.group }} -s
+        python3 -m pytest --github-report tests/models/ --report_nth_iteration=$num_iterations --gen_op_accuracy_tests --splits ${{ inputs.splits }} --group ${{ matrix.group }} -s
         exit_code=$?  # Capture the exit code
         if [ $exit_code -eq 5 ]; then
           if [ ${{ matrix.group }} -eq 0 ]; then
diff --git a/.github/actions/count_test_files/action.yaml b/.github/actions/count_test_files/action.yaml
@@ -0,0 +1,53 @@
+name: 'Count Test Files'
+description: 'Counts the number of valid test files in a directory'
+
+inputs:
+  test_directory:
+    description: 'Directory containing the test files'
+    required: true
+    default: 'tests/models/'
+
+outputs:
+  num_files:
+    description: 'The number of valid test files'
+    value: ${{ steps.count-files.outputs.num_files }}
+
+  matrix:
+    description: 'A JSON array of test groups'
+    value: ${{ steps.generate-matrix.outputs.matrix }}
+
+runs:
+  using: "composite"
+  steps:
+    - name: Count Test Files
+      id: count-files
+      shell: bash
+      run: |
+        # Count the number of valid test files
+        num_tests=$(find "${{ inputs.test_directory }}" -type f \( -name "test_*.py" -o -name "*_test.py" \) | wc -l)
+        if [ "$num_tests" -eq 0 ]; then
+          echo "Error: No test files found in the directory '${{ inputs.test_directory }}'."
+          exit 1
+        fi
+        echo "num_files=$num_tests" >> $GITHUB_OUTPUT
+
+    - name: Generate Matrix
+      id: generate-matrix
+      shell: bash
+      run: |
+        if [ -z "$MOCK_RUN" ]; then
+          # Generate matrix based on the number of test files
+          num_files="${{ steps.count-files.outputs.num_files }}"
+          matrix_tmp=$(seq 1 $num_files | jq -c --slurp '.')
+        else
+          # Use MOCK_RUN directly as the matrix
+          matrix_tmp="$MOCK_RUN"
+          echo "Mock mode enabled. Using MOCK_RUN as matrix: $matrix_tmp"
+        fi
+        echo "matrix=$matrix_tmp" >> $GITHUB_OUTPUT
+
+    - name: Debug Outputs
+      shell: bash
+      run: |
+        echo "Number of files: ${{ steps.count-files.outputs.num_files }}"
+        echo "Matrix: ${{ steps.generate-matrix.outputs.matrix }}"
diff --git a/.github/workflows/before_merge.yaml b/.github/workflows/before_merge.yaml
@@ -11,7 +11,6 @@ jobs:
       docker_tag: 'ghcr.io/tenstorrent/pytorch2.0_ttnn/ubuntu-22.04-amd64:latest'
       commit_report: 'None'
 
-
   validate-pr:
     if: ${{ always() }}
     runs-on: ubuntu-latest    
diff --git a/.github/workflows/run-accuracy-tests.yaml b/.github/workflows/run-accuracy-tests.yaml
@@ -2,7 +2,18 @@ name: "Accuracy Tests"
 
 on:
   workflow_dispatch:
-
+    inputs:
+      MockRun:
+        description: "Array of test groups to run. If empty, all tests will be run."
+        required: false
+        default: ""
+      docker_tag:
+        description: 'Docker container tag to use'
+        required: false
+        type: string
+        default: 'ghcr.io/tenstorrent/pytorch2.0_ttnn/ubuntu-22.04-amd64:latest'
+  schedule:
+    - cron: "0 21 * * 6" # Run every Saturday at 9:00 PM UTC
 permissions:
   actions: read
   contents: write
@@ -11,30 +22,66 @@ permissions:
   pull-requests: read
 
 jobs:
-  tools-tests:
+  count-test-files:
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.count-files.outputs.matrix }}
+      num_files: ${{ steps.count-files.outputs.num_files }}
     env:
-      pytest_verbosity: 2    
-      pytest_report_title: "⭐️ Tools Tests"
-    runs-on: ["in-service"]
+      MOCK_RUN: ${{ github.event.inputs.MockRun }}
+    steps:
+      - name: Checkout Code
+        uses: actions/checkout@v4
+
+      - name: Count Test Files
+        id: count-files
+        uses: ./.github/actions/count_test_files
+        with:
+          test_directory: 'tests/models/'
+  model-tests:
+    needs: [count-test-files]
+    runs-on: ["in-service", "nfs"]
     container: 
-      image: ghcr.io/tenstorrent/pytorch2.0_ttnn/ubuntu-22.04-amd64:latest
+      image: ${{ inputs.docker_tag }}
       credentials:
         username: ${{ github.actor }}
         password: ${{ secrets.GH_TOKEN }}
       options: >-
         --rm -v /dev/hugepages-1G:/dev/hugepages-1G --device /dev/tenstorrent
         -v ${{ github.workspace }}:${{ github.workspace }} -w ${{ github.workspace }}
-    steps:      
+        -v /mnt/tt-metal-pytorch-cache/.cache:/root/.cache
+    env:      
+      pytest_verbosity: 0
+      TORCH_HOME: /mnt/tt-metal-pytorch-cache/.cache/torch
+      HF_HOME: /mnt/tt-metal-pytorch-cache/.cache/huggingface
+    strategy:
+      matrix:
+        group: ${{ fromJson(needs.count-test-files.outputs.matrix) }}
+    steps:
       - uses: actions/checkout@v4
-      - uses: ./.github/actions/common_repo_setup        
-      - name: Run Tools Tests 
+        with:
+          lfs: true
+          fetch-depth: 0    
+      - name: docker-cleanup
         run: |
-          python3 -m pytest --github-report tests/tools/ -s
+          docker system prune -a -f --volumes
+          df -h  # Debug space
+      - uses: ./.github/actions/common_model_tests
+        with:
+          splits: ${{ needs.count-test-files.outputs.num_files }}
+          matrix_group: ${{ matrix.group }}
+          commit_report: ${{ github.event.inputs.commit_report }}
+      - name: Upload Accuracy Tests Artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: autogen-accuracy-tests-group-${{ matrix.group }}
+          path: tests/autogen_accuracy_tests/
 
   gen-model-accuracy:
+    needs: [model-tests]
     runs-on: ["in-service"]
     container: 
-      image: ghcr.io/tenstorrent/pytorch2.0_ttnn/ubuntu-22.04-amd64:latest
+      image: ${{ inputs.docker_tag }}
       credentials:
         username: ${{ github.actor }}
         password: ${{ secrets.GH_TOKEN }}
@@ -48,73 +95,68 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - uses: ./.github/actions/common_repo_setup        
-
+      
       - name: Download All Accuracy Tests and Inputs Artifacts
         uses: actions/download-artifact@v4
         with:
-          pattern: model-accuracy-tests-group-*
+          pattern: autogen-accuracy-tests-group-*
           merge-multiple: true
           path: tests/autogen_accuracy_tests/
 
       - name: Calculate Number of Groups
         id: calculate-groups
         run: |
           # Count the number of test files
-          num_files=$(find tests/autogen_accuracy_tests -type f -name "*.py" | wc -l)
-
-          # Get the number of available CPUs (or set a default if not available)
-          num_cpus=$(nproc || echo 4)
-
-          # Calculate the number of groups (e.g., 1 group per CPU, or adjust as needed)
-          num_groups=$((num_files < num_cpus ? num_files : num_cpus))
-
+          num_groups=$(find tests/autogen_accuracy_tests -type f -name "*.py" | wc -l)
           # Ensure at least 1 group
           num_groups=$((num_groups > 0 ? num_groups : 1))
-
           # Generate the list of groups as JSON
-          groups=$(seq 1 $num_groups | jq -c '.')
+          groups=$(seq 1 $num_groups | jq -c --slurp '.')
+          echo "Found: $groups"
           echo "groups=$groups" >> $GITHUB_OUTPUT
-        outputs:
-          groups: ${{ steps.calculate-groups.outputs.groups }}
-
-      - name: Upload Accuracy Tests Artifact
-        uses: actions/upload-artifact@v4
-        with:
-          name: autogen-accuracy-tests
-          path: tests/autogen_accuracy_tests/
 
   test-model-accuracy:
     needs: [gen-model-accuracy]
-    runs-on: ["in-service"]
+    runs-on: ["in-service", "nfs"]
     container: 
-      image: ghcr.io/tenstorrent/pytorch2.0_ttnn/ubuntu-22.04-amd64:latest
+      image: ${{ inputs.docker_tag }}
       credentials:
         username: ${{ github.actor }}
         password: ${{ secrets.GH_TOKEN }}
       options: >-
         --rm -v /dev/hugepages-1G:/dev/hugepages-1G --device /dev/tenstorrent
         -v ${{ github.workspace }}:${{ github.workspace }} -w ${{ github.workspace }}
+        -v /mnt/tt-metal-pytorch-cache/.cache:/root/.cache
     env:
       PYTHONPATH: ${{ github.workspace }}
+      TORCH_HOME: /mnt/tt-metal-pytorch-cache/.cache/torch
+      HF_HOME: /mnt/tt-metal-pytorch-cache/.cache/huggingface
     strategy:
       matrix:
         group: ${{ fromJson(needs.gen-model-accuracy.outputs.groups) }}
     steps:
       - uses: actions/checkout@v4
       - uses: ./.github/actions/common_repo_setup        
-
+      - name: docker-cleanup
+        run: |
+          docker system prune -a -f --volumes
+          df -h  # Debug space
       - name: Download Accuracy Tests Artifact
         uses: actions/download-artifact@v4
         with:
-          name: autogen-accuracy-tests
+          pattern: autogen-accuracy-tests-group-*
           path: tests/autogen_accuracy_tests/
 
       - name: Run Accuracy Tests
         run: |
           cd tests/autogen_accuracy_tests
-          set +e
           test_file=$(find . -type f -name "*.py" | sed -n "${{ matrix.group }}p")
           python3 -m pytest $test_file -s
+          exit_code=$?
+          if [ $exit_code -ne 0 ]; then
+            echo "Tests failed with exit code $exit_code"
+            exit $exit_code
+          fi
           exit 0;
         shell: bash
 
diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml
@@ -1,5 +1,4 @@
 name: "Run Tests"
-
 on:
   workflow_call:
     inputs:
@@ -79,8 +78,23 @@ jobs:
       - uses: ./.github/actions/common_repo_setup
       - uses: ./.github/actions/common_lowering_tests
 
+  count-test-files:
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.count-files.outputs.matrix }}
+      num_files: ${{ steps.count-files.outputs.num_files }}
+    steps:
+      - name: Checkout Code
+        uses: actions/checkout@v4
+
+      - name: Count Test Files
+        id: count-files
+        uses: ./.github/actions/count_test_files
+        with:
+          test_directory: 'tests/models/'
+
   model-tests:
-    needs: lowering-tests
+    needs: [count-test-files, lowering-tests]
     runs-on: ["in-service", "nfs"]
     container: 
       image: ${{ inputs.docker_tag }}
@@ -97,34 +111,29 @@ jobs:
       TORCH_HOME: /mnt/tt-metal-pytorch-cache/.cache/torch
       HF_HOME: /mnt/tt-metal-pytorch-cache/.cache/huggingface
     strategy:
-      matrix: # Need to find a way to replace this with a generator
-        group: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]
+      matrix: 
+        group: ${{ fromJson(needs.count-test-files.outputs.matrix) }}
     steps:
       - uses: actions/checkout@v4
         with:
           lfs: true
           fetch-depth: 0    
-      - uses: ./.github/actions/common_repo_setup
       - name: docker-cleanup
         run: |
           docker system prune -a -f --volumes
           df -h  # Debug space
       - uses: ./.github/actions/common_model_tests
-
+        with:
+          splits: ${{ needs.count-test-files.outputs.num_files }}
+          matrix_group: ${{ matrix.group }}
+          commit_report: ${{ github.event.inputs.commit_report }}
       - name: Upload Metrics Artifact
         if: success()  # Only run if tests passed
         uses: actions/upload-artifact@v4
         with:
           name: model-tests-metrics-group-${{ matrix.group }}
           path: metrics/
 
-      - name: Upload All Accuracy Tests and Inputs Artifacts
-        if: success()  # Only run if tests passed
-        uses: actions/upload-artifact@v4
-        with:
-          name: model-accuracy-tests-group-${{ matrix.group }}
-          path: tests/autogen_accuracy_tests/
-
   push-autogen-op-tests:
     needs: [model-tests]
     if: ${{ github.event_name == 'workflow_dispatch' && inputs.commit_report != 'None'}}
@@ -176,7 +185,7 @@ jobs:
           fi
 
   model-autogen-op-tests:
-    needs: [push-autogen-op-tests]
+    needs: [push-autogen-op-tests, count-test-files]
     if: ${{ github.event_name == 'workflow_dispatch' && inputs.commit_report != 'None'}}
     runs-on: ["in-service"]
     container: 
@@ -188,9 +197,8 @@ jobs:
         --rm -v /dev/hugepages-1G:/dev/hugepages-1G --device /dev/tenstorrent
         -v ${{ github.workspace }}:${{ github.workspace }} -w ${{ github.workspace }}
     strategy:
-      matrix: # Need to find a way to replace this with a generator
-        group: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]
-
+      matrix: 
+        group: ${{ fromJson(needs.count-test-files.outputs.matrix) }}
     env:
       pytest_verbosity: 0
       pytest_report_title: "⭐️ Model Input Variations Tests - Group ${{ matrix.group }}"