tenstorrent · chandrasekaranpradeep · Nov 12, 2025
@@ -0,0 +1,185 @@
+name: Extract crashed tests
+
+on:
+  workflow_call:
+    inputs:
+      repo:
+        description: 'Repository to fetch artifacts from (owner/repo)'
+        required: true
+        type: string
+      run_id:
+        description: 'Workflow run id to fetch artifacts for'
+        required: true
+        type: string
+      output_dir:
+        description: 'Directory to place downloaded artifacts'
+        required: true
+        type: string
+      artifact_prefix:
+        description: 'Artifact name prefix to match'
+        required: true
+        type: string
+    outputs:
+      crashed-tests:
+        description: "Comma-separated list of all crashed test names detected from the artifacts."
+        value: ${{ jobs.extract.outputs.crashed-tests }}
+      contains-crashed-tests:
+        description: "Boolean flag indicating whether any crashed tests were found."
+        value: ${{ jobs.extract.outputs.contains-crashed-tests }}
+      crashed-test-cnt:
+        description: "Total number of crashed test groups identified during extraction."
+        value: ${{ jobs.extract.outputs.crashed-test-cnt }}
+      crashed-test-ids:
+        description: "Array of job indices corresponding to each crashed test group."
+        value: ${{ jobs.extract.outputs.crashed-test-ids }}
+
+permissions:
+  contents: read
+
+jobs:
+  extract:
+    runs-on: ubuntu-latest
+    outputs:
+      crashed-tests: ${{ steps.extract-crashed-tests.outputs.crashed-tests }}
+      contains-crashed-tests: ${{ steps.extract-crashed-tests.outputs.contains-crashed-tests }}
+      crashed-test-cnt: ${{ steps.extract-crashed-tests.outputs.crashed-test-cnt }}
+      crashed-test-ids: ${{ steps.extract-crashed-tests.outputs.crashed-test-ids }}
+    steps:
+      - name: Set reusable strings
+        id: strings
+        shell: bash
+        run: |
+          echo "work-dir=$(pwd)" >> "$GITHUB_OUTPUT"
+
+      - name: Git safe dir
+        run: git config --global --add safe.directory ${{ steps.strings.outputs.work-dir }}
+
+      - uses: actions/checkout@v4
+        with:
+          sparse-checkout: |
+            .github/download-artifacts.sh
+
+      - name: Download Unique Ops Config Crashed Logs
+        shell: bash
+        continue-on-error: true
+        env:
+          GH_TOKEN: ${{ secrets.GH_TOKEN }}
+        run: |
+          bash .github/download-artifacts.sh "${{ inputs.repo }}" "${{ inputs.run_id }}" "${{ inputs.output_dir }}" "${{ inputs.artifact_prefix }}"
+
+      - name: Extract Crashed Cases
+        id: extract-crashed-tests
+        shell: bash
+        run: |
+          set -euo pipefail
+
+          logs_dir="${{ inputs.output_dir }}"
+          crashed_tests=""
+          contains_crashed_tests=false
+          number_crashed_tests_per_job=4
+
+          # Exit early with safe outputs if logs dir missing
+          if [ ! -d "$logs_dir" ]; then
+            echo "crashed-tests<<EOF" >> "$GITHUB_OUTPUT"
+            echo "$crashed_tests" >> "$GITHUB_OUTPUT"
+            echo "EOF" >> "$GITHUB_OUTPUT"
+            echo "contains-crashed-tests=${contains_crashed_tests}" >> "$GITHUB_OUTPUT"
+            echo "crashed-test-cnt=0" >> "$GITHUB_OUTPUT"
+            echo "crashed-test-ids=[]" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+
+          tmpfile="$(mktemp)"
+          trap 'rm -f "$tmpfile"' EXIT
+
+          # Collect .log files
+          files=()
+          while IFS= read -r -d '' f; do
+            files+=("$f")
+          done < <(find "$logs_dir" -type f -name '*.log' -print0)
+
+          if [ "${#files[@]}" -eq 0 ]; then
+            echo "crashed-tests<<EOF" >> "$GITHUB_OUTPUT"
+            echo "$crashed_tests" >> "$GITHUB_OUTPUT"
+            echo "EOF" >> "$GITHUB_OUTPUT"
+            echo "contains-crashed-tests=${contains_crashed_tests}" >> "$GITHUB_OUTPUT"
+            echo "crashed-test-cnt=0" >> "$GITHUB_OUTPUT"
+            echo "crashed-test-ids=[]" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+
+          # Sort logs deterministically
+          IFS=$'\n' sorted_files=($(printf '%s\n' "${files[@]}" | sort -V))
+          unset IFS
+
+          # Extract crash test tokens
+          : > "$tmpfile"
+          for file in "${sorted_files[@]}"; do
+            perl -nE 'while ( /([^\s]+::[^\s]+)/g ) { say $1 }' "$file" 2>/dev/null >> "$tmpfile" || true
+            printf '\n' >> "$tmpfile"
+          done
+
+          # Filter unwanted lines (errors, tracebacks, etc.)
+          filtered_tmp="$(mktemp)"
+          trap 'rm -f "$filtered_tmp" "$tmpfile"' EXIT
+
+          while IFS= read -r line || [ -n "$line" ]; do
+            line="$(printf '%s' "$line" | tr -d '\r' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
+            [ -z "$line" ] && continue
+            low="$(printf '%s' "$line" | tr '[:upper:]' '[:lower:]')"
+            if printf '%s\n' "$low" | grep -qiE '(error|exception|traceback|killed|oom|failed|critical)'; then
+              continue
+            fi
+            if printf '%s\n' "$line" | grep -qE '^[=-]{2,}$'; then
+              continue
+            fi
+            if printf '%s\n' "$line" | grep -qE '^[^[:space:]]+::[^[:space:]]+(\[[^]]+\])?$'; then
+              printf '%s\n' "$line" >> "$filtered_tmp"
+            fi
+          done < "$tmpfile"
+
+          mapfile -t tokens < <(awk 'NF && !seen[$0]++ { print }' "$filtered_tmp")
+          rm -f "$filtered_tmp" || true
+
+          joined=""
+          if [ "${#tokens[@]}" -gt 0 ]; then
+            joined=$(printf '%s,' "${tokens[@]}")
+            joined=${joined%,}
+          fi
+
+          # Fix missing commas between concatenated forge/ entries
+          if [ -n "$joined" ]; then
+            joined="$(perl -pe 's/\s+(?=forge\/)//g' <<< "$joined")"
+            joined="$(perl -pe 's/([^,])(?=forge\/)/\1,/g' <<< "$joined")"
+          fi
+
+          if [ -n "$joined" ]; then
+            contains_crashed_tests=true
+            crashed_tests="$joined"
+          fi
+
+          # Count crashed tokens and group into jobs
+          crashed_test_count=${#tokens[@]}
+
+          if [ "$crashed_test_count" -gt 0 ]; then
+            crashed_job_count=$(( (crashed_test_count + number_crashed_tests_per_job - 1) / number_crashed_tests_per_job ))
+            crashed_job_ids=$(seq -s ',' 1 "$crashed_job_count")
+            crashed_job_ids_formatted="[$crashed_job_ids]"
+          else
+            crashed_job_count=0
+            crashed_job_ids_formatted="[]"
+          fi
+
+          echo "crashed-tests<<EOF" >> "$GITHUB_OUTPUT"
+          echo "$crashed_tests" >> "$GITHUB_OUTPUT"
+          echo "EOF" >> "$GITHUB_OUTPUT"
+          echo "contains-crashed-tests=${contains_crashed_tests}" >> "$GITHUB_OUTPUT"
+          echo "crashed-test-cnt=${crashed_job_count}" >> "$GITHUB_OUTPUT"
+          echo "crashed-test-ids=${crashed_job_ids_formatted}" >> "$GITHUB_OUTPUT"
+
+      - name: show outputs
+        run: |
+          echo "crashed-tests (raw): ${{ steps.extract-crashed-tests.outputs.crashed-tests }}"
+          echo "contains-crashed-tests: ${{ steps.extract-crashed-tests.outputs.contains-crashed-tests }}"
+          echo "crashed-test-cnt: ${{ steps.extract-crashed-tests.outputs.crashed-test-cnt }}"
+          echo "crashed-test-ids: ${{ steps.extract-crashed-tests.outputs.crashed-test-ids }}"
@@ -244,136 +244,19 @@ jobs:
       split-by-count: true
 
   extract-crashed-tests-from-non-oom:
-    runs-on: ubuntu-latest
     needs:
       - docker-build
       - set-inputs
       - build
       - extract-unique-ops-configuration-non-oom
     if: always()
-    env:
-      CRASHED_TESTS_OUTPUT_DIR_PATH: crashed_tests_output_logs/
-      CRASHED_TESTS_ARTIFACT_PREFIX: unique-ops-configs-crashed-tests
-    outputs:
-      crashed-tests: ${{ steps.extract-crashed-tests.outputs.crashed-tests }}
-      contains-crashed-tests: ${{ steps.extract-crashed-tests.outputs.contains-crashed-tests }}
-    steps:
-      - name: Set reusable strings
-        id: strings
-        shell: bash
-        run: |
-          echo "work-dir=$(pwd)" >> "$GITHUB_OUTPUT"
-
-      - name: Git safe dir
-        run: git config --global --add safe.directory ${{ steps.strings.outputs.work-dir }}
-
-      - uses: actions/checkout@v4
-        with:
-          sparse-checkout: |
-            .github/download-artifacts.sh
-
-      - name: Download Unique Ops Config Crashed Logs
-        shell: bash
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          bash .github/download-artifacts.sh "${{ github.repository }}" "${{ github.run_id }}" "${{ env.CRASHED_TESTS_OUTPUT_DIR_PATH }}" "${{ env.CRASHED_TESTS_ARTIFACT_PREFIX }}"
-
-      - name: Extract Crashed Cases
-        id: extract-crashed-tests
-        shell: bash
-        run: |
-          set -euo pipefail
-
-          logs_dir="${{ env.CRASHED_TESTS_OUTPUT_DIR_PATH }}"
-          crashed_tests=""
-          contains_crashed_tests=false
-
-          # Exit early with safe outputs if logs dir missing
-          if [ ! -d "$logs_dir" ]; then
-            echo "crashed-tests<<EOF" >> "$GITHUB_OUTPUT"
-            echo "$crashed_tests" >> "$GITHUB_OUTPUT"
-            echo "EOF" >> "$GITHUB_OUTPUT"
-            echo "contains-crashed-tests=${contains_crashed_tests}" >> "$GITHUB_OUTPUT"
-            exit 0
-          fi
-
-          tmpfile="$(mktemp)"
-          trap 'rm -f "$tmpfile"' EXIT
-
-          # Collect .log files
-          files=()
-          while IFS= read -r -d '' f; do
-            files+=("$f")
-          done < <(find "$logs_dir" -type f -name '*.log' -print0)
-
-          if [ "${#files[@]}" -eq 0 ]; then
-            echo "crashed-tests<<EOF" >> "$GITHUB_OUTPUT"
-            echo "$crashed_tests" >> "$GITHUB_OUTPUT"
-            echo "EOF" >> "$GITHUB_OUTPUT"
-            echo "contains-crashed-tests=${contains_crashed_tests}" >> "$GITHUB_OUTPUT"
-            exit 0
-          fi
-
-          # Sort logs deterministically
-          IFS=$'\n' sorted_files=($(printf '%s\n' "${files[@]}" | sort -V))
-          unset IFS
-
-          # Extract crash test tokens
-          : > "$tmpfile"
-          for file in "${sorted_files[@]}"; do
-            perl -nE 'while ( /([^\s]+::[^\s]+)/g ) { say $1 }' "$file" 2>/dev/null >> "$tmpfile" || true
-            printf '\n' >> "$tmpfile"
-          done
-
-          # Filter unwanted lines (errors, tracebacks, etc.)
-          filtered_tmp="$(mktemp)"
-          trap 'rm -f "$filtered_tmp" "$tmpfile"' EXIT
-
-          while IFS= read -r line || [ -n "$line" ]; do
-            line="$(printf '%s' "$line" | tr -d '\r' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
-            [ -z "$line" ] && continue
-            low="$(printf '%s' "$line" | tr '[:upper:]' '[:lower:]')"
-            if printf '%s\n' "$low" | grep -qiE '(error|exception|traceback|killed|oom|failed|critical)'; then
-              continue
-            fi
-            if printf '%s\n' "$line" | grep -qE '^[=-]{2,}$'; then
-              continue
-            fi
-            if printf '%s\n' "$line" | grep -qE '^[^[:space:]]+::[^[:space:]]+(\[[^]]+\])?$'; then
-              printf '%s\n' "$line" >> "$filtered_tmp"
-            fi
-          done < "$tmpfile"
-
-          mapfile -t tokens < <(awk 'NF && !seen[$0]++ { print }' "$filtered_tmp")
-          rm -f "$filtered_tmp" || true
-
-          joined=""
-          if [ "${#tokens[@]}" -gt 0 ]; then
-            joined=$(printf '%s,' "${tokens[@]}")
-            joined=${joined%,}
-          fi
-
-          # Fix missing commas between concatenated forge/ entries
-          if [ -n "$joined" ]; then
-            joined="$(perl -pe 's/\s+(?=forge\/)//g' <<< "$joined")"
-            joined="$(perl -pe 's/([^,])(?=forge\/)/\1,/g' <<< "$joined")"
-          fi
-
-          if [ -n "$joined" ]; then
-            contains_crashed_tests=true
-            crashed_tests="$joined"
-          fi
-
-          echo "crashed-tests<<EOF" >> "$GITHUB_OUTPUT"
-          echo "$crashed_tests" >> "$GITHUB_OUTPUT"
-          echo "EOF" >> "$GITHUB_OUTPUT"
-          echo "contains-crashed-tests=${contains_crashed_tests}" >> "$GITHUB_OUTPUT"
-
-      - name: show outputs
-        run: |
-          echo "crashed-tests (raw): ${{ steps.extract-crashed-tests.outputs.crashed-tests }}"
-          echo "contains-crashed-tests: ${{ steps.extract-crashed-tests.outputs.contains-crashed-tests }}"
+    uses: ./.github/workflows/extract-crashed-tests.yml
+    secrets: inherit
+    with:
+      repo: ${{ github.repository }}
+      run_id: ${{ github.run_id }}
+      output_dir: crashed_tests_output_logs/
+      artifact_prefix: unique-ops-configs-crashed-tests
 
   extract-unique-ops-configuration-from-crashed-tests:
     if: ${{ always() && needs.extract-crashed-tests-from-non-oom.outputs.contains-crashed-tests == 'true' }}

@@ -20,7 +20,7 @@ on:
           - "4"
           - "8"
       run_ops_sweeps:
-        description: 'Run models ops and sweeps tests'
+        description: 'Run sweeps tests'
         required: false
         default: 'No'
         type: choice
@@ -121,6 +121,43 @@ jobs:
       runs-on: '[{"runs-on": "n150"}]'
       tests_to_filter: ${{ needs.set-inputs.outputs.nightly_tests_paths }}
 
+  extract-crashed-tests-from-full-model-passing-and-failing:
+    needs:
+      - docker-build
+      - set-inputs
+      - build
+      - test_full_model_passing
+      - test_full_model_xfailing
+    if: always()
+    uses: ./.github/workflows/extract-crashed-tests.yml
+    secrets: inherit
+    with:
+      repo: ${{ github.repository }}
+      run_id: ${{ github.run_id }}
+      output_dir: crashed_tests_output_logs/
+      artifact_prefix: test-crash-log
+
+  run-crashed-tests-from-full-model-passing-and-failing:
+    if: ${{ always() && needs.extract-crashed-tests-from-full-model-passing-and-failing.outputs.contains-crashed-tests == 'true' }}
+    needs:
+      - docker-build
+      - set-inputs
+      - build
+      - test_full_model_passing
+      - test_full_model_xfailing
+      - extract-crashed-tests-from-full-model-passing-and-failing
+    uses: ./.github/workflows/test-sub.yml
+    secrets: inherit
+    with:
+      test_mark: 'nightly'
+      test_group_cnt: ${{ needs.extract-crashed-tests-from-full-model-passing-and-failing.outputs.crashed-test-cnt }}
+      test_group_ids: ${{ needs.extract-crashed-tests-from-full-model-passing-and-failing.outputs.crashed-test-ids }}
+      docker-image: ${{ needs.docker-build.outputs.docker-image }}
+      run_id: ${{ needs.build.outputs.run_id }}
+      runs-on: '[{"runs-on": "n150"}]'
+      tests_to_filter: ${{ needs.extract-crashed-tests-from-full-model-passing-and-failing.outputs.crashed-tests }}
+      allow-fail: true
+
   test_sweeps:
     if: ${{ needs.set-inputs.outputs.run_ops_sweeps }}
     needs:
@@ -146,6 +183,8 @@ jobs:
       - build
       - test_full_model_passing
       - test_full_model_xfailing
+      - extract-crashed-tests-from-full-model-passing-and-failing
+      - run-crashed-tests-from-full-model-passing-and-failing
       - test_sweeps
     runs-on: Ubuntu-latest
     outputs: