Skip to content

Commit 3eb14ef

Browse files
Merge branch 'main' into sai_arthi_raguram/model_deps_split
2 parents 49518fc + 7d35604 commit 3eb14ef

File tree

289 files changed

+14639
-14906
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

289 files changed

+14639
-14906
lines changed

.github/model-analysis-config.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,17 @@ declare -A env_vars
88

99
# Model ops test generation
1010
# 1) PR config
11-
env_vars["BRANCH_NAME"]="generate_models_ops_test"
11+
env_vars["BRANCH_NAME"]="generate_models_ops_tests"
1212
env_vars["COMMIT_MESSAGE"]="Generate and update models ops tests"
1313
env_vars["TITLE"]="Generate and update models ops tests"
1414
env_vars["BODY"]="This PR will generate models ops tests by extracting the unique ops configurations across all the models present inside the forge/test/models directory path."
15-
env_vars["OUTPUT_PATH"]="forge/test/models_ops/"
1615

1716
# 2) Script config
1817
env_vars["UNIQUE_OPS_OUTPUT_DIR_PATH"]="models_unique_ops_output/"
1918
env_vars["MODELS_OPS_TEST_OUTPUT_DIR_PATH"]="forge/test"
2019
env_vars["MODELS_OPS_TEST_PACKAGE_NAME"]="models_ops"
2120
env_vars["SCRIPT_OUTPUT_LOG"]="generate_models_ops_test.log"
21+
env_vars["GENERATED_MODELS_OPS_TESTS_PATH"]="forge/test/models_ops/"
2222

2323

2424
for key in "${!env_vars[@]}"; do

.github/workflows/build-image.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ permissions:
2626
jobs:
2727

2828
build-image:
29-
runs-on: tt-beta-ubuntu-2204-large
29+
runs-on: tt-ubuntu-2204-large-stable
3030
outputs:
3131
docker-image: ${{ steps.build.outputs.docker-image }}
3232
steps:

.github/workflows/build.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ jobs:
5050
fail-fast: false
5151
matrix:
5252
build: [
53-
{ runs-on: tt-beta-ubuntu-2204-large, build-type: Release },
53+
{ runs-on: tt-ubuntu-2204-large-stable, build-type: Release },
5454
]
5555

5656
runs-on:
@@ -141,7 +141,7 @@ jobs:
141141
fail-fast: false
142142
matrix:
143143
build: [
144-
{ runs-on: tt-beta-ubuntu-2204-large, build-type: Debug },
144+
{ runs-on: tt-ubuntu-2204-large-stable, build-type: Debug },
145145
]
146146

147147
runs-on:

.github/workflows/model-analysis.yml

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ on:
1818
runs-on:
1919
description: 'Runs on'
2020
required: false
21-
default: runner
21+
default: n150
2222
type: choice
2323
options:
2424
- runner
@@ -39,13 +39,20 @@ on:
3939
required: false
4040
type: boolean
4141
default: false
42+
create_pull_request:
43+
description: 'Automatically create a Pull Request containing the generated model ops tests.'
44+
required: false
45+
type: boolean
46+
default: false
47+
# schedule:
48+
# - cron: '0 0 * * *' # Runs at 12:00 UTC every day
4249

4350

4451
permissions:
4552
packages: write
4653
checks: write
4754

48-
run-name: "Model Analysis Ops Test Generation (${{inputs.runs-on}}-${{inputs.test_group_cnt}})"
55+
run-name: "Model Analysis (${{inputs.runs-on}}-${{inputs.test_group_cnt}})"
4956

5057
jobs:
5158

@@ -61,13 +68,23 @@ jobs:
6168
test_group_cnt: ${{ steps.set-inputs.outputs.test_group_cnt }}
6269
test_group_ids: ${{ steps.set-inputs.outputs.test_group_ids }}
6370
runs-on: ${{ steps.set-inputs.outputs.runs-on }}
71+
runner: ${{ steps.set-inputs.outputs.runner}}
72+
create_pr: ${{ steps.set-inputs.outputs.create_pr }}
6473
steps:
6574
- name: Inputs Management
6675
id: set-inputs
6776
run: |
68-
echo "test_group_cnt=${{ inputs.test_group_cnt }}" >> $GITHUB_OUTPUT
69-
echo "test_group_ids=[$(seq -s ',' 1 ${{ inputs.test_group_cnt }})]" >> $GITHUB_OUTPUT
70-
echo "runs-on=[{\"runs-on\": \"${{ inputs.runs-on }}\"}]" >> $GITHUB_OUTPUT
77+
default_test_group_cnt=10
78+
default_runs_on=n150
79+
default_create_pr=false
80+
tgc=$(if [ -z "${{ inputs.test_group_cnt }}" ]; then echo $default_test_group_cnt; else echo ${{ inputs.test_group_cnt }}; fi)
81+
runs_on=$(if [ -z "${{ inputs.runs-on }}" ]; then echo $default_runs_on; else echo ${{ inputs.runs-on}}; fi)
82+
default_create_pr=$(if [ -z "${{ inputs.create_pull_request }}" ]; then echo $default_create_pr; else echo ${{ inputs.create_pull_request }}; fi)
83+
echo "test_group_cnt=$tgc" >> $GITHUB_OUTPUT
84+
echo "test_group_ids=[$(seq -s ',' 1 $tgc)]" >> $GITHUB_OUTPUT
85+
echo "runs-on=[{\"runs-on\": \"$runs_on\"}]" >> $GITHUB_OUTPUT
86+
echo "runner=$runs_on" >> $GITHUB_OUTPUT
87+
echo "create_pr=$default_create_pr" >> $GITHUB_OUTPUT
7188
7289
build:
7390
needs:
@@ -104,7 +121,7 @@ jobs:
104121
- build
105122
- extract-unique-ops-configuration
106123

107-
runs-on: ["in-service", "${{ inputs.runs-on }}"]
124+
runs-on: ["in-service", "${{ needs.set-inputs.outputs.runner }}" ]
108125

109126
container:
110127
image: ${{ needs.docker-build.outputs.docker-image }}
@@ -216,8 +233,16 @@ jobs:
216233
name: models-unique-ops-output
217234
path: ${{ env.UNIQUE_OPS_OUTPUT_DIR_PATH }}
218235

236+
- name: Upload Generated Models Ops Tests
237+
uses: actions/upload-artifact@v4
238+
if: ${{ needs.set-inputs.outputs.create_pr == 'false' }}
239+
with:
240+
name: generated-models-ops-tests
241+
path: ${{ env.GENERATED_MODELS_OPS_TESTS_PATH }}
242+
219243
- name: Create Pull Request
220244
uses: peter-evans/create-pull-request@v7
245+
if: ${{ needs.set-inputs.outputs.create_pr == 'true' }}
221246
with:
222247
branch: ${{ env.BRANCH_NAME }}
223248
committer: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
@@ -227,7 +252,26 @@ jobs:
227252
title: ${{ env.TITLE }}
228253
body: ${{ env.BODY }}
229254
delete-branch: true
230-
draft: true
231255
token: ${{ secrets.GH_TOKEN }}
232256
add-paths: |
233-
${{ env.OUTPUT_PATH }}
257+
${{ env.GENERATED_MODELS_OPS_TESTS_PATH }}
258+
259+
run-models-ops-tests:
260+
if: ${{ needs.set-inputs.outputs.create_pr == 'false' }}
261+
needs:
262+
- docker-build
263+
- set-inputs
264+
- build
265+
- extract-unique-ops-configuration
266+
- generate-models-ops-tests
267+
uses: ./.github/workflows/test-sub.yml
268+
secrets: inherit
269+
with:
270+
docker-image: ${{ needs.docker-build.outputs.docker-image }}
271+
test_mark: 'nightly_models_ops'
272+
test_group_cnt: ${{ needs.set-inputs.outputs.test_group_cnt }}
273+
test_group_ids: ${{ needs.set-inputs.outputs.test_group_ids }}
274+
runs-on: '[{"runs-on": "n150"}]'
275+
sh-runner: true
276+
run_id: ${{ needs.build.outputs.run_id }}
277+
run_models_ops_tests: true

.github/workflows/test-model-analysis-sub.yml

Lines changed: 46 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ on:
4040
required: false
4141
default: false
4242
type: boolean
43+
sh-runner:
44+
description: 'Run tests using shared runners'
45+
required: false
46+
type: boolean
4347

4448
jobs:
4549
run-tests:
@@ -50,14 +54,12 @@ jobs:
5054
build: ${{ fromJson(inputs.runs-on) }}
5155
test_group_id: ${{ fromJSON(inputs.test_group_ids) }}
5256

53-
runs-on:
54-
- in-service
55-
- ${{ matrix.build.runs-on }}
57+
runs-on: ${{ inputs.sh-runner && format('tt-beta-ubuntu-2204-{0}-large-stable', matrix.build.runs-on) || fromJson(format('["{0}", "in-service"]', matrix.build.runs-on)) }}
5658

5759
continue-on-error: ${{ inputs.allow-fail }}
5860

5961
container:
60-
image: ${{ inputs.docker-image }}
62+
image: ${{ inputs.sh-runner && format('harbor.ci.tenstorrent.net/{0}', inputs.docker-image) || inputs.docker-image }}
6163
options: --device /dev/tenstorrent/0
6264
volumes:
6365
- /dev/hugepages:/dev/hugepages
@@ -70,21 +72,22 @@ jobs:
7072
env:
7173
HF_TOKEN: ${{ secrets.HF_TOKEN }}
7274
HF_HOME: /mnt/dockercache/huggingface
75+
IRD_LF_CACHE: ${{ vars.IRD_LF_CACHE }}
7376
FORGE_MODELS_CACHE: /mnt/dockercache/forge_models_cache
7477
HF_HUB_DISABLE_PROGRESS_BARS: 1
7578
FORGE_DISABLE_REPORTIFY_DUMP: 1
7679
FORGE_EXTRACT_TVM_UNIQUE_OPS_CONFIG: 1
7780
FORGE_EXPORT_TVM_UNIQUE_OPS_CONFIG_DETAILS: 1
7881

79-
name: "run-tests ${{ inputs.test_mark }} (${{ matrix.build.runs-on }}, ${{ matrix.test_group_id }})"
82+
name: "run-tests ${{ inputs.test_mark }} (${{ inputs.sh-runner && format('{0}-shared', matrix.build.runs-on) || (matrix.build.runs-on) }}, ${{ matrix.test_group_id }})"
8083

8184
steps:
8285

8386
- name: Fetch job id
8487
id: fetch-job-id
8588
uses: tenstorrent/tt-github-actions/.github/actions/job_id@main
8689
with:
87-
job_name: "run-tests ${{ inputs.test_mark }} (${{ matrix.build.runs-on }}, ${{ matrix.test_group_id }})"
90+
job_name: "run-tests ${{ inputs.test_mark }} (${{ inputs.sh-runner && format('{0}-shared', matrix.build.runs-on) || (matrix.build.runs-on) }}, ${{ matrix.test_group_id }})"
8891

8992
- name: Set reusable strings
9093
id: strings
@@ -101,12 +104,18 @@ jobs:
101104
- uses: actions/checkout@v4
102105
with:
103106
sparse-checkout: |
107+
.github/workflows/
104108
env/
105109
forge/test
106110
pytest.ini
107111
conftest.py
108112
.test_durations
109113
114+
- name: Setup Forge Models repo
115+
shell: bash
116+
run: |
117+
git submodule update --init --recursive -f third_party/tt_forge_models
118+
110119
# Clean everything from submodules (needed to avoid issues
111120
# with cmake generated files leftover from previous builds)
112121
- name: Cleanup submodules
@@ -146,18 +155,19 @@ jobs:
146155
147156
- name: Extract and Export Unique Ops Configuration
148157
shell: bash
158+
timeout-minutes: 1440 # Set execution time to 1 days(default: 6 hours)
149159
run: |
160+
set -o pipefail
150161
source env/activate
151-
162+
echo "Collecting tests for group ${{ matrix.test_group_id }} with mark '${{ inputs.test_mark }}'..."
163+
set +e
152164
pytest_args=(
153165
"--splits" "${{ inputs.test_group_cnt }}"
154166
"--group" "${{ matrix.test_group_id }}"
155167
"--splitting-algorithm" "least_duration"
156168
"-m" "${{ inputs.test_mark }}"
157-
"--log-memory-usage"
158-
"--runxfail"
159-
"--no-skips"
160-
"-vss"
169+
"--collect-only"
170+
"-q"
161171
)
162172
if [ -n "${{ inputs.tests_to_filter }}" ]; then
163173
# Split on commas and trim whitespace
@@ -167,15 +177,34 @@ jobs:
167177
pytest_args+=("$(echo "$tf" | xargs)")
168178
done
169179
fi
170-
171-
pytest forge/test/models/ "${pytest_args[@]}" 2>&1 | tee pytest.log
172-
173-
- name: Upload Test Log
180+
pytest forge/test/models/ "${pytest_args[@]}" \
181+
| sed -n '/^Collected tests /,/^collected /p' | sed '/^[Cc]ollected /d' >.pytest_tests_to_run
182+
183+
if [ $? -ne 0 ]; then
184+
echo "Failed to collect tests. Doing dry run..."
185+
set -e
186+
pytest forge/test/models/ "${pytest_args[@]}"
187+
exit 1
188+
fi
189+
echo "Collected tests."
190+
cat .pytest_tests_to_run
191+
192+
python .github/workflows/test_runner.py \
193+
--continue-after-crash \
194+
--log-memory-usage \
195+
--runxfail \
196+
--no-skips \
197+
-vss \
198+
2>&1 | tee extract-and-export-unique-ops-configs.log
199+
exit_code=${PIPESTATUS[0]}
200+
exit $exit_code
201+
202+
- name: Upload Extract And Export Unique Ops Configs Log
174203
uses: actions/upload-artifact@v4
175204
if: success() || failure()
176205
with:
177-
name: test-log-${{ matrix.build.runs-on }}-${{ matrix.test_group_id }}-${{ inputs.test_mark }}-${{ steps.fetch-job-id.outputs.job_id }}
178-
path: pytest.log
206+
name: extract-and-export-unique-ops-configs-log-${{ matrix.build.runs-on }}-${{ matrix.test_group_id }}-${{ inputs.test_mark }}-${{ steps.fetch-job-id.outputs.job_id }}
207+
path: extract-and-export-unique-ops-configs.log
179208

180209
- name: Upload Memory Usage Log
181210
uses: actions/upload-artifact@v4

.github/workflows/test-sub.yml

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,11 @@ on:
5252
description: 'Run tests using shared runners'
5353
required: false
5454
type: boolean
55+
run_models_ops_tests:
56+
description: 'Run the generated models ops tests'
57+
required: false
58+
default: false
59+
type: boolean
5560

5661
jobs:
5762
run-tests:
@@ -62,7 +67,7 @@ jobs:
6267
build: ${{ fromJson(inputs.runs-on) }}
6368
test_group_id: ${{ fromJSON(inputs.test_group_ids) }}
6469

65-
runs-on: ${{ inputs.sh-runner && format('tt-beta-ubuntu-2204-{0}-large-stable', matrix.build.runs-on) || fromJson(format('["{0}", "in-service"]', matrix.build.runs-on)) }}
70+
runs-on: ${{ inputs.sh-runner && format('tt-ubuntu-2204-{0}-stable', matrix.build.runs-on) || fromJson(format('["{0}", "in-service"]', matrix.build.runs-on)) }}
6671

6772
continue-on-error: ${{ inputs.allow-fail }}
6873

@@ -151,6 +156,16 @@ jobs:
151156
pip install tt_tvm*.whl --upgrade
152157
pip install tt_forge_fe*.whl --upgrade
153158
159+
- name: Download Generated Models Ops Tests
160+
if: ${{ inputs.run_models_ops_tests }}
161+
continue-on-error: true
162+
uses: tenstorrent/tt-forge/.github/actions/download-artifact@main
163+
with:
164+
name: generated-models-ops-tests
165+
run_id: ${{ inputs.run_id }}
166+
github_token: ${{ secrets.GITHUB_TOKEN }}
167+
path: forge/test/models_ops
168+
154169
- name: Run Test
155170
env:
156171
HF_TOKEN: ${{ secrets.HF_TOKEN }}

0 commit comments

Comments
 (0)