Skip to content

Commit 9e3a7f3

Browse files
authored
Merge branch 'main' into llong/dit_ag_min
2 parents 62d1d85 + ba32283 commit 9e3a7f3

File tree

902 files changed

+50067
-22928
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

902 files changed

+50067
-22928
lines changed

.github/CODEOWNERS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,8 @@ tests/ttnn/**/operations/pool/ @tenstorrent/metalium-developers-convolutions @te
244244
tests/sweep_framework/ @stevendae @bbradelTT @ntarafdar @pavlejosipovic @tenstorrent/codeowner-bypass
245245
tests/ttnn/nightly/unit_tests/operations/matmul/ @tenstorrent/metalium-developers-mmfusedreduce @tenstorrent/codeowner-bypass
246246
tests/ttnn/nightly/unit_tests/operations/transformers/ @tenstorrent/metallium-maintainers-llama-models @tenstorrent/codeowner-bypass
247+
tests/ttnn/unit_tests/operations/sdpa/ @tenstorrent/metallium-maintainers-llama-models @tenstorrent/codeowner-bypass
248+
tests/ttnn/nightly/unit_tests/operations/sdpa/ @tenstorrent/metallium-maintainers-llama-models @tenstorrent/codeowner-bypass
247249
tests/ttnn/nightly/unit_tests/operations/eltwise/ @tenstorrent/metalium-developers-eltwise @tenstorrent/codeowner-bypass
248250
tests/nightly/t3000/ @tenstorrent/codeowner-bypass @tenstorrent/metalium-developers-ops-data-movement
249251
tests/nightly/tg/ @tenstorrent/codeowner-bypass @tenstorrent/metalium-developers-ops-data-movement

.github/actions/analyze-workflow-data/owners.json

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -574,10 +574,6 @@
574574
{ "job-name-component": "Galaxy Wan2.2 demo tests", "owner": { "id": "U03FJB5TM5Y", "name": "Colman Glagovich" } },
575575
{ "job-name-component": "Galaxy Mochi demo tests", "owner": { "id": "U09ELB03XRU", "name": "Stephen Osborne" } },
576576
{ "job-name-component": "Galaxy DeepSeek v3 demo tests", "owner": { "id": "U08H32XUS9W", "name": "Yousef Al Rawwash" } },
577-
{ "job-name-component": "ops-sanity-tests / example test", "owner": { "id": "U08DEGUJY3H", "name": "Rose Li" } },
578-
{ "job-name-component": "example test", "owner": { "id": "U08DEGUJY3H", "name": "Rose Li" } },
579-
{ "job-name-component": "ops-sanity-tests / example test BH", "owner": { "id": "U08DEGUJY3H", "name": "Rose Li" } },
580-
{ "job-name-component": "example test BH", "owner": { "id": "U08DEGUJY3H", "name": "Rose Li" } },
581577
{ "job-name-component": "t3000-demo-tests / t3k sd35 large tests", "owner": { "id": "U03FJB5TM5Y", "name": "Colman Glagovich" } },
582578
{ "job-name-component": "t3k sd35 large tests", "owner": { "id": "U03FJB5TM5Y", "name": "Colman Glagovich" } },
583579
{ "job-name-component": "t3000-demo-tests / t3k gpt oss tests", "owner": { "id": "U08TJ70UFRT", "name": "Harry Andrews" } },

.github/actions/analyze-workflow-data/update-owners-from-pipeline.js

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ const path = require('path');
77
* Examples:
88
* t3k_unit_tests.yaml -> t3000-unit-tests
99
* t3k_demo_tests.yaml -> t3000-demo-tests
10-
* ops/sanity/tests.yaml -> ops-sanity
1110
*/
1211
function getWorkflowPrefix(filePath) {
1312
const relativePath = path.relative(path.join(__dirname, '../../..', 'tests/pipeline_reorg'), filePath);

.github/actions/sweep-run-analysis/scripts/send_slack_notification.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import os
99
import sys
1010
import time
11-
from typing import Optional
1211

1312
import requests
1413

@@ -341,10 +340,15 @@ def build_cancelled_block() -> list[dict]:
341340
]
342341

343342

344-
def build_superset_link_block(run_id: Optional[int]) -> dict:
345-
"""Build the Superset dashboard link block."""
346-
if run_id:
347-
url = f"{SUPERSET_BASE_URL}?run_id={run_id}"
343+
def build_superset_link_block() -> dict:
344+
"""Build the Superset dashboard link block.
345+
346+
Uses gh_run_number (GITHUB_RUN_ID) since it's available immediately,
347+
whereas run_id (database PK) requires Airflow ingestion first.
348+
"""
349+
if GITHUB_RUN_ID:
350+
# Use GitHub run ID - works immediately, dashboard shows data after ingestion
351+
url = f"{SUPERSET_BASE_URL}?gh_run_number={GITHUB_RUN_ID}"
348352
text = f"<{url}|View in Superset>"
349353
else:
350354
text = f"<{GITHUB_ACTIONS_URL}|View in GitHub Actions>"
@@ -419,7 +423,7 @@ def build_slack_message(results: dict, conclusion: str) -> dict:
419423

420424
# Superset link
421425
blocks.append({"type": "divider"})
422-
blocks.append(build_superset_link_block(results.get("run_id")))
426+
blocks.append(build_superset_link_block())
423427

424428
return {"blocks": blocks}
425429

.github/scripts/utils/prepare_test_matrix.py

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -89,15 +89,21 @@ def load_tests(tests_yaml_path):
8989

9090
def build_test_matrix(tests, enabled_skus, sku_config):
9191
"""
92-
Filter tests based on enabled SKUs and add runs_on labels.
92+
Filter tests based on enabled SKUs and expand multi-SKU entries into flat matrix entries.
93+
94+
Each test entry may define multiple SKUs in its 'skus' dict. This function
95+
expands each test into one matrix entry per enabled SKU, with the appropriate
96+
timeout and runs_on labels.
9397
9498
Args:
95-
tests: List of test dictionaries
99+
tests: List of test dictionaries (with 'skus' dict)
96100
enabled_skus: List of enabled SKU strings
97101
sku_config: Dictionary mapping SKU names to their configuration
98102
99103
Returns:
100-
Filtered list of test dictionaries with runs_on added
104+
Filtered list of flat test dictionaries. Each entry has all keys from the
105+
test (e.g. name, cmd, model, owner_id, team) with skus removed and sku,
106+
timeout, and runs_on set for the selected SKU.
101107
"""
102108
if not enabled_skus:
103109
print("::error::No SKUs enabled. At least one SKU must be specified.")
@@ -112,25 +118,40 @@ def build_test_matrix(tests, enabled_skus, sku_config):
112118
filtered_tests = []
113119

114120
for test in tests:
115-
test_sku = test.get("sku")
116121
test_name = test.get("name", "Unnamed Test")
122+
test_skus = test.get("skus")
117123

118-
# Skip tests without a SKU
119-
if not test_sku:
120-
print(f"::warning::Test '{test_name}' has no SKU, skipping")
124+
# Skip tests without skus
125+
if not test_skus or not isinstance(test_skus, dict):
126+
print(f"::warning::Test '{test_name}' has no valid 'skus' mapping, skipping")
121127
continue
122128

123-
# Filter: only include tests whose SKU is in the enabled list
124-
if test_sku in enabled_skus:
125-
# Add runs_on from SKU config
126-
if test_sku in sku_config:
127-
test_with_runs_on = test.copy()
128-
test_with_runs_on["runs_on"] = sku_config[test_sku].get("runs_on", [])
129-
filtered_tests.append(test_with_runs_on)
130-
else:
131-
print(f"::warning::SKU '{test_sku}' for test '{test_name}' not found in SKU config, skipping")
129+
# Determine which of this test's SKUs are enabled
130+
matching_skus = [s for s in test_skus if s in enabled_skus]
131+
132+
# Append SKU to name when the same test runs on more than one SKU
133+
append_sku_to_name = len(matching_skus) > 1
134+
135+
for sku_name in matching_skus:
136+
sku_test_config = test_skus[sku_name]
137+
138+
if sku_name not in sku_config:
139+
print(f"::warning::SKU '{sku_name}' for test '{test_name}' not found in SKU config, skipping")
132140
continue
133141

142+
# Start from test copy so all keys (model, arch, etc.) are preserved
143+
entry = test.copy()
144+
entry.pop("skus", None)
145+
entry["sku"] = sku_name
146+
entry["timeout"] = sku_test_config.get("timeout", 0)
147+
entry["runs_on"] = sku_config[sku_name].get("runs_on", [])
148+
if append_sku_to_name:
149+
entry["name"] = f"{test_name} [{sku_name}]"
150+
for key, value in sku_test_config.items():
151+
if key != "timeout" and value is not None:
152+
entry[key] = value
153+
filtered_tests.append(entry)
154+
134155
if not filtered_tests:
135156
print(f"::error::No tests selected for enabled SKUs '{','.join(enabled_skus)}'. Failing pipeline.")
136157
sys.exit(1)

.github/scripts/utils/verify_time_budget.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def verify_timeouts(tests_file, time_budget_file, workflow_name):
3030
test_name = test.get("name", "Unnamed Test")
3131

3232
# Validate that all mandatory keys exist for this test
33-
required_keys = ["timeout", "sku", "team"]
33+
required_keys = ["skus", "team"]
3434
missing_keys = [key for key in required_keys if key not in test]
3535
if missing_keys:
3636
print(
@@ -39,14 +39,29 @@ def verify_timeouts(tests_file, time_budget_file, workflow_name):
3939
errors_found = True
4040
continue # Skip this invalid test
4141

42-
test_timeout = test["timeout"]
43-
test_sku = test["sku"]
42+
test_skus = test["skus"]
4443
test_team = test["team"]
4544

46-
# Use a tuple (team, sku) as the key for summation
47-
budget_key = (test_team, test_sku)
48-
budget_totals[budget_key] += test_timeout
49-
print(f" Test '{test_name}' (Team: {test_team}, SKU: {test_sku}) adds {test_timeout} min.")
45+
if not isinstance(test_skus, dict) or not test_skus:
46+
print(
47+
f" [ERROR] Validation FAILED! Test '{test_name}' has invalid 'skus' field. "
48+
f"Expected a non-empty mapping of SKU names to their config."
49+
)
50+
errors_found = True
51+
continue
52+
53+
for sku_name, sku_config in test_skus.items():
54+
if not isinstance(sku_config, dict) or "timeout" not in sku_config:
55+
print(f" [ERROR] Validation FAILED! Test '{test_name}', SKU '{sku_name}' is missing 'timeout'.")
56+
errors_found = True
57+
continue
58+
59+
test_timeout = sku_config["timeout"]
60+
61+
# Use a tuple (team, sku) as the key for summation
62+
budget_key = (test_team, sku_name)
63+
budget_totals[budget_key] += test_timeout
64+
print(f" Test '{test_name}' (Team: {test_team}, SKU: {sku_name}) adds {test_timeout} min.")
5065

5166
if errors_found:
5267
print(f"\nMissing keys in {tests_file}. Please fix the entries above.")

.github/sku_config.yaml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,26 @@
44
# Each SKU entry contains:
55
# runs_on: Array of GitHub Actions runner labels that tests for this SKU.
66
skus:
7+
wh_llmbox:
8+
runs_on:
9+
- arch-wormhole_b0
10+
- config-t3000
11+
- pipeline-functional
12+
- in-service
13+
14+
wh_llmbox_perf:
15+
runs_on:
16+
- arch-wormhole_b0
17+
- config-t3000
18+
- pipeline-perf
19+
- in-service
20+
21+
bh_p100:
22+
runs_on:
23+
- cloud-virtual-machine
24+
- P100
25+
- in-service
26+
727
wh_galaxy:
828
runs_on:
929
- arch-wormhole_b0
@@ -17,3 +37,19 @@ skus:
1737
- pipeline-functional
1838
- topology-6u
1939
- in-service
40+
41+
wh_n150_civ2:
42+
runs_on:
43+
- tt-ubuntu-2204-N150-viommu-stable
44+
45+
wh_n300_civ2:
46+
runs_on:
47+
- tt-ubuntu-2204-N300-viommu-stable
48+
49+
wh_llmbox_civ2:
50+
runs_on:
51+
- tt-ubuntu-2204-N300-llmbox-stable
52+
53+
bh_p150b_civ2:
54+
runs_on:
55+
- tt-ubuntu-2204-P150b-stable

.github/time_budget.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,14 +97,14 @@ scaleout:
9797

9898
models:
9999
e2e:
100-
wh_llmbox: 50
100+
wh_llmbox: 110
101101
unit:
102-
wh_llmbox: 287
102+
wh_llmbox: 292
103103
integration:
104104
wh_llmbox: 422
105105
perf:
106-
wh_llmbox: 685
106+
wh_llmbox_perf: 685
107107
wh_galaxy: 260
108108
demo:
109-
wh_llmbox: 1090
109+
wh_llmbox_perf: 1090
110110
wh_galaxy: 270

.github/workflows/_auto-retry-post-commit.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ on:
1919
# to want to to
2020
- "zzz TG Quick tests"
2121
- "apc nightly debug run"
22+
- "apc nightly debug run with watcher"
2223
types:
2324
- completed
2425

.github/workflows/_produce-data.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ on:
3737
- "All post-commit tests"
3838
- "PR - All post-commit tests"
3939
- "apc nightly debug run"
40+
- "apc nightly debug run with watcher"
4041
- "(Single-card) Model perf tests"
4142
- "(Single-card) Device perf regressions"
4243
- "(Single-card) Demo tests"

0 commit comments

Comments
 (0)