Skip to content

Commit 28a9863

Browse files
Remove assert on ARCH_NAME in data collection step in workflows (#37300)
We would like to gradually remove reliance on `ARCH_NAME` env var. Recently `ARCH_NAME` was removed from Galaxy demo pipelines. No tests need the env var set, but the data collection step in the workflow still has an assert on `ARCH_NAME`. This PR removes the hard assert on `ARCH_NAME` in the data collection script and tries to infer from the runner name for CIv2 or the SKU config for CIv1. Testing: - [x] Galaxy demo https://github.com/tenstorrent/tt-metal/actions/runs/21768966673 --------- Co-authored-by: William Ly <williamly@tenstorrent.com>
1 parent 6684e70 commit 28a9863

File tree

4 files changed

+72
-12
lines changed

4 files changed

+72
-12
lines changed
Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1+
import sys
2+
13
from infra.data_collection.github.utils import (
24
get_github_partial_benchmark_data_filenames,
35
create_json_with_github_benchmark_environment,
46
)
57

68
if __name__ == "__main__":
9+
sku_from_test = sys.argv[1] if len(sys.argv) > 1 else None
710
github_partial_benchmark_data_filenames = get_github_partial_benchmark_data_filenames()
811

912
for benchmark_data_filename in github_partial_benchmark_data_filenames:
10-
create_json_with_github_benchmark_environment(benchmark_data_filename)
13+
create_json_with_github_benchmark_environment(benchmark_data_filename, sku_from_test=sku_from_test)

.github/workflows/galaxy-demo-tests-impl.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ jobs:
130130
run: |
131131
if [ -d "generated/benchmark_data" ]; then
132132
echo "has_benchmark_data=true" >> $GITHUB_OUTPUT
133-
python3 .github/scripts/data_analysis/create_benchmark_with_environment_json.py
133+
python3 .github/scripts/data_analysis/create_benchmark_with_environment_json.py "${{ matrix.test-group.sku }}"
134134
else
135135
echo "::warning::Benchmark data directory 'generated/benchmark_data' does not exist"
136136
echo "has_benchmark_data=false" >> $GITHUB_OUTPUT

infra/data_collection/github/utils.py

Lines changed: 66 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from datetime import datetime
99
from typing import Optional, Union
1010

11+
import yaml
1112
from loguru import logger
1213

1314
from infra.data_collection.github.workflows import is_job_hanging_from_job_log
@@ -247,16 +248,14 @@ def get_job_row_from_github_job(github_job, github_job_id_to_annotations, workfl
247248
logger.info("Seems to have no config- label, so assuming no special config requested")
248249
detected_config = None
249250

250-
if labels_have_overlap(["E150", "grayskull", "arch-grayskull"], labels):
251-
detected_arch = "grayskull"
252-
elif labels_have_overlap(["N150", "N300", "wormhole_b0", "arch-wormhole_b0", "config-t3000"], labels):
251+
if labels_have_overlap(["N150", "N300", "wormhole_b0", "arch-wormhole_b0", "config-t3000"], labels):
253252
detected_arch = "wormhole_b0"
254253
elif labels_have_overlap(["BH", "arch-blackhole"], labels):
255254
detected_arch = "blackhole"
256255
else:
257256
detected_arch = None
258257

259-
single_cards_list = ("E150", "N150", "N300", "BH")
258+
single_cards_list = ("N150", "N300", "BH")
260259
single_cards_overlap = get_overlap(single_cards_list, labels)
261260

262261
# In order of preference
@@ -350,11 +349,15 @@ def get_job_rows_from_github_info(workflow_outputs_dir, github_jobs_json, github
350349
return [x for x in job_rows if x is not None]
351350

352351

352+
def _get_repo_root() -> pathlib.Path:
353+
"""Return the repository root directory (parent of infra/)."""
354+
return pathlib.Path(__file__).resolve().parents[3]
355+
356+
353357
def get_github_partial_benchmark_data_filenames():
354358
logger.info("We are assuming generated/benchmark_data exists from previous passing test")
355359

356-
current_utils_path = pathlib.Path(__file__)
357-
benchmark_data_dir = current_utils_path.parent.parent.parent.parent / "generated/benchmark_data"
360+
benchmark_data_dir = _get_repo_root() / "generated/benchmark_data"
358361
assert benchmark_data_dir.exists()
359362
assert benchmark_data_dir.is_dir()
360363

@@ -378,7 +381,62 @@ def get_github_runner_environment():
378381
}
379382

380383

381-
def create_json_with_github_benchmark_environment(github_partial_benchmark_data_filename):
384+
def _get_device_type_from_runner_environment(sku_from_test: Optional[str] = None) -> str:
385+
"""
386+
Infer device/card type (wormhole_b0, blackhole) from runner environment.
387+
RUNNER_NAME is a GitHub Actions env var that must be set.
388+
389+
When sku_from_test is provided (e.g. from workflow), look up sku_config for that SKU's
390+
runs_on labels; if any label contains "blackhole" or "wormhole", return the arch.
391+
"""
392+
assert "RUNNER_NAME" in os.environ, "RUNNER_NAME must be set (GitHub Actions env var)"
393+
runner_name = os.environ["RUNNER_NAME"]
394+
runner_lower = runner_name.lower()
395+
396+
# This assumes all CIv2 runner names start with tt-ubuntu
397+
if runner_lower.startswith("tt-ubuntu"):
398+
if "blackhole" in runner_lower or "bh-" in runner_lower or "p100" in runner_lower or "p150" in runner_lower:
399+
return "blackhole"
400+
if "n150" in runner_lower or "n300" in runner_lower or "wormhole" in runner_lower:
401+
return "wormhole_b0"
402+
return "unknown"
403+
404+
# Not tt-ubuntu: check .github/sku_config.yaml for arch from runs_on labels matching runner
405+
if sku_from_test:
406+
sku_config_path = _get_repo_root() / ".github" / "sku_config.yaml"
407+
if sku_config_path.exists():
408+
with open(sku_config_path) as f:
409+
config = yaml.safe_load(f)
410+
skus = config.get("skus") or {}
411+
412+
if sku_from_test in skus:
413+
# Use sku_from_test from workflow: get runs_on labels for this SKU
414+
runs_on = skus.get(sku_from_test, {}).get("runs_on") or []
415+
for label in runs_on:
416+
label_lower = label.lower()
417+
# Only checks CIv1 style labels in sku_config for now
418+
if "blackhole" in label_lower:
419+
return "blackhole"
420+
if "wormhole" in label_lower:
421+
return "wormhole_b0"
422+
423+
# Failed to parse from CIv2 runner name and failed to parse from sku_config:
424+
# Fallback to using ARCH_NAME env var
425+
if "ARCH_NAME" in os.environ:
426+
arch = os.environ["ARCH_NAME"]
427+
if arch in ("wormhole_b0", "blackhole"):
428+
return arch
429+
430+
logger.warning(
431+
f"Could not infer device type from RUNNER_NAME={runner_name!r}. "
432+
"Set ARCH_NAME env var (wormhole_b0, blackhole) for accurate benchmark data."
433+
)
434+
return "unknown"
435+
436+
437+
def create_json_with_github_benchmark_environment(
438+
github_partial_benchmark_data_filename, sku_from_test: Optional[str] = None
439+
):
382440
assert "GITHUB_REPOSITORY" in os.environ
383441
git_repo_name = os.environ["GITHUB_REPOSITORY"]
384442

@@ -411,9 +469,7 @@ def create_json_with_github_benchmark_environment(github_partial_benchmark_data_
411469
logger.warning("Hardcoded null for device_ip")
412470
device_ip = ""
413471

414-
assert "ARCH_NAME" in os.environ
415-
device_type = os.environ["ARCH_NAME"]
416-
assert device_type in ("grayskull", "wormhole_b0", "blackhole")
472+
device_type = _get_device_type_from_runner_environment(sku_from_test=sku_from_test)
417473

418474
logger.warning("Hardcoded null for device_memory_size")
419475
device_memory_size = ""

infra/requirements-infra.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ pydantic
66
toolz
77
defusedxml
88
pytest
9+
PyYAML

0 commit comments

Comments
 (0)