Skip to content

Commit e3e964b

Browse files
committed
Merge remote-tracking branch 'origin/main' into bnorris/ci-refactor
2 parents a3a3ce2 + 021f180 commit e3e964b

15 files changed

+866
-26
lines changed

.github/containers/CONTAINER_README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ tt-lang examples are available in `$TTMLIR_TOOLCHAIN_DIR/examples`.
1010

1111
Try running an example:
1212
```bash
13-
python $TTMLIR_TOOLCHAIN_DIR/examples/demo_one.py
13+
python $TTMLIR_TOOLCHAIN_DIR/examples/tutorial/multicore_grid_auto.py
1414
```
1515

1616
## Available Tools
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#!/bin/bash
2+
# SPDX-FileCopyrightText: (c) 2025 Tenstorrent AI ULC
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
# tt-lang environment activation for installed location
6+
# This script is used when tt-lang is installed via cmake --install
7+
8+
# Guard against double activation
9+
if [ "${TTLANG_ENV_ACTIVATED:-0}" = "1" ]; then
10+
return 0 2>/dev/null || exit 0
11+
fi
12+
13+
# Determine the install prefix (parent of env/)
14+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
15+
INSTALL_PREFIX="$(dirname "$SCRIPT_DIR")"
16+
17+
# Default TTMLIR_TOOLCHAIN_DIR if not set (assume same as install prefix for Docker)
18+
: ${TTMLIR_TOOLCHAIN_DIR:=$INSTALL_PREFIX}
19+
export TTMLIR_TOOLCHAIN_DIR
20+
21+
# Activate tt-mlir toolchain venv
22+
if [ -f "${TTMLIR_TOOLCHAIN_DIR}/venv/bin/activate" ]; then
23+
. "${TTMLIR_TOOLCHAIN_DIR}/venv/bin/activate"
24+
fi
25+
26+
# Set paths for installed tt-lang
27+
export TT_LANG_HOME="$INSTALL_PREFIX"
28+
export PATH="${INSTALL_PREFIX}/bin:${TTMLIR_TOOLCHAIN_DIR}/bin:$PATH"
29+
export PYTHONPATH="${INSTALL_PREFIX}/python_packages:${TTMLIR_TOOLCHAIN_DIR}/python_packages:${TTMLIR_TOOLCHAIN_DIR}/python_packages/ttrt/runtime/ttnn:$PYTHONPATH"
30+
export LD_LIBRARY_PATH="${TTMLIR_TOOLCHAIN_DIR}/lib:$LD_LIBRARY_PATH"
31+
32+
# Set TT_METAL_RUNTIME_ROOT
33+
export TT_METAL_RUNTIME_ROOT="${TTMLIR_TOOLCHAIN_DIR}/tt-metal"
34+
export TT_METAL_HOME="$TT_METAL_RUNTIME_ROOT"
35+
36+
export TTLANG_ENV_ACTIVATED=1
37+
38+
cat << 'EOF'
39+
40+
████████╗████████╗ ██╗ █████╗ ███╗ ██╗ ██████╗
41+
╚══██╔══╝╚══██╔══╝ ██║ ██╔══██╗ ████╗ ██║ ██╔════╝
42+
██║ ██║ █████╗ ██║ ███████║ ██╔██╗ ██║ ██║ ███╗
43+
██║ ██║ ╚════╝ ██║ ██╔══██║ ██║╚██╗██║ ██║ ██║
44+
██║ ██║ ███████╗██║ ██║ ██║ ╚████║ ╚██████╔╝
45+
╚═╝ ╚═╝ ╚══════╝╚═╝ ╚═╝ ╚═╝ ╚═══╝ ╚═════╝
46+
EOF
47+
echo ""
48+
echo " Toolchain: ${TTMLIR_TOOLCHAIN_DIR}"
49+
echo " Examples: ${TTMLIR_TOOLCHAIN_DIR}/examples"
50+
echo ""
51+
echo " Run an example on:"
52+
echo " - Python simulator: ttlang-sim $TTMLIR_TOOLCHAIN_DIR/examples/tutorial/multicore_grid_auto.py"
53+
echo " - TT hardware: python $TTMLIR_TOOLCHAIN_DIR/examples/tutorial/multicore_grid_auto.py"

.github/containers/test-docker-smoke.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ if [ -e /dev/tenstorrent/0 ]; then
4949
--device=/dev/tenstorrent/0 \
5050
-v /dev/hugepages:/dev/hugepages \
5151
-v /dev/hugepages-1G:/dev/hugepages-1G \
52-
tt-lang-user-ubuntu-22-04:latest python /opt/ttmlir-toolchain/examples/demo_one.py
52+
tt-lang-dist-ubuntu-22-04:latest python /opt/ttmlir-toolchain/examples/tutorial/multicore_grid_auto.py
5353
else
5454
echo "Test 3: SKIPPED (no hardware)"
5555
fi
@@ -64,7 +64,7 @@ echo ""
6464
# Test 5: Examples in /root
6565
echo "Test 5: Examples in /root"
6666
run_test "Examples in /root" "Examples missing" \
67-
sudo docker run --rm tt-lang-user-ubuntu-22-04:latest ls /root/examples/demo_one.py
67+
sudo docker run --rm tt-lang-dist-ubuntu-22-04:latest ls /root/examples/tutorial/multicore_grid_auto.py
6868
echo ""
6969

7070
echo "=== Smoke Test Complete ==="

.github/workflows/call-build-ttmlir-toolchain.yml

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,24 +56,31 @@ jobs:
5656
migrate-cache:
5757
name: Migrate toolchain cache to LLVM-only
5858
runs-on: ubuntu-latest
59-
if: true # TODO: Set to false or remove after running once
59+
if: true # TODO: Delete old cache first, then run once, then set to false
60+
61+
env:
62+
TTMLIR_TOOLCHAIN_DIR: ${{ github.workspace }}/ttmlir-toolchain
63+
6064
steps:
61-
- name: Checkout
65+
- name: Checkout current branch
6266
uses: actions/checkout@v4
6367

6468
- name: Determine tt-mlir commit
6569
id: commit
6670
run: |
6771
COMMIT=$(cat third-party/tt-mlir.commit | tr -d '[:space:]')
6872
echo "commit=$COMMIT" >> $GITHUB_OUTPUT
73+
echo "Using commit: $COMMIT"
6974
7075
- name: Restore full toolchain cache (from main branch format)
7176
id: restore-cache
7277
uses: actions/cache/restore@v4
7378
with:
74-
path: ttmlir-toolchain
79+
# Main branch saves with absolute path, so restore with same
80+
path: ${{ env.TTMLIR_TOOLCHAIN_DIR }}
7581
key: Linux-ttlang-ttmlir-toolchain-${{ steps.commit.outputs.commit }}-v2
7682
restore-keys: |
83+
Linux-ttlang-ttmlir-toolchain-${{ steps.commit.outputs.commit }}
7784
Linux-ttlang-ttmlir-toolchain-
7885
7986
- name: Check cache was restored
@@ -85,9 +92,6 @@ jobs:
8592
fi
8693
echo "Cache restored from: ${{ steps.restore-cache.outputs.cache-matched-key }}"
8794
88-
- name: Cleanup toolchain (create stubs)
89-
run: .github/containers/cleanup-toolchain.sh ttmlir-toolchain
90-
9195
- name: Save as LLVM-only cache
9296
uses: actions/cache/save@v4
9397
with:
@@ -97,6 +101,8 @@ jobs:
97101
# First job: Check if caches exist (runs on standard runner)
98102
check-cache:
99103
name: Check toolchain cache
104+
needs: migrate-cache
105+
if: always() # Run even if migrate-cache is skipped
100106
runs-on: ubuntu-latest
101107
timeout-minutes: 10
102108

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ export TT_METAL_HOME=/workspace/tt-mlir/third_party/tt-metal/src/tt-metal
109109
export TT_METAL_DEVICE_PROFILER=1
110110
export TT_METAL_PROFILER_MID_RUN_DUMP=1
111111
export TTLANG_AUTO_PROFILE=1
112-
python examples/demo_one.py
112+
python examples/tutorial/multicore_grid_auto.py
113113
```
114114

115115
See [docs/auto-profiler-examples/](https://github.com/tenstorrent/tt-lang/tree/main/docs/auto-profiler-examples) for sample profile outputs showing the per-line cycle breakdown format.

examples/broadcast.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
import torch
66

77

8-
def from_torch(t):
8+
def from_torch(tensor: ttnn.Tensor):
99
return ttnn.from_torch(
10-
t,
10+
tensor,
1111
dtype=ttnn.bfloat16,
1212
layout=ttnn.TILE_LAYOUT,
1313
device=device,

examples/general_broadcast.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
import torch
66

77

8-
def from_torch(t):
8+
def from_torch(tensor: ttnn.Tensor):
99
return ttnn.from_torch(
10-
t,
10+
tensor,
1111
dtype=ttnn.bfloat16,
1212
layout=ttnn.TILE_LAYOUT,
1313
device=device,

examples/tutorial/multicore.py

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
# SPDX-FileCopyrightText: (c) 2026 Tenstorrent AI ULC
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
import ttnn
5+
import torch
6+
7+
8+
def from_torch(tensor: ttnn.Tensor):
9+
return ttnn.from_torch(
10+
tensor,
11+
dtype=ttnn.bfloat16,
12+
layout=ttnn.TILE_LAYOUT,
13+
device=device,
14+
memory_config=ttnn.DRAM_MEMORY_CONFIG,
15+
)
16+
17+
18+
import ttl
19+
20+
TILE_SIZE = 32
21+
GRANULARITY = 4
22+
23+
24+
@ttl.kernel(grid=(4, 4))
25+
def __demo_kernel(a: ttnn.Tensor, b: ttnn.Tensor, c: ttnn.Tensor, y: ttnn.Tensor):
26+
row_tiles_per_block = GRANULARITY
27+
col_tiles_per_block = GRANULARITY
28+
29+
grid_cols, grid_rows = ttl.grid_size(dims=2)
30+
31+
rows_per_core = a.shape[0] // TILE_SIZE // row_tiles_per_block // grid_rows
32+
cols_per_core = a.shape[1] // TILE_SIZE // col_tiles_per_block // grid_rows
33+
34+
a_cb = ttl.make_circular_buffer_like(
35+
a, shape=(row_tiles_per_block, col_tiles_per_block), buffer_factor=2
36+
)
37+
b_cb = ttl.make_circular_buffer_like(
38+
b, shape=(row_tiles_per_block, col_tiles_per_block), buffer_factor=2
39+
)
40+
c_cb = ttl.make_circular_buffer_like(
41+
c, shape=(row_tiles_per_block, col_tiles_per_block), buffer_factor=2
42+
)
43+
y_cb = ttl.make_circular_buffer_like(
44+
y, shape=(row_tiles_per_block, col_tiles_per_block), buffer_factor=2
45+
)
46+
47+
@ttl.compute()
48+
def demo_compute():
49+
for _ in range(rows_per_core):
50+
for _ in range(cols_per_core):
51+
with (
52+
a_cb.wait() as a_blk,
53+
b_cb.wait() as b_blk,
54+
c_cb.wait() as c_blk,
55+
y_cb.reserve() as y_blk,
56+
):
57+
y_blk.store(a_blk * b_blk + c_blk)
58+
59+
@ttl.datamovement()
60+
def demo_read():
61+
core_col, core_row = ttl.core(dims=2)
62+
63+
for local_row in range(rows_per_core):
64+
row = core_row * rows_per_core + local_row
65+
start_row_tile = row * row_tiles_per_block
66+
end_row_tile = (row + 1) * row_tiles_per_block
67+
68+
for local_col in range(cols_per_core):
69+
col = core_col * cols_per_core + local_col
70+
start_col_tile = col * col_tiles_per_block
71+
end_col_tile = (col + 1) * col_tiles_per_block
72+
73+
with (
74+
a_cb.reserve() as a_blk,
75+
b_cb.reserve() as b_blk,
76+
c_cb.reserve() as c_blk,
77+
):
78+
tx_a = ttl.copy(
79+
a[
80+
start_row_tile:end_row_tile,
81+
start_col_tile:end_col_tile,
82+
],
83+
a_blk,
84+
)
85+
tx_b = ttl.copy(
86+
b[
87+
start_row_tile:end_row_tile,
88+
start_col_tile:end_col_tile,
89+
],
90+
b_blk,
91+
)
92+
tx_c = ttl.copy(
93+
c[
94+
start_row_tile:end_row_tile,
95+
start_col_tile:end_col_tile,
96+
],
97+
c_blk,
98+
)
99+
100+
tx_a.wait()
101+
tx_b.wait()
102+
tx_c.wait()
103+
104+
@ttl.datamovement()
105+
def demo_write():
106+
core_col, core_row = ttl.core(dims=2)
107+
108+
for local_row in range(rows_per_core):
109+
row = core_row * rows_per_core + local_row
110+
start_row_tile = row * row_tiles_per_block
111+
end_row_tile = (row + 1) * row_tiles_per_block
112+
113+
for local_col in range(cols_per_core):
114+
col = core_col * cols_per_core + local_col
115+
start_col_tile = col * col_tiles_per_block
116+
end_col_tile = (col + 1) * col_tiles_per_block
117+
118+
with y_cb.wait() as y_blk:
119+
tx = ttl.copy(
120+
y_blk,
121+
y[
122+
start_row_tile:end_row_tile,
123+
start_col_tile:end_col_tile,
124+
],
125+
)
126+
tx.wait()
127+
128+
129+
def demo_kernel(a: ttnn.Tensor, b: ttnn.Tensor, c: ttnn.Tensor):
130+
y = from_torch(torch.zeros((a.shape[0], a.shape[1]), dtype=torch.bfloat16))
131+
__demo_kernel(a, b, c, y)
132+
return y
133+
134+
135+
torch.manual_seed(42)
136+
137+
device = ttnn.open_device(device_id=0)
138+
139+
try:
140+
shape = (2048, 2048)
141+
142+
a = torch.rand(shape, dtype=torch.bfloat16)
143+
b = torch.rand(shape, dtype=torch.bfloat16)
144+
c = torch.rand(shape, dtype=torch.bfloat16)
145+
d = torch.rand(shape, dtype=torch.bfloat16)
146+
147+
expected_y = (a * b + c) * d
148+
149+
a = from_torch(a)
150+
b = from_torch(b)
151+
c = from_torch(c)
152+
d = from_torch(d)
153+
154+
y = ttnn.multiply(demo_kernel(a, b, c), d)
155+
156+
y = ttnn.to_torch(y)
157+
print(y)
158+
print(expected_y)
159+
160+
assert torch.allclose(y, expected_y, rtol=1e-2, atol=1e-2), "Tensors do not match"
161+
162+
finally:
163+
ttnn.close_device(device)
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
import torch
66

77

8-
def from_torch(t):
8+
def from_torch(tensor: ttnn.Tensor):
99
return ttnn.from_torch(
10-
t,
10+
tensor,
1111
dtype=ttnn.bfloat16,
1212
layout=ttnn.TILE_LAYOUT,
1313
device=device,
@@ -22,7 +22,7 @@ def from_torch(t):
2222

2323

2424
@ttl.kernel(grid="auto")
25-
def __demo_kernel(a, b, c, y):
25+
def __demo_kernel(a: ttnn.Tensor, b: ttnn.Tensor, c: ttnn.Tensor, y: ttnn.Tensor):
2626
row_tiles_per_block = GRANULARITY
2727
col_tiles_per_block = GRANULARITY
2828

@@ -139,7 +139,7 @@ def demo_write():
139139
tx.wait()
140140

141141

142-
def demo_kernel(a, b, c):
142+
def demo_kernel(a: ttnn.Tensor, b: ttnn.Tensor, c: ttnn.Tensor):
143143
y = from_torch(torch.zeros((a.shape[0], a.shape[1]), dtype=torch.bfloat16))
144144
__demo_kernel(a, b, c, y)
145145
return y

0 commit comments

Comments
 (0)