Skip to content

Commit 9779a0d

Browse files
hunhoffeclaudejgmelber
authored
NPU2 support for tiling_exploration per_tile and tile_group examples (#3107)
Co-authored-by: Claude Opus 4 (1M context) <noreply@anthropic.com> Co-authored-by: Joseph Melber <jgmelber@gmail.com>
1 parent 73567dd commit 9779a0d

6 files changed

Lines changed: 72 additions & 8 deletions

File tree

programming_examples/basic/tiling_exploration/per_tile/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
1212

1313
include ${srcdir}/../../../makefile-common
1414

15+
devicename ?= $(if $(filter 1,$(NPU2)),npu2,npu)
1516
tensor_height = 8
1617
tensor_width = 8
1718
tile_height = 2
@@ -25,7 +26,7 @@ all: build/final_${data_str}.xclbin
2526

2627
build/aie_${data_str}.mlir: ${srcdir}/${aie_py_src}
2728
mkdir -p ${@D}
28-
python3 $< --tensor-height ${tensor_height} --tensor-width ${tensor_width} --tile-height ${tile_height} --tile-width ${tile_width} > $@
29+
python3 $< --tensor-height ${tensor_height} --tensor-width ${tensor_width} --tile-height ${tile_height} --tile-width ${tile_width} --device ${devicename} > $@
2930

3031
build/final_${data_str}.xclbin: build/aie_${data_str}.mlir
3132
mkdir -p ${@D}

programming_examples/basic/tiling_exploration/per_tile/per_tile.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,18 @@
99
import numpy as np
1010

1111
from aie.iron import Buffer, ObjectFifo, Program, Runtime, Worker
12-
from aie.iron.device import NPU1Col1
12+
from aie.iron.device import NPU1Col1, NPU2
1313
from aie.iron.controlflow import range_
1414
from aie.helpers.taplib import TensorTiler2D
1515

1616

1717
def generate_module(
18-
tensor_height, tensor_width, tile_height, tile_width, generate_access_map=False
18+
tensor_height,
19+
tensor_width,
20+
tile_height,
21+
tile_width,
22+
generate_access_map=False,
23+
device="npu",
1924
):
2025
tensor_size = tensor_height * tensor_width
2126
tile_size = tile_height * tile_width
@@ -57,8 +62,15 @@ def access_order(of_out, counter_buf):
5762
for t in tiler:
5863
rt.drain(of_out.cons(), tensor_out, t, wait=True)
5964

65+
if device == "npu":
66+
dev = NPU1Col1()
67+
elif device == "npu2":
68+
dev = NPU2()
69+
else:
70+
raise ValueError(f"[ERROR] Device name {device} is unknown")
71+
6072
# Create the program from the device type and runtime
61-
my_program = Program(NPU1Col1(), rt)
73+
my_program = Program(dev, rt)
6274

6375
# Place components (assign them resources on the device) and generate an MLIR module
6476
return my_program.resolve_program()
@@ -71,6 +83,7 @@ def main(opts):
7183
opts.tile_height,
7284
opts.tile_width,
7385
opts.generate_access_map,
86+
opts.device,
7487
)
7588
if not opts.generate_access_map:
7689
print(module)
@@ -87,6 +100,13 @@ def get_arg_parser():
87100
action="store_true",
88101
help="Produce a file showing data access order",
89102
)
103+
p.add_argument(
104+
"-d",
105+
"--device",
106+
default="npu",
107+
choices=["npu", "npu2"],
108+
help="Target NPU device",
109+
)
90110
return p
91111

92112

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// (c) Copyright 2026 Advanced Micro Devices, Inc.
2+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3+
//
4+
// REQUIRES: ryzen_ai_npu2, peano
5+
//
6+
// RUN: mkdir -p test_stx
7+
// RUN: cd test_stx
8+
// RUN: make -f %S/Makefile clean
9+
// RUN: make -f %S/Makefile devicename=npu2
10+
// RUN: %run_on_npu2% make -f %S/Makefile run_py devicename=npu2
11+
// CHECK: Running...

programming_examples/basic/tiling_exploration/tile_group/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
1212

1313
include ${srcdir}/../../../makefile-common
1414

15+
devicename ?= $(if $(filter 1,$(NPU2)),npu2,npu)
1516
tensor_height = 8
1617
tensor_width = 8
1718
tile_height = 2
@@ -25,7 +26,7 @@ all: build/final_${data_str}.xclbin
2526

2627
build/aie_${data_str}.mlir: ${srcdir}/${aie_py_src}
2728
mkdir -p ${@D}
28-
python3 $< --tensor-height ${tensor_height} --tensor-width ${tensor_width} --tile-height ${tile_height} --tile-width ${tile_width} > $@
29+
python3 $< --tensor-height ${tensor_height} --tensor-width ${tensor_width} --tile-height ${tile_height} --tile-width ${tile_width} --device ${devicename} > $@
2930

3031
build/final_${data_str}.xclbin: build/aie_${data_str}.mlir
3132
mkdir -p ${@D}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// (c) Copyright 2026 Advanced Micro Devices, Inc.
2+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3+
//
4+
// REQUIRES: ryzen_ai_npu2, peano
5+
//
6+
// RUN: mkdir -p test_stx
7+
// RUN: cd test_stx
8+
// RUN: make -f %S/Makefile clean
9+
// RUN: make -f %S/Makefile devicename=npu2
10+
// RUN: %run_on_npu2% make -f %S/Makefile run_py devicename=npu2
11+
// CHECK: Running...

programming_examples/basic/tiling_exploration/tile_group/tile_group.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,20 @@
99
import numpy as np
1010

1111
from aie.iron import ObjectFifo, Program, Runtime, Worker
12-
from aie.iron.device import NPU1Col1
12+
from aie.iron.device import NPU1Col1, NPU2
1313
from aie.helpers.taplib import TensorTiler2D
1414
from aie.iron.controlflow import range_
1515
import aie.extras.dialects.arith as arith
1616
from aie.helpers.util import np_dtype_to_mlir_type
1717

1818

1919
def generate_module(
20-
tensor_height, tensor_width, tile_height, tile_width, generate_access_map=False
20+
tensor_height,
21+
tensor_width,
22+
tile_height,
23+
tile_width,
24+
generate_access_map=False,
25+
device="npu",
2126
):
2227
# define types
2328
dtype = np.int32
@@ -57,7 +62,14 @@ def access_order(of_out):
5762
rt.start(worker)
5863
rt.drain(of_out.cons(), tensor_out, t, wait=True)
5964

60-
my_program = Program(NPU1Col1(), rt)
65+
if device == "npu":
66+
dev = NPU1Col1()
67+
elif device == "npu2":
68+
dev = NPU2()
69+
else:
70+
raise ValueError(f"[ERROR] Device name {device} is unknown")
71+
72+
my_program = Program(dev, rt)
6173

6274
# Place components (assign them resources on the device) and generate an MLIR module
6375
return my_program.resolve_program()
@@ -70,6 +82,7 @@ def main(opts):
7082
opts.tile_height,
7183
opts.tile_width,
7284
opts.generate_access_map,
85+
opts.device,
7386
)
7487
if not opts.generate_access_map:
7588
print(module)
@@ -86,6 +99,13 @@ def get_arg_parser():
8699
action="store_true",
87100
help="Produce a file showing data access order",
88101
)
102+
p.add_argument(
103+
"-d",
104+
"--device",
105+
default="npu",
106+
choices=["npu", "npu2"],
107+
help="Target NPU device",
108+
)
89109
return p
90110

91111

0 commit comments

Comments
 (0)