From 2534cbd962af62c22175d4cd57e943d7ec129c9d Mon Sep 17 00:00:00 2001 From: Erika Hunhoff Date: Thu, 21 May 2026 23:37:34 -0600 Subject: [PATCH] tiling_exploration: add NPU2 support to per_tile and tile_group Both designs now take a --device {npu,npu2} arg and select NPU1Col1() or NPU2() accordingly; Makefiles thread devicename through to the generator. Adds run_strix_makefile.lit for each, mirroring the pattern used by sibling examples (passthrough_pykernel, matrix_scalar_add, etc.). Addresses the tiling_exploration item in #2092. Co-Authored-By: Claude Opus 4 (1M context) --- .../tiling_exploration/per_tile/Makefile | 3 ++- .../tiling_exploration/per_tile/per_tile.py | 26 ++++++++++++++++--- .../per_tile/run_strix_makefile.lit | 11 ++++++++ .../tiling_exploration/tile_group/Makefile | 3 ++- .../tile_group/run_strix_makefile.lit | 11 ++++++++ .../tile_group/tile_group.py | 26 ++++++++++++++++--- 6 files changed, 72 insertions(+), 8 deletions(-) create mode 100644 programming_examples/basic/tiling_exploration/per_tile/run_strix_makefile.lit create mode 100644 programming_examples/basic/tiling_exploration/tile_group/run_strix_makefile.lit diff --git a/programming_examples/basic/tiling_exploration/per_tile/Makefile b/programming_examples/basic/tiling_exploration/per_tile/Makefile index 1f3c26361c7..fc343079542 100644 --- a/programming_examples/basic/tiling_exploration/per_tile/Makefile +++ b/programming_examples/basic/tiling_exploration/per_tile/Makefile @@ -12,6 +12,7 @@ srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) include ${srcdir}/../../../makefile-common +devicename ?= $(if $(filter 1,$(NPU2)),npu2,npu) tensor_height = 8 tensor_width = 8 tile_height = 2 @@ -25,7 +26,7 @@ all: build/final_${data_str}.xclbin build/aie_${data_str}.mlir: ${srcdir}/${aie_py_src} mkdir -p ${@D} - python3 $< --tensor-height ${tensor_height} --tensor-width ${tensor_width} --tile-height ${tile_height} --tile-width ${tile_width} > $@ + python3 $< --tensor-height ${tensor_height} --tensor-width ${tensor_width} --tile-height ${tile_height} --tile-width ${tile_width} --device ${devicename} > $@ build/final_${data_str}.xclbin: build/aie_${data_str}.mlir mkdir -p ${@D} diff --git a/programming_examples/basic/tiling_exploration/per_tile/per_tile.py b/programming_examples/basic/tiling_exploration/per_tile/per_tile.py index 22334b4d789..b33ec38bc5e 100644 --- a/programming_examples/basic/tiling_exploration/per_tile/per_tile.py +++ b/programming_examples/basic/tiling_exploration/per_tile/per_tile.py @@ -9,13 +9,18 @@ import numpy as np from aie.iron import Buffer, ObjectFifo, Program, Runtime, Worker -from aie.iron.device import NPU1Col1 +from aie.iron.device import NPU1Col1, NPU2 from aie.iron.controlflow import range_ from aie.helpers.taplib import TensorTiler2D def generate_module( - tensor_height, tensor_width, tile_height, tile_width, generate_access_map=False + tensor_height, + tensor_width, + tile_height, + tile_width, + generate_access_map=False, + device="npu", ): tensor_size = tensor_height * tensor_width tile_size = tile_height * tile_width @@ -57,8 +62,15 @@ def access_order(of_out, counter_buf): for t in tiler: rt.drain(of_out.cons(), tensor_out, t, wait=True) + if device == "npu": + dev = NPU1Col1() + elif device == "npu2": + dev = NPU2() + else: + raise ValueError(f"[ERROR] Device name {device} is unknown") + # Create the program from the device type and runtime - my_program = Program(NPU1Col1(), rt) + my_program = Program(dev, rt) # Place components (assign them resources on the device) and generate an MLIR module return my_program.resolve_program() @@ -71,6 +83,7 @@ def main(opts): opts.tile_height, opts.tile_width, opts.generate_access_map, + opts.device, ) if not opts.generate_access_map: print(module) @@ -87,6 +100,13 @@ def get_arg_parser(): action="store_true", help="Produce a file showing data access order", ) + p.add_argument( + "-d", + "--device", + default="npu", + choices=["npu", "npu2"], + help="Target NPU device", + ) return p diff --git a/programming_examples/basic/tiling_exploration/per_tile/run_strix_makefile.lit b/programming_examples/basic/tiling_exploration/per_tile/run_strix_makefile.lit new file mode 100644 index 00000000000..65b78f3146f --- /dev/null +++ b/programming_examples/basic/tiling_exploration/per_tile/run_strix_makefile.lit @@ -0,0 +1,11 @@ +// (c) Copyright 2026 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: ryzen_ai_npu2, peano +// +// RUN: mkdir -p test_stx +// RUN: cd test_stx +// RUN: make -f %S/Makefile clean +// RUN: make -f %S/Makefile devicename=npu2 +// RUN: %run_on_npu2% make -f %S/Makefile run_py devicename=npu2 +// CHECK: Running... diff --git a/programming_examples/basic/tiling_exploration/tile_group/Makefile b/programming_examples/basic/tiling_exploration/tile_group/Makefile index 157b373eaba..b336b1d0f58 100644 --- a/programming_examples/basic/tiling_exploration/tile_group/Makefile +++ b/programming_examples/basic/tiling_exploration/tile_group/Makefile @@ -12,6 +12,7 @@ srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) include ${srcdir}/../../../makefile-common +devicename ?= $(if $(filter 1,$(NPU2)),npu2,npu) tensor_height = 8 tensor_width = 8 tile_height = 2 @@ -25,7 +26,7 @@ all: build/final_${data_str}.xclbin build/aie_${data_str}.mlir: ${srcdir}/${aie_py_src} mkdir -p ${@D} - python3 $< --tensor-height ${tensor_height} --tensor-width ${tensor_width} --tile-height ${tile_height} --tile-width ${tile_width} > $@ + python3 $< --tensor-height ${tensor_height} --tensor-width ${tensor_width} --tile-height ${tile_height} --tile-width ${tile_width} --device ${devicename} > $@ build/final_${data_str}.xclbin: build/aie_${data_str}.mlir mkdir -p ${@D} diff --git a/programming_examples/basic/tiling_exploration/tile_group/run_strix_makefile.lit b/programming_examples/basic/tiling_exploration/tile_group/run_strix_makefile.lit new file mode 100644 index 00000000000..65b78f3146f --- /dev/null +++ b/programming_examples/basic/tiling_exploration/tile_group/run_strix_makefile.lit @@ -0,0 +1,11 @@ +// (c) Copyright 2026 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: ryzen_ai_npu2, peano +// +// RUN: mkdir -p test_stx +// RUN: cd test_stx +// RUN: make -f %S/Makefile clean +// RUN: make -f %S/Makefile devicename=npu2 +// RUN: %run_on_npu2% make -f %S/Makefile run_py devicename=npu2 +// CHECK: Running... diff --git a/programming_examples/basic/tiling_exploration/tile_group/tile_group.py b/programming_examples/basic/tiling_exploration/tile_group/tile_group.py index 74d17ff3f05..a0a09108f6a 100644 --- a/programming_examples/basic/tiling_exploration/tile_group/tile_group.py +++ b/programming_examples/basic/tiling_exploration/tile_group/tile_group.py @@ -9,7 +9,7 @@ import numpy as np from aie.iron import ObjectFifo, Program, Runtime, Worker -from aie.iron.device import NPU1Col1 +from aie.iron.device import NPU1Col1, NPU2 from aie.helpers.taplib import TensorTiler2D from aie.iron.controlflow import range_ import aie.extras.dialects.arith as arith @@ -17,7 +17,12 @@ def generate_module( - tensor_height, tensor_width, tile_height, tile_width, generate_access_map=False + tensor_height, + tensor_width, + tile_height, + tile_width, + generate_access_map=False, + device="npu", ): # define types dtype = np.int32 @@ -57,7 +62,14 @@ def access_order(of_out): rt.start(worker) rt.drain(of_out.cons(), tensor_out, t, wait=True) - my_program = Program(NPU1Col1(), rt) + if device == "npu": + dev = NPU1Col1() + elif device == "npu2": + dev = NPU2() + else: + raise ValueError(f"[ERROR] Device name {device} is unknown") + + my_program = Program(dev, rt) # Place components (assign them resources on the device) and generate an MLIR module return my_program.resolve_program() @@ -70,6 +82,7 @@ def main(opts): opts.tile_height, opts.tile_width, opts.generate_access_map, + opts.device, ) if not opts.generate_access_map: print(module) @@ -86,6 +99,13 @@ def get_arg_parser(): action="store_true", help="Produce a file showing data access order", ) + p.add_argument( + "-d", + "--device", + default="npu", + choices=["npu", "npu2"], + help="Target NPU device", + ) return p