Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 60 additions & 1 deletion amd_triton_npu/backend/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -1154,7 +1154,11 @@ def _generate_elf_launcher(constants, signature, kernel_name):


def compile_module(
launcher_src, kernel_placeholder_name, output_format="xclbin", actual_sizes=None
launcher_src,
kernel_placeholder_name,
output_format="xclbin",
Comment thread
erwei-xilinx marked this conversation as resolved.
actual_sizes=None,
on_cache_resolved=None,
):
py_version = sys.version_info
if platform.system() == "Windows":
Expand Down Expand Up @@ -1216,6 +1220,8 @@ def launch(
key = hashlib.md5(key_data.encode("utf-8")).hexdigest()

cache = get_cache_manager(key)
if on_cache_resolved is not None:
on_cache_resolved(cache.cache_dir)
name = "__npu_dispatch"
filename = f"{name}.so"
cache_path = cache.get_file(filename)
Expand Down Expand Up @@ -1443,17 +1449,70 @@ def _get_constexpr(name):

# Later KERNEL_NAME_PLACEHOLDER will be used to assign the kernel name
# in the following launch function.
self.npu_cache_dir = None

def _on_cache_resolved(cache_dir):
self.npu_cache_dir = cache_dir

self.launch = compile_module(
launcher_src,
kernel_placeholder_name,
self.output_format,
actual_sizes=actual_sizes,
on_cache_resolved=_on_cache_resolved,
)

def __call__(self, gridX, gridY, gridZ, stream, function, *args):
self.launch(gridX, gridY, gridZ, stream, function, *args)


def get_npu_cache_dir(compiled_kernel):
"""Return the NPU binary cache directory for a compiled kernel.

The NPU backend stores hardware-specific artifacts in a separate cache
directory from Triton's main compiler cache. Depending on the selected
output format, the directory contains either:

* xclbin output: ``aie.xclbin``, ``insts.bin``, and
``__npu_dispatch.so``
* elf output: ``aie.elf``, ``elf_kernel_name.txt``, and
``__npu_dispatch.so``

This function returns the path to that directory.

The directory is only populated after the first kernel invocation,
since NPU binary compilation is deferred to launch time.

Args:
compiled_kernel: A triton.compiler.compiler.CompiledKernel instance
compiled for the NPU backend.

Returns:
str | None: Absolute path to the NPU binary cache directory, or
None if the kernel has not been launched yet or does not expose
an NPU launcher via ``_run``.

Raises:
TypeError: If ``compiled_kernel._run`` exists but is not an
``NPULauncher`` instance.

Example::

compiled_kernel = my_kernel[grid](a, b, c, N, BLOCK_SIZE_N=1024)
npu_cache = get_npu_cache_dir(compiled_kernel)
print(f"NPU artifacts at: {npu_cache}")
"""
launcher = getattr(compiled_kernel, "_run", None)
if launcher is None:
return None
if not isinstance(launcher, NPULauncher):
raise TypeError(
f"Expected an NPULauncher but got {type(launcher).__name__}. "
"Is the NPU backend active?"
)
return launcher.npu_cache_dir


class NPUUtils(object):
def __new__(cls):
if not hasattr(cls, "instance"):
Expand Down
Loading