-
Notifications
You must be signed in to change notification settings - Fork 183
Expand file tree
/
Copy pathpassthrough_kernel.py
More file actions
99 lines (83 loc) · 2.96 KB
/
passthrough_kernel.py
File metadata and controls
99 lines (83 loc) · 2.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# passthrough_kernel/passthrough_kernel.py -*- Python -*-
#
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates
import numpy as np
import argparse
import sys
from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker
from aie.iron.device import NPU1Col1, NPU2
def my_passthrough_kernel(dev, in1_size, out_size, trace_size):
in1_dtype = np.uint8
out_dtype = np.uint8
enable_trace = 1 if trace_size > 0 else 0
# Define tensor types
line_size = in1_size // in1_dtype(0).nbytes
line_type = np.ndarray[(line_size,), np.dtype[in1_dtype]]
vector_type = np.ndarray[(line_size,), np.dtype[in1_dtype]]
# Dataflow with ObjectFifos
of_in = ObjectFifo(line_type, name="in")
of_out = ObjectFifo(line_type, name="out")
# External, binary kernel definition
passthrough_fn = Kernel(
"passThroughLine",
"passThrough.cc.o",
[line_type, line_type, np.int32],
)
# Task for the core to perform
def core_fn(of_in, of_out, passThroughLine):
elemOut = of_out.acquire(1)
elemIn = of_in.acquire(1)
passThroughLine(elemIn, elemOut, line_size)
of_in.release(1)
of_out.release(1)
# Create a worker to perform the task
my_worker = Worker(
core_fn,
[of_in.cons(), of_out.prod(), passthrough_fn],
trace=enable_trace,
)
# Runtime operations to move data to/from the AIE-array
rt = Runtime()
with rt.sequence(vector_type, vector_type, vector_type) as (a_in, b_out, _):
rt.enable_trace(trace_size)
rt.start(my_worker)
rt.fill(of_in.prod(), a_in)
rt.drain(of_out.cons(), b_out, wait=True)
# Place components (assign the resources on the device) and generate an MLIR module
return Program(dev, rt).resolve_program()
p = argparse.ArgumentParser()
p.add_argument("-d", "--dev", required=True, dest="device", help="AIE Device")
p.add_argument(
"-i1s", "--in1_size", required=True, dest="in1_size", help="Input 1 size"
)
p.add_argument("-os", "--out_size", required=True, dest="out_size", help="Output size")
p.add_argument(
"-t",
"--trace_size",
required=False,
dest="trace_size",
default=0,
help="Trace buffer size",
)
opts = p.parse_args(sys.argv[1:])
if opts.device == "npu":
dev = NPU1Col1()
elif opts.device == "npu2":
dev = NPU2()
else:
raise ValueError("[ERROR] Device name {} is unknown".format(opts.device))
in1_size = int(opts.in1_size)
if in1_size % 64 != 0 or in1_size < 512:
print(
"In1 buffer size ("
+ str(in1_size)
+ ") must be a multiple of 64 and greater than or equal to 512"
)
raise ValueError
out_size = int(opts.out_size)
trace_size = int(opts.trace_size)
print(my_passthrough_kernel(dev, in1_size, out_size, trace_size))