-
Notifications
You must be signed in to change notification settings - Fork 68
[Bug] Directly read or write to element in the input matrix gives dataflow check failure #566
Copy link
Copy link
Open
Description
Describe the bug
When I run https://github.com/cornell-zhang/allo/blob/main/tests/dataflow/test_weight_stationary_gemm.py for hardware emulation, it gives a dataflow check failure. I wonder is it anticipated or it's a bug? Will dataflow programming support directly accessing element in the input matrix in a kernel?
Reproduction
import allo
from allo.ir.types import float32, Stream
import allo.dataflow as df
import allo.backend.hls as hls
import numpy as np
M, N, K = 4, 4, 4
P0, P1 = K, N + 2
@df.region()
def top(A: float32[M, K], B: float32[K, N], C: float32[M, N]):
fifo_A: Stream[float32, 4][P0, P1]
fifo_B: Stream[float32, 4][P0, P1]
@df.kernel(mapping=[P0, P1], args=[A, B, C])
def gemm(local_A: float32[M, K], local_B: float32[K, N], local_C: float32[M, N]):
# Weight stationary GEMM systolic array
# B is the matrix that contains the stationary weights
i, j = df.get_pid()
# periperals kernels
with allo.meta_if(j == 0):
for m in range(M):
fifo_A[i, j + 1].put(local_A[m, i])
# drain
with allo.meta_elif(j == N + 1):
for m in range(M):
fifo_A[i, j].get()
# compute
# There are three cases: i == 0, i == K - 1, and the rest
with allo.meta_elif(i == 0):
# Does not take partial sum from the previous PE
b: float32 = local_B[i, j - 1]
for m in range(M):
a = fifo_A[i, j].get()
fifo_A[i, j + 1].put(a)
fifo_B[i + 1, j].put(a * b)
with allo.meta_elif(i == K - 1):
# Does not keep passing the partial sum to the next PE
# Concludes the computation and writes to the output
b: float32 = local_B[i, j - 1]
for m in range(M):
partial_sum = fifo_B[i, j].get()
a = fifo_A[i, j].get()
local_C[m, j - 1] = partial_sum + a * b
fifo_A[i, j + 1].put(a)
with allo.meta_else():
# Continues the computation
b: float32 = local_B[i, j - 1]
for m in range(M):
partial_sum = fifo_B[i, j].get()
a = fifo_A[i, j].get()
fifo_A[i, j + 1].put(a)
fifo_B[i + 1, j].put(partial_sum + a * b)
def test_systolic():
A = np.random.rand(M, K).astype(np.float32)
B = np.random.rand(K, N).astype(np.float32)
C = np.zeros((M, N), dtype=np.float32)
llvm_mod = df.build(top, target="simulator")
llvm_mod(A, B, C)
np.testing.assert_allclose(C, np.dot(A, B), atol=1e-5)
print("Dataflow Simulator Passed!")
mod = df.build(top)
if hls.is_available("vitis_hls"):
C = np.zeros((M, N), dtype=np.float32)
mod(A, B, C)
np.testing.assert_allclose(C, np.dot(A, B), atol=1e-5)
print("Passed!")
if __name__ == "__main__":
test_systolic()Buggy output
Expected behavior
Whether accessing element in the input matrix is allowed in a kernel in dataflow programming?
Additional context
No response
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels