Currently user doesn't have access to some new loops created by reuse_at. For example, this 2D convolution with line buffer and window buffer:
A = hcl.placeholder((10, 10))
r = hcl.reduce_axis(0, 3, name="r")
c = hcl.reduce_axis(0, 3, name="c")
B = hcl.compute((8, 8), lambda y, x: hcl.sum(A[y + r, x + c], axis=[r, c]))
s = hcl.create_schedule([A, B])
LB = s.reuse_at(A, s[B], B.axis[0])
WB = s.reuse_at(LB, s[B], B.axis[1])
The v9 loop that shifts the window buffer (see below for generated HLS code) is not directly accessible in B.axis list.
void top(
int32_t v0[10][10],
int32_t v1[8][8]
) {
int32_t tensor_1_reuse_0[3][10]; // L472
int32_t tensor_1_reuse_1[3][3]; // L472
l_tensor_1_y: for (int y = 0; y < 10; y++) { // L472
l_x: for (int x = 0; x < 10; x++) { // L472
int32_t v6 = tensor_1_reuse_0[1][x]; // L472
tensor_1_reuse_0[0][x] = v6; // L472
int32_t v7 = tensor_1_reuse_0[2][x]; // L472
tensor_1_reuse_0[1][x] = v7; // L472
int32_t v8 = v0[y][x]; // L472
tensor_1_reuse_0[2][x] = v8; // L472
if ((y - 2) >= 0) { // L472
// THIS LOOP IS NOT DIRECTLY ACCESSIBLE
for (int v9 = 0; v9 < 3; v9++) { // L256
int32_t v10 = tensor_1_reuse_1[v9][1]; // L256
tensor_1_reuse_1[v9][0] = v10; // L256
int32_t v11 = tensor_1_reuse_1[v9][2]; // L256
tensor_1_reuse_1[v9][1] = v11; // L256
int32_t v12 = tensor_1_reuse_0[v9][x]; // L256
tensor_1_reuse_1[v9][2] = v12; // L256
}
if ((x - 2) >= 0) { // L256
int32_t sum; // L256
sum = 0; // L472
l_r: for (int r = 0; r < 3; r++) { // L256
l_c: for (int c = 0; c < 3; c++) { // L256
if (1) { // L472
int32_t v16 = tensor_1_reuse_1[r][c]; // L6
int32_t v17 = sum; // L256
ap_int<33> v18 = v16; // L472
ap_int<33> v19 = v17; // L472
ap_int<33> v20 = v18 + v19; // L6
int32_t v21 = v20; // L472
sum = v21; // L472
}
}
}
int32_t v22 = sum; // L256
v1[(y - 2)][(x - 2)] = v22; // L472
}
}
}
}
}
I think the axis list loop nest representation makes sense for perfectly nested loops but I'm not sure for imperfect loop nests. Loop v9 and loop r are at the same level, nested in loop x.
Currently user doesn't have access to some new loops created by
reuse_at. For example, this 2D convolution with line buffer and window buffer:The
v9loop that shifts the window buffer (see below for generated HLS code) is not directly accessible inB.axislist.I think the axis list loop nest representation makes sense for perfectly nested loops but I'm not sure for imperfect loop nests. Loop
v9and looprare at the same level, nested in loopx.