Skip to content

Commit 0e12ba2

Browse files
author
Amir Kiamarzi amirhossein.kiamarz2@unibo.it
committed
hp-fmatmul fix
1 parent a5094d9 commit 0e12ba2

File tree

1 file changed

+14
-14
lines changed

1 file changed

+14
-14
lines changed

sw/spatzBenchmarks/hp-fmatmul/kernel/hp-fmatmul.c

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ void matmul_2xVL(__fp16 *c, const __fp16 *a, const __fp16 *b,
4545
while (p < p_end) {
4646
// Calculate the vl
4747
size_t gvl;
48-
asm volatile("vsetvli %[gvl], %[vl], e32, m8, ta, ma"
48+
asm volatile("vsetvli %[gvl], %[vl], e16, m8, ta, ma"
4949
: [gvl] "=r"(gvl)
5050
: [vl] "r"(p_end - p));
5151

@@ -56,7 +56,7 @@ void matmul_2xVL(__fp16 *c, const __fp16 *a, const __fp16 *b,
5656
const __fp16 *a_ = a + m * N;
5757
const __fp16 *a__ = a_;
5858

59-
asm volatile("vle32.v v16, (%0);" ::"r"(b_));
59+
asm volatile("vle16.v v16, (%0);" ::"r"(b_));
6060
const __fp16 *b__ = b_ + P;
6161

6262
__fp16 *c__ = c_ + m * P;
@@ -72,7 +72,7 @@ void matmul_2xVL(__fp16 *c, const __fp16 *a, const __fp16 *b,
7272
while (n < N) {
7373
a__ = a_ + ++n;
7474

75-
asm volatile("vle32.v v24, (%0);" ::"r"(b__));
75+
asm volatile("vle16.v v24, (%0);" ::"r"(b__));
7676
b__ += P;
7777

7878
if (n == 1) {
@@ -94,7 +94,7 @@ void matmul_2xVL(__fp16 *c, const __fp16 *a, const __fp16 *b,
9494
if (n == N)
9595
break;
9696

97-
asm volatile("vle32.v v16, (%0);" ::"r"(b__));
97+
asm volatile("vle16.v v16, (%0);" ::"r"(b__));
9898
b__ += P;
9999

100100
asm volatile("vfmacc.vf v0, %0, v24" ::"f"(t0));
@@ -105,10 +105,10 @@ void matmul_2xVL(__fp16 *c, const __fp16 *a, const __fp16 *b,
105105
}
106106

107107
asm volatile("vfmacc.vf v0, %0, v24" ::"f"(t0));
108-
asm volatile("vse32.v v0, (%0);" ::"r"(c__));
108+
asm volatile("vse16.v v0, (%0);" ::"r"(c__));
109109
c__ += P;
110110
asm volatile("vfmacc.vf v8, %0, v24" ::"f"(t1));
111-
asm volatile("vse32.v v8, (%0);" ::"r"(c__));
111+
asm volatile("vse16.v v8, (%0);" ::"r"(c__));
112112
}
113113

114114
p += gvl;
@@ -128,7 +128,7 @@ void matmul_4xVL(__fp16 *c, const __fp16 *a, const __fp16 *b,
128128
while (p < p_end) {
129129
// Calculate the vl
130130
size_t gvl;
131-
asm volatile("vsetvli %[gvl], %[vl], e32, m4, ta, ma"
131+
asm volatile("vsetvli %[gvl], %[vl], e16, m4, ta, ma"
132132
: [gvl] "=r"(gvl)
133133
: [vl] "r"(p_end - p));
134134

@@ -139,7 +139,7 @@ void matmul_4xVL(__fp16 *c, const __fp16 *a, const __fp16 *b,
139139
const __fp16 *a_ = a + m * N;
140140
const __fp16 *a__ = a_;
141141

142-
asm volatile("vle32.v v16, (%0);" ::"r"(b_));
142+
asm volatile("vle16.v v16, (%0);" ::"r"(b_));
143143
const __fp16 *b__ = b_ + P;
144144

145145
__fp16 *c__ = c_ + m * P;
@@ -157,7 +157,7 @@ void matmul_4xVL(__fp16 *c, const __fp16 *a, const __fp16 *b,
157157
unsigned int n = 0;
158158

159159
while (n < N) {
160-
asm volatile("vle32.v v20, (%0);" ::"r"(b__));
160+
asm volatile("vle16.v v20, (%0);" ::"r"(b__));
161161
b__ += P;
162162

163163
a__ = a_ + ++n;
@@ -193,7 +193,7 @@ void matmul_4xVL(__fp16 *c, const __fp16 *a, const __fp16 *b,
193193
if (n == N)
194194
break;
195195

196-
asm volatile("vle32.v v16, (%0);" ::"r"(b__));
196+
asm volatile("vle16.v v16, (%0);" ::"r"(b__));
197197
b__ += P;
198198

199199
asm volatile("vfmacc.vf v0, %0, v20" ::"f"(t0));
@@ -210,16 +210,16 @@ void matmul_4xVL(__fp16 *c, const __fp16 *a, const __fp16 *b,
210210
}
211211

212212
asm volatile("vfmacc.vf v0, %0, v20" ::"f"(t0));
213-
asm volatile("vse32.v v0, (%0);" ::"r"(c__));
213+
asm volatile("vse16.v v0, (%0);" ::"r"(c__));
214214
c__ += P;
215215
asm volatile("vfmacc.vf v4, %0, v20" ::"f"(t1));
216-
asm volatile("vse32.v v4, (%0);" ::"r"(c__));
216+
asm volatile("vse16.v v4, (%0);" ::"r"(c__));
217217
c__ += P;
218218
asm volatile("vfmacc.vf v8, %0, v20" ::"f"(t2));
219-
asm volatile("vse32.v v8, (%0);" ::"r"(c__));
219+
asm volatile("vse16.v v8, (%0);" ::"r"(c__));
220220
c__ += P;
221221
asm volatile("vfmacc.vf v12, %0, v20" ::"f"(t3));
222-
asm volatile("vse32.v v12, (%0);" ::"r"(c__));
222+
asm volatile("vse16.v v12, (%0);" ::"r"(c__));
223223
}
224224

225225
p += gvl;

0 commit comments

Comments
 (0)