Skip to content

Commit db0abfa

Browse files
authored
Merge pull request #5238 from martin-frbg/revert5125
remove non-vectorized SGEMV transpose reduce path for POWER8, restoring optimizations frpm PR4880
2 parents 7389b6c + 4ec62d7 commit db0abfa

File tree

1 file changed

+4
-19
lines changed

1 file changed

+4
-19
lines changed

kernel/power/sgemv_t.c

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -77,17 +77,7 @@ static void sgemv_kernel_4x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x,
7777
temp7 += vx * vva7;
7878
}
7979

80-
#if defined(POWER8)
81-
y[0] += alpha * (temp0[0] + temp0[1] + temp0[2] + temp0[3]);
82-
y[1] += alpha * (temp1[0] + temp1[1] + temp1[2] + temp1[3]);
83-
y[2] += alpha * (temp2[0] + temp2[1] + temp2[2] + temp2[3]);
84-
y[3] += alpha * (temp3[0] + temp3[1] + temp3[2] + temp3[3]);
85-
86-
y[4] += alpha * (temp4[0] + temp4[1] + temp4[2] + temp4[3]);
87-
y[5] += alpha * (temp5[0] + temp5[1] + temp5[2] + temp5[3]);
88-
y[6] += alpha * (temp6[0] + temp6[1] + temp6[2] + temp6[3]);
89-
y[7] += alpha * (temp7[0] + temp7[1] + temp7[2] + temp7[3]);
90-
#else
80+
9181
register __vector float t0, t1, t2, t3;
9282
register __vector float a = {alpha, alpha, alpha, alpha};
9383
__vector float vy0 = vec_vsx_ld(0, y);
@@ -116,7 +106,7 @@ static void sgemv_kernel_4x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x,
116106
vy1 += a * temp4;
117107
vec_vsx_st(vy0, 0, y);
118108
vec_vsx_st(vy1, 0, &(y[4]));
119-
#endif
109+
120110
}
121111

122112
static void sgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x,
@@ -143,12 +133,7 @@ static void sgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x,
143133
temp2 += vx * vva2;
144134
temp3 += vx * vva3;
145135
}
146-
#if defined(POWER8)
147-
y[0] += alpha * (temp0[0] + temp0[1] + temp0[2] + temp0[3]);
148-
y[1] += alpha * (temp1[0] + temp1[1] + temp1[2] + temp1[3]);
149-
y[2] += alpha * (temp2[0] + temp2[1] + temp2[2] + temp2[3]);
150-
y[3] += alpha * (temp3[0] + temp3[1] + temp3[2] + temp3[3]);
151-
#else
136+
152137
register __vector float t0, t1, t2, t3;
153138
register __vector float a = {alpha, alpha, alpha, alpha};
154139
__vector float vy0 = vec_vsx_ld(0, y);
@@ -165,7 +150,7 @@ static void sgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x,
165150

166151
vy0 += a * temp0;
167152
vec_vsx_st(vy0, 0, y);
168-
#endif
153+
169154
}
170155

171156
static void sgemv_kernel_4x2(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x,

0 commit comments

Comments
 (0)