Skip to content

Commit e8b11a1

Browse files
authored
Merge pull request #5125 from martin-frbg/issue5122
Fix SGEMV on POWER8 by reverting to the non-vectorized earlier code
2 parents 9a3948d + 81eed86 commit e8b11a1

File tree

2 files changed

+39
-8
lines changed

2 files changed

+39
-8
lines changed

kernel/power/sgemv_t.c

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,17 @@ static void sgemv_kernel_4x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
7878
temp7 += v_x[i] * va7[i];
7979
}
8080

81-
81+
#if defined(POWER8)
82+
y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
83+
y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
84+
y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
85+
y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
86+
87+
y[4] += alpha * (temp4[0] + temp4[1]+temp4[2] + temp4[3]);
88+
y[5] += alpha * (temp5[0] + temp5[1]+temp5[2] + temp5[3]);
89+
y[6] += alpha * (temp6[0] + temp6[1]+temp6[2] + temp6[3]);
90+
y[7] += alpha * (temp7[0] + temp7[1]+temp7[2] + temp7[3]);
91+
#else
8292
register __vector float t0, t1, t2, t3;
8393
register __vector float a = { alpha, alpha, alpha, alpha };
8494
__vector float *v_y = (__vector float*) y;
@@ -105,7 +115,7 @@ static void sgemv_kernel_4x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
105115

106116
v_y[0] += a * temp0;
107117
v_y[1] += a * temp4;
108-
118+
#endif
109119
}
110120

111121

@@ -132,7 +142,12 @@ static void sgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
132142
temp2 += v_x[i] * va2[i];
133143
temp3 += v_x[i] * va3[i];
134144
}
135-
145+
#if defined(POWER8)
146+
y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
147+
y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
148+
y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
149+
y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
150+
#else
136151
register __vector float t0, t1, t2, t3;
137152
register __vector float a = { alpha, alpha, alpha, alpha };
138153
__vector float *v_y = (__vector float*) y;
@@ -148,7 +163,7 @@ static void sgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
148163
temp0 += temp1 + temp2 + temp3;
149164

150165
v_y[0] += a * temp0;
151-
166+
#endif
152167
}
153168

154169

kernel/power/sgemv_t_8.c

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,17 @@ static void sgemv_kernel_8x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
9999
temp7 += vx1* va7_1 + vx2 * va7_2;
100100
}
101101

102-
102+
#if defined(POWER8)
103+
y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
104+
y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
105+
y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
106+
y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
107+
108+
y[4] += alpha * (temp4[0] + temp4[1]+temp4[2] + temp4[3]);
109+
y[5] += alpha * (temp5[0] + temp5[1]+temp5[2] + temp5[3]);
110+
y[6] += alpha * (temp6[0] + temp6[1]+temp6[2] + temp6[3]);
111+
y[7] += alpha * (temp7[0] + temp7[1]+temp7[2] + temp7[3]);
112+
#else
103113
register __vector float t0, t1, t2, t3;
104114
register __vector float a = { alpha, alpha, alpha, alpha };
105115
__vector float *v_y = (__vector float*) y;
@@ -126,7 +136,7 @@ static void sgemv_kernel_8x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
126136

127137
v_y[0] += a * temp0;
128138
v_y[1] += a * temp4;
129-
139+
#endif
130140
}
131141

132142

@@ -153,7 +163,13 @@ static void sgemv_kernel_8x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
153163
temp2 += v_x[i] * va2[i] + v_x[i+1] * va2[i+1];
154164
temp3 += v_x[i] * va3[i] + v_x[i+1] * va3[i+1];
155165
}
156-
166+
167+
#if defined(POWER8)
168+
y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
169+
y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
170+
y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
171+
y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
172+
#else
157173
register __vector float t0, t1, t2, t3;
158174
register __vector float a = { alpha, alpha, alpha, alpha };
159175
__vector float *v_y = (__vector float*) y;
@@ -169,7 +185,7 @@ static void sgemv_kernel_8x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
169185
temp0 += temp1 + temp2 + temp3;
170186

171187
v_y[0] += a * temp0;
172-
188+
#endif
173189
}
174190

175191

0 commit comments

Comments
 (0)