@@ -77,7 +77,8 @@ static inline void calcBIOSums_Neon(const Pel* srcY0Tmp, const Pel* srcY1Tmp, co
7777 int16x8_t sumAbsGYTmp = vdupq_n_s16 (0 );
7878 int16x8_t sumDIYTmp = vdupq_n_s16 (0 );
7979 int16x8_t sumSignGyGxTmp = vdupq_n_s16 (0 );
80- int16x8_t x = {1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 };
80+ int16_t vals[8 ] = {1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 };
81+ int16x8_t x = vld1q_s16 (vals);
8182
8283 for (int y = 0 ; y < 3 ; y++)
8384 {
@@ -93,9 +94,9 @@ static inline void calcBIOSums_Neon(const Pel* srcY0Tmp, const Pel* srcY1Tmp, co
9394 int16x8_t packTempY = vshrq_n_s16 ( vaddq_s16 (loadGradY0, loadGradY1), 1 );
9495 int16x8_t gX = vabsq_s16 (packTempX);
9596 int16x8_t gY = vabsq_s16 (packTempY);
96- int16x8_t dIX = vmulq_s16 (subTemp1,vreinterpretq_s16_u16 (vcleq_s16 (packTempX, vdupq_n_s16 (0 ))- vcgeq_s16 (packTempX,vdupq_n_s16 (0 ))));
97- int16x8_t dIY = vmulq_s16 (subTemp1,vreinterpretq_s16_u16 (vcleq_s16 (packTempY, vdupq_n_s16 (0 ))- vcgeq_s16 (packTempY,vdupq_n_s16 (0 ))));
98- int16x8_t signGY_GX = vmulq_s16 (packTempX,vreinterpretq_s16_u16 (vcleq_s16 (packTempY, vdupq_n_s16 (0 ))- vcgeq_s16 (packTempY,vdupq_n_s16 (0 ))));
97+ int16x8_t dIX = vmulq_s16 (subTemp1,vreinterpretq_s16_u16 ( vsubq_u16 ( vcleq_s16 (packTempX, vdupq_n_s16 (0 )), vcgeq_s16 (packTempX,vdupq_n_s16 (0 )) )));
98+ int16x8_t dIY = vmulq_s16 (subTemp1,vreinterpretq_s16_u16 ( vsubq_u16 ( vcleq_s16 (packTempY, vdupq_n_s16 (0 )), vcgeq_s16 (packTempY,vdupq_n_s16 (0 )) )));
99+ int16x8_t signGY_GX = vmulq_s16 (packTempX,vreinterpretq_s16_u16 ( vsubq_u16 ( vcleq_s16 (packTempY, vdupq_n_s16 (0 )), vcgeq_s16 (packTempY,vdupq_n_s16 (0 )) )));
99100
100101 sumAbsGXTmp = vaddq_s16 (sumAbsGXTmp, gX );
101102 sumAbsGYTmp = vaddq_s16 (sumAbsGYTmp, gY );
@@ -124,9 +125,9 @@ static inline void calcBIOSums_Neon(const Pel* srcY0Tmp, const Pel* srcY1Tmp, co
124125 gX = vabsq_s16 (packTempX);
125126 gY = vabsq_s16 (packTempY);
126127
127- dIX = vmulq_s16 (subTemp1,vreinterpretq_s16_u16 (vcleq_s16 (packTempX, vdupq_n_s16 (0 ))- vcgeq_s16 (packTempX,vdupq_n_s16 (0 ))));
128- dIY = vmulq_s16 (subTemp1,vreinterpretq_s16_u16 (vcleq_s16 (packTempY, vdupq_n_s16 (0 ))- vcgeq_s16 (packTempY,vdupq_n_s16 (0 ))));
129- signGY_GX = vmulq_s16 (packTempX,vreinterpretq_s16_u16 (vcleq_s16 (packTempY, vdupq_n_s16 (0 ))- vcgeq_s16 (packTempY,vdupq_n_s16 (0 ))));
128+ dIX = vmulq_s16 (subTemp1,vreinterpretq_s16_u16 ( vsubq_u16 ( vcleq_s16 (packTempX, vdupq_n_s16 (0 )), vcgeq_s16 (packTempX,vdupq_n_s16 (0 )) )));
129+ dIY = vmulq_s16 (subTemp1,vreinterpretq_s16_u16 ( vsubq_u16 ( vcleq_s16 (packTempY, vdupq_n_s16 (0 )), vcgeq_s16 (packTempY,vdupq_n_s16 (0 )) )));
130+ signGY_GX = vmulq_s16 (packTempX,vreinterpretq_s16_u16 ( vsubq_u16 ( vcleq_s16 (packTempY, vdupq_n_s16 (0 )), vcgeq_s16 (packTempY,vdupq_n_s16 (0 )) )));
130131
131132 sumAbsGXTmp = vaddq_s16 (sumAbsGXTmp, gX );
132133 sumAbsGYTmp = vaddq_s16 (sumAbsGYTmp, gY );
0 commit comments