@@ -66,6 +66,58 @@ POSSIBILITY OF SUCH DAMAGE.
6666
6767namespace vvenc
6868{
69+ int motionErrorLumaInt_sve ( const Pel* org, const ptrdiff_t origStride, const Pel* buf, const ptrdiff_t buffStride,
70+ const int w, int h, const int besterror )
71+ {
72+ CHECKD ( w % 8 != 0 , " Width must be a multiple of eight" );
73+ CHECKD ( h % 4 != 0 , " Height must be a multiple of four" );
74+
75+ int error = 0 ;
76+ do
77+ {
78+ int64x2_t acc1 = vdupq_n_s64 ( 0 );
79+ int64x2_t acc2 = vdupq_n_s64 ( 0 );
80+
81+ int x1 = 0 ;
82+ do
83+ {
84+ int16x8_t o1 = vld1q_s16 ( org + 0 * origStride + x1 );
85+ int16x8_t b1 = vld1q_s16 ( buf + 0 * origStride + x1 );
86+ int16x8_t o2 = vld1q_s16 ( org + 1 * origStride + x1 );
87+ int16x8_t b2 = vld1q_s16 ( buf + 1 * buffStride + x1 );
88+ int16x8_t o3 = vld1q_s16 ( org + 2 * origStride + x1 );
89+ int16x8_t b3 = vld1q_s16 ( buf + 2 * buffStride + x1 );
90+ int16x8_t o4 = vld1q_s16 ( org + 3 * origStride + x1 );
91+ int16x8_t b4 = vld1q_s16 ( buf + 3 * buffStride + x1 );
92+
93+ int16x8_t diff1 = vabdq_s16 ( o1, b1 );
94+ int16x8_t diff2 = vabdq_s16 ( o2, b2 );
95+ int16x8_t diff3 = vabdq_s16 ( o3, b3 );
96+ int16x8_t diff4 = vabdq_s16 ( o4, b4 );
97+
98+ acc1 = vvenc_sdotq_s16 ( acc1, diff1, diff1 );
99+ acc2 = vvenc_sdotq_s16 ( acc2, diff2, diff2 );
100+ acc1 = vvenc_sdotq_s16 ( acc1, diff3, diff3 );
101+ acc2 = vvenc_sdotq_s16 ( acc2, diff4, diff4 );
102+
103+ x1 += 8 ;
104+ } while ( x1 != w );
105+
106+ int64x2_t diff2_sum = vaddq_s64 ( acc1, acc2 );
107+ error += ( int32_t )vaddvq_s64 ( diff2_sum );
108+ if ( error > besterror )
109+ {
110+ return error;
111+ }
112+
113+ org += 4 * origStride;
114+ buf += 4 * buffStride;
115+ h -= 4 ;
116+ } while ( h != 0 );
117+
118+ return error;
119+ }
120+
69121void applyPlanarCorrection_sve ( const Pel* refPel, const ptrdiff_t refStride, Pel* dstPel, const ptrdiff_t dstStride,
70122 const int32_t w, const int32_t h, const ClpRng& clpRng, const uint16_t motionError )
71123{
@@ -353,6 +405,7 @@ void applyBlock_sve( const CPelBuf& src, PelBuf& dst, const CompArea& blk, const
353405template <>
354406void MCTF::_initMCTF_ARM<SVE>()
355407{
408+ m_motionErrorLumaInt8 = motionErrorLumaInt_sve;
356409 m_applyPlanarCorrection = applyPlanarCorrection_sve;
357410 m_applyBlock = applyBlock_sve;
358411}
0 commit comments