Skip to content

Commit 45271d9

Browse files
authored
Merge pull request #611 from georges-arm/georges-arm/arm-GetSADwMask_sve
Arm: Add SVE implementation of xGetSADwMask
2 parents 7417f4b + a25eea0 commit 45271d9

File tree

2 files changed

+157
-0
lines changed

2 files changed

+157
-0
lines changed

source/Lib/CommonLib/arm/InitARM.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,12 @@ void RdCost::initRdCostARM()
113113
{
114114
_initRdCostARM<NEON>();
115115
}
116+
#if TARGET_SIMD_ARM_SVE
117+
if( vext >= SVE )
118+
{
119+
_initRdCostARM<SVE>();
120+
}
121+
#endif // TARGET_SIMD_ARM_SVE
116122
}
117123
#endif
118124

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
/* -----------------------------------------------------------------------------
2+
The copyright in this software is being made available under the Clear BSD
3+
License, included below. No patent rights, trademark rights and/or
4+
other Intellectual Property Rights other than the copyrights concerning
5+
the Software are granted under this license.
6+
7+
The Clear BSD License
8+
9+
Copyright (c) 2019-2025, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10+
All rights reserved.
11+
12+
Redistribution and use in source and binary forms, with or without modification,
13+
are permitted (subject to the limitations in the disclaimer below) provided that
14+
the following conditions are met:
15+
16+
* Redistributions of source code must retain the above copyright notice,
17+
this list of conditions and the following disclaimer.
18+
19+
* Redistributions in binary form must reproduce the above copyright
20+
notice, this list of conditions and the following disclaimer in the
21+
documentation and/or other materials provided with the distribution.
22+
23+
* Neither the name of the copyright holder nor the names of its
24+
contributors may be used to endorse or promote products derived from this
25+
software without specific prior written permission.
26+
27+
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28+
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29+
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31+
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32+
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33+
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34+
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35+
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36+
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38+
POSSIBILITY OF SUCH DAMAGE.
39+
40+
41+
------------------------------------------------------------------------------------------- */
42+
43+
/** \file RdCost_sve.cpp
44+
\brief RD cost computation class, SVE version
45+
*/
46+
47+
#include <limits>
48+
#include <math.h>
49+
50+
#include "../neon/reverse_neon.h"
51+
#include "../neon/sum_neon.h"
52+
#include "CommonDefARM.h"
53+
#include "CommonLib/CommonDef.h"
54+
#include "CommonLib/RdCost.h"
55+
#include "neon_sve_bridge.h"
56+
57+
#include <arm_sve.h>
58+
59+
namespace vvenc
60+
{
61+
62+
#if ENABLE_SIMD_OPT_DIST && defined( TARGET_SIMD_ARM )
63+
64+
static inline int64x2_t xGetSADwMask_sve_step( const short* src1, const short* src2, const short* weightMask, int stepX,
65+
int x, int64x2_t sum )
66+
{
67+
int16x8_t vsrc1 = vld1q_s16( src1 + x );
68+
int16x8_t vsrc2 = vld1q_s16( src2 + x );
69+
int16x8_t vmask;
70+
if( stepX == -1 )
71+
{
72+
vmask = vld1q_s16( weightMask - x - 7 );
73+
vmask = reverse_vector_s16x8( vmask );
74+
}
75+
else
76+
{
77+
vmask = vld1q_s16( weightMask + x );
78+
}
79+
int16x8_t diff = vabdq_s16( vsrc1, vsrc2 );
80+
return vvenc_sdotq_s16( sum, diff, vmask );
81+
}
82+
83+
Distortion xGetSADwMask_sve( const DistParam& rcDtParam )
84+
{
85+
if( rcDtParam.org.width < 4 || rcDtParam.bitDepth > 10 || rcDtParam.applyWeight )
86+
{
87+
return RdCost::xGetSADwMask( rcDtParam );
88+
}
89+
90+
const short* src1 = ( const short* )rcDtParam.org.buf;
91+
const short* src2 = ( const short* )rcDtParam.cur.buf;
92+
const short* weightMask = ( const short* )rcDtParam.mask;
93+
int rows = rcDtParam.org.height;
94+
int cols = rcDtParam.org.width;
95+
int subShift = rcDtParam.subShift;
96+
int subStep = 1 << subShift;
97+
const int strideSrc1 = rcDtParam.org.stride * subStep;
98+
const int strideSrc2 = rcDtParam.cur.stride * subStep;
99+
const int strideMask = rcDtParam.maskStride * subStep;
100+
101+
int64x2_t sum0 = vdupq_n_s64( 0 );
102+
103+
if( cols == 8 )
104+
{
105+
do
106+
{
107+
sum0 = xGetSADwMask_sve_step( src1, src2, weightMask, rcDtParam.stepX, 0, sum0 );
108+
109+
src1 += strideSrc1;
110+
src2 += strideSrc2;
111+
weightMask += strideMask;
112+
rows -= subStep;
113+
} while( rows != 0 );
114+
}
115+
else
116+
{
117+
int64x2_t sum1 = vdupq_n_s64( 0 );
118+
119+
do
120+
{
121+
int x = 0;
122+
do
123+
{
124+
sum0 = xGetSADwMask_sve_step( src1, src2, weightMask, rcDtParam.stepX, x + 0, sum0 );
125+
sum1 = xGetSADwMask_sve_step( src1, src2, weightMask, rcDtParam.stepX, x + 8, sum1 );
126+
x += 16;
127+
} while( x != cols );
128+
129+
src1 += strideSrc1;
130+
src2 += strideSrc2;
131+
weightMask += strideMask;
132+
rows -= subStep;
133+
} while( rows != 0 );
134+
135+
sum0 = vaddq_s64( sum0, sum1 );
136+
}
137+
138+
Distortion sum = horizontal_add_s64x2( sum0 );
139+
sum <<= subShift;
140+
return sum >> DISTORTION_PRECISION_ADJUSTMENT( rcDtParam.bitDepth );
141+
}
142+
143+
template<>
144+
void RdCost::_initRdCostARM<SVE>()
145+
{
146+
m_afpDistortFunc[0][DF_SAD_WITH_MASK] = xGetSADwMask_sve;
147+
}
148+
149+
#endif // defined( TARGET_SIMD_ARM )
150+
151+
} // namespace vvenc

0 commit comments

Comments
 (0)