Skip to content

Commit a8432ee

Browse files
authored
Merge pull request #608 from georges-arm/georges-arm/rdcost-neon-deduplicate-reverse
Use reverse_vector_s16x8 in RdCost_neon.cpp
2 parents c802434 + 4d30fb2 commit a8432ee

File tree

1 file changed

+2
-13
lines changed

1 file changed

+2
-13
lines changed

source/Lib/CommonLib/arm/neon/RdCost_neon.cpp

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ POSSIBILITY OF SUCH DAMAGE.
5050
#include "CommonDefARM.h"
5151
#include "CommonLib/CommonDef.h"
5252
#include "CommonLib/RdCost.h"
53+
#include "reverse_neon.h"
5354
#include "sum_neon.h"
5455

5556
#if ENABLE_SIMD_OPT_DIST && defined( TARGET_SIMD_X86 )
@@ -1097,18 +1098,6 @@ Distortion xGetSAD_NxN_neon( const DistParam &rcDtParam )
10971098
return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth);
10981099
}
10991100

1100-
static inline int16x8_t reverse_vector_s16( int16x8_t x )
1101-
{
1102-
#if REAL_TARGET_AARCH64
1103-
static const uint8_t shuffle_table[ 16 ] = { 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1 };
1104-
uint8x16_t shuffle_indices = vld1q_u8( shuffle_table );
1105-
return vreinterpretq_s16_u8( vqtbl1q_u8( vreinterpretq_u8_s16( x ), shuffle_indices ) );
1106-
#else
1107-
int16x8_t rev_halves = vrev64q_s16( x );
1108-
return vcombine_s16( vget_high_s16( rev_halves ), vget_low_s16( rev_halves ) );
1109-
#endif
1110-
}
1111-
11121101
Distortion xGetSADwMask_neon( const DistParam& rcDtParam )
11131102
{
11141103
if (rcDtParam.org.width < 4 || rcDtParam.bitDepth > 10 || rcDtParam.applyWeight)
@@ -1141,7 +1130,7 @@ Distortion xGetSADwMask_neon( const DistParam& rcDtParam )
11411130
if (rcDtParam.stepX == -1)
11421131
{
11431132
vmask = vld1q_s16( weightMask - x - 7 );
1144-
vmask = reverse_vector_s16( vmask );
1133+
vmask = reverse_vector_s16x8( vmask );
11451134
}
11461135
else
11471136
{

0 commit comments

Comments
 (0)