Skip to content

Commit 632e4b1

Browse files
authored
Preparing v1.13.0-rc1 (#486)
* Preparing v1.13.0-rc1, improvements fixes * Bump version
1 parent 19d671e commit 632e4b1

File tree

17 files changed

+218
-170
lines changed

17 files changed

+218
-170
lines changed

.github/workflows/Build.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,13 @@ jobs:
2929
generators: "Visual Studio 17 2022",
3030
msvc_arch: Win32
3131
}
32+
- {
33+
name: "Windows MSVC arm64",
34+
os: windows-2022,
35+
environment_script: "C:/Program Files/Microsoft Visual Studio/2022/Enterprise/VC/Auxiliary/Build/vcvars64.bat",
36+
generators: "Visual Studio 17 2022",
37+
msvc_arch: arm64,
38+
}
3239
- {
3340
name: "Windows MinGW",
3441
os: windows-2022,

.gitlab-ci-internal.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,14 @@ test_vc193x:
265265
tags:
266266
- vc193x
267267

268+
test_vc193x_arm64:
269+
extends: .build_test_msvc_template
270+
variables:
271+
MSVC_ARCH: arm64
272+
CONFIG_OPTIONS: "-DVVENC_OVERRIDE_COMPILER_CHECK=1"
273+
tags:
274+
- vc193x-arm
275+
268276
test_macos-x86_64:
269277
extends: .build_test_macos_template
270278
variables:

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ if( NOT CMAKE_VERSION VERSION_LESS 3.13.0 )
1010
endif()
1111

1212
# project name
13-
project( vvenc VERSION 1.12.1 )
13+
project( vvenc VERSION 1.13.0 )
1414

1515
# set alternative version numbering for release candidates
16-
#set( PROJECT_VERSION_RC rc1 )
16+
set( PROJECT_VERSION_RC rc1 )
1717
if( PROJECT_VERSION_RC )
1818
set( PROJECT_VERSION "${PROJECT_VERSION}-${PROJECT_VERSION_RC}" )
1919
endif()

source/Lib/CommonLib/RdCost.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ class RdCost
255255
template <X86_VEXT vext>
256256
static void xGetSADX5_8xN_SIMD ( const DistParam& rcDtParam, Distortion* cost, bool isCalCentrePos );
257257
template <X86_VEXT vext>
258-
static void xGetSADX5_16xN_SIMD ( const DistParam& rcDtParam, Distortion* cost, bool isCalCentrePos );
258+
static void xGetSADX5_16xN_SIMD_X86 ( const DistParam& rcDtParam, Distortion* cost, bool isCalCentrePos );
259259

260260
template<X86_VEXT vext, bool fastHad>
261261
static Distortion xGetHADs_SIMD ( const DistParam& pcDtParam );
@@ -268,7 +268,7 @@ class RdCost
268268

269269
#ifdef TARGET_SIMD_ARM
270270
template <ARM_VEXT vext>
271-
static void xGetSADX5_16xN_SIMD ( const DistParam& rcDtParam, Distortion* cost, bool isCalCentrePos );
271+
static void xGetSADX5_16xN_SIMD_ARM ( const DistParam& rcDtParam, Distortion* cost, bool isCalCentrePos );
272272

273273
template <ARM_VEXT vext>
274274
static Distortion xGetHAD2SADs_ARMSIMD( const DistParam &rcDtParam );

source/Lib/CommonLib/arm/BufferARM.h

Lines changed: 137 additions & 130 deletions
Large diffs are not rendered by default.

source/Lib/CommonLib/arm/InterPredARM.h

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,8 @@ static inline void calcBIOSums_Neon(const Pel* srcY0Tmp, const Pel* srcY1Tmp, co
7777
int16x8_t sumAbsGYTmp = vdupq_n_s16(0);
7878
int16x8_t sumDIYTmp = vdupq_n_s16(0);
7979
int16x8_t sumSignGyGxTmp = vdupq_n_s16(0);
80-
int16x8_t x = {1, 1, 1, 1, 1, 1, 0, 0};
80+
int16_t vals[8] = {1, 1, 1, 1, 1, 1, 0, 0};
81+
int16x8_t x = vld1q_s16(vals);
8182

8283
for (int y = 0; y < 3; y++)
8384
{
@@ -93,9 +94,9 @@ static inline void calcBIOSums_Neon(const Pel* srcY0Tmp, const Pel* srcY1Tmp, co
9394
int16x8_t packTempY = vshrq_n_s16( vaddq_s16(loadGradY0, loadGradY1), 1 );
9495
int16x8_t gX = vabsq_s16(packTempX);
9596
int16x8_t gY = vabsq_s16(packTempY);
96-
int16x8_t dIX = vmulq_s16(subTemp1,vreinterpretq_s16_u16(vcleq_s16(packTempX, vdupq_n_s16(0))-vcgeq_s16(packTempX,vdupq_n_s16(0))));
97-
int16x8_t dIY = vmulq_s16(subTemp1,vreinterpretq_s16_u16(vcleq_s16(packTempY, vdupq_n_s16(0))-vcgeq_s16(packTempY,vdupq_n_s16(0))));
98-
int16x8_t signGY_GX = vmulq_s16(packTempX,vreinterpretq_s16_u16(vcleq_s16(packTempY, vdupq_n_s16(0))-vcgeq_s16(packTempY,vdupq_n_s16(0))));
97+
int16x8_t dIX = vmulq_s16(subTemp1,vreinterpretq_s16_u16( vsubq_u16( vcleq_s16(packTempX, vdupq_n_s16(0)), vcgeq_s16(packTempX,vdupq_n_s16(0)) )));
98+
int16x8_t dIY = vmulq_s16(subTemp1,vreinterpretq_s16_u16( vsubq_u16( vcleq_s16(packTempY, vdupq_n_s16(0)), vcgeq_s16(packTempY,vdupq_n_s16(0)) )));
99+
int16x8_t signGY_GX = vmulq_s16(packTempX,vreinterpretq_s16_u16( vsubq_u16( vcleq_s16(packTempY, vdupq_n_s16(0)), vcgeq_s16(packTempY,vdupq_n_s16(0)) )));
99100

100101
sumAbsGXTmp = vaddq_s16(sumAbsGXTmp, gX);
101102
sumAbsGYTmp = vaddq_s16(sumAbsGYTmp, gY);
@@ -124,9 +125,9 @@ static inline void calcBIOSums_Neon(const Pel* srcY0Tmp, const Pel* srcY1Tmp, co
124125
gX = vabsq_s16(packTempX);
125126
gY = vabsq_s16(packTempY);
126127

127-
dIX = vmulq_s16(subTemp1,vreinterpretq_s16_u16(vcleq_s16(packTempX, vdupq_n_s16(0))-vcgeq_s16(packTempX,vdupq_n_s16(0))));
128-
dIY = vmulq_s16(subTemp1,vreinterpretq_s16_u16(vcleq_s16(packTempY, vdupq_n_s16(0))-vcgeq_s16(packTempY,vdupq_n_s16(0))));
129-
signGY_GX = vmulq_s16(packTempX,vreinterpretq_s16_u16(vcleq_s16(packTempY, vdupq_n_s16(0))-vcgeq_s16(packTempY,vdupq_n_s16(0))));
128+
dIX = vmulq_s16(subTemp1,vreinterpretq_s16_u16( vsubq_u16( vcleq_s16(packTempX, vdupq_n_s16(0)), vcgeq_s16(packTempX,vdupq_n_s16(0)) )));
129+
dIY = vmulq_s16(subTemp1,vreinterpretq_s16_u16( vsubq_u16( vcleq_s16(packTempY, vdupq_n_s16(0)), vcgeq_s16(packTempY,vdupq_n_s16(0)) )));
130+
signGY_GX = vmulq_s16(packTempX,vreinterpretq_s16_u16( vsubq_u16( vcleq_s16(packTempY, vdupq_n_s16(0)), vcgeq_s16(packTempY,vdupq_n_s16(0)) )));
130131

131132
sumAbsGXTmp = vaddq_s16(sumAbsGXTmp, gX);
132133
sumAbsGYTmp = vaddq_s16(sumAbsGYTmp, gY);

source/Lib/CommonLib/arm/RdCostARM.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -915,7 +915,7 @@ Distortion RdCost::xGetHAD2SADs_ARMSIMD( const DistParam &rcDtParam )
915915
#endif // defined( TARGET_SIMD_X86 )
916916

917917
template<ARM_VEXT vext, bool isCalCentrePos>
918-
void xGetSADX5_16xN_SIMDImp( const DistParam& rcDtParam, Distortion* cost )
918+
void xGetSADX5_16xN_SIMDImp_ARM( const DistParam& rcDtParam, Distortion* cost )
919919
{
920920
int i, j;
921921
const Pel* piOrg = rcDtParam.org.buf;
@@ -990,7 +990,7 @@ void xGetSADX5_16xN_SIMDImp( const DistParam& rcDtParam, Distortion* cost )
990990
}
991991

992992
template <ARM_VEXT vext>
993-
void RdCost::xGetSADX5_16xN_SIMD(const DistParam& rcDtParam, Distortion* cost, bool isCalCentrePos)
993+
void RdCost::xGetSADX5_16xN_SIMD_ARM(const DistParam& rcDtParam, Distortion* cost, bool isCalCentrePos)
994994
{
995995
if( rcDtParam.bitDepth > 10 )
996996
{
@@ -999,9 +999,9 @@ void RdCost::xGetSADX5_16xN_SIMD(const DistParam& rcDtParam, Distortion* cost, b
999999
}
10001000

10011001
if( isCalCentrePos )
1002-
xGetSADX5_16xN_SIMDImp<vext, true>( rcDtParam, cost );
1002+
xGetSADX5_16xN_SIMDImp_ARM<vext, true>( rcDtParam, cost );
10031003
else
1004-
xGetSADX5_16xN_SIMDImp<vext, false>( rcDtParam, cost );
1004+
xGetSADX5_16xN_SIMDImp_ARM<vext, false>( rcDtParam, cost );
10051005
}
10061006

10071007
template< int iWidth, ARM_VEXT vext >
@@ -1169,7 +1169,7 @@ Distortion RdCost::xGetSADwMask_ARMSIMD( const DistParam& rcDtParam )
11691169
template<ARM_VEXT vext>
11701170
void RdCost::_initRdCostARM()
11711171
{
1172-
m_afpDistortFuncX5[1] = xGetSADX5_16xN_SIMD<vext>;
1172+
m_afpDistortFuncX5[1] = xGetSADX5_16xN_SIMD_ARM<vext>;
11731173

11741174
#if defined( TARGET_SIMD_X86 )
11751175
m_afpDistortFunc[0][DF_HAD_2SAD ] = RdCost::xGetHAD2SADs_ARMSIMD<vext>;

source/Lib/CommonLib/arm/neon/InterpolationFilter_neon.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -178,9 +178,10 @@ static int16x8_t filter8xX_N8_neon( Pel const* src, int16x8_t ch, int32x4_t voff
178178

179179
static int16x8x2_t filter16xX_N8_neon( Pel const* src, int16x8_t ch, int32x4_t voffset1, int32x4_t invshift1st )
180180
{
181-
int16x8_t a = filter8xX_N8_neon( src + 0, ch, voffset1, invshift1st );
182-
int16x8_t b = filter8xX_N8_neon( src + 8, ch, voffset1, invshift1st );
183-
return ( int16x8x2_t ){ a, b };
181+
int16x8x2_t result;
182+
result.val[0] = filter8xX_N8_neon( src + 0, ch, voffset1, invshift1st );
183+
result.val[1] = filter8xX_N8_neon( src + 8, ch, voffset1, invshift1st );
184+
return result; // explicit return since MSVC for arm64 does not support direct return with typecast
184185
}
185186

186187
template<bool isLast>

source/Lib/CommonLib/arm/neon/Trafo_neon.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ namespace vvenc
6363

6464
static inline int64_t shift_and_round( int64_t x, int shift )
6565
{
66-
return ( x + ( 1 << ( shift - 1 ) ) ) >> shift;
66+
return ( x + ( static_cast<int64_t>(1) << ( shift - 1 ) ) ) >> shift;
6767
}
6868

6969
template<unsigned trSize>

source/Lib/CommonLib/x86/RdCostX86.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3250,7 +3250,7 @@ void RdCost::xGetSADX5_8xN_SIMD(const DistParam& rcDtParam, Distortion* cost, bo
32503250
}
32513251

32523252
template <X86_VEXT vext, bool isCalCentrePos>
3253-
void xGetSADX5_16xN_SIMDImp(const DistParam& rcDtParam, Distortion* cost) {
3253+
void xGetSADX5_16xN_SIMDImp_X86(const DistParam& rcDtParam, Distortion* cost) {
32543254
int i, j;
32553255
const Pel* piOrg = rcDtParam.org.buf;
32563256
const Pel* piCur = rcDtParam.cur.buf - 4;
@@ -3549,16 +3549,16 @@ void xGetSADX5_16xN_SIMDImp(const DistParam& rcDtParam, Distortion* cost) {
35493549
}
35503550

35513551
template <X86_VEXT vext>
3552-
void RdCost::xGetSADX5_16xN_SIMD(const DistParam& rcDtParam, Distortion* cost, bool isCalCentrePos) {
3552+
void RdCost::xGetSADX5_16xN_SIMD_X86(const DistParam& rcDtParam, Distortion* cost, bool isCalCentrePos) {
35533553
if( rcDtParam.bitDepth > 10 ){
35543554
RdCost::xGetSAD16X5( rcDtParam, cost, isCalCentrePos );
35553555
return;
35563556
}
35573557

35583558
if (isCalCentrePos)
3559-
xGetSADX5_16xN_SIMDImp<vext, true>(rcDtParam, cost);
3559+
xGetSADX5_16xN_SIMDImp_X86<vext, true>(rcDtParam, cost);
35603560
else
3561-
xGetSADX5_16xN_SIMDImp<vext, false>(rcDtParam, cost);
3561+
xGetSADX5_16xN_SIMDImp_X86<vext, false>(rcDtParam, cost);
35623562
}
35633563

35643564
template <X86_VEXT vext>
@@ -3611,7 +3611,7 @@ void RdCost::_initRdCostX86()
36113611
m_fxdWtdPredPtr = fixWeightedSSE_SIMD <vext>;
36123612

36133613
m_afpDistortFuncX5[0] = xGetSADX5_8xN_SIMD <vext>;
3614-
m_afpDistortFuncX5[1] = xGetSADX5_16xN_SIMD<vext>;
3614+
m_afpDistortFuncX5[1] = xGetSADX5_16xN_SIMD_X86<vext>;
36153615
}
36163616

36173617
template void RdCost::_initRdCostX86<SIMDX86>();

0 commit comments

Comments
 (0)