@@ -1320,6 +1320,87 @@ static bool test_PelBufferOps()
13201320#endif // ENABLE_SIMD_OPT_BUFFER
13211321
13221322#if ENABLE_SIMD_OPT_MCIF
1323+ template <bool isLast, unsigned width>
1324+ static bool check_filterXxY_N8 ( InterpolationFilter* ref, InterpolationFilter* opt, unsigned num_cases )
1325+ {
1326+ static_assert ( width == 4 || width == 8 || width == 16 , " Width must be either 4, 8, or 16" );
1327+
1328+ DimensionGenerator dim;
1329+
1330+ // Max buffer size for src is ( height + 7 ) * srcStride.
1331+ std::vector<Pel> src ( ( MAX_CU_SIZE + 7 ) * MAX_CU_SIZE );
1332+ std::vector<Pel> dst_ref ( MAX_CU_SIZE * MAX_CU_SIZE );
1333+ std::vector<Pel> dst_opt ( MAX_CU_SIZE * MAX_CU_SIZE );
1334+
1335+ bool passed = true ;
1336+
1337+ // Test 8-bit and 10-bit.
1338+ for ( unsigned bd : { 8 , 10 } )
1339+ {
1340+ ClpRng clpRng{ ( int )bd };
1341+
1342+ InputGenerator<Pel> inp_gen{ bd, /* is_signed=*/ false };
1343+
1344+ std::ostringstream sstm_test;
1345+ sstm_test << " InterpolationFilter::filter" << width << " x" << width << " [0][" << isLast << " ]"
1346+ << " bitDepth=" << bd;
1347+ std::cout << " Testing " << sstm_test.str () << std::endl;
1348+
1349+ for ( unsigned n = 0 ; n < num_cases; n++ )
1350+ {
1351+ unsigned height = width == 4 ? 4 : dim.get ( 4 , MAX_CU_SIZE, 4 );
1352+ unsigned srcStride = dim.get ( width + 7 , MAX_CU_SIZE ); // srcStride >= width + 7
1353+ unsigned dstStride = dim.get ( width, MAX_CU_SIZE );
1354+
1355+ unsigned hCoeff_idx = dim.get ( 0 , LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS );
1356+ unsigned vCoeff_idx = dim.get ( 0 , LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS );
1357+ const TFilterCoeff* pCoeffH =
1358+ width == 4 ? InterpolationFilter::m_lumaFilter4x4[hCoeff_idx] : InterpolationFilter::m_lumaFilter[hCoeff_idx];
1359+ const TFilterCoeff* pCoeffV =
1360+ width == 4 ? InterpolationFilter::m_lumaFilter4x4[vCoeff_idx] : InterpolationFilter::m_lumaFilter[vCoeff_idx];
1361+
1362+ // Fill input buffers with unsigned data.
1363+ std::generate ( src.begin (), src.end (), inp_gen );
1364+
1365+ // Clear output blocks.
1366+ std::fill ( dst_ref.begin (), dst_ref.end (), 0 );
1367+ std::fill ( dst_opt.begin (), dst_opt.end (), 0 );
1368+
1369+ ptrdiff_t src_offset = 3 * ( 1 + srcStride );
1370+
1371+ if ( width == 4 )
1372+ {
1373+ ref->m_filter4x4 [0 ][isLast]( clpRng, src.data () + src_offset, ( int )srcStride, dst_ref.data (),
1374+ ( int )dstStride, ( int )width, ( int )height, pCoeffH, pCoeffV );
1375+ opt->m_filter4x4 [0 ][isLast]( clpRng, src.data () + src_offset, ( int )srcStride, dst_opt.data (),
1376+ ( int )dstStride, ( int )width, ( int )height, pCoeffH, pCoeffV );
1377+ }
1378+ else if ( width == 8 )
1379+ {
1380+ ref->m_filter8x8 [0 ][isLast]( clpRng, src.data () + src_offset, ( int )srcStride, dst_ref.data (),
1381+ ( int )dstStride, ( int )width, ( int )height, pCoeffH, pCoeffV );
1382+ opt->m_filter8x8 [0 ][isLast]( clpRng, src.data () + src_offset, ( int )srcStride, dst_opt.data (),
1383+ ( int )dstStride, ( int )width, ( int )height, pCoeffH, pCoeffV );
1384+ }
1385+ else // width == 16
1386+ {
1387+ ref->m_filter16x16 [0 ][isLast]( clpRng, src.data () + src_offset, ( int )srcStride, dst_ref.data (),
1388+ ( int )dstStride, ( int )width, ( int )height, pCoeffH, pCoeffV );
1389+ opt->m_filter16x16 [0 ][isLast]( clpRng, src.data () + src_offset, ( int )srcStride, dst_opt.data (),
1390+ ( int )dstStride, ( int )width, ( int )height, pCoeffH, pCoeffV );
1391+ }
1392+
1393+ std::ostringstream sstm_subtest;
1394+ sstm_subtest << sstm_test.str () << " srcStride=" << srcStride << " dstStride=" << dstStride << " h=" << height;
1395+
1396+ passed =
1397+ compare_values_2d ( sstm_subtest.str (), dst_ref.data (), dst_opt.data (), height, width, dstStride ) && passed;
1398+ }
1399+ }
1400+
1401+ return passed;
1402+ }
1403+
13231404template <bool isLast, unsigned width>
13241405static bool check_filterXxY_N4 ( InterpolationFilter* ref, InterpolationFilter* opt, unsigned num_cases )
13251406{
@@ -1464,6 +1545,13 @@ static bool test_InterpolationFilter()
14641545 unsigned num_cases = NUM_CASES;
14651546 bool passed = true ;
14661547
1548+ passed = check_filterXxY_N8<false , 4 >( &ref, &opt, num_cases ) && passed;
1549+ passed = check_filterXxY_N8<true , 4 >( &ref, &opt, num_cases ) && passed;
1550+ passed = check_filterXxY_N8<false , 8 >( &ref, &opt, num_cases ) && passed;
1551+ passed = check_filterXxY_N8<true , 8 >( &ref, &opt, num_cases ) && passed;
1552+ passed = check_filterXxY_N8<false , 16 >( &ref, &opt, num_cases ) && passed;
1553+ passed = check_filterXxY_N8<true , 16 >( &ref, &opt, num_cases ) && passed;
1554+
14671555 passed = check_filterXxY_N4<false , 4 >( &ref, &opt, num_cases ) && passed;
14681556 passed = check_filterXxY_N4<true , 4 >( &ref, &opt, num_cases ) && passed;
14691557 passed = check_filterXxY_N4<false , 8 >( &ref, &opt, num_cases ) && passed;
0 commit comments