Preparing v1.10.0-rc1: first IDR at POC31, adaptive MCTF, ~0.5% BDR gains, ... (#336)

adamjw24 · web-flow · commit 36652e4f39a0 · 2023-12-07T17:08:16.000+01:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -10,10 +10,10 @@ if( NOT CMAKE_VERSION VERSION_LESS 3.13.0 )
 endif()
 
 # project name
-project( vvenc VERSION 1.9.1 )
+project( vvenc VERSION 1.10.0 )
 
 # set alternative version numbering for release candidates
-#set( PROJECT_VERSION_RC rc1 )
+set( PROJECT_VERSION_RC rc1 )
 if( PROJECT_VERSION_RC )
     set( PROJECT_VERSION "${PROJECT_VERSION}-${PROJECT_VERSION_RC}" )
 endif()
diff --git a/cfg/experimental/lowdelay_faster.cfg b/cfg/experimental/lowdelay_faster.cfg
@@ -144,4 +144,4 @@ ReduceIntraChromaModesFullRD  : 1      # Reduce modes for chroma full RD intra s
 FastTTSplit                   : 0      # Fast method for TT split
 ReduceFilterME                : 2      # Use reduced filter taps for subpel motion estimation (2: 4-tap, 1: 6-tap)
 SelectiveRDOQ                 : 2      # Only use RDOQ when there are non-zero unquantized coefficients (0: never, 1: always, 2: for natural content)
-FirstPassMode                 : 2      # FirstPassMode (0: default, 1: faster, 2: faster with temporal downsampling)
+FirstPassMode                 : 4      # FirstPassMode (0: default, 1: faster, 2: faster with temporal downsampling)
diff --git a/cfg/randomaccess_faster.cfg b/cfg/randomaccess_faster.cfg
@@ -132,4 +132,4 @@ ReduceIntraChromaModesFullRD  : 1      # Reduce modes for chroma full RD intra s
 FastTTSplit                   : 0      # Fast method for TT split
 ReduceFilterME                : 2      # Use reduced filter taps for subpel motion estimation (2: 4-tap, 1: 6-tap)
 SelectiveRDOQ                 : 2      # Only use RDOQ when there are non-zero unquantized coefficients (0: never, 1: always, 2: for natural content)
-FirstPassMode                 : 2      # FirstPassMode (0: default, 1: faster, 2: faster with temporal downsampling)
+FirstPassMode                 : 4      # FirstPassMode (0: default, 1: faster, 2: faster with temporal downsampling)
diff --git a/changelog.txt b/changelog.txt
@@ -1,3 +1,23 @@
+/////////////////////////////////////////
+tag 1.10.0-rc1
+* libvvenc:
+  - added library parameters:
+    - vvenc_config::m_poc0idr to force POC0 to be an IDR (otherwise per default it will now be a RASL picture)
+  - changed default GOP structure of the first GOP to align with other GOPs, ensuring all intra-periods
+    and DASH segments have equal length
+  - enable spatial subsampling for 1st pass in 2-pass RC per default for faster preset
+  - adaptively extend the number of neighboring frames in MCTF pre-filtering
+  - allow ALF derivation from partial data for FPPLinesSynchro
+  - improved rate matching accuracy at high target rates on easy-to-encode content, especially for HDR
+  - disable compilation for SSE42 and AVX as no specific code is used,
+    only keep explicit support for SSE41 and AVX2
+  - around 0.5% BDR gain for all presets
+  - minor changes and cleanups to rate control, DMVR, MCTF, QPA and others
+
+* vvencFFapp:
+  - added parameter: POC0IDR to control if POC0 is an IDR (=1, default if PicReordering is 1)
+					 or RASL (=0, default if PicReordering is 0)
+
 /////////////////////////////////////////
 tag 1.9.1
 * libvvenc:
diff --git a/source/Lib/CommonLib/MCTF.cpp b/source/Lib/CommonLib/MCTF.cpp
@@ -416,7 +416,7 @@ void applyPlanarCorrectionCore( const Pel* refPel, const ptrdiff_t refStride, Pe
   }
 }
 
-void applyBlockCore( const CPelBuf& src, PelBuf& dst, const CompArea& blk, const ClpRng& clpRng, const Pel** correctedPics, int numRefs, const int* verror, const double refStrenghts[4], double weightScaling, double sigmaSq )
+void applyBlockCore( const CPelBuf& src, PelBuf& dst, const CompArea& blk, const ClpRng& clpRng, const Pel** correctedPics, int numRefs, const int* verror, const double* refStrenghts, double weightScaling, double sigmaSq )
 {
   const int         w = blk.width;
   const int         h = blk.height;
@@ -586,7 +586,7 @@ void MCTF::init( const VVEncCfg& encCfg, bool isFinalPass, NoMallocThreadPool* t
   m_area        = Area( 0, 0, m_encCfg->m_PadSourceWidth, m_encCfg->m_PadSourceHeight );
 
   // TLayer (TL) dependent definition of drop frames: TL = 4,  TL = 3,  TL = 2,  TL = 1,  TL = 0
-  const static int sMCTFSpeed[5] { 0, 0, ((3<<12) + (2<<9) + (2<<6) + (0<<3) + 0),   ((3<<12) + (2<<9) + (2<<6) + (0<<3) + 0),   ((3<<12) + (3<<9) + (2<<6) + (2<<3) + 2) };
+  const static int sMCTFSpeed[5] { 0, 0, ((3<<12) + (2<<9) + (2<<6) + (0<<3) + 0),   ((3<<12) + (2<<9) + (2<<6) + (0<<3) + 0),   ((3<<12) + (3<<9) + (3<<6) + (2<<3) + 2) };
 
   m_MCTFSpeedVal     = sMCTFSpeed[ m_encCfg->m_vvencMCTF.MCTFSpeed ];
   m_lowResFltSearch  = m_encCfg->m_vvencMCTF.MCTFSpeed > 0;
@@ -1247,17 +1247,20 @@ bool MCTF::estimateLumaLn( std::atomic_int& blockX_, std::atomic_int* prevLineX,
       }
     }
 
-    const int w = std::min<int>( blockSize, orig.Y().width  - blockX ) & ~7;
-    const int h = std::min<int>( blockSize, orig.Y().height - blockY ) & ~7;
+    if( doubleRes )
+    {
+      const int w = std::min<int>( blockSize, orig.Y().width  - blockX ) & ~7;
+      const int h = std::min<int>( blockSize, orig.Y().height - blockY ) & ~7;
 
-    CHECKD(bitDepth>10, "unsupported internal bit depth (also in calcVar)" );
-    const double bdScale = double(1<<(2*(10-bitDepth)));
-    const double dvar = m_calcVar( orig.Y().bufAt( blockX, blockY ), orig.Y().stride, w, h ) * bdScale;
-    const double mse  = best.error * bdScale / double( w * h );
+      CHECKD(bitDepth>10, "unsupported internal bit depth (also in calcVar)" );
+      const double bdScale = double(1<<(2*(10-bitDepth)));
+      const double dvar = m_calcVar( orig.Y().bufAt( blockX, blockY ), orig.Y().stride, w, h ) * bdScale;
+      const double mse  = best.error * bdScale / double( w * h );
 
-    best.error   = ( int ) ( 20 * ( ( best.error*bdScale + 5.0 ) / ( dvar + 5.0 ) ) + mse / 50.0 );
-    best.rmsme   = uint16_t( 0.5 + sqrt( mse ) );
-    best.overlap = ( ( double ) w * h ) / ( m_mctfUnitSize * m_mctfUnitSize );
+      best.error   = ( int ) ( 20 * ( ( best.error*bdScale + 5.0 ) / ( dvar + 5.0 ) ) + mse / 50.0 );
+      best.rmsme   = uint16_t( 0.5 + sqrt( mse ) );
+      best.overlap = ( ( double ) w * h ) / ( m_mctfUnitSize * m_mctfUnitSize );
+    }
 
     mvs.get(blockX / stepSize, blockY / stepSize) = best;
   }
diff --git a/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h b/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h
@@ -1151,9 +1151,9 @@ void simdFilterBlkCcAlf( const PelBuf &dstBuf, const CPelUnitBuf &recSrc, const
   if( getChannelTypeScaleX( CH_C, nChromaFormat ) == 1 )
   {
     __m128i xfilterCoeff[4];
-    xfilterCoeff[0] = _mm_set1_epi32( ( filterCoeff[1] & 0xffff ) | ( filterCoeff[2] << 16 ) );
-    xfilterCoeff[1] = _mm_set1_epi32( ( filterCoeff[0] & 0xffff ) | ( filterCoeff[3] << 16 ) );
-    xfilterCoeff[2] = _mm_set1_epi32( ( filterCoeff[4] & 0xffff ) | ( filterCoeff[5] << 16 ) );
+    xfilterCoeff[0] = _mm_set1_epi32( ( filterCoeff[1] & 0xffff ) | ( filterCoeff[2] * (1<< 16 )));
+    xfilterCoeff[1] = _mm_set1_epi32( ( filterCoeff[0] & 0xffff ) | ( filterCoeff[3] * (1<< 16 )));
+    xfilterCoeff[2] = _mm_set1_epi32( ( filterCoeff[4] & 0xffff ) | ( filterCoeff[5] * (1<< 16 )));
     xfilterCoeff[3] = _mm_set1_epi32( ( filterCoeff[6] & 0xffff ) );
 
     for( int i = 0; i < endHeight - startHeight; i += clsSizeY )
diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h
@@ -2374,8 +2374,8 @@ uint64_t AvgHighPassWithDownsamplingDiff1st_SIMD (const int width, const int hei
   pSrc -= iSrcStride;
   pSrcM1-=iSrcM1Stride;
   pSrcM1-=iSrcM1Stride;
-  uint32_t x;
-  uint32_t y;
+  int32_t x;
+  int32_t y;
   const __m128i scale1 = _mm_set_epi16 (1,1,1,1,1,1,1,1);
   for (y = 2; y < height-2; y += 2)
   {
@@ -2456,8 +2456,8 @@ uint64_t AvgHighPassWithDownsamplingDiff2nd_SIMD (const int width,const int heig
 {
   uint64_t taAct = 0;
   uint16_t act = 0;
-  uint32_t y;
-  uint32_t x;
+  int32_t y;
+  int32_t x;
   pSrc -= iSrcStride;
   pSrc -= iSrcStride;
   pSrcM1-=iSM1Stride;
diff --git a/source/Lib/CommonLib/x86/InitX86.cpp b/source/Lib/CommonLib/x86/InitX86.cpp
@@ -79,8 +79,8 @@ void InterpolationFilter::initInterpolationFilterX86( /*int iBitDepthY, int iBit
     _initInterpolationFilterX86<AVX2>(/*iBitDepthY, iBitDepthC*/);
     break;
   case AVX:
-    _initInterpolationFilterX86<AVX>(/*iBitDepthY, iBitDepthC*/);
-    break;
+    //_initInterpolationFilterX86<AVX>(/*iBitDepthY, iBitDepthC*/);
+    //break;
   case SSE42:
   case SSE41:
     _initInterpolationFilterX86<SSE41>(/*iBitDepthY, iBitDepthC*/);
@@ -106,8 +106,8 @@ void PelBufferOps::initPelBufOpsX86()
       _initPelBufOpsX86<AVX2>();
       break;
     case AVX:
-      _initPelBufOpsX86<AVX>();
-      break;
+      //_initPelBufOpsX86<AVX>();
+      //break;
     case SSE42:
     case SSE41:
       _initPelBufOpsX86<SSE41>();
@@ -130,8 +130,8 @@ void LoopFilter::initLoopFilterX86()
     _initLoopFilterX86<AVX2>();
     break;
   case AVX:
-    _initLoopFilterX86<AVX>();
-    break;
+    //_initLoopFilterX86<AVX>();
+    //break;
   case SSE42:
   case SSE41:
     _initLoopFilterX86<SSE41>();
@@ -156,8 +156,8 @@ void RdCost::initRdCostX86()
       break;
 #endif
     case AVX:
-      _initRdCostX86<AVX>();
-      break;
+      //_initRdCostX86<AVX>();
+      //break;
     case SSE42:
     case SSE41:
       _initRdCostX86<SSE41>();
@@ -179,8 +179,8 @@ void AdaptiveLoopFilter::initAdaptiveLoopFilterX86()
     _initAdaptiveLoopFilterX86<AVX2>();
     break;
   case AVX:
-    _initAdaptiveLoopFilterX86<AVX>();
-    break;
+    //_initAdaptiveLoopFilterX86<AVX>();
+    //break;
   case SSE42:
   case SSE41:
     _initAdaptiveLoopFilterX86<SSE41>();
@@ -201,8 +201,8 @@ void SampleAdaptiveOffset::initSampleAdaptiveOffsetX86()
       _initSampleAdaptiveOffsetX86<AVX2>();
       break;
     case AVX:
-      _initSampleAdaptiveOffsetX86<AVX>();
-      break;
+      //_initSampleAdaptiveOffsetX86<AVX>();
+      //break;
     case SSE42:
     case SSE41:
       _initSampleAdaptiveOffsetX86<SSE41>();
@@ -224,8 +224,8 @@ void InterPredInterpolation::initInterPredictionX86()
       _initInterPredictionX86<AVX2>();
       break;
     case AVX:
-      _initInterPredictionX86<AVX>();
-      break;
+      //_initInterPredictionX86<AVX>();
+      //break;
     case SSE42:
     case SSE41:
       _initInterPredictionX86<SSE41>();
@@ -246,8 +246,8 @@ void AffineGradientSearch::initAffineGradientSearchX86()
     _initAffineGradientSearchX86<AVX2>();
     break;
   case AVX:
-    _initAffineGradientSearchX86<AVX>();
-    break;
+    //_initAffineGradientSearchX86<AVX>();
+    //break;
   case SSE42:
   case SSE41:
     _initAffineGradientSearchX86<SSE41>();
@@ -268,8 +268,8 @@ void IntraPrediction::initIntraPredictionX86()
       _initIntraPredictionX86<AVX2>();
       break;
     case AVX:
-      _initIntraPredictionX86<AVX>();
-      break;
+      //_initIntraPredictionX86<AVX>();
+      //break;
     case SSE42:
     case SSE41:
       _initIntraPredictionX86<SSE41>();
@@ -290,11 +290,11 @@ void MCTF::initMCTF_X86()
       _initMCTF_X86<AVX2 >();
       break;
     case AVX:
-      _initMCTF_X86<AVX  >();
-      break;
+      //_initMCTF_X86<AVX  >();
+      //break;
     case SSE42:
-      _initMCTF_X86<SSE42>();
-      break;
+      //_initMCTF_X86<SSE42>();
+      //break;
     case SSE41:
       _initMCTF_X86<SSE41>();
       break;
@@ -314,11 +314,11 @@ void TCoeffOps::initTCoeffOpsX86()
       _initTCoeffOpsX86<AVX2 >();
       break;
     case AVX:
-      _initTCoeffOpsX86<AVX  >();
-      break;
+      //_initTCoeffOpsX86<AVX  >();
+      //break;
     case SSE42:
-      _initTCoeffOpsX86<SSE42>();
-      break;
+      //_initTCoeffOpsX86<SSE42>();
+      //break;
     case SSE41:
       _initTCoeffOpsX86<SSE41>();
       break;
@@ -334,17 +334,17 @@ void TrQuant::initTrQuantX86()
   {
   case AVX512:
   case AVX2:
-  _initTrQuantX86<AVX2 >();
-  break;
+    _initTrQuantX86<AVX2 >();
+    break;
   case AVX:
-  _initTrQuantX86<AVX  >();
-  break;
+    //_initTrQuantX86<AVX  >();
+    //break;
   case SSE42:
-  _initTrQuantX86<SSE42>();
-  break;
+    //_initTrQuantX86<SSE42>();
+    //break;
   case SSE41:
-  _initTrQuantX86<SSE41>();
-  break;
+    _initTrQuantX86<SSE41>();
+    break;
   default:
   break;
   }
@@ -363,8 +363,8 @@ void Quant::initQuantX86()
       _initQuantX86<AVX2>();
       break;
     case AVX:
-      _initQuantX86<AVX>();
-      break;
+      //_initQuantX86<AVX>();
+      //break;
     case SSE42:
     case SSE41:
       _initQuantX86<SSE41>();
diff --git a/source/Lib/EncoderLib/BitAllocation.cpp b/source/Lib/EncoderLib/BitAllocation.cpp
@@ -299,7 +299,7 @@ static void clipQPValToEstimatedMinimStats (const uint8_t* minNoiseLevels, const
     return;
   }
 
-  i = std::max (0, apprI3Log2 (std::min (16.0, resFac) * i * i, false) + dQPOffset + extraQPOffset); // =6*log2
+  i = std::max (0, apprI3Log2 (std::min (1.0, resFac) * i * i, false) + dQPOffset + extraQPOffset); // = 6*log2
   if (QP < i)
   {
     QP = i;
@@ -806,7 +806,8 @@ int BitAllocation::applyQPAdaptationSubCtu (const Slice* slice, const VVEncCfg*
 }
 
 int BitAllocation::getCtuPumpingReducingQP (const Slice* slice, const CPelBuf& origY, const Distortion uiSadBestForQPA,
-                                            std::vector<int>& ctuPumpRedQP, const uint32_t ctuRsAddr, const int baseQP)
+                                            std::vector<int>& ctuPumpRedQP, const uint32_t ctuRsAddr, const int baseQP,
+                                            const bool isBIM)
 {
   if (slice == nullptr || !slice->pps->useDQP || ctuPumpRedQP.size() <= ctuRsAddr) return 0;
 
@@ -824,7 +825,7 @@ int BitAllocation::getCtuPumpingReducingQP (const Slice* slice, const CPelBuf& o
   }
 
   const double sumAbsRatio = double (uiSadBestForQPA * 3 /*TODO: or 4? fine-tune!*/) / double (sumAbsZmOrig == 0 ? 1 : sumAbsZmOrig);
-  const int pumpingReducQP = (int (log (Clip3 (0.25, 4.0, sumAbsRatio)) / log (2.0) + (sumAbsRatio < 1.0 ? -0.5 : 0.5))) >> (baseQP >= 38/*MAX_QP_PERCEPT_QPA*/ ? 1 : 0);
+  const int pumpingReducQP = ((isBIM ? -1 : 0) + int (log (Clip3 (0.25, 4.0, sumAbsRatio)) / log (2.0) + (sumAbsRatio < 1.0 ? -0.5 : 0.5))) >> (baseQP >= 38/*MAX_QP_PERCEPT_QPA*/ ? 1 : 0);
 
   ctuPumpRedQP[ctuRsAddr] += pumpingReducQP;
 
diff --git a/source/Lib/EncoderLib/BitAllocation.h b/source/Lib/EncoderLib/BitAllocation.h
@@ -90,7 +90,8 @@ namespace vvenc {
                                  const Distortion uiSadBestForQPA,
                                  std::vector<int>& ctuPumpRedQP,
                                  const uint32_t ctuRsAddr,
-                                 const int baseQP );
+                                 const int baseQP,
+                                 const bool isBIM );
   }
 
 } // namespace vvenc
diff --git a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
@@ -5726,15 +5726,6 @@ void EncAdaptiveLoopFilter::deriveCcAlfFilter( Picture& pic, CodingStructure& cs
     aps->ccAlfParam.newCcAlfFilter[1] = false;
   }
 
-  // Accumulate ALF statistic
-  const int filterIdx = 0;
-  const int numberOfComponents = getNumberValidComponents( m_chromaFormat );
-  for( int compIdx = 1; compIdx < numberOfComponents; compIdx++ )
-  {
-    const ComponentID compID = ComponentID( compIdx );
-    xGetFrameStatsCcalf( compID, filterIdx + 1, numCtus );
-  }
-
   const TempCtx ctxStartCcAlf( m_CtxCache, SubCtx( Ctx::CcAlfFilterControlFlag, m_CABACEstimator->getCtx() ) );
   const PelUnitBuf orgYuv = cs.picture->getOrigBuf();
   const PelUnitBuf recYuv = cs.getRecoBuf();
diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp
@@ -2007,7 +2007,8 @@ void EncCu::xCheckRDCostMerge( CodingStructure *&tempCS, CodingStructure *&bestC
     {
       const Picture*    pic = slice.pic;
       const uint32_t rsAddr = getCtuAddr (partitioner.currQgPos, *pic->cs->pcv);
-      const int pumpReducQP = BitAllocation::getCtuPumpingReducingQP (&slice, tempCS->getOrgBuf (COMP_Y), uiSadBestForQPA, *m_globalCtuQpVector, rsAddr, m_pcEncCfg->m_QP);
+      const int pumpReducQP = BitAllocation::getCtuPumpingReducingQP (&slice, tempCS->getOrgBuf (COMP_Y), uiSadBestForQPA, *m_globalCtuQpVector, rsAddr,
+                              m_pcEncCfg->m_QP, m_pcEncCfg->m_RCNumPasses != 2 && m_pcEncCfg->m_blockImportanceMapping && !pic->m_picShared->m_ctuBimQpOffset.empty());
 
       if (pumpReducQP != 0) // subtract QP offset, reduces Intra-period pumping or overcoding
       {
diff --git a/source/Lib/EncoderLib/RateCtrl.cpp b/source/Lib/EncoderLib/RateCtrl.cpp
@@ -367,14 +367,14 @@ int RateCtrl::getBaseQP()
         sumFrBits = uint64_t (0.5 + sumFrBits * sqrt (hpEner / (hpEnerPic * firstPassData.size())));
       }
     }
-    baseQP = int (24.5 - log (d) / log (2.0)); // QPstart, equivalent to round (24 + 2*log2 (resRatio))
+    baseQP = int (24.5 - log (std::max (1.0, d)) / log (2.0)); // QPstart, round(24 + 2*log2(resRatio))
     d = (double) m_pcEncCfg->m_RCTargetBitrate * (double) firstPassData.size() / (encRCSeq->frameRate * sumFrBits);
     d = firstPassBaseQP - (105.0 / 128.0) * sqrt ((double) std::max (1, firstPassBaseQP)) * log (d) / log (2.0);
     baseQP = int (0.5 + d + 0.5 * std::max (0.0, baseQP - d));
   }
   else if (m_pcEncCfg->m_LookAhead)
   {
-    baseQP = int (24.5 - log (d) / log (2.0)); // QPstart, equivalent to round (24 + 2*log2 (resRatio))
+    baseQP = int (24.5 - log (std::max (1.0, d)) / log (2.0)); // QPstart, round(24 + 2*log2(resRatio))
     d = MAX_QP_PERCEPT_QPA - 2.0 - 1.5 * firstQPOffset - 0.5 * log ((double) encRCSeq->intraPeriod / encRCSeq->gopSize) / log (2.0);
     baseQP = int (0.5 + d + 0.5 * std::max (0.0, baseQP - d));
   }
@@ -1192,7 +1192,7 @@ void RateCtrl::initRateControlPic( Picture& pic, Slice* slice, int& qp, double&
       {
         if ( it->poc == slice->poc && it->numBits > 0 )
         {
-          const double sqrOfResRatio = double( m_pcEncCfg->m_SourceWidth * m_pcEncCfg->m_SourceHeight ) / ( 3840.0 * 2160.0 );
+          const double sqrOfResRatio = std::min( 1.0, double( m_pcEncCfg->m_SourceWidth * m_pcEncCfg->m_SourceHeight ) / ( 3840.0 * 2160.0 ) );
           const int firstPassSliceQP = it->qp;
           const int budgetRelaxScale = ( encRCSeq->maxGopRate + 0.5 < 2.0 * (double)encRCSeq->targetRate * encRCSeq->gopSize / encRCSeq->frameRate ? 2 : 3 ); // quarters
           const bool isRateCapperMax = ( encRCSeq->maxGopRate + 0.5 >= 3.0 * (double)encRCSeq->targetRate * encRCSeq->gopSize / encRCSeq->frameRate );
@@ -1285,7 +1285,7 @@ void RateCtrl::initRateControlPic( Picture& pic, Slice* slice, int& qp, double&
           tmpVal = updateQPstartModelVal() + log (sqrOfResRatio) / log (2.0); // GOP's QPstart
           d /= (double)it->numBits;
           d = firstPassSliceQP - ( 105.0 / 128.0 ) * sqrt( (double)std::max( 1, firstPassSliceQP ) ) * log( d ) / log( 2.0 );
-          sliceQP = int( 0.5 + d + 0.5 * std::max( 0.0, tmpVal - d ) + encRCSeq->qpCorrection[ frameLevel ] );
+          sliceQP = int( 0.5 + d + ( it->isIntra && m_pcEncCfg->m_HdrMode != vvencHDRMode::VVENC_HDR_OFF ? 0.375 : 0.5 ) * std::max( 0.0, tmpVal - d ) + encRCSeq->qpCorrection[ frameLevel ] );
 
           encRcPic->clipTargetQP( getPicList(), ( m_pcEncCfg->m_LookAhead ? getBaseQP() : m_pcEncCfg->m_QP ) + ( it->isIntra ? m_pcEncCfg->m_intraQPOffset : 0 ), 5 - budgetRelaxScale,
                                   ( it->poc < encRCSeq->gopSize ? 0 : ( m_pcEncCfg->m_maxTLayer + 1 ) >> 1 ), sqrOfResRatio, sliceQP, &encRCSeq->lastAverageQP );
diff --git a/source/Lib/apputils/IStreamIO.h b/source/Lib/apputils/IStreamIO.h
diff --git a/source/Lib/apputils/VVEncAppCfg.h b/source/Lib/apputils/VVEncAppCfg.h
diff --git a/source/Lib/vvenc/CMakeLists.txt b/source/Lib/vvenc/CMakeLists.txt
diff --git a/source/Lib/vvenc/vvencCfg.cpp b/source/Lib/vvenc/vvencCfg.cpp

Original file line number	Diff line number	Diff line change
`@@ -299,7 +299,7 @@ static void clipQPValToEstimatedMinimStats (const uint8_t* minNoiseLevels, const`
`299`	`299`	`return;`
`300`	`300`	`}`
`301`	`301`
`302`		`- i = std::max (0, apprI3Log2 (std::min (16.0, resFac) * i * i, false) + dQPOffset + extraQPOffset); // =6*log2`
	`302`	`+ i = std::max (0, apprI3Log2 (std::min (1.0, resFac) * i * i, false) + dQPOffset + extraQPOffset); // = 6*log2`
`303`	`303`	`if (QP < i)`
`304`	`304`	`{`
`305`	`305`	`QP = i;`
`@@ -806,7 +806,8 @@ int BitAllocation::applyQPAdaptationSubCtu (const Slice* slice, const VVEncCfg*`
`806`	`806`	`}`
`807`	`807`
`808`	`808`	`int BitAllocation::getCtuPumpingReducingQP (const Slice* slice, const CPelBuf& origY, const Distortion uiSadBestForQPA,`
`809`		`- std::vector<int>& ctuPumpRedQP, const uint32_t ctuRsAddr, const int baseQP)`
	`809`	`+ std::vector<int>& ctuPumpRedQP, const uint32_t ctuRsAddr, const int baseQP,`
	`810`	`+ const bool isBIM)`
`810`	`811`	`{`
`811`	`812`	`if (slice == nullptr \|\| !slice->pps->useDQP \|\| ctuPumpRedQP.size() <= ctuRsAddr) return 0;`
`812`	`813`
`@@ -824,7 +825,7 @@ int BitAllocation::getCtuPumpingReducingQP (const Slice* slice, const CPelBuf& o`
`824`	`825`	`}`
`825`	`826`
`826`	`827`	`const double sumAbsRatio = double (uiSadBestForQPA * 3 /TODO: or 4? fine-tune!/) / double (sumAbsZmOrig == 0 ? 1 : sumAbsZmOrig);`
`827`		`- const int pumpingReducQP = (int (log (Clip3 (0.25, 4.0, sumAbsRatio)) / log (2.0) + (sumAbsRatio < 1.0 ? -0.5 : 0.5))) >> (baseQP >= 38/MAX_QP_PERCEPT_QPA/ ? 1 : 0);`
	`828`	`+ const int pumpingReducQP = ((isBIM ? -1 : 0) + int (log (Clip3 (0.25, 4.0, sumAbsRatio)) / log (2.0) + (sumAbsRatio < 1.0 ? -0.5 : 0.5))) >> (baseQP >= 38/MAX_QP_PERCEPT_QPA/ ? 1 : 0);`
`828`	`829`
`829`	`830`	`ctuPumpRedQP[ctuRsAddr] += pumpingReducQP;`
`830`	`831`
Original file line number	Diff line number	Diff line change
`@@ -90,7 +90,8 @@ namespace vvenc {`
`90`	`90`	`const Distortion uiSadBestForQPA,`
`91`	`91`	`std::vector<int>& ctuPumpRedQP,`
`92`	`92`	`const uint32_t ctuRsAddr,`
`93`		`- const int baseQP );`
	`93`	`+ const int baseQP,`
	`94`	`+ const bool isBIM );`
`94`	`95`	`}`
`95`	`96`
`96`	`97`	`} // namespace vvenc`