@@ -3673,6 +3673,187 @@ void GSState::CalculatePrimitiveCoversWithoutGaps()
36733673 m_primitive_covers_without_gaps = SpriteDrawWithoutGaps () ? (m_primitive_covers_without_gaps == GapsFound ? SpriteNoGaps : m_primitive_covers_without_gaps) : GapsFound;
36743674}
36753675
3676+ __forceinline bool GSState::EarlyDetectShuffle (u32 prim)
3677+ {
3678+ // We only handle sprites here and need one sprite in the queue.
3679+ // Texture mapping must be enabled for a shuffle.
3680+ if (m_index.tail < 2 || prim != GS_SPRITE || !PRIM->TME )
3681+ return false ;
3682+
3683+ const GSVertex* RESTRICT vertex = &m_vertex.buff [0 ];
3684+ const u16 * RESTRICT index = &m_index.buff [0 ];
3685+ const GSVector4i& o = m_xyof;
3686+
3687+ if (GSLocalMemory::m_psm[m_context->FRAME .PSM ].bpp == 16 && GSLocalMemory::m_psm[m_context->TEX0 .PSM ].bpp == 16 )
3688+ {
3689+ // Handle shuffles where the source and destination are both 16 bits.
3690+
3691+ const int x0 = static_cast <int >(vertex[index[0 ]].XYZ .X ) - static_cast <int >(m_context->XYOFFSET .OFX );
3692+ const int x1 = static_cast <int >(vertex[index[0 ]].XYZ .X ) - static_cast <int >(m_context->XYOFFSET .OFX );
3693+ const int xn = static_cast <int >(m_v.XYZ .X ) - static_cast <int >(static_cast <int >(m_context->XYOFFSET .OFX ));
3694+
3695+ int u0, un;
3696+ if (PRIM->FST )
3697+ {
3698+ u0 = static_cast <int >(vertex[index[0 ]].U );
3699+ un = static_cast <int >(m_v.U );
3700+ }
3701+ else
3702+ {
3703+ const float q0 = vertex[index[0 ]].RGBAQ .Q == 0 .0f ? FLT_MIN : vertex[index[0 ]].RGBAQ .Q ;
3704+ u0 = static_cast <int >((1 << m_context->TEX0 .TW ) * (vertex[index[0 ]].ST .S / q0) * 16 .0f );
3705+ const float qn = m_v.RGBAQ .Q == 0 .0f ? FLT_MIN : m_v.RGBAQ .Q ;
3706+ un = static_cast <int >((1 << m_context->TEX0 .TW ) * (m_v.ST .S / qn) * 16 .0f );
3707+ }
3708+
3709+ // Check that the X-U offsets are the same for the first and current vertex and
3710+ // that the width of the first sprite is at most 16 pixels.
3711+ return std::abs (u0 - x0) == std::abs (un - xn) && std::abs (x1 - x0) <= 0x100 ;
3712+ }
3713+
3714+ if (GSLocalMemory::m_psm[m_context->FRAME .PSM ].bpp == 16 && GSLocalMemory::m_psm[m_context->TEX0 .PSM ].bpp == 32 )
3715+ {
3716+ // Handle shuffles where the source is 32/24 bits and destination is 16 bits.
3717+ // Example: The Godfather.
3718+
3719+ // These shuffles usually mask R and G (lower 10 bits in 16 bit format) so that they
3720+ // write only to B and A (top 6 bits in 16 bit format).
3721+ if (GSUtil::GetChannelMask (m_context->FRAME .PSM , m_context->FRAME .FBMSK ) != 0xC )
3722+ return false ;
3723+
3724+ const int x0 = static_cast <int >(vertex[index[0 ]].XYZ .X ) - static_cast <int >(m_context->XYOFFSET .OFX );
3725+ const int y0 = static_cast <int >(vertex[index[0 ]].XYZ .Y ) - static_cast <int >(m_context->XYOFFSET .OFY );
3726+ const int x1 = static_cast <int >(vertex[index[1 ]].XYZ .X ) - static_cast <int >(m_context->XYOFFSET .OFX );
3727+ const int y1 = static_cast <int >(vertex[index[1 ]].XYZ .Y ) - static_cast <int >(m_context->XYOFFSET .OFY );
3728+
3729+ int u0, v0, u1, v1;
3730+
3731+ if (PRIM->FST )
3732+ {
3733+ u0 = static_cast <int >(vertex[index[0 ]].U );
3734+ v0 = static_cast <int >(vertex[index[0 ]].V );
3735+ u1 = static_cast <int >(vertex[index[1 ]].U );
3736+ v1 = static_cast <int >(vertex[index[1 ]].V );
3737+ }
3738+ else
3739+ {
3740+ const float q0 = vertex[index[0 ]].RGBAQ .Q == 0 .0f ? FLT_MIN : vertex[index[0 ]].RGBAQ .Q ;
3741+ u0 = static_cast <int >((1 << m_context->TEX0 .TW ) * (vertex[index[0 ]].ST .S / q0) * 16 .0f );
3742+ v0 = static_cast <int >((1 << m_context->TEX0 .TH ) * (vertex[index[0 ]].ST .T / q0) * 16 .0f );
3743+ const float q1 = vertex[index[1 ]].RGBAQ .Q == 0 .0f ? FLT_MIN : vertex[index[1 ]].RGBAQ .Q ;
3744+ u1 = static_cast <int >((1 << m_context->TEX0 .TW ) * (vertex[index[1 ]].ST .S / q0) * 16 .0f );
3745+ v1 = static_cast <int >((1 << m_context->TEX0 .TH ) * (vertex[index[1 ]].ST .T / q0) * 16 .0f );
3746+ }
3747+
3748+ // Check that the source and destination sprite are exactly 8 pixel squares.
3749+ // We do not use the current vertex in this check because it doesn't have a
3750+ // clean correspondence with the first shuffle for 32->16 bit shuffles
3751+ // (the coordinates manually swizzle between 32 and 16 bits).
3752+ const bool const_spacing =
3753+ (std::abs (x1 - x0) == 0x80 ) && (std::abs (y1 - y0) == 0x80 ) &&
3754+ (std::abs (u1 - u0) == 0x80 ) && (std::abs (v1 - v0) == 0x80 );
3755+
3756+ // The purpose of these shuffles is to write the alpha channel,
3757+ // so the coordinates should write to upper 16 bits regions only.
3758+ const bool write_ba = (std::min (x0, x1) & 0x80 ) != 0 ;
3759+
3760+ return const_spacing && write_ba;
3761+ }
3762+
3763+ if (GSLocalMemory::m_psm[m_context->FRAME .PSM ].bpp == 32 && GSLocalMemory::m_psm[m_context->TEX0 .PSM ].bpp == 16 )
3764+ {
3765+ // Handle shuffles where the source is 16 bits and destination is 32/16 bits.
3766+ // Example: DT Racer.
3767+
3768+ // These shuffles usually mask RGB (lower 24 bits in 32 bit format) so that they
3769+ // write only to A.
3770+ if (GSUtil::GetChannelMask (m_context->FRAME .PSM , m_context->FRAME .FBMSK ) != 8 )
3771+ return false ;
3772+
3773+ const int x0 = static_cast <int >(vertex[index[0 ]].XYZ .X ) - static_cast <int >(m_context->XYOFFSET .OFX );
3774+ const int y0 = static_cast <int >(vertex[index[0 ]].XYZ .Y ) - static_cast <int >(m_context->XYOFFSET .OFY );
3775+ const int x1 = static_cast <int >(vertex[index[1 ]].XYZ .X ) - static_cast <int >(m_context->XYOFFSET .OFX );
3776+ const int y1 = static_cast <int >(vertex[index[1 ]].XYZ .Y ) - static_cast <int >(m_context->XYOFFSET .OFY );
3777+
3778+ int u0, v0, u1, v1;
3779+
3780+ if (PRIM->FST )
3781+ {
3782+ u0 = static_cast <int >(vertex[index[0 ]].U );
3783+ v0 = static_cast <int >(vertex[index[0 ]].V );
3784+ u1 = static_cast <int >(vertex[index[1 ]].U );
3785+ v1 = static_cast <int >(vertex[index[1 ]].V );
3786+ }
3787+ else
3788+ {
3789+ const float q0 = vertex[index[0 ]].RGBAQ .Q == 0 .0f ? FLT_MIN : vertex[index[0 ]].RGBAQ .Q ;
3790+ u0 = static_cast <int >((1 << m_context->TEX0 .TW ) * (vertex[index[0 ]].ST .S / q0) * 16 .0f );
3791+ v0 = static_cast <int >((1 << m_context->TEX0 .TH ) * (vertex[index[0 ]].ST .T / q0) * 16 .0f );
3792+ const float q1 = vertex[index[1 ]].RGBAQ .Q == 0 .0f ? FLT_MIN : vertex[index[1 ]].RGBAQ .Q ;
3793+ u1 = static_cast <int >((1 << m_context->TEX0 .TW ) * (vertex[index[1 ]].ST .S / q0) * 16 .0f );
3794+ v1 = static_cast <int >((1 << m_context->TEX0 .TH ) * (vertex[index[1 ]].ST .T / q0) * 16 .0f );
3795+ }
3796+
3797+ // Check that the source and destination sprite are exactly 8 pixel squares.
3798+ // We do not use the current vertex in this check because it doesn't have a
3799+ // clean correspondence with the first shuffle for 32->16 bit shuffles
3800+ // (the coordinates manually swizzle between 32 and 16 bits).
3801+ const bool const_spacing =
3802+ (std::abs (x1 - x0) == 0x80 ) && (std::abs (y1 - y0) == 0x80 ) &&
3803+ (std::abs (u1 - u0) == 0x80 ) && (std::abs (v1 - v0) == 0x80 );
3804+
3805+ // The purpose of these shuffles is to read the green channel,
3806+ // so the coordinates should read the lower 16 bits only.
3807+ const bool read_rg = (std::min (u0, u1) & 0x80 ) == 0 ;
3808+
3809+ return const_spacing && read_rg;
3810+ }
3811+
3812+ if (m_context->TEX0 .PSM == PSMT8)
3813+ {
3814+ // Handle channel shuffles.
3815+
3816+ // Heuristics to detect channel shuffle based on first sprite and clamp mode.
3817+ const auto CheckWidthOrClampMode = [this ]() -> bool {
3818+ const GSVertex* v = &m_vertex.buff [0 ];
3819+
3820+ const int draw_width = std::abs (v[1 ].XYZ .X - v[0 ].XYZ .X ) >> 4 ;
3821+ const int draw_height = std::abs (v[1 ].XYZ .Y - v[0 ].XYZ .Y ) >> 4 ;
3822+
3823+ // Checks if using region clamp or region repeat for U or V.
3824+ // Might used used when the sprites are 16 pixels wide.
3825+ const bool clamp_region = ((m_context->CLAMP .WMS | m_context->CLAMP .WMT ) & 0x2 ) != 0 ;
3826+
3827+ // Channel shuffles usually draw 8 x 2 sprites.
3828+ const bool draw_match = (draw_height == 2 ) || (draw_width == 8 );
3829+
3830+ return draw_match || clamp_region;
3831+ };
3832+
3833+ const bool single_page_x = temp_draw_rect.width () <= 64 ;
3834+ const bool single_page_y = temp_draw_rect.height () <= 64 ;
3835+ if (single_page_x && single_page_y)
3836+ {
3837+ return CheckWidthOrClampMode ();
3838+ }
3839+ else if (!single_page_x)
3840+ {
3841+ // Not a single page in width.
3842+ return false ;
3843+ }
3844+
3845+ // WRC 4 does channel shuffles in vertical strips. So check for page alignment.
3846+ // Texture TBW should also be twice the framebuffer FBW, because the page is twice as wide.
3847+ if (m_context->TEX0 .TBW == (m_context->FRAME .FBW * 2 ) &&
3848+ GSLocalMemory::IsPageAligned (m_context->FRAME .PSM , temp_draw_rect))
3849+ {
3850+ return CheckWidthOrClampMode ();
3851+ }
3852+ }
3853+
3854+ return false ;
3855+ }
3856+
36763857__forceinline bool GSState::IsAutoFlushDraw (u32 prim, int & tex_layer)
36773858{
36783859 if (!PRIM->TME || (GSConfig.UserHacks_AutoFlush == GSHWAutoFlushLevel::SpritesOnly && prim != GS_SPRITE))
@@ -3683,15 +3864,8 @@ __forceinline bool GSState::IsAutoFlushDraw(u32 prim, int& tex_layer)
36833864 return false ;
36843865
36853866 // Try to detect shuffles, because these will not autoflush, they by design clash.
3686- if (GSLocalMemory::m_psm[m_context->FRAME .PSM ].bpp == 16 && GSLocalMemory::m_psm[m_context->TEX0 .PSM ].bpp == 16 )
3687- {
3688- // Pretty confident here...
3689- GSVertex* buffer = &m_vertex.buff [0 ];
3690- const bool const_spacing = std::abs (buffer[m_index.buff [0 ]].U - buffer[m_index.buff [0 ]].XYZ .X ) == std::abs (m_v.U - m_v.XYZ .X ) && std::abs (buffer[m_index.buff [1 ]].XYZ .X - buffer[m_index.buff [0 ]].XYZ .X ) <= 256 ; // Lequal to 16 pixels apart.
3691-
3692- if (const_spacing)
3693- return false ;
3694- }
3867+ if (EarlyDetectShuffle (prim))
3868+ return false ;
36953869
36963870 // Check if one of the texture being used is the same as the FRAME or ZBUF.
36973871 // In the case of possible mip-mapping, we need to check all possible layers.
0 commit comments