Skip to content

Commit edb808c

Browse files
committed
GS: Early detection of shuffles in vertex kick to prevent autoflushing.
1 parent 511c2ea commit edb808c

File tree

2 files changed

+184
-9
lines changed

2 files changed

+184
-9
lines changed

pcsx2/GS/GSState.cpp

Lines changed: 183 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3673,6 +3673,187 @@ void GSState::CalculatePrimitiveCoversWithoutGaps()
36733673
m_primitive_covers_without_gaps = SpriteDrawWithoutGaps() ? (m_primitive_covers_without_gaps == GapsFound ? SpriteNoGaps : m_primitive_covers_without_gaps) : GapsFound;
36743674
}
36753675

3676+
__forceinline bool GSState::EarlyDetectShuffle(u32 prim)
3677+
{
3678+
// We only handle sprites here and need one sprite in the queue.
3679+
// Texture mapping must be enabled for a shuffle.
3680+
if (m_index.tail < 2 || prim != GS_SPRITE || !PRIM->TME)
3681+
return false;
3682+
3683+
const GSVertex* RESTRICT vertex = &m_vertex.buff[0];
3684+
const u16* RESTRICT index = &m_index.buff[0];
3685+
const GSVector4i& o = m_xyof;
3686+
3687+
if (GSLocalMemory::m_psm[m_context->FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_context->TEX0.PSM].bpp == 16)
3688+
{
3689+
// Handle shuffles where the source and destination are both 16 bits.
3690+
3691+
const int x0 = static_cast<int>(vertex[index[0]].XYZ.X) - static_cast<int>(m_context->XYOFFSET.OFX);
3692+
const int x1 = static_cast<int>(vertex[index[0]].XYZ.X) - static_cast<int>(m_context->XYOFFSET.OFX);
3693+
const int xn = static_cast<int>(m_v.XYZ.X) - static_cast<int>(static_cast<int>(m_context->XYOFFSET.OFX));
3694+
3695+
int u0, un;
3696+
if (PRIM->FST)
3697+
{
3698+
u0 = static_cast<int>(vertex[index[0]].U);
3699+
un = static_cast<int>(m_v.U);
3700+
}
3701+
else
3702+
{
3703+
const float q0 = vertex[index[0]].RGBAQ.Q == 0.0f ? FLT_MIN : vertex[index[0]].RGBAQ.Q;
3704+
u0 = static_cast<int>((1 << m_context->TEX0.TW) * (vertex[index[0]].ST.S / q0) * 16.0f);
3705+
const float qn = m_v.RGBAQ.Q == 0.0f ? FLT_MIN : m_v.RGBAQ.Q;
3706+
un = static_cast<int>((1 << m_context->TEX0.TW) * (m_v.ST.S / qn) * 16.0f);
3707+
}
3708+
3709+
// Check that the X-U offsets are the same for the first and current vertex and
3710+
// that the width of the first sprite is at most 16 pixels.
3711+
return std::abs(u0 - x0) == std::abs(un - xn) && std::abs(x1 - x0) <= 0x100;
3712+
}
3713+
3714+
if (GSLocalMemory::m_psm[m_context->FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_context->TEX0.PSM].bpp == 32)
3715+
{
3716+
// Handle shuffles where the source is 32/24 bits and destination is 16 bits.
3717+
// Example: The Godfather.
3718+
3719+
// These shuffles usually mask R and G (lower 10 bits in 16 bit format) so that they
3720+
// write only to B and A (top 6 bits in 16 bit format).
3721+
if (GSUtil::GetChannelMask(m_context->FRAME.PSM, m_context->FRAME.FBMSK) != 0xC)
3722+
return false;
3723+
3724+
const int x0 = static_cast<int>(vertex[index[0]].XYZ.X) - static_cast<int>(m_context->XYOFFSET.OFX);
3725+
const int y0 = static_cast<int>(vertex[index[0]].XYZ.Y) - static_cast<int>(m_context->XYOFFSET.OFY);
3726+
const int x1 = static_cast<int>(vertex[index[1]].XYZ.X) - static_cast<int>(m_context->XYOFFSET.OFX);
3727+
const int y1 = static_cast<int>(vertex[index[1]].XYZ.Y) - static_cast<int>(m_context->XYOFFSET.OFY);
3728+
3729+
int u0, v0, u1, v1;
3730+
3731+
if (PRIM->FST)
3732+
{
3733+
u0 = static_cast<int>(vertex[index[0]].U);
3734+
v0 = static_cast<int>(vertex[index[0]].V);
3735+
u1 = static_cast<int>(vertex[index[1]].U);
3736+
v1 = static_cast<int>(vertex[index[1]].V);
3737+
}
3738+
else
3739+
{
3740+
const float q0 = vertex[index[0]].RGBAQ.Q == 0.0f ? FLT_MIN : vertex[index[0]].RGBAQ.Q;
3741+
u0 = static_cast<int>((1 << m_context->TEX0.TW) * (vertex[index[0]].ST.S / q0) * 16.0f);
3742+
v0 = static_cast<int>((1 << m_context->TEX0.TH) * (vertex[index[0]].ST.T / q0) * 16.0f);
3743+
const float q1 = vertex[index[1]].RGBAQ.Q == 0.0f ? FLT_MIN : vertex[index[1]].RGBAQ.Q;
3744+
u1 = static_cast<int>((1 << m_context->TEX0.TW) * (vertex[index[1]].ST.S / q0) * 16.0f);
3745+
v1 = static_cast<int>((1 << m_context->TEX0.TH) * (vertex[index[1]].ST.T / q0) * 16.0f);
3746+
}
3747+
3748+
// Check that the source and destination sprite are exactly 8 pixel squares.
3749+
// We do not use the current vertex in this check because it doesn't have a
3750+
// clean correspondence with the first shuffle for 32->16 bit shuffles
3751+
// (the coordinates manually swizzle between 32 and 16 bits).
3752+
const bool const_spacing =
3753+
(std::abs(x1 - x0) == 0x80) && (std::abs(y1 - y0) == 0x80) &&
3754+
(std::abs(u1 - u0) == 0x80) && (std::abs(v1 - v0) == 0x80);
3755+
3756+
// The purpose of these shuffles is to write the alpha channel,
3757+
// so the coordinates should write to upper 16 bits regions only.
3758+
const bool write_ba = (std::min(x0, x1) & 0x80) != 0;
3759+
3760+
return const_spacing && write_ba;
3761+
}
3762+
3763+
if (GSLocalMemory::m_psm[m_context->FRAME.PSM].bpp == 32 && GSLocalMemory::m_psm[m_context->TEX0.PSM].bpp == 16)
3764+
{
3765+
// Handle shuffles where the source is 16 bits and destination is 32/16 bits.
3766+
// Example: DT Racer.
3767+
3768+
// These shuffles usually mask RGB (lower 24 bits in 32 bit format) so that they
3769+
// write only to A.
3770+
if (GSUtil::GetChannelMask(m_context->FRAME.PSM, m_context->FRAME.FBMSK) != 8)
3771+
return false;
3772+
3773+
const int x0 = static_cast<int>(vertex[index[0]].XYZ.X) - static_cast<int>(m_context->XYOFFSET.OFX);
3774+
const int y0 = static_cast<int>(vertex[index[0]].XYZ.Y) - static_cast<int>(m_context->XYOFFSET.OFY);
3775+
const int x1 = static_cast<int>(vertex[index[1]].XYZ.X) - static_cast<int>(m_context->XYOFFSET.OFX);
3776+
const int y1 = static_cast<int>(vertex[index[1]].XYZ.Y) - static_cast<int>(m_context->XYOFFSET.OFY);
3777+
3778+
int u0, v0, u1, v1;
3779+
3780+
if (PRIM->FST)
3781+
{
3782+
u0 = static_cast<int>(vertex[index[0]].U);
3783+
v0 = static_cast<int>(vertex[index[0]].V);
3784+
u1 = static_cast<int>(vertex[index[1]].U);
3785+
v1 = static_cast<int>(vertex[index[1]].V);
3786+
}
3787+
else
3788+
{
3789+
const float q0 = vertex[index[0]].RGBAQ.Q == 0.0f ? FLT_MIN : vertex[index[0]].RGBAQ.Q;
3790+
u0 = static_cast<int>((1 << m_context->TEX0.TW) * (vertex[index[0]].ST.S / q0) * 16.0f);
3791+
v0 = static_cast<int>((1 << m_context->TEX0.TH) * (vertex[index[0]].ST.T / q0) * 16.0f);
3792+
const float q1 = vertex[index[1]].RGBAQ.Q == 0.0f ? FLT_MIN : vertex[index[1]].RGBAQ.Q;
3793+
u1 = static_cast<int>((1 << m_context->TEX0.TW) * (vertex[index[1]].ST.S / q0) * 16.0f);
3794+
v1 = static_cast<int>((1 << m_context->TEX0.TH) * (vertex[index[1]].ST.T / q0) * 16.0f);
3795+
}
3796+
3797+
// Check that the source and destination sprite are exactly 8 pixel squares.
3798+
// We do not use the current vertex in this check because it doesn't have a
3799+
// clean correspondence with the first shuffle for 32->16 bit shuffles
3800+
// (the coordinates manually swizzle between 32 and 16 bits).
3801+
const bool const_spacing =
3802+
(std::abs(x1 - x0) == 0x80) && (std::abs(y1 - y0) == 0x80) &&
3803+
(std::abs(u1 - u0) == 0x80) && (std::abs(v1 - v0) == 0x80);
3804+
3805+
// The purpose of these shuffles is to read the green channel,
3806+
// so the coordinates should read the lower 16 bits only.
3807+
const bool read_rg = (std::min(u0, u1) & 0x80) == 0;
3808+
3809+
return const_spacing && read_rg;
3810+
}
3811+
3812+
if (m_context->TEX0.PSM == PSMT8)
3813+
{
3814+
// Handle channel shuffles.
3815+
3816+
// Heuristics to detect channel shuffle based on first sprite and clamp mode.
3817+
const auto CheckWidthOrClampMode = [this]() -> bool {
3818+
const GSVertex* v = &m_vertex.buff[0];
3819+
3820+
const int draw_width = std::abs(v[1].XYZ.X - v[0].XYZ.X) >> 4;
3821+
const int draw_height = std::abs(v[1].XYZ.Y - v[0].XYZ.Y) >> 4;
3822+
3823+
// Checks if using region clamp or region repeat for U or V.
3824+
// Might used used when the sprites are 16 pixels wide.
3825+
const bool clamp_region = ((m_context->CLAMP.WMS | m_context->CLAMP.WMT) & 0x2) != 0;
3826+
3827+
// Channel shuffles usually draw 8 x 2 sprites.
3828+
const bool draw_match = (draw_height == 2) || (draw_width == 8);
3829+
3830+
return draw_match || clamp_region;
3831+
};
3832+
3833+
const bool single_page_x = temp_draw_rect.width() <= 64;
3834+
const bool single_page_y = temp_draw_rect.height() <= 64;
3835+
if (single_page_x && single_page_y)
3836+
{
3837+
return CheckWidthOrClampMode();
3838+
}
3839+
else if (!single_page_x)
3840+
{
3841+
// Not a single page in width.
3842+
return false;
3843+
}
3844+
3845+
// WRC 4 does channel shuffles in vertical strips. So check for page alignment.
3846+
// Texture TBW should also be twice the framebuffer FBW, because the page is twice as wide.
3847+
if (m_context->TEX0.TBW == (m_context->FRAME.FBW * 2) &&
3848+
GSLocalMemory::IsPageAligned(m_context->FRAME.PSM, temp_draw_rect))
3849+
{
3850+
return CheckWidthOrClampMode();
3851+
}
3852+
}
3853+
3854+
return false;
3855+
}
3856+
36763857
__forceinline bool GSState::IsAutoFlushDraw(u32 prim, int& tex_layer)
36773858
{
36783859
if (!PRIM->TME || (GSConfig.UserHacks_AutoFlush == GSHWAutoFlushLevel::SpritesOnly && prim != GS_SPRITE))
@@ -3683,15 +3864,8 @@ __forceinline bool GSState::IsAutoFlushDraw(u32 prim, int& tex_layer)
36833864
return false;
36843865

36853866
// Try to detect shuffles, because these will not autoflush, they by design clash.
3686-
if (GSLocalMemory::m_psm[m_context->FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_context->TEX0.PSM].bpp == 16)
3687-
{
3688-
// Pretty confident here...
3689-
GSVertex* buffer = &m_vertex.buff[0];
3690-
const bool const_spacing = std::abs(buffer[m_index.buff[0]].U - buffer[m_index.buff[0]].XYZ.X) == std::abs(m_v.U - m_v.XYZ.X) && std::abs(buffer[m_index.buff[1]].XYZ.X - buffer[m_index.buff[0]].XYZ.X) <= 256; // Lequal to 16 pixels apart.
3691-
3692-
if (const_spacing)
3693-
return false;
3694-
}
3867+
if (EarlyDetectShuffle(prim))
3868+
return false;
36953869

36963870
// Check if one of the texture being used is the same as the FRAME or ZBUF.
36973871
// In the case of possible mip-mapping, we need to check all possible layers.

pcsx2/GS/GSState.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ class GSState : public GSAlignedClass<32>
172172
void GrowVertexBuffer();
173173
bool IsAutoFlushDraw(u32 prim, int& tex_layer);
174174
template<u32 prim> void HandleAutoFlush();
175+
bool EarlyDetectShuffle(u32 prim);
175176
void CheckCLUTValidity(u32 prim);
176177

177178
template <u32 prim, bool auto_flush> void VertexKick(u32 skip);

0 commit comments

Comments
 (0)