Skip to content

Commit 11ced6d

Browse files
committed
GS/HW: Add setting to do autoflush draws with a copy loop.
Only applies to certain autoflush draws.
1 parent 4c42672 commit 11ced6d

File tree

14 files changed

+575
-43
lines changed

14 files changed

+575
-43
lines changed

pcsx2-qt/Settings/GraphicsHardwareFixesSettingsTab.ui

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,11 @@
267267
<string>Enabled (All Primitives)</string>
268268
</property>
269269
</item>
270+
<item>
271+
<property name="text">
272+
<string>Batch Enabled (All Primitives)</string>
273+
</property>
274+
</item>
270275
</widget>
271276
</item>
272277
<item row="7" column="0" colspan="2">

pcsx2/Config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,7 @@ enum class GSHWAutoFlushLevel : u8
417417
Disabled,
418418
SpritesOnly,
419419
Enabled,
420+
BatchEnabled,
420421
};
421422

422423
enum class GSGPUTargetCLUTMode : u8

pcsx2/GS/GSState.cpp

Lines changed: 174 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1882,6 +1882,7 @@ void GSState::Flush(GSFlushReason reason)
18821882

18831883
m_dirty_gs_regs = 0;
18841884
temp_draw_rect = GSVector4i::zero();
1885+
ResetAutoFlushList();
18851886
}
18861887

18871888
m_state_flush_reason = GSFlushReason::UNKNOWN;
@@ -2070,6 +2071,11 @@ void GSState::FlushPrim()
20702071
pxAssert((int)unused < GSUtil::GetVertexCount(PRIM->PRIM));
20712072
}
20722073

2074+
if (HasAutoFlushList())
2075+
{
2076+
UpdateAutoFlushList();
2077+
}
2078+
20732079
// If the PSM format of Z is invalid, but it is masked (no write) and ZTST is set to ALWAYS pass (no test, just allow)
20742080
// we can ignore the Z format, since it won't be used in the draw (Star Ocean 3 transitions)
20752081
#ifdef PCSX2_DEVBUILD
@@ -4223,6 +4229,126 @@ GSState::PRIM_OVERLAP GSState::PrimitiveOverlap(bool save_drawlist)
42234229
return GetPrimitiveOverlapDrawlist(save_drawlist);
42244230
}
42254231

4232+
template<u32 primclass, bool fst>
4233+
void GSState::ProcessAutoflushDrawlistImpl(float pos_scale, float tex_scale)
4234+
{
4235+
if (!m_drawlist.empty())
4236+
{
4237+
// Chop the barrier drawlist to fit within each autoflush draw.
4238+
std::vector<size_t> drawlist;
4239+
drawlist.reserve(m_drawlist.capacity());
4240+
for (size_t i = 0, j = 0; i < m_autoflush_list.size(); i++)
4241+
{
4242+
int prims = static_cast<int>(m_autoflush_list[i]);
4243+
while (prims > 0)
4244+
{
4245+
if (m_drawlist[j] > static_cast<size_t>(prims))
4246+
{
4247+
drawlist.push_back(prims);
4248+
m_drawlist[j] -= prims;
4249+
prims = 0;
4250+
}
4251+
else
4252+
{
4253+
drawlist.push_back(m_drawlist[j]);
4254+
prims -= m_drawlist[j];
4255+
m_drawlist[j] = 0;
4256+
j++;
4257+
}
4258+
}
4259+
}
4260+
m_drawlist = std::move(drawlist);
4261+
}
4262+
else
4263+
{
4264+
// If we don't need barrier, simply copy the autoflush list as the drawlist
4265+
// since it makes handling the cases with/without barriers simpler.
4266+
const size_t n_elems = m_autoflush_list.size();
4267+
m_drawlist.resize(n_elems);
4268+
std::memcpy(m_drawlist.data(), m_autoflush_list.data(), sizeof(m_autoflush_list[0]) * n_elems);
4269+
}
4270+
4271+
constexpr int n = GSUtil::GetClassVertexCount(primclass);
4272+
4273+
const GSVertex* RESTRICT verts = m_vertex.buff;
4274+
const u16* RESTRICT index = m_index.buff;
4275+
4276+
const auto ProcessBBox = [](GSVector4 bbox, float scale) {
4277+
bbox += GSVector4(-1.0f, -1.0f, 1.0f, 1.0f); // Expand 1 native pixel.
4278+
bbox *= scale; // Upscaling
4279+
bbox = bbox.floor().xyzw(bbox.ceil()); // Rounding.
4280+
return GSVector4i(bbox);
4281+
};
4282+
4283+
// Compute the texture bboxes.
4284+
for (size_t i = 0, idx = 0; i < m_autoflush_list.size(); i++)
4285+
{
4286+
GSVector4 bbox(FLT_MAX, FLT_MAX, -FLT_MAX, -FLT_MAX);
4287+
4288+
const size_t n_prims = m_autoflush_list[i];
4289+
for (size_t j = 0; j < n_prims; j++, idx += n)
4290+
{
4291+
for (size_t k = 0; k < n; k++)
4292+
{
4293+
const GSVertex& v = verts[index[idx + k]];
4294+
const float q = (primclass == GS_SPRITE_CLASS) ? verts[index[idx + 1]].RGBAQ.Q : v.RGBAQ.Q;
4295+
GSVector4 tex = GetTexCoordsImpl<fst>(v, q);
4296+
bbox = bbox.min(tex).xyzw(bbox.max(tex));
4297+
}
4298+
}
4299+
4300+
m_autoflush_bbox.push_back(ProcessBBox(bbox, tex_scale));
4301+
}
4302+
4303+
// Recompute the position bboxes if needed.
4304+
if (m_drawlist_bbox.size() > 0)
4305+
{
4306+
m_drawlist_bbox.clear();
4307+
4308+
for (size_t i = 0, idx = 0; i < m_drawlist.size(); i++)
4309+
{
4310+
GSVector4i bbox(INT_MAX, INT_MAX, INT_MIN, INT_MIN);
4311+
4312+
const size_t n_prims = m_drawlist[i];
4313+
for (size_t j = 0; j < n_prims; j++, idx += n)
4314+
{
4315+
for (size_t k = 0; k < n; k++)
4316+
{
4317+
bbox = bbox.runion(GetVertexXY(verts[index[idx + k]]));
4318+
}
4319+
}
4320+
4321+
const GSVector4i xyof = m_context->scissor.xyof.xyxy();
4322+
GSVector4 bbox_f = GSVector4(bbox - xyof) / 16.0f;
4323+
m_drawlist_bbox.push_back(ProcessBBox(bbox_f, pos_scale));
4324+
}
4325+
}
4326+
}
4327+
4328+
void GSState::ProcessAutoflushDrawlist(float pos_scale, float tex_scale)
4329+
{
4330+
pxAssertRel(PRIM->TME, "Autoflush drawlist only valid with texture mapping.");
4331+
4332+
switch (m_vt.m_primclass)
4333+
{
4334+
case GS_SPRITE_CLASS:
4335+
if (PRIM->FST)
4336+
ProcessAutoflushDrawlistImpl<GS_SPRITE_CLASS, true>(pos_scale, tex_scale);
4337+
else
4338+
ProcessAutoflushDrawlistImpl<GS_SPRITE_CLASS, false>(pos_scale, tex_scale);
4339+
break;
4340+
case GS_TRIANGLE_CLASS:
4341+
if (PRIM->FST)
4342+
ProcessAutoflushDrawlistImpl<GS_TRIANGLE_CLASS, true>(pos_scale, tex_scale);
4343+
else
4344+
ProcessAutoflushDrawlistImpl<GS_TRIANGLE_CLASS, false>(pos_scale, tex_scale);
4345+
break;
4346+
default:
4347+
pxFail("Autoflush drawlist only for triangles/sprites.");
4348+
break;
4349+
}
4350+
}
4351+
42264352
bool GSState::SpriteDrawWithoutGaps()
42274353
{
42284354
// Check that the height matches. Xenosaga 3 draws a letterbox around
@@ -4790,6 +4916,34 @@ void GSState::GetQuadRasterizedPoints(GSVector4& xy, bool keep_order)
47904916
GetQuadRasterizedPoints(xy, tex_ignore, keep_order);
47914917
}
47924918

4919+
__forceinline bool GSState::CanUseAutoFlushList() const
4920+
{
4921+
// Can combine if recursive color draw and source/RT are basically the same
4922+
// format (aside from 24/32 bit difference).
4923+
return m_context->TEX0.TBP0 == m_context->FRAME.Block() &&
4924+
(m_context->TEX0.PSM & ~1) == (m_context->FRAME.PSM & ~1) &&
4925+
GSIsHardwareRenderer();
4926+
}
4927+
4928+
__forceinline void GSState::ResetAutoFlushList()
4929+
{
4930+
m_autoflush_list.clear();
4931+
m_autoflush_bbox.clear();
4932+
m_autoflush_tail = 0;
4933+
}
4934+
4935+
__forceinline void GSState::UpdateAutoFlushList()
4936+
{
4937+
if (NumQueuedIndices() > 0)
4938+
{
4939+
const int n = GSUtil::GetVertexCount(PRIM->PRIM);
4940+
m_autoflush_list.push_back(NumQueuedIndices() / n);
4941+
m_autoflush_tail = m_index.tail;
4942+
temp_draw_rect = GSVector4i::zero(); // Reset draw rect since it's used for autoflush overlap.
4943+
m_texflush_flag = false; // Reset TEXFLUSH since this is equivalent to starting a new draw.
4944+
}
4945+
}
4946+
47934947
__forceinline bool GSState::IsAutoFlushDraw(u32 prim, int& tex_layer)
47944948
{
47954949
if (!PRIM->TME || (GSConfig.UserHacks_AutoFlush == GSHWAutoFlushLevel::SpritesOnly && prim != GS_SPRITE))
@@ -4931,9 +5085,20 @@ template<u32 prim>
49315085
__forceinline void GSState::HandleAutoFlush()
49325086
{
49335087
// Kind of a cheat, making the assumption that 2 consecutive fan/strip triangles won't overlap each other (*should* be safe)
4934-
if ((m_index.tail & 1) && (prim == GS_TRIANGLESTRIP || prim == GS_TRIANGLEFAN) && !m_texflush_flag)
5088+
if ((NumQueuedIndices() & 1) && (prim == GS_TRIANGLESTRIP || prim == GS_TRIANGLEFAN) && !m_texflush_flag)
49355089
return;
49365090

5091+
const auto DoFlush = [&]() {
5092+
if (GSConfig.UserHacks_AutoFlush == GSHWAutoFlushLevel::BatchEnabled && CanUseAutoFlushList())
5093+
{
5094+
UpdateAutoFlushList();
5095+
}
5096+
else
5097+
{
5098+
Flush(GSFlushReason::AUTOFLUSH);
5099+
}
5100+
};
5101+
49375102
// To briefly explain what's going on here, what we are checking for is draws over a texture when the source and destination are themselves.
49385103
// Because one page of the texture gets buffered in the Texture Cache (the PS2's one) if any of those pixels are overwritten, you still read the old data.
49395104
// So we need to calculate if a page boundary is being crossed for the format it is in and if the same part of the texture being written and read inside the draw.
@@ -5133,7 +5298,7 @@ __forceinline void GSState::HandleAutoFlush()
51335298
return;
51345299
else if (m_texflush_flag)
51355300
{
5136-
Flush(GSFlushReason::AUTOFLUSH);
5301+
DoFlush();
51375302
return;
51385303
}
51395304

@@ -5153,8 +5318,9 @@ __forceinline void GSState::HandleAutoFlush()
51535318
const GSVector4i scissor = m_context->scissor.in;
51545319
GSVector4i old_draw_rect = GSVector4i::zero();
51555320
int current_draw_end = m_index.tail;
5321+
const int current_draw_start = static_cast<int>(m_autoflush_tail);
51565322

5157-
while (current_draw_end >= n)
5323+
while (current_draw_end >= current_draw_start + n)
51585324
{
51595325
for (int i = current_draw_end - 1; i >= current_draw_end - n; i--)
51605326
{
@@ -5204,7 +5370,7 @@ __forceinline void GSState::HandleAutoFlush()
52045370
old_draw_rect = tex_rect.rintersect(old_draw_rect);
52055371
if (!old_draw_rect.rintersect(scissor).rempty())
52065372
{
5207-
Flush(GSFlushReason::AUTOFLUSH);
5373+
DoFlush();
52085374
return;
52095375
}
52105376

@@ -5231,10 +5397,10 @@ __forceinline void GSState::HandleAutoFlush()
52315397
area_out = GSVector4i(area_out.x / frame_psm.pgs.x, area_out.y / frame_psm.pgs.y, area_out.z / frame_psm.pgs.x, area_out.w / frame_psm.pgs.y);
52325398

52335399
if (!area_out.rintersect(tex_rect).rempty())
5234-
Flush(GSFlushReason::AUTOFLUSH);
5400+
DoFlush();
52355401
}
52365402
else // Formats are too different so just flush it.
5237-
Flush(GSFlushReason::AUTOFLUSH);
5403+
DoFlush();
52385404
}
52395405
}
52405406
}
@@ -5254,7 +5420,7 @@ __forceinline void GSState::VertexKick(u32 skip)
52545420
return;
52555421
}
52565422

5257-
if (auto_flush && skip == 0 && m_index.tail > 0 && ((m_vertex.tail + 1) - m_vertex.head) >= n)
5423+
if (auto_flush && skip == 0 && NumQueuedIndices() > 0 && ((m_vertex.tail + 1) - m_vertex.head) >= n)
52585424
{
52595425
HandleAutoFlush<prim>();
52605426
}
@@ -5497,7 +5663,7 @@ __forceinline void GSState::VertexKick(u32 skip)
54975663
// Update rectangle for the current draw. We can use the re-integer coordinates from min/max here.
54985664
const GSVector4i draw_min = pmin.zwzw();
54995665
const GSVector4i draw_max = pmax;
5500-
if (m_vertex.tail != n)
5666+
if (NumQueuedIndices() > n)
55015667
temp_draw_rect = temp_draw_rect.min_i32(draw_min).blend32<12>(temp_draw_rect.max_i32(draw_max));
55025668
else
55035669
temp_draw_rect = draw_min.blend32<12>(draw_max);

pcsx2/GS/GSState.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,13 @@ class GSState : public GSAlignedClass<32>
150150
u32 tail;
151151
} m_index = {};
152152

153+
u32 m_autoflush_tail = 0;
154+
155+
__forceinline u32 NumQueuedIndices() const
156+
{
157+
return m_index.tail - m_autoflush_tail;
158+
}
159+
153160
struct
154161
{
155162
GSVertex* buff;
@@ -171,6 +178,10 @@ class GSState : public GSAlignedClass<32>
171178
void UpdateVertexKick();
172179

173180
void GrowVertexBuffer();
181+
bool CanUseAutoFlushList() const;
182+
__forceinline bool HasAutoFlushList() const { return m_autoflush_tail > 0; }
183+
void ResetAutoFlushList();
184+
void UpdateAutoFlushList();
174185
bool IsAutoFlushDraw(u32 prim, int& tex_layer);
175186
template<u32 prim> void HandleAutoFlush();
176187
bool EarlyDetectShuffle(u32 prim);
@@ -376,6 +387,9 @@ class GSState : public GSAlignedClass<32>
376387
std::vector<size_t> m_drawlist;
377388
std::vector<GSVector4i> m_drawlist_bbox;
378389

390+
std::vector<size_t> m_autoflush_list;
391+
std::vector<GSVector4i> m_autoflush_bbox;
392+
379393
struct GSPCRTCRegs
380394
{
381395
struct PCRTCDisplay
@@ -510,6 +524,9 @@ class GSState : public GSAlignedClass<32>
510524
PRIM_OVERLAP GetPrimitiveOverlapDrawlistImpl(bool save_drawlist = false, bool save_bbox = false, float bbox_scale = 1.0f);
511525
PRIM_OVERLAP GetPrimitiveOverlapDrawlist(bool save_drawlist = false, bool save_bbox = false, float bbox_scale = 1.0f);
512526
PRIM_OVERLAP PrimitiveOverlap(bool save_drawlist = false);
527+
template<u32 primclass, bool fst>
528+
void ProcessAutoflushDrawlistImpl(float pos_scale, float tex_scale);
529+
void ProcessAutoflushDrawlist(float pos_scale, float tex_scale);
513530
bool SpriteDrawWithoutGaps();
514531
void CalculatePrimitiveCoversWithoutGaps();
515532
GIFRegTEX0 GetTex0Layer(u32 lod);

pcsx2/GS/Renderers/Common/GSDevice.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,9 @@ struct alignas(16) GSHWDrawConfig
758758
u32 indices_per_prim; ///< Number of indices that make up one primitive
759759
const std::vector<size_t>* drawlist; ///< For reducing barriers on sprites
760760
const std::vector<GSVector4i>* drawlist_bbox; ///< For RT copy when barriers not available.
761+
const std::vector<size_t>* autoflush_list; ///< For batched autoflush drawing.
762+
const std::vector<GSVector4i>* autoflush_bbox; ///< For batched autoflush drawing.
763+
bool autoflush; ///< Do a batched autoflush draw.
761764
GSVector4i scissor; ///< Scissor rect
762765
GSVector4i drawarea; ///< Area in the framebuffer which will be modified.
763766
Topology topology; ///< Draw topology

0 commit comments

Comments
 (0)