Skip to content

Commit e751f36

Browse files
TJnotJTlightningterror
authored andcommitted
GS/SW: Rewrite vertex ST values if they might overflow the rasterizer fixed point format.
1 parent 13f2d87 commit e751f36

2 files changed

Lines changed: 141 additions & 0 deletions

File tree

pcsx2/GS/Renderers/SW/GSRendererSW.cpp

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,125 @@ void ConvertVertexBuffer(const GSDrawingContext* RESTRICT ctx, GSVertexSW* RESTR
287287
}
288288
}
289289

290+
// Fix ST coordinates that would overflow the rasterizer fixed point format by rewriting the vertices.
291+
template <u32 primclass>
292+
void GSRendererSW::RewriteVerticesIfSTOverflow()
293+
{
294+
if (PRIM->TME && PRIM->FST == 0)
295+
{
296+
const GSVector4 tsize = GSVector4(
297+
static_cast<float>(1 << m_context->TEX0.TW),
298+
static_cast<float>(1 << m_context->TEX0.TH),
299+
1.0f,
300+
1.0f);
301+
302+
// SW rasterizer stores UV in 1.15.16 format so clamp to +/- (2^15 - 2) (-2 so bilinear doesn't overflow).
303+
// Do the division by texture size here to avoid divisions for each vertex.
304+
const GSVector4 OVERFLOW_VAL = GSVector4::cxpr(static_cast<float>((1 << 15) - 2)) / tsize;
305+
306+
// Only rewrite big/small S or T when the clamping mode is CLAMP or REGION_CLAMP.
307+
const GSVector4i clamp_mode = GSVector4i(
308+
(m_context->CLAMP.WMS == CLAMP_CLAMP || m_context->CLAMP.WMS == CLAMP_REGION_CLAMP) ? 0xFFFFFFFF : 0,
309+
(m_context->CLAMP.WMT == CLAMP_CLAMP || m_context->CLAMP.WMT == CLAMP_REGION_CLAMP) ? 0xFFFFFFFF : 0,
310+
0,
311+
0);
312+
313+
const bool st_overflow =
314+
((GSVector4i::cast(m_vt.m_min.t <= -OVERFLOW_VAL * tsize) & clamp_mode).mask() & 3) ||
315+
((GSVector4i::cast(m_vt.m_max.t >= OVERFLOW_VAL * tsize) & clamp_mode).mask() & 3) ||
316+
m_vt.nan.value;
317+
318+
if (st_overflow)
319+
{
320+
constexpr int n = GSUtil::GetClassVertexCount(primclass);
321+
322+
// Make sure the copy buffer is large enough.
323+
while (m_vertex.maxcount < m_index.tail)
324+
GrowVertexBuffer();
325+
326+
GSVertex* RESTRICT vertex = m_vertex.buff;
327+
GSVertex* RESTRICT vertex_copy = m_vertex.buff_copy;
328+
u16* RESTRICT index = m_index.buff;
329+
330+
for (int i = 0; i < static_cast<int>(m_index.tail); i += n)
331+
{
332+
GSVector4 stcq[n];
333+
334+
// Load STQ for this primitive.
335+
for (int j = 0; j < n; j++)
336+
stcq[j] = GSVector4::cast(GSVector4i(vertex[index[i + j]].m[0]));
337+
338+
// Perform Q division and see which values need to be rewritten.
339+
GSVector4 uv[n];
340+
GSVector4i small{}, big{}, nan{};
341+
for (int j = 0; j < n; j++)
342+
{
343+
// For sprites always use Q of second vertex.
344+
const GSVector4 q = primclass == GS_SPRITE_CLASS ? stcq[1].wwww() : stcq[j].wwww();
345+
uv[j] = (stcq[j] / q).xyzw(GSVector4::zero());
346+
small |= GSVector4i::cast(uv[j] <= -OVERFLOW_VAL);
347+
big |= GSVector4i::cast(uv[j] >= OVERFLOW_VAL);
348+
nan |= GSVector4i::cast(uv[j] != uv[j]);
349+
}
350+
351+
// Get the new values for fields that will be rewritten.
352+
// The follows rules are used:
353+
// 1. If there are small values but not big or nans, make all vertices small.
354+
// 2. If there are big values but not small or nans, make all vertices big.
355+
// 3. If there are both big and small values, or nans, make all vertices zero.
356+
GSVector4 uv_new = GSVector4::zero();
357+
uv_new = uv_new.blend32(-OVERFLOW_VAL, GSVector4::cast(small));
358+
uv_new = uv_new.blend32(OVERFLOW_VAL, GSVector4::cast(big));
359+
uv_new = uv_new.blend32(GSVector4::zero(), GSVector4::cast((small & big) | nan));
360+
361+
const GSVector4i rewrite = (((small | big) & clamp_mode) | nan).upl64(GSVector4i::zero());
362+
363+
// If both S and T are rewritten, no point in keeping Q. Just set it to 1.0f;
364+
if ((GSVector4::cast(rewrite).mask() & 3) == 3)
365+
{
366+
for (int j = 0; j < n; j++)
367+
stcq[j] = stcq[j].template insert32<0, 3>(GSVector4::m_one);
368+
}
369+
370+
// Rewrite the fields that require it and write to the copy buffer.
371+
for (int j = 0; j < n; j++)
372+
{
373+
// For sprites always use Q of second vertex.
374+
const GSVector4 q = (primclass == GS_SPRITE_CLASS) ? stcq[1].wwww() : stcq[j].wwww();
375+
stcq[j] = stcq[j].blend32(uv_new * q, GSVector4::cast(rewrite));
376+
377+
vertex_copy[i + j].m[0] = GSVector4i::cast(stcq[j]).m;
378+
vertex_copy[i + j].m[1] = vertex[index[i + j]].m[1];
379+
index[i + j] = i + j;
380+
}
381+
}
382+
383+
// Swap the buffers and fix the counts.
384+
std::swap(m_vertex.buff, m_vertex.buff_copy);
385+
m_vertex.head = m_vertex.next = m_vertex.tail = m_index.tail;
386+
387+
// Recalculate ST min/max/eq in the vertex trace.
388+
GSVector4 tmin = GSVector4::cxpr(FLT_MAX);
389+
GSVector4 tmax = GSVector4::cxpr(-FLT_MAX);
390+
for (int i = 0; i < static_cast<int>(m_index.tail); i += n)
391+
{
392+
for (int j = 0; j < n; j++)
393+
{
394+
GSVector4 stcq = GSVector4::cast(GSVector4i(m_vertex.buff[i + j].m[0]));
395+
const float Q = (primclass == GS_SPRITE_CLASS) ? stcq.w : m_vertex.buff[i + 1].RGBAQ.Q;
396+
stcq = (stcq / Q).xyzw(stcq);
397+
398+
tmin = tmin.min(stcq);
399+
tmax = tmax.max(stcq);
400+
}
401+
}
402+
m_vt.m_min.t = tmin.xyww() * tsize;
403+
m_vt.m_max.t = tmax.xyww() * tsize;
404+
m_vt.m_eq.stq = (m_vt.m_min.t == m_vt.m_max.t).mask();
405+
}
406+
}
407+
}
408+
290409
void GSVertexSWInitStatic()
291410
{
292411
#define InitCVB4(P, T, F, Q) GSVertexSW::s_cvb[P][T][F][Q] = ConvertVertexBuffer<P, T, F, Q>;
@@ -309,6 +428,25 @@ void GSRendererSW::Draw()
309428
{
310429
const GSDrawingContext* context = m_context;
311430

431+
switch (m_vt.m_primclass)
432+
{
433+
case GS_POINT_CLASS:
434+
RewriteVerticesIfSTOverflow<GS_POINT_CLASS>();
435+
break;
436+
case GS_LINE_CLASS:
437+
RewriteVerticesIfSTOverflow<GS_LINE_CLASS>();
438+
break;
439+
case GS_TRIANGLE_CLASS:
440+
RewriteVerticesIfSTOverflow<GS_TRIANGLE_CLASS>();
441+
break;
442+
case GS_SPRITE_CLASS:
443+
RewriteVerticesIfSTOverflow<GS_SPRITE_CLASS>();
444+
break;
445+
default:
446+
pxFailRel("Unknown primitive class.");
447+
break;
448+
}
449+
312450
auto data = m_vertex_heap.make_shared<SharedData>().cast<GSRasterizerData>();
313451
SharedData* sd = static_cast<SharedData*>(data.get());
314452

pcsx2/GS/Renderers/SW/GSRendererSW.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ class GSRendererSW final : public GSRenderer
7979

8080
bool GetScanlineGlobalData(SharedData* data);
8181

82+
template <u32 primclass>
83+
void RewriteVerticesIfSTOverflow();
84+
8285
public:
8386
GSRendererSW(int threads);
8487
~GSRendererSW() override;

0 commit comments

Comments
 (0)