@@ -287,6 +287,125 @@ void ConvertVertexBuffer(const GSDrawingContext* RESTRICT ctx, GSVertexSW* RESTR
287287 }
288288}
289289
290+ // Fix ST coordinates that would overflow the rasterizer fixed point format by rewriting the vertices.
291+ template <u32 primclass>
292+ void GSRendererSW::RewriteVerticesIfSTOverflow ()
293+ {
294+ if (PRIM->TME && PRIM->FST == 0 )
295+ {
296+ const GSVector4 tsize = GSVector4 (
297+ static_cast <float >(1 << m_context->TEX0 .TW ),
298+ static_cast <float >(1 << m_context->TEX0 .TH ),
299+ 1 .0f ,
300+ 1 .0f );
301+
302+ // SW rasterizer stores UV in 1.15.16 format so clamp to +/- (2^15 - 2) (-2 so bilinear doesn't overflow).
303+ // Do the division by texture size here to avoid divisions for each vertex.
304+ const GSVector4 OVERFLOW_VAL = GSVector4::cxpr (static_cast <float >((1 << 15 ) - 2 )) / tsize;
305+
306+ // Only rewrite big/small S or T when the clamping mode is CLAMP or REGION_CLAMP.
307+ const GSVector4i clamp_mode = GSVector4i (
308+ (m_context->CLAMP .WMS == CLAMP_CLAMP || m_context->CLAMP .WMS == CLAMP_REGION_CLAMP) ? 0xFFFFFFFF : 0 ,
309+ (m_context->CLAMP .WMT == CLAMP_CLAMP || m_context->CLAMP .WMT == CLAMP_REGION_CLAMP) ? 0xFFFFFFFF : 0 ,
310+ 0 ,
311+ 0 );
312+
313+ const bool st_overflow =
314+ ((GSVector4i::cast (m_vt.m_min .t <= -OVERFLOW_VAL * tsize) & clamp_mode).mask () & 3 ) ||
315+ ((GSVector4i::cast (m_vt.m_max .t >= OVERFLOW_VAL * tsize) & clamp_mode).mask () & 3 ) ||
316+ m_vt.nan .value ;
317+
318+ if (st_overflow)
319+ {
320+ constexpr int n = GSUtil::GetClassVertexCount (primclass);
321+
322+ // Make sure the copy buffer is large enough.
323+ while (m_vertex.maxcount < m_index.tail )
324+ GrowVertexBuffer ();
325+
326+ GSVertex* RESTRICT vertex = m_vertex.buff ;
327+ GSVertex* RESTRICT vertex_copy = m_vertex.buff_copy ;
328+ u16 * RESTRICT index = m_index.buff ;
329+
330+ for (int i = 0 ; i < static_cast <int >(m_index.tail ); i += n)
331+ {
332+ GSVector4 stcq[n];
333+
334+ // Load STQ for this primitive.
335+ for (int j = 0 ; j < n; j++)
336+ stcq[j] = GSVector4::cast (GSVector4i (vertex[index[i + j]].m [0 ]));
337+
338+ // Perform Q division and see which values need to be rewritten.
339+ GSVector4 uv[n];
340+ GSVector4i small{}, big{}, nan{};
341+ for (int j = 0 ; j < n; j++)
342+ {
343+ // For sprites always use Q of second vertex.
344+ const GSVector4 q = primclass == GS_SPRITE_CLASS ? stcq[1 ].wwww () : stcq[j].wwww ();
345+ uv[j] = (stcq[j] / q).xyzw (GSVector4::zero ());
346+ small |= GSVector4i::cast (uv[j] <= -OVERFLOW_VAL);
347+ big |= GSVector4i::cast (uv[j] >= OVERFLOW_VAL);
348+ nan |= GSVector4i::cast (uv[j] != uv[j]);
349+ }
350+
351+ // Get the new values for fields that will be rewritten.
352+ // The follows rules are used:
353+ // 1. If there are small values but not big or nans, make all vertices small.
354+ // 2. If there are big values but not small or nans, make all vertices big.
355+ // 3. If there are both big and small values, or nans, make all vertices zero.
356+ GSVector4 uv_new = GSVector4::zero ();
357+ uv_new = uv_new.blend32 (-OVERFLOW_VAL, GSVector4::cast (small));
358+ uv_new = uv_new.blend32 (OVERFLOW_VAL, GSVector4::cast (big));
359+ uv_new = uv_new.blend32 (GSVector4::zero (), GSVector4::cast ((small & big) | nan));
360+
361+ const GSVector4i rewrite = (((small | big) & clamp_mode) | nan).upl64 (GSVector4i::zero ());
362+
363+ // If both S and T are rewritten, no point in keeping Q. Just set it to 1.0f;
364+ if ((GSVector4::cast (rewrite).mask () & 3 ) == 3 )
365+ {
366+ for (int j = 0 ; j < n; j++)
367+ stcq[j] = stcq[j].template insert32 <0 , 3 >(GSVector4::m_one);
368+ }
369+
370+ // Rewrite the fields that require it and write to the copy buffer.
371+ for (int j = 0 ; j < n; j++)
372+ {
373+ // For sprites always use Q of second vertex.
374+ const GSVector4 q = (primclass == GS_SPRITE_CLASS) ? stcq[1 ].wwww () : stcq[j].wwww ();
375+ stcq[j] = stcq[j].blend32 (uv_new * q, GSVector4::cast (rewrite));
376+
377+ vertex_copy[i + j].m [0 ] = GSVector4i::cast (stcq[j]).m ;
378+ vertex_copy[i + j].m [1 ] = vertex[index[i + j]].m [1 ];
379+ index[i + j] = i + j;
380+ }
381+ }
382+
383+ // Swap the buffers and fix the counts.
384+ std::swap (m_vertex.buff , m_vertex.buff_copy );
385+ m_vertex.head = m_vertex.next = m_vertex.tail = m_index.tail ;
386+
387+ // Recalculate ST min/max/eq in the vertex trace.
388+ GSVector4 tmin = GSVector4::cxpr (FLT_MAX);
389+ GSVector4 tmax = GSVector4::cxpr (-FLT_MAX);
390+ for (int i = 0 ; i < static_cast <int >(m_index.tail ); i += n)
391+ {
392+ for (int j = 0 ; j < n; j++)
393+ {
394+ GSVector4 stcq = GSVector4::cast (GSVector4i (m_vertex.buff [i + j].m [0 ]));
395+ const float Q = (primclass == GS_SPRITE_CLASS) ? stcq.w : m_vertex.buff [i + 1 ].RGBAQ .Q ;
396+ stcq = (stcq / Q).xyzw (stcq);
397+
398+ tmin = tmin.min (stcq);
399+ tmax = tmax.max (stcq);
400+ }
401+ }
402+ m_vt.m_min .t = tmin.xyww () * tsize;
403+ m_vt.m_max .t = tmax.xyww () * tsize;
404+ m_vt.m_eq .stq = (m_vt.m_min .t == m_vt.m_max .t ).mask ();
405+ }
406+ }
407+ }
408+
290409void GSVertexSWInitStatic ()
291410{
292411#define InitCVB4 (P, T, F, Q ) GSVertexSW::s_cvb[P][T][F][Q] = ConvertVertexBuffer<P, T, F, Q>;
@@ -309,6 +428,25 @@ void GSRendererSW::Draw()
309428{
310429 const GSDrawingContext* context = m_context;
311430
431+ switch (m_vt.m_primclass )
432+ {
433+ case GS_POINT_CLASS:
434+ RewriteVerticesIfSTOverflow<GS_POINT_CLASS>();
435+ break ;
436+ case GS_LINE_CLASS:
437+ RewriteVerticesIfSTOverflow<GS_LINE_CLASS>();
438+ break ;
439+ case GS_TRIANGLE_CLASS:
440+ RewriteVerticesIfSTOverflow<GS_TRIANGLE_CLASS>();
441+ break ;
442+ case GS_SPRITE_CLASS:
443+ RewriteVerticesIfSTOverflow<GS_SPRITE_CLASS>();
444+ break ;
445+ default :
446+ pxFailRel (" Unknown primitive class." );
447+ break ;
448+ }
449+
312450 auto data = m_vertex_heap.make_shared <SharedData>().cast <GSRasterizerData>();
313451 SharedData* sd = static_cast <SharedData*>(data.get ());
314452
0 commit comments