@@ -1882,6 +1882,7 @@ void GSState::Flush(GSFlushReason reason)
18821882
18831883 m_dirty_gs_regs = 0 ;
18841884 temp_draw_rect = GSVector4i::zero ();
1885+ ResetAutoFlushList ();
18851886 }
18861887
18871888 m_state_flush_reason = GSFlushReason::UNKNOWN;
@@ -2070,6 +2071,11 @@ void GSState::FlushPrim()
20702071 pxAssert ((int )unused < GSUtil::GetVertexCount (PRIM->PRIM ));
20712072 }
20722073
2074+ if (HasAutoFlushList ())
2075+ {
2076+ UpdateAutoFlushList ();
2077+ }
2078+
20732079 // If the PSM format of Z is invalid, but it is masked (no write) and ZTST is set to ALWAYS pass (no test, just allow)
20742080 // we can ignore the Z format, since it won't be used in the draw (Star Ocean 3 transitions)
20752081#ifdef PCSX2_DEVBUILD
@@ -4223,6 +4229,126 @@ GSState::PRIM_OVERLAP GSState::PrimitiveOverlap(bool save_drawlist)
42234229 return GetPrimitiveOverlapDrawlist (save_drawlist);
42244230}
42254231
4232+ template <u32 primclass, bool fst>
4233+ void GSState::ProcessAutoflushDrawlistImpl (float pos_scale, float tex_scale)
4234+ {
4235+ if (!m_drawlist.empty ())
4236+ {
4237+ // Chop the barrier drawlist to fit within each autoflush draw.
4238+ std::vector<size_t > drawlist;
4239+ drawlist.reserve (m_drawlist.capacity ());
4240+ for (size_t i = 0 , j = 0 ; i < m_autoflush_list.size (); i++)
4241+ {
4242+ int prims = static_cast <int >(m_autoflush_list[i]);
4243+ while (prims > 0 )
4244+ {
4245+ if (m_drawlist[j] > static_cast <size_t >(prims))
4246+ {
4247+ drawlist.push_back (prims);
4248+ m_drawlist[j] -= prims;
4249+ prims = 0 ;
4250+ }
4251+ else
4252+ {
4253+ drawlist.push_back (m_drawlist[j]);
4254+ prims -= m_drawlist[j];
4255+ m_drawlist[j] = 0 ;
4256+ j++;
4257+ }
4258+ }
4259+ }
4260+ m_drawlist = std::move (drawlist);
4261+ }
4262+ else
4263+ {
4264+ // If we don't need barrier, simply copy the autoflush list as the drawlist
4265+ // since it makes handling the cases with/without barriers simpler.
4266+ const size_t n_elems = m_autoflush_list.size ();
4267+ m_drawlist.resize (n_elems);
4268+ std::memcpy (m_drawlist.data (), m_autoflush_list.data (), sizeof (m_autoflush_list[0 ]) * n_elems);
4269+ }
4270+
4271+ constexpr int n = GSUtil::GetClassVertexCount (primclass);
4272+
4273+ const GSVertex* RESTRICT verts = m_vertex.buff ;
4274+ const u16 * RESTRICT index = m_index.buff ;
4275+
4276+ const auto ProcessBBox = [](GSVector4 bbox, float scale) {
4277+ bbox += GSVector4 (-1 .0f , -1 .0f , 1 .0f , 1 .0f ); // Expand 1 native pixel.
4278+ bbox *= scale; // Upscaling
4279+ bbox = bbox.floor ().xyzw (bbox.ceil ()); // Rounding.
4280+ return GSVector4i (bbox);
4281+ };
4282+
4283+ // Compute the texture bboxes.
4284+ for (size_t i = 0 , idx = 0 ; i < m_autoflush_list.size (); i++)
4285+ {
4286+ GSVector4 bbox (FLT_MAX, FLT_MAX, -FLT_MAX, -FLT_MAX);
4287+
4288+ const size_t n_prims = m_autoflush_list[i];
4289+ for (size_t j = 0 ; j < n_prims; j++, idx += n)
4290+ {
4291+ for (size_t k = 0 ; k < n; k++)
4292+ {
4293+ const GSVertex& v = verts[index[idx + k]];
4294+ const float q = (primclass == GS_SPRITE_CLASS) ? verts[index[idx + 1 ]].RGBAQ .Q : v.RGBAQ .Q ;
4295+ GSVector4 tex = GetTexCoordsImpl<fst>(v, q);
4296+ bbox = bbox.min (tex).xyzw (bbox.max (tex));
4297+ }
4298+ }
4299+
4300+ m_autoflush_bbox.push_back (ProcessBBox (bbox, tex_scale));
4301+ }
4302+
4303+ // Recompute the position bboxes if needed.
4304+ if (m_drawlist_bbox.size () > 0 )
4305+ {
4306+ m_drawlist_bbox.clear ();
4307+
4308+ for (size_t i = 0 , idx = 0 ; i < m_drawlist.size (); i++)
4309+ {
4310+ GSVector4i bbox (INT_MAX, INT_MAX, INT_MIN, INT_MIN);
4311+
4312+ const size_t n_prims = m_drawlist[i];
4313+ for (size_t j = 0 ; j < n_prims; j++, idx += n)
4314+ {
4315+ for (size_t k = 0 ; k < n; k++)
4316+ {
4317+ bbox = bbox.runion (GetVertexXY (verts[index[idx + k]]));
4318+ }
4319+ }
4320+
4321+ const GSVector4i xyof = m_context->scissor .xyof .xyxy ();
4322+ GSVector4 bbox_f = GSVector4 (bbox - xyof) / 16 .0f ;
4323+ m_drawlist_bbox.push_back (ProcessBBox (bbox_f, pos_scale));
4324+ }
4325+ }
4326+ }
4327+
4328+ void GSState::ProcessAutoflushDrawlist (float pos_scale, float tex_scale)
4329+ {
4330+ pxAssertRel (PRIM->TME , " Autoflush drawlist only valid with texture mapping." );
4331+
4332+ switch (m_vt.m_primclass )
4333+ {
4334+ case GS_SPRITE_CLASS:
4335+ if (PRIM->FST )
4336+ ProcessAutoflushDrawlistImpl<GS_SPRITE_CLASS, true >(pos_scale, tex_scale);
4337+ else
4338+ ProcessAutoflushDrawlistImpl<GS_SPRITE_CLASS, false >(pos_scale, tex_scale);
4339+ break ;
4340+ case GS_TRIANGLE_CLASS:
4341+ if (PRIM->FST )
4342+ ProcessAutoflushDrawlistImpl<GS_TRIANGLE_CLASS, true >(pos_scale, tex_scale);
4343+ else
4344+ ProcessAutoflushDrawlistImpl<GS_TRIANGLE_CLASS, false >(pos_scale, tex_scale);
4345+ break ;
4346+ default :
4347+ pxFail (" Autoflush drawlist only for triangles/sprites." );
4348+ break ;
4349+ }
4350+ }
4351+
42264352bool GSState::SpriteDrawWithoutGaps ()
42274353{
42284354 // Check that the height matches. Xenosaga 3 draws a letterbox around
@@ -4790,6 +4916,34 @@ void GSState::GetQuadRasterizedPoints(GSVector4& xy, bool keep_order)
47904916 GetQuadRasterizedPoints (xy, tex_ignore, keep_order);
47914917}
47924918
4919+ __forceinline bool GSState::CanUseAutoFlushList () const
4920+ {
4921+ // Can combine if recursive color draw and source/RT are basically the same
4922+ // format (aside from 24/32 bit difference).
4923+ return m_context->TEX0 .TBP0 == m_context->FRAME .Block () &&
4924+ (m_context->TEX0 .PSM & ~1 ) == (m_context->FRAME .PSM & ~1 ) &&
4925+ GSIsHardwareRenderer ();
4926+ }
4927+
4928+ __forceinline void GSState::ResetAutoFlushList ()
4929+ {
4930+ m_autoflush_list.clear ();
4931+ m_autoflush_bbox.clear ();
4932+ m_autoflush_tail = 0 ;
4933+ }
4934+
4935+ __forceinline void GSState::UpdateAutoFlushList ()
4936+ {
4937+ if (NumQueuedIndices () > 0 )
4938+ {
4939+ const int n = GSUtil::GetVertexCount (PRIM->PRIM );
4940+ m_autoflush_list.push_back (NumQueuedIndices () / n);
4941+ m_autoflush_tail = m_index.tail ;
4942+ temp_draw_rect = GSVector4i::zero (); // Reset draw rect since it's used for autoflush overlap.
4943+ m_texflush_flag = false ; // Reset TEXFLUSH since this is equivalent to starting a new draw.
4944+ }
4945+ }
4946+
47934947__forceinline bool GSState::IsAutoFlushDraw (u32 prim, int & tex_layer)
47944948{
47954949 if (!PRIM->TME || (GSConfig.UserHacks_AutoFlush == GSHWAutoFlushLevel::SpritesOnly && prim != GS_SPRITE))
@@ -4931,9 +5085,20 @@ template<u32 prim>
49315085__forceinline void GSState::HandleAutoFlush ()
49325086{
49335087 // Kind of a cheat, making the assumption that 2 consecutive fan/strip triangles won't overlap each other (*should* be safe)
4934- if ((m_index. tail & 1 ) && (prim == GS_TRIANGLESTRIP || prim == GS_TRIANGLEFAN) && !m_texflush_flag)
5088+ if ((NumQueuedIndices () & 1 ) && (prim == GS_TRIANGLESTRIP || prim == GS_TRIANGLEFAN) && !m_texflush_flag)
49355089 return ;
49365090
5091+ const auto DoFlush = [&]() {
5092+ if (GSConfig.UserHacks_AutoFlush == GSHWAutoFlushLevel::BatchEnabled && CanUseAutoFlushList ())
5093+ {
5094+ UpdateAutoFlushList ();
5095+ }
5096+ else
5097+ {
5098+ Flush (GSFlushReason::AUTOFLUSH);
5099+ }
5100+ };
5101+
49375102 // To briefly explain what's going on here, what we are checking for is draws over a texture when the source and destination are themselves.
49385103 // Because one page of the texture gets buffered in the Texture Cache (the PS2's one) if any of those pixels are overwritten, you still read the old data.
49395104 // So we need to calculate if a page boundary is being crossed for the format it is in and if the same part of the texture being written and read inside the draw.
@@ -5133,7 +5298,7 @@ __forceinline void GSState::HandleAutoFlush()
51335298 return ;
51345299 else if (m_texflush_flag)
51355300 {
5136- Flush (GSFlushReason::AUTOFLUSH );
5301+ DoFlush ( );
51375302 return ;
51385303 }
51395304
@@ -5153,8 +5318,9 @@ __forceinline void GSState::HandleAutoFlush()
51535318 const GSVector4i scissor = m_context->scissor .in ;
51545319 GSVector4i old_draw_rect = GSVector4i::zero ();
51555320 int current_draw_end = m_index.tail ;
5321+ const int current_draw_start = static_cast <int >(m_autoflush_tail);
51565322
5157- while (current_draw_end >= n)
5323+ while (current_draw_end >= current_draw_start + n)
51585324 {
51595325 for (int i = current_draw_end - 1 ; i >= current_draw_end - n; i--)
51605326 {
@@ -5204,7 +5370,7 @@ __forceinline void GSState::HandleAutoFlush()
52045370 old_draw_rect = tex_rect.rintersect (old_draw_rect);
52055371 if (!old_draw_rect.rintersect (scissor).rempty ())
52065372 {
5207- Flush (GSFlushReason::AUTOFLUSH );
5373+ DoFlush ( );
52085374 return ;
52095375 }
52105376
@@ -5231,10 +5397,10 @@ __forceinline void GSState::HandleAutoFlush()
52315397 area_out = GSVector4i (area_out.x / frame_psm.pgs .x , area_out.y / frame_psm.pgs .y , area_out.z / frame_psm.pgs .x , area_out.w / frame_psm.pgs .y );
52325398
52335399 if (!area_out.rintersect (tex_rect).rempty ())
5234- Flush (GSFlushReason::AUTOFLUSH );
5400+ DoFlush ( );
52355401 }
52365402 else // Formats are too different so just flush it.
5237- Flush (GSFlushReason::AUTOFLUSH );
5403+ DoFlush ( );
52385404 }
52395405 }
52405406 }
@@ -5254,7 +5420,7 @@ __forceinline void GSState::VertexKick(u32 skip)
52545420 return ;
52555421 }
52565422
5257- if (auto_flush && skip == 0 && m_index. tail > 0 && ((m_vertex.tail + 1 ) - m_vertex.head ) >= n)
5423+ if (auto_flush && skip == 0 && NumQueuedIndices () > 0 && ((m_vertex.tail + 1 ) - m_vertex.head ) >= n)
52585424 {
52595425 HandleAutoFlush<prim>();
52605426 }
@@ -5497,7 +5663,7 @@ __forceinline void GSState::VertexKick(u32 skip)
54975663 // Update rectangle for the current draw. We can use the re-integer coordinates from min/max here.
54985664 const GSVector4i draw_min = pmin.zwzw ();
54995665 const GSVector4i draw_max = pmax;
5500- if (m_vertex. tail != n)
5666+ if (NumQueuedIndices () > n)
55015667 temp_draw_rect = temp_draw_rect.min_i32 (draw_min).blend32 <12 >(temp_draw_rect.max_i32 (draw_max));
55025668 else
55035669 temp_draw_rect = draw_min.blend32 <12 >(draw_max);
0 commit comments