diff --git a/bmedll/Hudwarp.cpp b/bmedll/Hudwarp.cpp index 0ab29d5..0cba53c 100644 --- a/bmedll/Hudwarp.cpp +++ b/bmedll/Hudwarp.cpp @@ -27,6 +27,9 @@ ID3D11PixelShader* pPixelShader; #define MAINVP 0 D3D11_VIEWPORT viewport{ 0 }; +// render hud at 1440p on 1080p and similar scale +#define HUD_RENDER_RES 1.333f + HudwarpProcess::HudwarpProcess(ID3D11Device* pDevice, ID3D11DeviceContext** ppID3D11DeviceContext) : m_pDevice(pDevice), m_pContext(*ppID3D11DeviceContext) { // Initialize shaders @@ -70,8 +73,8 @@ HudwarpProcess::HudwarpProcess(ID3D11Device* pDevice, ID3D11DeviceContext** ppID m_height = *reinterpret_cast(vguimatsurfacedllBaseAddress + 0x290DD8 + 4); // We add a border to the texture so that the HUD can't get cut off by the texture boundaries - unsigned int widthWithBorder = m_width * (1.0f + HUD_TEX_BORDER_SIZE * 2.0f); - unsigned int heightWithBorder = m_height * (1.0f + HUD_TEX_BORDER_SIZE * 2.0f); + unsigned int widthWithBorder = m_width * (1.0f + HUD_TEX_BORDER_SIZE * 2.0f) * HUD_RENDER_RES; + unsigned int heightWithBorder = m_height * (1.0f + HUD_TEX_BORDER_SIZE * 2.0f) * HUD_RENDER_RES; // Setup the texture descriptor D3D11_TEXTURE2D_DESC textureDesc; @@ -125,6 +128,7 @@ HudwarpProcess::HudwarpProcess(ID3D11Device* pDevice, ID3D11DeviceContext** ppID mOrtho = XMMatrixOrthographicLH(1.0f, 1.0f, 0.0f, 1.0f); ConstantBuffer cb; cb.mProjection = mOrtho; + cb.aspectRatio = (float)m_width / (float)m_height; cb.xWarp = 0.0f; cb.xScale = 1.0f; cb.yWarp = 0.0f; @@ -246,8 +250,8 @@ void HudwarpProcess::Resize(unsigned int w, unsigned int h) m_height = *reinterpret_cast(vguimatsurfacedllBaseAddress + 0x290DD8 + 4); // We add a border to the texture so that the HUD can't get cut off by the texture boundaries - unsigned int widthWithBorder = m_width * (1.0f + HUD_TEX_BORDER_SIZE * 2.0f); - unsigned int heightWithBorder = m_height * (1.0f + HUD_TEX_BORDER_SIZE * 2.0f); + unsigned int widthWithBorder = m_width * (1.0f + HUD_TEX_BORDER_SIZE * 2.0f) * HUD_RENDER_RES; + unsigned int heightWithBorder = m_height * (1.0f + HUD_TEX_BORDER_SIZE * 2.0f) * HUD_RENDER_RES; // Setup the texture descriptor D3D11_TEXTURE2D_DESC textureDesc; @@ -263,6 +267,20 @@ void HudwarpProcess::Resize(unsigned int w, unsigned int h) textureDesc.CPUAccessFlags = 0; textureDesc.MiscFlags = 0; + // Update the constant buffer + ConstantBuffer cb; + cb.mProjection = mOrtho; + cb.aspectRatio = (float)m_width / (float)m_height; + cb.xWarp = m_hudwarpSettings.xWarp; + cb.xScale = m_hudwarpSettings.xScale; + cb.yWarp = m_hudwarpSettings.yWarp; + cb.yScale = m_hudwarpSettings.yScale; + cb.viewDist = m_hudwarpSettings.viewDist; + + m_pContext->UpdateSubresource(m_pConstantBuffer, 0, 0, &cb, 0, 0); + + m_shouldUpdateConstantBuffer = false; + // Create the render texture m_pDevice->CreateTexture2D(&textureDesc, NULL, &m_pRenderTexture); @@ -289,6 +307,7 @@ void HudwarpProcess::Resize(unsigned int w, unsigned int h) void HudwarpProcess::UpdateSettings(HudwarpSettings* hudwarpSettings) { m_hudwarpSettings = *hudwarpSettings; + m_shouldUpdateConstantBuffer = true; } void HudwarpProcess::Begin() @@ -357,16 +376,21 @@ void HudwarpProcess::Finish() m_pContext->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); // Update the constant buffer - ConstantBuffer cb; - cb.mProjection = mOrtho; - cb.aspectRatio = (float)m_width / (float)m_height; - cb.xWarp = m_hudwarpSettings.xWarp; - cb.xScale = m_hudwarpSettings.xScale; - cb.yWarp = m_hudwarpSettings.yWarp; - cb.yScale = m_hudwarpSettings.yScale; - cb.viewDist = m_hudwarpSettings.viewDist; - - m_pContext->UpdateSubresource(m_pConstantBuffer, 0, 0, &cb, 0, 0); + if (m_shouldUpdateConstantBuffer) + { + ConstantBuffer cb; + cb.mProjection = mOrtho; + cb.aspectRatio = (float)m_width / (float)m_height; + cb.xWarp = m_hudwarpSettings.xWarp; + cb.xScale = m_hudwarpSettings.xScale; + cb.yWarp = m_hudwarpSettings.yWarp; + cb.yScale = m_hudwarpSettings.yScale; + cb.viewDist = m_hudwarpSettings.viewDist; + + m_pContext->UpdateSubresource(m_pConstantBuffer, 0, 0, &cb, 0, 0); + + m_shouldUpdateConstantBuffer = false; + } // Set shader resources m_pContext->VSSetConstantBuffers(0, 1, &m_pConstantBuffer); diff --git a/bmedll/Hudwarp.h b/bmedll/Hudwarp.h index 4f1914a..8cf9252 100644 --- a/bmedll/Hudwarp.h +++ b/bmedll/Hudwarp.h @@ -43,6 +43,7 @@ class HudwarpProcess ID3D11InputLayout* m_pVertexLayout = nullptr; ID3D11Buffer* m_pIndexBuffer = nullptr; ID3D11Buffer* m_pConstantBuffer = nullptr; + bool m_shouldUpdateConstantBuffer = false; ID3D11RasterizerState* m_pCWcullMode = NULL; @@ -77,8 +78,10 @@ struct Vertex XMFLOAT2 texCoord; }; -// Must match hudScale in shader +// Used for hudScale in shader, prevents cut off HUD +// These values must match or the HUD will end up scaled wrong #define HUD_TEX_BORDER_SIZE 0.025f +#define HUD_TEX_BORDER_SIZE_STR "0.025f" constexpr const char* hudwarpShader = R"( cbuffer ConstantBuffer : register(b0) @@ -116,7 +119,7 @@ sampler Sampler : register(s0); float2 UndoHudTexBorder(float2 texCoord) { // IMPORTANT: must match value of HUD_TEX_BORDER_SIZE - float hudTexBorderSize = 0.025f; + float hudTexBorderSize = )" HUD_TEX_BORDER_SIZE_STR R"(; float hudScale = 1.0f + 2.0f * hudTexBorderSize; float hudOffset = 0.5f - (0.5f / hudScale); diff --git a/bmedll/MiscRenderHooks.cpp b/bmedll/MiscRenderHooks.cpp index 54e5102..5313bb9 100644 --- a/bmedll/MiscRenderHooks.cpp +++ b/bmedll/MiscRenderHooks.cpp @@ -6,98 +6,6 @@ #include -bool shouldUseGPUHudwarp = false; -bool isHudwarpDisabled = false; - -bool isRenderingHud = false; -HudwarpProcess* hudwarpProcess = nullptr; - -typedef void(__fastcall* sub_18000BAC0_type)(float*, float*, float*); -sub_18000BAC0_type sub_18000BAC0_org = nullptr; -void __fastcall sub_18000BAC0(float* a1, float* a2, float* a3) -{ - // Ported from TFORevive by Barnaby - - if (!shouldUseGPUHudwarp) - { - // If hudwarp is disabled and running on CPU do just the scaling - if (isHudwarpDisabled) - { - // Still perform scaling for HUD when warping is disabled - float viewWidth = a1[2]; - float viewHeight = a1[3]; - - float xScale = a1[7]; - float yScale = a1[9]; - - a3[0] = (a2[0] - 0.5f * viewWidth) * xScale + 0.5f * viewWidth; - a3[1] = (a2[1] - 0.5f * viewHeight) * yScale + 0.5f * viewHeight; - a3[2] = a2[2]; - return; - } - - return sub_18000BAC0_org(a1, a2, a3); - } - - // We prevent the hud from reaching bounds of render texture by adding a border of 0.025 * width/height to the texture - // We need to offset the verts to account for that here - const float hudOffset = HUD_TEX_BORDER_SIZE; - const float hudScale = 1.0f - 2.0f * hudOffset; - - float viewWidth = a1[2]; - float viewHeight = a1[3]; - - a3[0] = a2[0] * hudScale + hudOffset * viewWidth; - a3[1] = a2[1] * hudScale + hudOffset * viewHeight; - a3[2] = a2[2]; -} - -typedef void(__fastcall* CMatSystemSurface__ApplyHudwarpSettings_type)(void*, HudwarpSettings*, unsigned int, unsigned int); -CMatSystemSurface__ApplyHudwarpSettings_type CMatSystemSurface__ApplyHudwarpSettings_org = nullptr; -void __fastcall CMatSystemSurface__ApplyHudwarpSettings(void* thisptr, HudwarpSettings* hudwarpSettings, unsigned int screenX, unsigned int screenY) -{ - // Ported from TFORevive by Barnaby - - static ConVarRef hudwarp_chopsize{ "hudwarp_chopsize" }; - unsigned int originalChopsize = hudwarp_chopsize->GetInt(); - - static ConVarRef hudwarp_disable{ "hudwarp_disable" }; - HudwarpSettings newSettings = *hudwarpSettings; - if (hudwarp_disable->GetInt()) - { - // Override hudwarp settings if hudwarp_disable is 1. - // NOTE: Comment below refers to original CPU version, we can set them to 0 when using our shader. - // Stuff breaks if you set the warp values to 0. - // Respawn set them to a min of 1deg in rads (0.017453292), we can do that too because it'll result in so little distortion you won't notice it :) - newSettings.xWarp = 0.0f; - if (newSettings.xScale > 1.0f) newSettings.xScale = 1.0f; - newSettings.yWarp = 0.0f; - if (newSettings.yScale > 1.0f) newSettings.yScale = 1.0f; - } - - if (hudwarpProcess) - hudwarpProcess->UpdateSettings(&newSettings); - - static ConVarRef hudwarp_use_gpu{ "hudwarp_use_gpu" }; - // If using GPU hudwarp or hudwarp is disabled do this - // Replace chopsize, it gets set from the cvar in CMatSystemSurface__ApplyHudwarpSettings - if (hudwarp_use_gpu->GetInt() || hudwarp_disable->GetInt()) - { - if (screenX > screenY) [[likely]] - { - hudwarp_chopsize->SetValueInt(screenX); - } - else - { - hudwarp_chopsize->SetValueInt(screenY); - } - } - - CMatSystemSurface__ApplyHudwarpSettings_org(thisptr, &newSettings, screenX, screenY); - - hudwarp_chopsize->SetValueInt(originalChopsize); -} - ID3D11Device* pDevice; ID3D11DeviceContext** ppID3D11DeviceContext; ID3DUserDefinedAnnotation* pPerf; @@ -132,6 +40,13 @@ void QueueEndEvent() SetPixMarker(queuedRenderContext, 222222, ""); // hijack color field for our purposes } +bool isRenderingHud = false; + +bool shouldUseGPUHudwarp = false; +bool isHudwarpDisabled = false; + +HudwarpProcess* hudwarpProcess = nullptr; + void HudRenderStart() { if (!shouldUseGPUHudwarp) @@ -150,16 +65,6 @@ void HudRenderFinish() hudwarpProcess->Finish(); } -typedef void(__fastcall* OnWindowSizeChanged_type)(unsigned int, unsigned int, bool); -OnWindowSizeChanged_type OnWindowSizeChanged_org = nullptr; -void __fastcall OnWindowSizeChanged(unsigned int w, unsigned int h, bool isInGame) -{ - OnWindowSizeChanged_org(w, h, isInGame); - - if (hudwarpProcess) - hudwarpProcess->Resize(w, h); -} - HookedFuncStaticWithType sub_5ADC0("materialsystem_dx11.dll", 0x5ADC0); __int64 __fastcall sub_5ADC0_Hook(__int64 queuedRenderContext, unsigned long color, const char* pszName) { @@ -215,6 +120,268 @@ void __fastcall RenderHud_Hook(__int64 a1, __int64 a2, __int64 a3) QueueEndEvent(); } +const __m128 vConstHalf = _mm_set_ps1(0.5f); + +static const float hudOffset = HUD_TEX_BORDER_SIZE; +static const float hudScale = 1.0f - 2.0f * hudOffset; +const __m128 vHudwarpHudOffset = _mm_set_ps1(hudOffset); // Const since hudOffset is never gonna change +const __m128 vHudwarpHudScale = _mm_set_ps1(hudScale); // Const since hudScale is never gonna change + +__m128 vHudwarpScreenSize = _mm_set_ps(1080.0f, 1920.0f, 1080.0f, 1920.0f); +__m128 vHudwarpHalfScreenSize = _mm_mul_ps(vHudwarpScreenSize, vConstHalf); +__m128 vHudwarpOffset = _mm_mul_ps(vHudwarpScreenSize, vHudwarpHudOffset); + +__m128 vHudwarpScale = _mm_set_ps1(0.0f); + +// Seems confusing but trust me +// This lets us simplify hudwarp_disable simd stuff to 1 instruction +__m128 vHudwarpHalfScreenSizeScaleSub1 = _mm_set_ps1(0.0f); + +void UpdateHudwarpScreenSizeVectors(float screenWidth, float screenHeight) +{ + static float prevScreenWidth = 1920.0f; + static float prevScreenHeight = 1080.0f; + + if (prevScreenWidth != screenWidth || prevScreenHeight != screenHeight) + { + prevScreenWidth = screenWidth; + prevScreenHeight = screenHeight; + + vHudwarpScreenSize = _mm_set_ps(screenHeight, screenWidth, screenHeight, screenWidth); + vHudwarpHalfScreenSize = _mm_mul_ps(vHudwarpScreenSize, vConstHalf); + vHudwarpOffset = _mm_mul_ps(vHudwarpScreenSize, vHudwarpHudOffset); + + vHudwarpHalfScreenSizeScaleSub1 = _mm_fmsub_ps(vHudwarpScreenSize, vHudwarpScale, vHudwarpScreenSize); + } +} + +void UpdateHudwarpScaleVector(float scaleX, float scaleY) +{ + static float prevScaleX = 0.0f; + static float prevScaleY = 0.0f; + + if (prevScaleX != scaleX || prevScaleY != scaleY) + { + prevScaleX = scaleX; + prevScaleY = scaleY; + + vHudwarpScale = _mm_set_ps(scaleY, scaleX, scaleY, scaleX); + + vHudwarpHalfScreenSizeScaleSub1 = _mm_fmsub_ps(vHudwarpScreenSize, vHudwarpScale, vHudwarpScreenSize); + } +} + +bool shouldDoublePumpHudwarpVerts = false; +int hudwarpVertsProcessed = 0; + +// Hudwarp vertex manipulation +// When hudwarp is disabled or we're using GPU we can run 2 at once +void HudwarpVertexMath_DoublePump(float* warpSettings, float* vertA, float* vertB) +{ + // Ported from TFORevive by Barnaby + + if (shouldUseGPUHudwarp) + { + // Perform SIMD math :3 + __m128 vPos = _mm_set_ps(vertB[1], vertB[0], vertA[1], vertA[0]); + __m128 vOutPos = _mm_fmadd_ps(vHudwarpHudScale, vPos, vHudwarpOffset); + + // Unpack the vectorised data + *(__int64*)vertA = vOutPos.m128_i64[0]; + *(__int64*)vertB = vOutPos.m128_i64[1]; + } + else + { + // Short-circuit when not scaling + float xScale = warpSettings[7]; + float yScale = warpSettings[9]; + if (xScale == 1.0f && yScale == 1.0f) + { + return; + } + + __m128 vPos = _mm_set_ps(vertB[1], vertB[0], vertA[1], vertA[0]); + + // Rearranged from: (pos - hss) * scale + hss + // To: pos * scale - hss*(scale - 1) + // Allows use of _mm_fmsub_ps + __m128 vOutPos = _mm_fmsub_ps(vPos, vHudwarpHudScale, vHudwarpHalfScreenSizeScaleSub1); + + // Unpack the vectorised data + *(__int64*)vertA = vOutPos.m128_i64[0]; + *(__int64*)vertB = vOutPos.m128_i64[1]; + } +} + +typedef void(__fastcall* sub_18000BAC0_type)(float*, float*, float*); +sub_18000BAC0_type sub_18000BAC0_org = nullptr; +void __fastcall sub_18000BAC0(float* a1, float* a2, float* a3) +{ + // Ported from TFORevive by Barnaby + + if (shouldDoublePumpHudwarpVerts) + { + hudwarpVertsProcessed++; + + if (hudwarpVertsProcessed % 2 == 0) + { + HudwarpVertexMath_DoublePump(a1, a2 - 6, a2); + } + + return; + } + + // Some wacky SIMD stuff is done in here now, if we wanted we could possibly look into + // double pumping this since we only use 2 of the 4 f32 slots in each __m128. + // That would need us to hook the 2 calling functions and rewrite a lot more tbh + + if (!shouldUseGPUHudwarp) + { + // If hudwarp is disabled and running on CPU do just the scaling + if (isHudwarpDisabled) + { + // Short-circuit when not scaling, only works cause a2 == a3 + float xScale = a1[7]; + float yScale = a1[9]; + if (xScale == 1.0f && yScale == 1.0f) + { + return; + } + + // The following code is equivalent to: + // a3[0] = (a2[0] - 0.5f * viewWidth) * xScale + 0.5f * viewWidth; + // a3[1] = (a2[1] - 0.5f * viewHeight) * yScale + 0.5f * viewHeight; + // a3[2] = a2[2]; + + __m128 vPos = _mm_set_ps(0.0f, 0.0f, a2[1], a2[0]); + + // Rearranged from: (pos - hss) * scale + hss + // To: pos * scale - hss*(scale - 1) + // Allows use of _mm_fmsub_ps + __m128 vOutPos = _mm_fmsub_ps(vPos, vHudwarpHudScale, vHudwarpHalfScreenSizeScaleSub1); + + // Unpack the vectorised data + // Using silly optim since x and y are adjacent + *(__int64*)a3 = vOutPos.m128_i64[0]; + // Equiv to these two + // a3[0] = vOutPos.m128_f32[0]; + // a3[1] = vOutPos.m128_f32[1]; + // a3[2] = a2[2]; // This isn't needed because in all cases a2 == a3 + + return; + } + + return sub_18000BAC0_org(a1, a2, a3); + } + + // We prevent the hud from reaching bounds of render texture by adding a border of 0.025 * width/height to the texture + // We need to offset the verts to account for that here + + // The following code is equivalent to: + // a3[0] = a2[0] * hudScale + hudOffset * viewWidth; + // a3[1] = a2[1] * hudScale + hudOffset * viewHeight; + // a3[2] = a2[2]; + + // Perform SIMD math :3 + __m128 vPos = _mm_set_ps(0.0f, 0.0f, a2[1], a2[0]); + __m128 vOutPos = _mm_fmadd_ps(vHudwarpHudScale, vPos, vHudwarpOffset); + + // Unpack the vectorised data + *(__int64*)a3 = vOutPos.m128_i64[0]; + // a3[2] = a2[2]; // This isn't needed because in all cases a2 == a3 +} + +typedef float* (*__fastcall sub_18000BE60_type)(float* a1, float* a2, float* a3, float* a4, float a5, float* a6, float* a7, _DWORD* a8); +HookedFuncStaticWithType sub_18000BE60_org("vguimatsurface.dll", 0xBE60); + +float* __fastcall sub_18000BE60(float* a1, float* a2, float* a3, float* a4, float a5, float* a6, float* a7, _DWORD* a8) +{ + shouldDoublePumpHudwarpVerts = shouldUseGPUHudwarp || isHudwarpDisabled; + hudwarpVertsProcessed = 0; + + float* ret = sub_18000BE60_org(a1, a2, a3, a4, a5, a6, a7, a8); + + shouldDoublePumpHudwarpVerts = false; + + return ret; +} + +typedef void** (*__fastcall sub_1800154A0_type)(__int64 a1, int a2, int a3, int a4, int a5); +HookedFuncStaticWithType sub_1800154A0_org("vguimatsurface.dll", 0x154A0); + +void** __fastcall sub_1800154A0(__int64 a1, int a2, int a3, int a4, int a5) +{ + shouldDoublePumpHudwarpVerts = shouldUseGPUHudwarp || isHudwarpDisabled; + hudwarpVertsProcessed = 0; + + void** ret = sub_1800154A0_org(a1, a2, a3, a4, a5); + + shouldDoublePumpHudwarpVerts = false; + + return ret; +} + +typedef void(__fastcall* OnWindowSizeChanged_type)(unsigned int, unsigned int, bool); +OnWindowSizeChanged_type OnWindowSizeChanged_org = nullptr; +void __fastcall OnWindowSizeChanged(unsigned int w, unsigned int h, bool isInGame) +{ + OnWindowSizeChanged_org(w, h, isInGame); + + UpdateHudwarpScreenSizeVectors((float)w, (float)h); + + if (hudwarpProcess) + hudwarpProcess->Resize(w, h); +} + +typedef void(__fastcall* CMatSystemSurface__ApplyHudwarpSettings_type)(void*, HudwarpSettings*, unsigned int, unsigned int); +CMatSystemSurface__ApplyHudwarpSettings_type CMatSystemSurface__ApplyHudwarpSettings_org = nullptr; +void __fastcall CMatSystemSurface__ApplyHudwarpSettings(void* thisptr, HudwarpSettings* hudwarpSettings, unsigned int screenX, unsigned int screenY) +{ + // Ported from TFORevive by Barnaby + + static ConVarRef hudwarp_chopsize{ "hudwarp_chopsize" }; + unsigned int originalChopsize = hudwarp_chopsize->GetInt(); + + static ConVarRef hudwarp_disable{ "hudwarp_disable" }; + + HudwarpSettings newSettings = *hudwarpSettings; + if (hudwarp_disable->GetInt()) + { + // Override hudwarp settings if hudwarp_disable is 1. + // NOTE: Comment below refers to original CPU version, we can set them to 0 when using our shader. + // Stuff breaks if you set the warp values to 0. + // Respawn set them to a min of 1deg in rads (0.017453292), we can do that too because it'll result in so little distortion you won't notice it :) + newSettings.xWarp = 0.0f; + if (newSettings.xScale > 1.0f) newSettings.xScale = 1.0f; + newSettings.yWarp = 0.0f; + if (newSettings.yScale > 1.0f) newSettings.yScale = 1.0f; + } + + UpdateHudwarpScaleVector(hudwarpSettings->xScale, hudwarpSettings->yScale); + + if (hudwarpProcess) + hudwarpProcess->UpdateSettings(&newSettings); + + static ConVarRef hudwarp_use_gpu{ "hudwarp_use_gpu" }; + // If using GPU hudwarp or hudwarp is disabled do this + // Replace chopsize, it gets set from the cvar in CMatSystemSurface__ApplyHudwarpSettings + if (hudwarp_use_gpu->GetInt() || hudwarp_disable->GetInt()) + { + if (screenX > screenY) [[likely]] + { + hudwarp_chopsize->SetValueInt(screenX); + } + else + { + hudwarp_chopsize->SetValueInt(screenY); + } + } + + CMatSystemSurface__ApplyHudwarpSettings_org(thisptr, &newSettings, screenX, screenY); + + hudwarp_chopsize->SetValueInt(originalChopsize); +} + void SetupHudwarp() { pDevice = *(ID3D11Device**)(Util::GetModuleBaseAddress("materialsystem_dx11.dll") + 0x290D88); @@ -252,4 +419,8 @@ void DoMiscRenderHooks() sub_5ADC0.Hook(sub_5ADC0_Hook); CreateMiscHook(materialsystem_dx11dllBaseAddress, 0x63D0, &sub_63D0, reinterpret_cast(&sub_63D0_org)); + + sub_18000BE60_org.Hook(sub_18000BE60); + // Don't hook sub_180011420, causes visual bugs + sub_1800154A0_org.Hook(sub_1800154A0); } \ No newline at end of file