Skip to content

Commit e46d739

Browse files
committed
Add performance optimization by only applying FSR within a given radius from the image center. Pixels outside the radius fall back to simple bilinear upsampling, which is cheaper and hard to tell a difference at the edges of the image.
1 parent 470ed9e commit e46d739

File tree

6 files changed

+86
-23
lines changed

6 files changed

+86
-23
lines changed

src/fsr/fsr_easu.hlsl

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ cbuffer cb : register(b0) {
1010
uint4 Const1;
1111
uint4 Const2;
1212
uint4 Const3;
13-
//uint4 Sample;
13+
uint4 Centre;
14+
uint4 Radius;
1415
};
1516

1617
SamplerState samLinearClamp : register(s0);
@@ -23,23 +24,41 @@ AF4 FsrEasuBF(AF2 p) { AF4 res = InputTexture.GatherBlue(samLinearClamp, p, int2
2324

2425
#include "ffx_fsr1.h"
2526

26-
void Sharpen(int2 pos) {
27+
void Upscale(int2 pos) {
2728
AF3 c;
2829
FsrEasuF(c, pos, Const0, Const1, Const2, Const3);
29-
//if (Sample.x == 1)
30-
// c *= c;
30+
OutputTexture[pos] = AF4(c, 1);
31+
}
32+
33+
void Bilinear(int2 pos) {
34+
AF3 c = InputTexture.SampleLevel(samLinearClamp, float2(pos) / Radius.zw, 0).rgb;
3135
OutputTexture[pos] = AF4(c, 1);
3236
}
3337

3438
[numthreads(64, 1, 1)]
3539
void main(uint3 LocalThreadId : SV_GroupThreadID, uint3 WorkGroupId : SV_GroupID, uint3 Dtid : SV_DispatchThreadID) {
3640
// Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
3741
AU2 gxy = ARmp8x8(LocalThreadId.x) + AU2(WorkGroupId.x << 4u, WorkGroupId.y << 4u);
38-
Sharpen(gxy);
39-
gxy.x += 8u;
40-
Sharpen(gxy);
41-
gxy.y += 8u;
42-
Sharpen(gxy);
43-
gxy.x -= 8u;
44-
Sharpen(gxy);
42+
AU2 groupCentre = AU2((WorkGroupId.x << 4u) + 8u, (WorkGroupId.y << 4u) + 8u);
43+
AU2 dc1 = Centre.xy - groupCentre;
44+
AU2 dc2 = Centre.zw - groupCentre;
45+
if (dot(dc1, dc1) <= Radius.y || dot(dc2, dc2) <= Radius.y) {
46+
// only do the expensive EASU for workgroups inside the given radius
47+
Upscale(gxy);
48+
gxy.x += 8u;
49+
Upscale(gxy);
50+
gxy.y += 8u;
51+
Upscale(gxy);
52+
gxy.x -= 8u;
53+
Upscale(gxy);
54+
} else {
55+
// resort to cheaper bilinear sampling
56+
Bilinear(gxy);
57+
gxy.x += 8u;
58+
Bilinear(gxy);
59+
gxy.y += 8u;
60+
Bilinear(gxy);
61+
gxy.x -= 8u;
62+
Bilinear(gxy);
63+
}
4564
}

src/fsr/fsr_rcas.hlsl

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77

88
cbuffer cb : register(b0) {
99
uint4 Const0;
10-
//uint4 Sample;
10+
uint4 Centre;
11+
uint4 Radius;
1112
};
1213

1314
SamplerState samLinearClamp : register(s0);
@@ -22,20 +23,32 @@ void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {}
2223
void Sharpen(int2 pos) {
2324
AF3 c;
2425
FsrRcasF(c.r, c.g, c.b, pos, Const0);
25-
//if (Sample.x == 1)
26-
// c *= c;
2726
OutputTexture[pos] = AF4(c, 1);
2827
}
2928

3029
[numthreads(64, 1, 1)]
3130
void main(uint3 LocalThreadId : SV_GroupThreadID, uint3 WorkGroupId : SV_GroupID, uint3 Dtid : SV_DispatchThreadID) {
3231
// Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
3332
AU2 gxy = ARmp8x8(LocalThreadId.x) + AU2(WorkGroupId.x << 4u, WorkGroupId.y << 4u);
34-
Sharpen(gxy);
35-
gxy.x += 8u;
36-
Sharpen(gxy);
37-
gxy.y += 8u;
38-
Sharpen(gxy);
39-
gxy.x -= 8u;
40-
Sharpen(gxy);
33+
AU2 groupCentre = AU2((WorkGroupId.x << 4u) + 8u, (WorkGroupId.y << 4u) + 8u);
34+
AU2 dc1 = Centre.xy - groupCentre;
35+
AU2 dc2 = Centre.zw - groupCentre;
36+
if (dot(dc1, dc1) <= Radius.y || dot(dc2, dc2) <= Radius.y) {
37+
// only do RCAS for workgroups inside the given radius
38+
Sharpen(gxy);
39+
gxy.x += 8u;
40+
Sharpen(gxy);
41+
gxy.y += 8u;
42+
Sharpen(gxy);
43+
gxy.x -= 8u;
44+
Sharpen(gxy);
45+
} else {
46+
OutputTexture[gxy] = InputTexture[gxy];
47+
gxy.x += 8u;
48+
OutputTexture[gxy] = InputTexture[gxy];
49+
gxy.y += 8u;
50+
OutputTexture[gxy] = InputTexture[gxy];
51+
gxy.x -= 8u;
52+
OutputTexture[gxy] = InputTexture[gxy];
53+
}
4154
}

src/openvr_mod.cfg

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,17 @@
1818
// tune sharpness, values range from 0 to 1
1919
"sharpness": 0.9,
2020

21+
// Only apply FSR to the given radius around the center of the image.
22+
// Anything outside this radius is upscaled by simple bilinear filtering,
23+
// which is cheaper and thus saves a bit of performance. Due to the design
24+
// of current HMD lenses, you can experiment with fairly small radii and may
25+
// still not see a noticeable difference.
26+
// Sensible values probably lie somewhere between [0.2, 1.0]. However, note
27+
// that, since the image is not spheric, even a value of 1.0 technically still
28+
// skips some pixels in the corner of the image, so if you want to completely
29+
// disable this optimization, you can choose a value of 2.
30+
"radius": 0.5,
31+
2132
// if enabled, applies a negative LOD bias to texture MIP levels
2233
// should theoretically improve texture detail in the upscaled image
2334
"applyMIPBias": true

src/postprocess/Config.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ struct Config {
1010
bool applyMIPBias = true;
1111
float renderScale = 1.f;
1212
float sharpness = 0.75f;
13+
float radius = 0.5f;
1314

1415
static Config Load() {
1516
Config config;
@@ -24,6 +25,7 @@ struct Config {
2425
if (config.sharpness < 0) config.sharpness = 0;
2526
config.renderScale = fsr.get("renderScale", 1.0).asFloat();
2627
config.applyMIPBias = fsr.get("applyMIPBias", true).asBool();
28+
config.radius = fsr.get("radius", 0.5).asFloat();
2729
}
2830
} catch (...) {
2931
Log() << "Could not read config file.\n";

src/postprocess/PostProcessor.cpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ namespace vr {
2222
}
2323

2424
DXGI_FORMAT TranslateTypelessFormats(DXGI_FORMAT format) {
25-
Log() << "Mapping format " << std::hex << format << std::dec << std::endl;
2625
switch (format) {
2726
case DXGI_FORMAT_R32G32B32A32_TYPELESS:
2827
return DXGI_FORMAT_R32G32B32A32_FLOAT;
@@ -82,6 +81,7 @@ namespace vr {
8281
}
8382
if (!initialized) {
8483
try {
84+
textureContainsOnlyOneEye = std::abs(pBounds->uMax - pBounds->uMin) > .5f;
8585
PrepareResources(texture, pTexture->eColorSpace);
8686
} catch (...) {
8787
Log() << "Resource creation failed, disabling\n";
@@ -90,7 +90,6 @@ namespace vr {
9090
}
9191
}
9292

93-
bool textureContainsOnlyOneEye = std::abs(pBounds->uMax - pBounds->uMin) > .5f;
9493
// if a single shared texture is used for both eyes, only apply effects on the first Submit
9594
if (eyeCount == 0 || textureContainsOnlyOneEye || texture != lastSubmittedTexture) {
9695
ApplyPostProcess(texture);
@@ -191,6 +190,8 @@ namespace vr {
191190
AU1 const1[4];
192191
AU1 const2[4];
193192
AU1 const3[4];
193+
AU1 imageCentre[4];
194+
AU1 radius[4];
194195
};
195196

196197
void PostProcessor::PrepareUpscalingResources() {
@@ -199,6 +200,13 @@ namespace vr {
199200
UpscaleConstants constants;
200201
// create shader constants buffers
201202
FsrEasuCon(constants.const0, constants.const1, constants.const2, constants.const3, inputWidth, inputHeight, inputWidth, inputHeight, outputWidth, outputHeight);
203+
constants.imageCentre[1] = constants.imageCentre[3] = outputHeight / 2;
204+
constants.imageCentre[0] = textureContainsOnlyOneEye ? outputWidth / 2 : outputWidth / 4;
205+
constants.imageCentre[2] = textureContainsOnlyOneEye ? outputWidth / 2 : 3 * outputWidth / 4;
206+
constants.radius[0] = 0.5f * Config::Instance().radius * outputHeight;
207+
constants.radius[1] = constants.radius[0] * constants.radius[0];
208+
constants.radius[2] = outputWidth;
209+
constants.radius[3] = outputHeight;
202210
D3D11_BUFFER_DESC bd;
203211
bd.Usage = D3D11_USAGE_IMMUTABLE;
204212
bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
@@ -252,6 +260,8 @@ namespace vr {
252260

253261
struct SharpenConstants {
254262
AU1 const0[4];
263+
AU1 imageCentre[4];
264+
AU1 radius[4];
255265
};
256266

257267
void PostProcessor::PrepareSharpeningResources() {
@@ -260,6 +270,13 @@ namespace vr {
260270
SharpenConstants constants;
261271
float sharpness = AClampF1( Config::Instance().sharpness, 0, 1 );
262272
FsrRcasCon(constants.const0, 2.f - 2*sharpness);
273+
constants.imageCentre[1] = constants.imageCentre[3] = outputHeight / 2;
274+
constants.imageCentre[0] = textureContainsOnlyOneEye ? outputWidth / 2 : outputWidth / 4;
275+
constants.imageCentre[2] = textureContainsOnlyOneEye ? outputWidth / 2 : 3 * outputWidth / 4;
276+
constants.radius[0] = 0.5f * Config::Instance().radius * outputHeight;
277+
constants.radius[1] = constants.radius[0] * constants.radius[0];
278+
constants.radius[2] = outputWidth;
279+
constants.radius[3] = outputHeight;
263280
D3D11_BUFFER_DESC bd;
264281
bd.Usage = D3D11_USAGE_IMMUTABLE;
265282
bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER;

src/postprocess/PostProcessor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ namespace vr {
1919
uint32_t inputHeight = 0;
2020
uint32_t outputWidth = 0;
2121
uint32_t outputHeight = 0;
22+
bool textureContainsOnlyOneEye = true;
2223
bool requiresCopy = false;
2324
bool inputIsSrgb = false;
2425
ComPtr<ID3D11Device> device;

0 commit comments

Comments
 (0)