-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathpush.comp
165 lines (131 loc) · 5.33 KB
/
push.comp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#version 430
#extension GL_ARB_shading_language_include : require
#include </data/shaders/common/floatpacking.glsl>
#define LEVEL_ZERO
layout (local_size_x = 8, local_size_y = 8) in;
layout (binding = 0) uniform usampler2D pullSameLevelTexture;
layout (rgba32f, binding = 0) restrict readonly uniform image2D pullSameLevelImage;
layout (rgba32f, binding = 1) restrict readonly uniform image2D coarserLevel;
layout (rgba32f, binding = 2) restrict writeonly uniform image2D img_output;
layout (r16, binding = 3) restrict writeonly uniform image2D imgOutputLastStage;
uniform int level;
vec4 readInput(ivec2 pixelCoordinate)
{
# ifdef LEVEL_ZERO
float depthSample = float(texelFetch(pullSameLevelTexture, ivec2(pixelCoordinate), 0).r >> 8) / (1 << 24);
return vec4(depthSample, 0.0, 0.0, 0.0);
# else
return imageLoad(pullSameLevelImage, pixelCoordinate);
# endif
}
void writeOutput(ivec2 pixelCoordinate, vec4 result)
{
// bool checkerboardWhite = ((pixelCoordinate.x % 2) + (pixelCoordinate.y % 2)) % 2 == 0;
// result.r = float(checkerboardWhite);
#ifdef LEVEL_ZERO
// uncomment to fill last push stage
// imageStore(img_output, pixelCoordinate, result);
imageStore(imgOutputLastStage, pixelCoordinate, vec4(result.r, 0.0, 0.0, 0.0));
#else
imageStore(img_output, pixelCoordinate, result);
#endif
}
void main()
{
ivec2 coarserLowerLeftPixel = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + gl_LocalInvocationID.xy) - 1;
ivec2[4] offsets = { {0,0}, {0,1}, {1,1}, {1,0} };
// read four pixels from coarser level
float[4] depths;
float[4] maxDepths;
float[4] radiuses;
vec2[4] displacementVectorsCoarse;
for (int i = 0 ; i < 4; i++) {
ivec2 texCoords = coarserLowerLeftPixel + offsets[i];
vec4 coarserSample = imageLoad(coarserLevel, texCoords);
depths[i] = coarserSample.r;
maxDepths[i] = coarserSample.g;
radiuses[i] = coarserSample.b;
displacementVectorsCoarse[i] = unpack2FloatsFromFloat(coarserSample.a);
}
// each invocation processes those four output pixels that have the same input pixels
for (int outputPixel = 0; outputPixel < 4; outputPixel++)
{
ivec2 pixelCoordinate = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + gl_LocalInvocationID.xy) * 2 - 1 + offsets[outputPixel];
// compute weights
int[4] weightsX = { 9, 3, 1, 3};
int[4] weights;
for(int i = 0; i < 4; i++) {
weights[i] = weightsX[(i - outputPixel + 4) % 4];
}
// don't go over ISM borders
ivec2 origTexCoord = pixelCoordinate / 2;
for (int i = 0 ; i < 4; i++) {
ivec2 inputPixelCoords = coarserLowerLeftPixel + offsets[i];
// ISMs are 64px wide, so we ignore 6 bits of texture coordinates when reading from lowest level
// we do read from level+1
if (origTexCoord >> (6-(level+1)) != inputPixelCoords >> (6-(level+1))) {
weights[i] = 0;
}
}
// ignore pixels with invalid depth
for (int i = 0 ; i < 4; i++) {
bool invalid = depths[i] == 1.0;
if (invalid)
weights[i] = 0;
}
// radius check
vec2[4] displacementVectors;
for (int i = 0; i < 4; i++) {
vec2 coarserTexCoord = (coarserLowerLeftPixel + offsets[i] + 0.5) * 2;
vec2 thisTexCoord = pixelCoordinate + 0.5;
displacementVectors[i] = coarserTexCoord - thisTexCoord + displacementVectorsCoarse[i]*2;
float dist = length(displacementVectors[i]);
float radius = radiuses[i];
radius *= pow(2, -level); // scale with miplevel
if (dist > radius)
weights[i] = 0;
}
// depth range check
float minimum = 9001;
float maxDepth;
for (int i = 0; i < 4; i++) {
if (weights[i] == 0)
continue;
minimum = min(depths[i], minimum);
if (minimum == depths[i])
maxDepth = maxDepths[i];
}
for(int i = 0; i < 4; i++) {
if (depths[i] > maxDepth)
weights[i] = 0;
}
float depthAcc = 0.0;
float radiusAcc = 0.0;
vec2 displacementAcc = vec2(0.0);
int weightAcc = 0;
float maxDepthAcc = 0.0;
for (int i = 0; i < 4; i++) {
depthAcc += depths[i] * weights[i];
maxDepthAcc += maxDepths[i] * weights[i];
radiusAcc += radiuses[i] * weights[i];
displacementAcc += displacementVectors[i] * weights[i];
weightAcc += weights[i];
}
vec4 result = vec4(0.0);
result.r = depthAcc / weightAcc;
result.g = maxDepthAcc / weightAcc;
result.b = radiusAcc / weightAcc;
result.a = pack2FloatsToFloat(displacementAcc / weightAcc);
vec4 origSample = readInput(pixelCoordinate);
bool invalid = origSample.r == 1.0;
bool occluded = false;
for (int i = 0; i < 4; i++) {
occluded = occluded || (weights[i] > 0 && origSample.r > maxDepths[i]);
}
bool allSamplesInvalid = weightAcc <= 0;
if (allSamplesInvalid || !invalid && !occluded) {
result = origSample;
}
writeOutput(pixelCoordinate, result);
}
}