@@ -1274,7 +1274,6 @@ inline float VMFHemisphericalIntegral(const float cosine, const float sharpness)
12741274 return lerp (e, 1.0 , lerpFactor) / (e + 1.0 );
12751275}
12761276
1277-
12781277struct SGLobe {
12791278 float3 axis ;
12801279 float sharpness ;
@@ -1304,7 +1303,7 @@ inline float UpperSGClampedCosineIntegralOverTwoPi(const float sharpness)
13041303 return (((((((-1.0 / 362880.0 ) * sharpness + 1.0 / 40320.0 ) * sharpness - 1.0 / 5040.0 ) * sharpness + 1.0 / 720.0 ) * sharpness - 1.0 / 120.0 ) * sharpness + 1.0 / 24.0 ) * sharpness - 1.0 / 6.0 ) * sharpness + 0.5 ;
13051304 }
13061305
1307- return (expm1 (-sharpness) + sharpness) / (sharpness * sharpness);
1306+ return (expm1 (-sharpness) + sharpness) * rcp (sharpness * sharpness);
13081307}
13091308
13101309// [Tokuyoshi et al. 2024 "Hierarchical Light Sampling with Accurate Spherical Gaussian Lighting (Supplementary Document)" Listing. 6]
@@ -1319,7 +1318,7 @@ inline float LowerSGClampedCosineIntegralOverTwoPi(const float sharpness)
13191318 return e * (((((((((1.0 / 403200.0 ) * sharpness - 1.0 / 45360.0 ) * sharpness + 1.0 / 5760.0 ) * sharpness - 1.0 / 840.0 ) * sharpness + 1.0 / 144.0 ) * sharpness - 1.0 / 30.0 ) * sharpness + 1.0 / 8.0 ) * sharpness - 1.0 / 3.0 ) * sharpness + 0.5 );
13201319 }
13211320
1322- return e * (-expm1 (-sharpness) - sharpness * e) / (sharpness * sharpness);
1321+ return e * (-expm1 (-sharpness) - sharpness * e) * rcp (sharpness * sharpness);
13231322}
13241323
13251324// Approximate product integral of an SG and clamped cosine / pi.
@@ -1354,28 +1353,28 @@ inline float SGClampedCosineProductIntegralOverPi2024(const float cosine, const
13541353 const float B5 = 4. 90735564e-3 ;
13551354 const float B6 = -5. 58853149e-4 ;
13561355
1357-
1356+ float a;
13581357 if (abs (tz) >= 4.0 ) {
13591358 ERFCTZ = mulsign (1.0 , -tz);
13601359 } else if (abs (tz) > 1.0 ) {
1361- const float a = abs (tz);
1362- const float y = 1.0 - exp2 (-(((((((A7 * a + A6) * a + A5) * a + A4) * a + A3) * a + A2) * a + A1) * a));
1360+ a = abs (tz);
1361+ a = 1.0 - exp2 (-(((((((A7 * a + A6) * a + A5) * a + A4) * a + A3) * a + A2) * a + A1) * a));
13631362
1364- ERFCTZ = mulsign (y , -tz);
1363+ ERFCTZ = mulsign (a , -tz);
13651364 } else {
1366- const float x2 = tz * tz;
1367- ERFCTZ = (((((B6 * x2 + B5) * x2 + B4) * x2 + B3) * x2 + B2) * x2 + B1) * -tz;
1365+ a = tz * tz;
1366+ ERFCTZ = (((((B6 * a + B5) * a + B4) * a + B3) * a + B2) * a + B1) * -tz;
13681367 }
13691368
13701369 [branch]if (t >= 4.0 ) {
13711370 ERFCT = mulsign (1.0 , t);
13721371 } else if (t > 1.0 ) {
1373- const float y = 1.0 - exp2 (-(((((((A7 * t + A6) * t + A5) * t + A4) * t + A3) * t + A2) * t + A1) * t));
1372+ a = 1.0 - exp2 (-(((((((A7 * t + A6) * t + A5) * t + A4) * t + A3) * t + A2) * t + A1) * t));
13741373
1375- ERFCT = mulsign (y , t);
1374+ ERFCT = mulsign (a , t);
13761375 } else {
1377- const float x2 = t * t;
1378- ERFCT = (((((B6 * x2 + B5) * x2 + B4) * x2 + B3) * x2 + B2) * x2 + B1) * t;
1376+ a = t * t;
1377+ ERFCT = (((((B6 * a + B5) * a + B4) * a + B3) * a + B2) * a + B1) * t;
13791378 }
13801379 ERFCT = 1.0f - ERFCT;
13811380 ERFCTZ = 1.0f - ERFCTZ;
@@ -1386,7 +1385,7 @@ inline float SGClampedCosineProductIntegralOverPi2024(const float cosine, const
13861385
13871386
13881387
1389- const float lerpFactor = saturate (max (0.5 * (cosine * ERFCTZ + ERFCT) - 0.5 * INV_SQRTPI * exp (-tz * tz) * expm1 (t * t * (cosine * cosine - 1.0 )) / t , CLAMPING_THRESHOLD));
1388+ const float lerpFactor = saturate (max (0.5f * (cosine * ERFCTZ + ERFCT) - 0.5f * INV_SQRTPI * exp (-tz * tz) * expm1 (t * t * (cosine * cosine - 1.0 )) * rcp (t) , CLAMPING_THRESHOLD));
13901389
13911390 // Interpolation between lower and upper hemispherical integrals.
13921391 const float lowerIntegral = LowerSGClampedCosineIntegralOverTwoPi (sharpness);
@@ -1410,7 +1409,7 @@ inline float SGGX(const float3 m, const float2x2 roughnessMat)
14101409// [Tokuyoshi et al. 2024 "Hierarchical Light Sampling with Accurate Spherical Gaussian Lighting", Section 5.2]
14111410inline float SGGXReflectionPDF (const float3 wi, const float3 m, const float2x2 roughnessMat)
14121411{
1413- return SGGX (m, roughnessMat) / max (4.0 * sqrt (dot (wi.xz, mul (roughnessMat, wi.xz)) + wi.y * wi.y), EPSILON); // TODO: Use Kahan's algorithm for precise mul and dot. [https://pharr.org/matt/blog/2019/11/03/difference-of-floats]
1412+ return SGGX (m, roughnessMat) * rcp ( max (4.0 * sqrt (dot (wi.xz, mul (roughnessMat, wi.xz)) + wi.y * wi.y), EPSILON) ); // TODO: Use Kahan's algorithm for precise mul and dot. [https://pharr.org/matt/blog/2019/11/03/difference-of-floats]
14141413}
14151414
14161415
@@ -1427,10 +1426,10 @@ inline float expm1_over_x(const float x)
14271426
14281427 if (abs (x) < 1.0 )
14291428 {
1430- return y / log (u);
1429+ return y * rcp ( log (u) );
14311430 }
14321431
1433- return y / x ;
1432+ return y * rcp (x) ;
14341433}
14351434
14361435
@@ -1455,33 +1454,29 @@ inline float SGIntegral(const float sharpness)
14551454
14561455 */
14571456
1458- inline bool IsFinite (float x)
1457+ bool IsFinite (float x)
14591458{
14601459 return (asuint (x) & 0x7F800000 ) != 0x7F800000 ;
14611460}
14621461
1463- inline float SGImportance (const GaussianTreeNode TargetNode, const float3 viewDir , const float3 p, const float3 n, const float2 projRoughness2, const float reflecSharpness , const float3x3 tangentFrame, const float metallic ) {
1462+ inline float SGImportance (const GaussianTreeNode TargetNode, const float3 viewDirTS , const float3 p, const float3 n, const float2 projRoughness2, const float3 reflecVec , const float3x3 tangentFrame) {
14641463 // if(TargetNode.intensity <= 0) return 0;
14651464 float3 to_light = TargetNode.position - p;
14661465 const float squareddist = dot (to_light, to_light);
14671466
1468- const float c = clamp (dot (n, -(to_light)) / sqrt (squareddist), 0 , 1 );
1467+ // const float c = clamp(dot(n, -(to_light)) / sqrt(squareddist), 0, 1);
14691468
14701469 // Compute the Jacobian J for the transformation between halfvetors and reflection vectors at halfvector = normal.
1471- const float3 viewDirTS = mul (tangentFrame, viewDir);//their origional one constructs the tangent frame from N,T,BT, whereas mine constructs it from T,N,BT; problem? I converted all .y to .z and vice versa, but...
1470+ // const float3 viewDirTS = mul(tangentFrame, viewDir);//their origional one constructs the tangent frame from N,T,BT, whereas mine constructs it from T,N,BT; problem? I converted all .y to .z and vice versa, but...
14721471
14731472 const float vlen = length (viewDirTS.xz);
14741473 const float2 v = (vlen != 0.0 ) ? (viewDirTS.xz / vlen) : float2 (1.0 , 0.0 );
1475- const float2x2 reflecJacobianMat = mul (float2x2 (v.x, -v.y, v.y, v.x), float2x2 (0.5 , 0.0 , 0.0 , 0.5 / viewDirTS.y));
1474+ const float2x2 reflecJacobianMat = mul (float2x2 (v.x, -v.y, v.y, v.x), float2x2 (0.5 , 0.0 , 0.0 , 0.5f / viewDirTS.y));
14761475
14771476 // Compute JJ^T matrix.
14781477 const float2x2 jjMat = mul (reflecJacobianMat, transpose (reflecJacobianMat));
1479- const float detJJ4 = 1.0 / (4.0 * viewDirTS.y * viewDirTS.y); // = 4 * determiant(JJ^T).
1478+ const float detJJ4 = rcp (4.0 * viewDirTS.y * viewDirTS.y); // = 4 * determiant(JJ^T).
14801479
1481- // Preprocess for the lobe visibility.
1482- // Approximate the reflection lobe with an SG whose axis is the perfect specular reflection vector.
1483- // We use a conservative sharpness to filter the visibility.
1484- const float3 reflecVec = reflect (-viewDir, n) * reflecSharpness;
14851480
14861481 // TargetNode.variance = 0.5f * TargetNode.radius * TargetNode.radius;
14871482 const float Variance = max (TargetNode.variance, (1.0f / pow (2 , 31 )) * squareddist);// * (1.0f - c) + 0.5f * (TargetNode.radius * TargetNode.radius) * c;
@@ -1517,11 +1512,11 @@ inline float SGImportance(const GaussianTreeNode TargetNode, const float3 viewDi
15171512 // NDF filtering in a numerically stable manner.
15181513 // See the supplementary document (Section 5.2) of the paper for the derivation.
15191514 const float tr = filteredProjRoughnessMat._11 + filteredProjRoughnessMat._22;
1520- const float2x2 filteredRoughnessMat = IsFinite (1.0 + tr + det) ? min (filteredProjRoughnessMat + float2x2 (det, 0.0 , 0.0 , det), FLT_MAX) / (1.0 + tr + det) : (float2x2 (min (filteredProjRoughnessMat._11, FLT_MAX) / min (filteredProjRoughnessMat._11 + 1.0 , FLT_MAX), 0.0 , 0.0 , min (filteredProjRoughnessMat._22, FLT_MAX) / min (filteredProjRoughnessMat._22 + 1.0 , FLT_MAX)));
1515+ const float2x2 filteredRoughnessMat = min (filteredProjRoughnessMat + float2x2 (det, 0.0 , 0.0 , det), FLT_MAX) * rcp ( 1.0 + tr + det); // IsFinite(1.0 + tr + det) ? min(filteredProjRoughnessMat + float2x2(det, 0.0, 0.0, det), FLT_MAX) / (1.0 + tr + det) : (float2x2(min(filteredProjRoughnessMat._11, FLT_MAX) / min(filteredProjRoughnessMat._11 + 1.0, FLT_MAX), 0.0, 0.0, min(filteredProjRoughnessMat._22, FLT_MAX) / min(filteredProjRoughnessMat._22 + 1.0, FLT_MAX)));
15211516
15221517 // Evaluate the filtered distribution.
15231518 const float3 halfvecUnormalized = viewDirTS + mul (tangentFrame, LightLobe.axis);
1524- const float3 halfvec = halfvecUnormalized / max (length (halfvecUnormalized), EPSILON);
1519+ const float3 halfvec = halfvecUnormalized * rsqrt ( max (dot (halfvecUnormalized, halfvecUnormalized ), EPSILON) );
15251520 const float pdf = SGGXReflectionPDF (viewDirTS, halfvec, filteredRoughnessMat);
15261521
15271522 // Visibility of the SG light in the upper hemisphere.
@@ -1625,6 +1620,8 @@ void CalcLightPDF(inout float lightPDF, float3 p, float3 p2, float3 n, const int
16251620 const float2 roughness2 = sharpness * sharpness;
16261621 const float2 ProjRoughness2 = roughness2 / max (1.0 - roughness2, EPSILON);
16271622 const float reflecSharpness = (1.0 - max (roughness2.x, roughness2.y)) / max (2.0f * max (roughness2.x, roughness2.y), EPSILON);
1623+ float3 viewDirTS = mul (tangentFrame, viewDir);//their origional one constructs the tangent frame from N,T,BT, whereas mine constructs it from T,N,BT; problem? I converted all .y to .z and vice versa, but...
1624+ float3 reflecVec = reflect (-viewDir, n) * reflecSharpness;
16281625
16291626 while (Reps < 100 ) {
16301627 Reps++;
@@ -1636,8 +1633,8 @@ void CalcLightPDF(inout float lightPDF, float3 p, float3 p2, float3 n, const int
16361633 [branch]if (node.left >= 0 ) {
16371634#ifdef UseSGTree
16381635 const float2 ci = float2 (
1639- SGImportance (SGTree[node.left + NodeOffset], viewDir , p, n, ProjRoughness2, reflecSharpness , tangentFrame, metallic ),
1640- SGImportance (SGTree[node.left + 1 + NodeOffset], viewDir , p, n, ProjRoughness2, reflecSharpness , tangentFrame, metallic )
1636+ SGImportance (SGTree[node.left + NodeOffset], viewDirTS , p, n, ProjRoughness2, reflecVec , tangentFrame),
1637+ SGImportance (SGTree[node.left + 1 + NodeOffset], viewDirTS , p, n, ProjRoughness2, reflecVec , tangentFrame)
16411638 );
16421639#else
16431640 const float2 ci = float2 (
@@ -1688,10 +1685,12 @@ void CalcLightPDF(inout float lightPDF, float3 p, float3 p2, float3 n, const int
16881685 float3 Scale = pow (rcp (float3 (scalex, scaley, scalez)),2 );
16891686 n = normalize (mul (_MeshData[MeshIndex].W2L, float4 (n,0 )).xyz / Scale);
16901687 viewDir = normalize (mul (_MeshData[MeshIndex].W2L, float4 (viewDir,0 )).xyz / Scale);
1688+ tangentFrame = GetTangentSpace2 (n);//Need to maybe check to see if this holds up when the traversal backtracks due to dead end
1689+ viewDirTS = mul (tangentFrame, viewDir);
1690+ reflecVec = reflect (-viewDir, n) * reflecSharpness;
16911691 NodeOffset = _MeshData[MeshIndex].LightNodeOffset;
16921692 node_index = NodeOffset;
16931693 HasHitTLAS = true ;
1694- tangentFrame = GetTangentSpace2 (n);//Need to maybe check to see if this holds up when the traversal backtracks due to dead end
16951694 if (MeshIndex != _LightMeshes[-(node.left+1 )].LockedMeshIndex) {
16961695 lightPDF = 1 ;
16971696 return ;
@@ -1719,6 +1718,8 @@ int SampleLightBVH(float3 p, float3 n, inout float pmf, const int pixel_index, i
17191718 const float2 roughness2 = sharpness * sharpness;
17201719 const float2 ProjRoughness2 = roughness2 / max (1.0 - roughness2, EPSILON);
17211720 const float reflecSharpness = (1.0 - max (roughness2.x, roughness2.y)) / max (2.0f * max (roughness2.x, roughness2.y), EPSILON);
1721+ float3 viewDirTS = mul (tangentFrame, viewDir);//their origional one constructs the tangent frame from N,T,BT, whereas mine constructs it from T,N,BT; problem? I converted all .y to .z and vice versa, but...
1722+ float3 reflecVec = reflect (-viewDir, n) * reflecSharpness;
17221723 float RandNum = random (264 + Reps, pixel_index).x;
17231724 while (Reps < 222 ) {
17241725 Reps++;
@@ -1730,8 +1731,8 @@ int SampleLightBVH(float3 p, float3 n, inout float pmf, const int pixel_index, i
17301731 [branch]if (node.left >= 0 ) {
17311732#ifdef UseSGTree
17321733 const float2 ci = float2 (
1733- SGImportance (SGTree[node.left + NodeOffset], viewDir , p, n, ProjRoughness2, reflecSharpness , tangentFrame, metallic ),
1734- SGImportance (SGTree[node.left + 1 + NodeOffset], viewDir , p, n, ProjRoughness2, reflecSharpness , tangentFrame, metallic )
1734+ SGImportance (SGTree[node.left + NodeOffset], viewDirTS , p, n, ProjRoughness2, reflecVec , tangentFrame),
1735+ SGImportance (SGTree[node.left + 1 + NodeOffset], viewDirTS , p, n, ProjRoughness2, reflecVec , tangentFrame)
17351736 );
17361737#else
17371738 const float2 ci = float2 (
@@ -1774,6 +1775,8 @@ int SampleLightBVH(float3 p, float3 n, inout float pmf, const int pixel_index, i
17741775 n = normalize (mul (_MeshData[MeshIndex].W2L, float4 (n,0 )).xyz / Scale);
17751776 viewDir = normalize (mul (_MeshData[MeshIndex].W2L, float4 (viewDir,0 )).xyz / Scale);
17761777 tangentFrame = GetTangentSpace2 (n);
1778+ viewDirTS = mul (tangentFrame, viewDir);
1779+ reflecVec = reflect (-viewDir, n) * reflecSharpness;
17771780 NodeOffset = _MeshData[MeshIndex].LightNodeOffset;
17781781 node_index = NodeOffset;
17791782 HasHitTLAS = true ;
0 commit comments