Skip to content

Commit 2c60f75

Browse files
committed
fix SIMD overread
1 parent 884ff06 commit 2c60f75

1 file changed

Lines changed: 23 additions & 12 deletions

File tree

src/culverin/culverin_math.cpp

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -74,32 +74,43 @@ void culverin_math_interpolate_character_transform(const PosStride *__restrict s
7474
float *__restrict out_r) {
7575
using namespace JPH;
7676

77-
// Use RVec3 for position to respect double-precision worlds
77+
// 1. POSITION LERP
78+
// SAFETY: Explicitly construct from components.
79+
// This prevents the Jolt SIMD engine from trying to
80+
// perform a 32-byte (4-double) load on a 24-byte (3-double) C-struct.
7881
const RVec3 p1(static_cast<Real>(start_p->x), static_cast<Real>(start_p->y),
7982
static_cast<Real>(start_p->z));
8083

81-
// Pointer reinterpretation is safe here as Jolt types are POD-like
82-
const auto &p2 = *reinterpret_cast<const RVec3 *>(end_p);
84+
const RVec3 p2(static_cast<Real>(end_p->x), static_cast<Real>(end_p->y),
85+
static_cast<Real>(end_p->z));
86+
8387
const auto p_res = p1 + (p2 - p1) * static_cast<Real>(alpha);
8488

85-
// Rotation is always float[4] regardless of JPH_DOUBLE_PRECISION
86-
const auto v1 = Vec4::sLoadFloat4(reinterpret_cast<const Float4 *>(start_r));
87-
const auto &q2 = *reinterpret_cast<const Quat *>(end_r);
88-
const auto v2 = q2.mValue;
89+
// 2. ROTATION NLERP
90+
// start_r is AuxStride (float[4], 16 bytes). sLoadFloat4 is safe here.
91+
const auto v1 = Vec4::sLoadFloat4(reinterpret_cast<const Float4 *>(start_r));
8992

90-
// SIMD Dot and shortest path
91-
const float dot = v1.Dot(v2);
93+
// end_r is JPH_Quat (float[4], 16 bytes).
94+
// Construct explicitly to avoid any potential AVX-512 over-reads.
9295
const Quat q1(v1);
93-
const Quat q2_shortest = (dot < 0.0F) ? -q2 : q2;
96+
const Quat q2(end_r->x, end_r->y, end_r->z, end_r->w);
97+
98+
const auto v2 = q2.mValue;
99+
const float dot = v1.Dot(v2);
100+
101+
// Shortest path hemisphere check
102+
const Quat q2_shortest = (dot < 0.0f) ? -q2 : q2;
94103

95-
// NLerp + Normalize
104+
// NLerp: (q1 + (q2 - q1) * alpha).Normalized()
96105
const Quat q_res = (q1 + (q2_shortest - q1) * alpha).Normalized();
97106

98-
// Final stores
107+
// 3. STORE RESULTS
108+
// Cast back to float for the renderer
99109
out_p[0] = static_cast<float>(p_res.GetX());
100110
out_p[1] = static_cast<float>(p_res.GetY());
101111
out_p[2] = static_cast<float>(p_res.GetZ());
102112

113+
// out_r is float[4] (16 bytes). StoreFloat4 is safe.
103114
q_res.mValue.StoreFloat4(reinterpret_cast<Float4 *>(out_r));
104115
}
105116

0 commit comments

Comments
 (0)