Skip to content

Commit daef67c

Browse files
committed
Support for row major hsMatrix
Swapping the shaders around to support row major hsMatrices and eliminating the swap function. This change hasn’t filtered down to the dynamic effects which get their uniforms through a giant buffer provided by the engine itself.
1 parent b3f999c commit daef67c

File tree

5 files changed

+49
-75
lines changed

5 files changed

+49
-75
lines changed

Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal

+34-34
Original file line numberDiff line numberDiff line change
@@ -265,9 +265,9 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]],
265265
half3 LAmbient = half3(0.0, 0.0, 0.0);
266266
half3 LDiffuse = half3(0.0, 0.0, 0.0);
267267

268-
const float3 Ndirection = normalize(uniforms.localToWorldMatrix * float4(in.normal, 0.0)).xyz;
268+
const float3 Ndirection = normalize(float4(in.normal, 0.0) * uniforms.localToWorldMatrix).xyz;
269269

270-
float4 position = (uniforms.localToWorldMatrix * float4(in.position, 1.0));
270+
float4 position = (float4(in.position, 1.0) * uniforms.localToWorldMatrix);
271271
if(temp_hasOnlyWeight1) {
272272
const float4 position2 = blendMatrix1 * float4(in.position, 1.0);
273273
position = (in.weight1 * position) + ((1.0f - in.weight1) * position2);
@@ -317,7 +317,7 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]],
317317
abs(uniforms.invVtxAlpha - MDiffuse.a));
318318

319319
out.vtxColor = half4(material.rgb, abs(uniforms.invVtxAlpha - MDiffuse.a));
320-
const float4 vCamPosition = uniforms.worldToCameraMatrix * position;
320+
const float4 vCamPosition = position * uniforms.worldToCameraMatrix;
321321
//out.vCamNormal = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.position, 0.0));
322322

323323
//Fog
@@ -333,13 +333,13 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]],
333333
}
334334
out.fogColor.rgb = uniforms.fogColor;
335335

336-
const float4 normal = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.normal, 0.0));
336+
const float4 normal = (uniforms.localToWorldMatrix * float4(in.normal, 0.0)) * uniforms.worldToCameraMatrix;
337337

338338
for(size_t layer=0; layer<num_layers; layer++) {
339339
(&out.texCoord1)[layer] = uniforms.sampleLocation(layer, &in.texCoord1, normal, vCamPosition);
340340
}
341341

342-
out.position = uniforms.projectionMatrix * vCamPosition;
342+
out.position = vCamPosition * uniforms.projectionMatrix;
343343

344344
return out;
345345
}
@@ -354,35 +354,35 @@ float3 VertexUniforms::sampleLocation(size_t index, thread float3 *texCoords, co
354354
//Note: If we want to require newer versions of Metal/newer hardware we could pass function pointers instead of doing these ifs.
355355
if (flags & (kMiscUseReflectionXform | kMiscUseRefractionXform)) {
356356
matrix = cameraToWorldMatrix;
357-
matrix[3][0] = matrix[3][1] = matrix[3][2] = 0;
357+
matrix[0][3] = matrix[1][3] = matrix[2][3] = 0;
358358

359359
// This is just a rotation about X of Pi/2 (y = z, z = -y),
360360
// followed by flipping Z to reflect back towards us (z = -z).
361361

362362
// swap mat[1][0] and mat[2][0]
363363
float temp;
364-
temp = matrix[0][1];
365-
matrix[0][1] = matrix[0][2];
366-
matrix[0][2] = temp;
364+
temp = matrix[1][0];
365+
matrix[1][0] = matrix[2][0];
366+
matrix[2][0] = temp;
367367

368368
// swap mat[1][1] and mat[2][1]
369369
temp = matrix[1][1];
370-
matrix[1][1] = matrix[1][2];
371-
matrix[1][2] = temp;
370+
matrix[1][1] = matrix[2][1];
371+
matrix[2][1] = temp;
372372

373373
// swap mat[1][2] and mat[2][2]
374-
temp = matrix[2][1];
375-
matrix[2][1] = matrix[2][2];
374+
temp = matrix[1][2];
375+
matrix[1][2] = matrix[2][2];
376376
matrix[2][2] = temp;
377377

378378
if (flags & kMiscUseRefractionXform) {
379379
// Same as reflection, but then matrix = matrix * scaleMatNegateZ.
380380

381381
// mat[0][2] = -mat[0][2];
382-
matrix[2][0] = -matrix[2][0];
382+
matrix[0][2] = -matrix[0][2];
383383

384384
// mat[1][2] = -mat[1][2];
385-
matrix[2][1] = -matrix[2][1];
385+
matrix[1][2] = -matrix[1][2];
386386

387387
// mat[2][2] = -mat[2][2];
388388
matrix[2][2] = -matrix[2][2];
@@ -398,10 +398,10 @@ float3 VertexUniforms::sampleLocation(size_t index, thread float3 *texCoords, co
398398
matrix_float4x4 scaleMatrix = matrix_float4x4(1.0);
399399

400400
// hsVector3 camTrans(0.5f, 0.5f, 0.f);
401-
scaleMatrix[3][0] = 0.5f;
402-
scaleMatrix[3][1] = -0.5f;
401+
scaleMatrix[0][3] = 0.5f;
402+
scaleMatrix[1][3] = -0.5f;
403403

404-
matrix = scaleMatrix * translationMatrix;
404+
matrix = translationMatrix * scaleMatrix;
405405

406406
// The scale and trans move us from NDC to Screen space. We need to swap
407407
// the Z and W coordinates so that the texture projection will divide by W
@@ -410,50 +410,50 @@ float3 VertexUniforms::sampleLocation(size_t index, thread float3 *texCoords, co
410410

411411
// swap mat[2][2] and mat[3][2]
412412
temp = matrix[2][2];
413-
matrix[2][2] = matrix[2][3];
414-
matrix[2][3] = temp;
413+
matrix[2][2] = matrix[3][2];
414+
matrix[3][2] = temp;
415415

416416
// swap mat[2][3] and mat[3][3]
417-
temp = matrix[3][2];
418-
matrix[3][2] = matrix[3][3];
417+
temp = matrix[2][3];
418+
matrix[2][3] = matrix[3][3];
419419
matrix[3][3] = temp;
420420

421421
// Multiply by the projection matrix
422-
matrix = matrix * projectionMatrix;
422+
matrix = projectionMatrix * matrix;
423423
} else if (flags & kMiscProjection) {
424424
matrix_float4x4 cam2World = cameraToWorldMatrix;
425425
if( !(UVWSrc & kUVWPosition) ) {
426-
cam2World.columns[3][0] = 0;
427-
cam2World.columns[3][1] = 0;
428-
cam2World.columns[3][2] = 0;
426+
cam2World.columns[0][3] = 0;
427+
cam2World.columns[1][3] = 0;
428+
cam2World.columns[2][3] = 0;
429429
}
430430

431-
matrix = matrix * cam2World;
431+
matrix = cam2World * matrix;
432432
}
433433

434434
float4 sampleCoord;
435435

436436
switch (UVWSrc) {
437437
case kUVWNormal:
438438
{
439-
sampleCoord = matrix * normal;
439+
sampleCoord = normal * matrix;
440440
}
441441
break;
442442
case kUVWPosition:
443443
{
444-
sampleCoord = matrix * camPosition;
444+
sampleCoord = camPosition * matrix;
445445
}
446446
break;
447447
case kUVWReflect:
448448
{
449-
sampleCoord = matrix * reflect(normalize(camPosition), normalize(normal));
449+
sampleCoord = reflect(normalize(camPosition), normalize(normal)) * matrix;
450450
}
451451
break;
452452
default:
453453
{
454454
const int index = UVWSrc & 0x0F;
455455
if (index < num_uvs) {
456-
sampleCoord = matrix * float4(texCoords[index], 1.0);
456+
sampleCoord = float4(texCoords[index], 1.0) * matrix;
457457
} else {
458458
//The DX engine will use a UV co-ord of 0,0 if the index is out of range
459459
sampleCoord = float4(0.0);
@@ -649,13 +649,13 @@ vertex ShadowCasterInOut shadowVertexShader(Vertex in [[stage_in]],
649649
{
650650
ShadowCasterInOut out;
651651

652-
const float4 vCamPosition = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.position, 1.0));
652+
const float4 vCamPosition = (float4(in.position, 1.0) * uniforms.localToWorldMatrix) * uniforms.worldToCameraMatrix;
653653

654654
const float4x4 matrix = uniforms.uvTransforms[0].transform;
655655

656-
out.texCoord1 = (matrix * vCamPosition).xyz;
656+
out.texCoord1 = (vCamPosition * matrix).xyz;
657657

658-
out.position = uniforms.projectionMatrix * vCamPosition;
658+
out.position = vCamPosition * uniforms.projectionMatrix;
659659

660660
return out;
661661
}

Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal

+4-2
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,11 @@ vertex ColorInOut plateVertexShader(PlateVertex in [[stage_in]],
7777
ColorInOut out;
7878

7979
float4 position = float4(in.position, 0.0, 1.0);
80-
position = uniforms.projectionMatrix * position;
81-
out.position = (uniforms.localToWorldMatrix * position);
80+
position = position * uniforms.projectionMatrix;
81+
out.position = ( position * uniforms.localToWorldMatrix);
82+
out.position.y *= -1.0f;
8283
out.texCoord = (float4(in.texCoord, 1.0) * uniforms.uvTransforms[0].transform).xyz;
84+
out.texCoord.y = 1.0 - out.texCoord.y;
8385
out.normal = float4(0.0, 0.0, 1.0, 0.0);
8486

8587
return out;

Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp

+5-29
Original file line numberDiff line numberDiff line change
@@ -62,39 +62,15 @@ You can contact Cyan Worlds, Inc. by email [email protected]
6262

6363
#include "plMetalPipelineState.h"
6464

65-
matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst, bool swapOrder)
65+
matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst)
6666
{
6767
if (src.fFlags & hsMatrix44::kIsIdent)
6868
{
6969
memcpy(dst, &matrix_identity_float4x4, sizeof(float) * 16);
7070
}
7171
else
7272
{
73-
//SIMD is column major, hsMatrix44 is row major.
74-
//We need to flip.
75-
if(swapOrder) {
76-
dst->columns[0][0] = src.fMap[0][0];
77-
dst->columns[1][0] = src.fMap[0][1];
78-
dst->columns[2][0] = src.fMap[0][2];
79-
dst->columns[3][0] = src.fMap[0][3];
80-
81-
dst->columns[0][1] = src.fMap[1][0];
82-
dst->columns[1][1] = src.fMap[1][1];
83-
dst->columns[2][1] = src.fMap[1][2];
84-
dst->columns[3][1] = src.fMap[1][3];
85-
86-
dst->columns[0][2] = src.fMap[2][0];
87-
dst->columns[1][2] = src.fMap[2][1];
88-
dst->columns[2][2] = src.fMap[2][2];
89-
dst->columns[3][2] = src.fMap[2][3];
90-
91-
dst->columns[0][3] = src.fMap[3][0];
92-
dst->columns[1][3] = src.fMap[3][1];
93-
dst->columns[2][3] = src.fMap[3][2];
94-
dst->columns[3][3] = src.fMap[3][3];
95-
} else {
96-
memcpy(dst, &src.fMap, sizeof(matrix_float4x4));
97-
}
73+
memcpy(dst, &src.fMap, sizeof(matrix_float4x4));
9874
}
9975

10076
return dst;
@@ -957,13 +933,13 @@ void plMetalDevice::SetWorldToCameraMatrix(const hsMatrix44& src)
957933
hsMatrix2SIMD(inv, &fMatrixC2W);
958934
}
959935

960-
void plMetalDevice::SetLocalToWorldMatrix(const hsMatrix44& src, bool swapOrder)
936+
void plMetalDevice::SetLocalToWorldMatrix(const hsMatrix44& src)
961937
{
962938
hsMatrix44 inv;
963939
src.GetInverse(&inv);
964940

965-
hsMatrix2SIMD(src, &fMatrixL2W, swapOrder);
966-
hsMatrix2SIMD(inv, &fMatrixW2L, swapOrder);
941+
hsMatrix2SIMD(src, &fMatrixL2W);
942+
hsMatrix2SIMD(inv, &fMatrixW2L);
967943
}
968944

969945
void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable)

Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ class plCubicEnvironmap;
6767
class plLayerInterface;
6868
class plMetalPipelineState;
6969

70-
matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst, bool swapOrder = true);
70+
//NOTE: Results of this will be row major
71+
matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst);
7172

7273
class plMetalDevice
7374
{
@@ -149,7 +150,7 @@ class plMetalDevice
149150

150151
void SetProjectionMatrix(const hsMatrix44& src);
151152
void SetWorldToCameraMatrix(const hsMatrix44& src);
152-
void SetLocalToWorldMatrix(const hsMatrix44& src, bool swapOrder = true);
153+
void SetLocalToWorldMatrix(const hsMatrix44& src);
153154

154155
void PopulateTexture(plMetalDevice::TextureRef *tRef, plMipmap *img, uint slice);
155156
uint ConfigureAllowedLevels(plMetalDevice::TextureRef *tRef, plMipmap *mipmap);

Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp

+3-8
Original file line numberDiff line numberDiff line change
@@ -2554,15 +2554,10 @@ void plMetalPipeline::IDrawPlate(plPlate* plate)
25542554
fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(fDevice.fNoZReadOrWriteStencilState);
25552555
fState.fCurrentDepthStencilState = fDevice.fNoZReadOrWriteStencilState;
25562556

2557-
//column major layout
25582557
simd_float4x4 projMat = matrix_identity_float4x4;
2559-
//projMat.columns[2][3] = 1.0f;
2560-
//projMat.columns[3][1] = -0.5f;
2561-
projMat.columns[3][2] = 0.0f;
2562-
projMat.columns[1][1] = 1.0f;
25632558

25642559
/// Set up the transform directly
2565-
fDevice.SetLocalToWorldMatrix(plate->GetTransform(), false);
2560+
fDevice.SetLocalToWorldMatrix(plate->GetTransform());
25662561

25672562
IPushPiggyBacks(material);
25682563

@@ -4440,8 +4435,8 @@ void plMetalPipeline::IBlendVertBuffer(plSpan* span, hsMatrix44* matrixPalette,
44404435
hsMatrix2SIMD(matrixPalette[indices & 0xFF], &simdMatrix);
44414436
if (weights[j]) {
44424437
//Note: This bit is different than GL/DirectX. It's using acclerate so this is also accelerated on ARM through NEON or maybe even the Neural Engine.
4443-
destPt_buf += weights[j] * simd_mul(simdMatrix, *(simd_float4 *)pt_buf);
4444-
destNorm_buf += weights[j] * simd_mul(simdMatrix, *(simd_float4 *)vec_buf);
4438+
destPt_buf += simd_mul(*(simd_float4 *)pt_buf, simdMatrix) * weights[j];
4439+
destNorm_buf += simd_mul(*(simd_float4 *)vec_buf, simdMatrix) * weights[j];
44454440
}
44464441
//ISkinVertexSSE41(matrixPalette[indices & 0xFF], weights[j], pt_buf, destPt_buf, vec_buf, destNorm_buf);
44474442
indices >>= 8;

0 commit comments

Comments
 (0)