Skip to content

Commit 4341e6c

Browse files
Merge pull request #2487 from KhronosGroup/fix-2473
Implement MSL 3.2 coherent/atomic_thread_fence
2 parents 969e75f + cec2e40 commit 4341e6c

11 files changed

Lines changed: 589 additions & 13 deletions
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#include <metal_stdlib>
2+
#include <simd/simd.h>
3+
4+
using namespace metal;
5+
6+
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u);
7+
8+
kernel void main0()
9+
{
10+
atomic_thread_fence(mem_flags::mem_threadgroup, memory_order_seq_cst, thread_scope_threadgroup);
11+
atomic_thread_fence(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture, memory_order_seq_cst, thread_scope_device);
12+
atomic_thread_fence(mem_flags::mem_texture, memory_order_seq_cst, thread_scope_device);
13+
atomic_thread_fence(mem_flags::mem_device, memory_order_seq_cst, thread_scope_device);
14+
atomic_thread_fence(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture, memory_order_seq_cst, thread_scope_threadgroup);
15+
atomic_thread_fence(mem_flags::mem_threadgroup, memory_order_seq_cst, thread_scope_threadgroup);
16+
threadgroup_barrier(mem_flags::mem_threadgroup);
17+
atomic_thread_fence(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture, memory_order_seq_cst, thread_scope_device);
18+
threadgroup_barrier(mem_flags::mem_threadgroup);
19+
atomic_thread_fence(mem_flags::mem_texture, memory_order_seq_cst, thread_scope_device);
20+
threadgroup_barrier(mem_flags::mem_threadgroup);
21+
atomic_thread_fence(mem_flags::mem_device, memory_order_seq_cst, thread_scope_device);
22+
threadgroup_barrier(mem_flags::mem_threadgroup);
23+
atomic_thread_fence(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture, memory_order_seq_cst, thread_scope_threadgroup);
24+
threadgroup_barrier(mem_flags::mem_threadgroup);
25+
threadgroup_barrier(mem_flags::mem_threadgroup);
26+
}
27+
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#include <metal_stdlib>
2+
#include <simd/simd.h>
3+
4+
using namespace metal;
5+
6+
struct BDA;
7+
8+
struct BDA
9+
{
10+
float v;
11+
};
12+
13+
struct SSBO
14+
{
15+
uint v;
16+
};
17+
18+
struct Registers
19+
{
20+
coherent device BDA* ssbo;
21+
};
22+
23+
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u);
24+
25+
struct spvDescriptorSetBuffer0
26+
{
27+
coherent device SSBO* ssbos [[id(0)]][4];
28+
texture2d<float, access::write, memory_coherence_device> img [[id(4)]];
29+
};
30+
31+
kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant Registers& _80 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
32+
{
33+
spvDescriptorSet0.ssbos[gl_WorkGroupID.x]->v += 10u;
34+
spvDescriptorSet0.ssbos[gl_WorkGroupID.x]->v += 2u;
35+
spvDescriptorSet0.img.write(float4(8.0), uint2(int2(4)));
36+
spvDescriptorSet0.img.write(float4(1.0), uint2(int2(1)));
37+
spvDescriptorSet0.img.write(float4(4.0), uint2(int2(2)));
38+
_80.ssbo->v += 4.0;
39+
_80.ssbo->v += 6.0;
40+
}
41+
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#include <metal_stdlib>
2+
#include <simd/simd.h>
3+
4+
using namespace metal;
5+
6+
struct BDA;
7+
8+
struct BDA
9+
{
10+
float v;
11+
};
12+
13+
struct SSBO
14+
{
15+
uint v;
16+
};
17+
18+
struct Registers
19+
{
20+
coherent device BDA* ssbo;
21+
};
22+
23+
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u);
24+
25+
kernel void main0(coherent device SSBO& _25 [[buffer(0)]], constant Registers& _69 [[buffer(1)]], texture2d<float, access::write, memory_coherence_device> img [[texture(0)]])
26+
{
27+
_25.v += 10u;
28+
_25.v += 2u;
29+
img.write(float4(8.0), uint2(int2(4)));
30+
img.write(float4(1.0), uint2(int2(1)));
31+
img.write(float4(4.0), uint2(int2(2)));
32+
_69.ssbo->v += 4.0;
33+
_69.ssbo->v += 6.0;
34+
}
35+
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
#pragma clang diagnostic ignored "-Wmissing-prototypes"
2+
3+
#include <metal_stdlib>
4+
#include <simd/simd.h>
5+
6+
using namespace metal;
7+
8+
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u);
9+
10+
static inline __attribute__((always_inline))
11+
void barrier_shared()
12+
{
13+
atomic_thread_fence(mem_flags::mem_threadgroup, memory_order_seq_cst, thread_scope_threadgroup);
14+
}
15+
16+
static inline __attribute__((always_inline))
17+
void full_barrier()
18+
{
19+
atomic_thread_fence(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture, memory_order_seq_cst, thread_scope_device);
20+
}
21+
22+
static inline __attribute__((always_inline))
23+
void image_barrier()
24+
{
25+
atomic_thread_fence(mem_flags::mem_texture, memory_order_seq_cst, thread_scope_device);
26+
}
27+
28+
static inline __attribute__((always_inline))
29+
void buffer_barrier()
30+
{
31+
atomic_thread_fence(mem_flags::mem_device, memory_order_seq_cst, thread_scope_device);
32+
}
33+
34+
static inline __attribute__((always_inline))
35+
void group_barrier()
36+
{
37+
atomic_thread_fence(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture, memory_order_seq_cst, thread_scope_threadgroup);
38+
}
39+
40+
static inline __attribute__((always_inline))
41+
void barrier_shared_exec()
42+
{
43+
atomic_thread_fence(mem_flags::mem_threadgroup, memory_order_seq_cst, thread_scope_threadgroup);
44+
threadgroup_barrier(mem_flags::mem_threadgroup);
45+
}
46+
47+
static inline __attribute__((always_inline))
48+
void full_barrier_exec()
49+
{
50+
atomic_thread_fence(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture, memory_order_seq_cst, thread_scope_device);
51+
threadgroup_barrier(mem_flags::mem_threadgroup);
52+
}
53+
54+
static inline __attribute__((always_inline))
55+
void image_barrier_exec()
56+
{
57+
atomic_thread_fence(mem_flags::mem_texture, memory_order_seq_cst, thread_scope_device);
58+
threadgroup_barrier(mem_flags::mem_threadgroup);
59+
}
60+
61+
static inline __attribute__((always_inline))
62+
void buffer_barrier_exec()
63+
{
64+
atomic_thread_fence(mem_flags::mem_device, memory_order_seq_cst, thread_scope_device);
65+
threadgroup_barrier(mem_flags::mem_threadgroup);
66+
}
67+
68+
static inline __attribute__((always_inline))
69+
void group_barrier_exec()
70+
{
71+
atomic_thread_fence(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture, memory_order_seq_cst, thread_scope_threadgroup);
72+
threadgroup_barrier(mem_flags::mem_threadgroup);
73+
}
74+
75+
static inline __attribute__((always_inline))
76+
void exec_barrier()
77+
{
78+
threadgroup_barrier(mem_flags::mem_threadgroup);
79+
}
80+
81+
kernel void main0()
82+
{
83+
barrier_shared();
84+
full_barrier();
85+
image_barrier();
86+
buffer_barrier();
87+
group_barrier();
88+
barrier_shared_exec();
89+
full_barrier_exec();
90+
image_barrier_exec();
91+
buffer_barrier_exec();
92+
group_barrier_exec();
93+
exec_barrier();
94+
}
95+
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#pragma clang diagnostic ignored "-Wmissing-prototypes"
2+
3+
#include <metal_stdlib>
4+
#include <simd/simd.h>
5+
6+
using namespace metal;
7+
8+
struct BDA;
9+
10+
struct BDA
11+
{
12+
float v;
13+
};
14+
15+
struct SSBO
16+
{
17+
uint v;
18+
};
19+
20+
struct Registers
21+
{
22+
coherent device BDA* ssbo;
23+
};
24+
25+
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u);
26+
27+
struct spvDescriptorSetBuffer0
28+
{
29+
coherent device SSBO* ssbos [[id(0)]][4];
30+
texture2d<float, access::write, memory_coherence_device> img [[id(4)]];
31+
};
32+
33+
static inline __attribute__((always_inline))
34+
void foo(coherent device SSBO* constant (&ssbos)[4], thread uint3& gl_WorkGroupID, texture2d<float, access::write, memory_coherence_device> img)
35+
{
36+
ssbos[gl_WorkGroupID.x]->v += 2u;
37+
img.write(float4(8.0), uint2(int2(4)));
38+
}
39+
40+
static inline __attribute__((always_inline))
41+
void write_in_func(texture2d<float, access::write, memory_coherence_device> img)
42+
{
43+
img.write(float4(4.0), uint2(int2(2)));
44+
}
45+
46+
static inline __attribute__((always_inline))
47+
void write_in_func(coherent device BDA* const thread & s)
48+
{
49+
s->v += 6.0;
50+
}
51+
52+
kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant Registers& _80 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
53+
{
54+
spvDescriptorSet0.ssbos[gl_WorkGroupID.x]->v += 10u;
55+
foo(spvDescriptorSet0.ssbos, gl_WorkGroupID, spvDescriptorSet0.img);
56+
spvDescriptorSet0.img.write(float4(1.0), uint2(int2(1)));
57+
write_in_func(spvDescriptorSet0.img);
58+
coherent device BDA* tmp = _80.ssbo;
59+
tmp->v += 4.0;
60+
coherent device BDA* param = tmp;
61+
write_in_func(param);
62+
}
63+
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#pragma clang diagnostic ignored "-Wmissing-prototypes"
2+
3+
#include <metal_stdlib>
4+
#include <simd/simd.h>
5+
6+
using namespace metal;
7+
8+
struct BDA;
9+
10+
struct BDA
11+
{
12+
float v;
13+
};
14+
15+
struct SSBO
16+
{
17+
uint v;
18+
};
19+
20+
struct Registers
21+
{
22+
coherent device BDA* ssbo;
23+
};
24+
25+
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u);
26+
27+
static inline __attribute__((always_inline))
28+
void foo(coherent device SSBO& _25, texture2d<float, access::write, memory_coherence_device> img)
29+
{
30+
_25.v += 2u;
31+
img.write(float4(8.0), uint2(int2(4)));
32+
}
33+
34+
static inline __attribute__((always_inline))
35+
void write_in_func(texture2d<float, access::write, memory_coherence_device> img)
36+
{
37+
img.write(float4(4.0), uint2(int2(2)));
38+
}
39+
40+
static inline __attribute__((always_inline))
41+
void write_in_func(coherent device BDA* const thread & s)
42+
{
43+
s->v += 6.0;
44+
}
45+
46+
kernel void main0(coherent device SSBO& _25 [[buffer(0)]], constant Registers& _69 [[buffer(1)]], texture2d<float, access::write, memory_coherence_device> img [[texture(0)]])
47+
{
48+
_25.v += 10u;
49+
foo(_25, img);
50+
img.write(float4(1.0), uint2(int2(1)));
51+
write_in_func(img);
52+
coherent device BDA* tmp = _69.ssbo;
53+
tmp->v += 4.0;
54+
coherent device BDA* param = tmp;
55+
write_in_func(param);
56+
}
57+

0 commit comments

Comments
 (0)