Skip to content

Commit 939ba67

Browse files
authored
Merge branch 'develop' into task/rhornung67/fix-warning
2 parents e7be820 + a1c0983 commit 939ba67

5 files changed

Lines changed: 48 additions & 0 deletions

File tree

src/apps/MASS3DPA_ATOMIC-Cuda.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ __global__ void Mass3DPA_Atomic(const Real_ptr B,
4848
}
4949
}
5050
}
51+
__syncthreads();
5152

5253

5354
GPU_FOREACH_THREAD_DIRECT(dz, z, mpa_at::D1D) {
@@ -57,6 +58,7 @@ __global__ void Mass3DPA_Atomic(const Real_ptr B,
5758
}
5859
}
5960
}
61+
__syncthreads();
6062

6163

6264
GPU_FOREACH_THREAD_DIRECT(dz, z, mpa_at::D1D) {
@@ -66,6 +68,7 @@ __global__ void Mass3DPA_Atomic(const Real_ptr B,
6668
}
6769
}
6870
}
71+
__syncthreads();
6972

7073
GPU_FOREACH_THREAD_DIRECT(qz, z, mpa_at::Q1D) {
7174
GPU_FOREACH_THREAD_DIRECT(qy, y, mpa_at::Q1D) {
@@ -74,6 +77,7 @@ __global__ void Mass3DPA_Atomic(const Real_ptr B,
7477
}
7578
}
7679
}
80+
__syncthreads();
7781

7882
GPU_FOREACH_THREAD_DIRECT(qz, z, mpa_at::Q1D) {
7983
GPU_FOREACH_THREAD_DIRECT(qy, y, mpa_at::Q1D) {
@@ -82,6 +86,7 @@ __global__ void Mass3DPA_Atomic(const Real_ptr B,
8286
}
8387
}
8488
}
89+
__syncthreads();
8590

8691
GPU_FOREACH_THREAD_DIRECT(qz, z, mpa_at::Q1D) {
8792
GPU_FOREACH_THREAD_DIRECT(dy, y, mpa_at::D1D) {
@@ -90,6 +95,7 @@ __global__ void Mass3DPA_Atomic(const Real_ptr B,
9095
}
9196
}
9297
}
98+
__syncthreads();
9399

94100
GPU_FOREACH_THREAD_DIRECT(dz, z, mpa_at::D1D) {
95101
GPU_FOREACH_THREAD_DIRECT(dy, y, mpa_at::D1D) {
@@ -191,6 +197,7 @@ void MASS3DPA_ATOMIC::runCudaVariantImpl(VariantID vid) {
191197
); // RAJA::loop<inner_y>
192198
} // lambda ()
193199
); // RAJA::loop<inner_z>
200+
ctx.teamSync();
194201

195202

196203
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::D1D),
@@ -206,6 +213,7 @@ void MASS3DPA_ATOMIC::runCudaVariantImpl(VariantID vid) {
206213
); // RAJA::loop<inner_y>
207214
} // lambda (dz)
208215
); // RAJA::loop<inner_z>
216+
ctx.teamSync();
209217

210218
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::D1D),
211219
[&](Index_type dz) {
@@ -220,6 +228,7 @@ void MASS3DPA_ATOMIC::runCudaVariantImpl(VariantID vid) {
220228
); // RAJA::loop<inner_y>
221229
} // lambda (dz)
222230
); // RAJA::loop<inner_z>
231+
ctx.teamSync();
223232

224233
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::Q1D),
225234
[&](Index_type qz) {
@@ -234,6 +243,7 @@ void MASS3DPA_ATOMIC::runCudaVariantImpl(VariantID vid) {
234243
); // RAJA::loop<inner_y>
235244
} // lambda (qz)
236245
); // RAJA::loop<inner_z>
246+
ctx.teamSync();
237247

238248
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::Q1D),
239249
[&](Index_type qz) {
@@ -248,6 +258,7 @@ void MASS3DPA_ATOMIC::runCudaVariantImpl(VariantID vid) {
248258
); // RAJA::loop<inner_y>
249259
} // lambda (dz)
250260
); // RAJA::loop<inner_z>
261+
ctx.teamSync();
251262

252263
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::Q1D),
253264
[&](Index_type qz) {
@@ -262,6 +273,7 @@ void MASS3DPA_ATOMIC::runCudaVariantImpl(VariantID vid) {
262273
); // RAJA::loop<inner_y>
263274
} // lambda (dz)
264275
); // RAJA::loop<inner_z>
276+
ctx.teamSync();
265277

266278

267279
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::D1D),

src/apps/MASS3DPA_ATOMIC-Hip.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ __global__ void Mass3DPA_Atomic(const Real_ptr B,
4848
}
4949
}
5050
}
51+
__syncthreads();
5152

5253

5354
GPU_FOREACH_THREAD_DIRECT(dz, z, mpa_at::D1D) {
@@ -57,6 +58,7 @@ __global__ void Mass3DPA_Atomic(const Real_ptr B,
5758
}
5859
}
5960
}
61+
__syncthreads();
6062

6163

6264
GPU_FOREACH_THREAD_DIRECT(dz, z, mpa_at::D1D) {
@@ -66,6 +68,7 @@ __global__ void Mass3DPA_Atomic(const Real_ptr B,
6668
}
6769
}
6870
}
71+
__syncthreads();
6972

7073
GPU_FOREACH_THREAD_DIRECT(qz, z, mpa_at::Q1D) {
7174
GPU_FOREACH_THREAD_DIRECT(qy, y, mpa_at::Q1D) {
@@ -74,6 +77,7 @@ __global__ void Mass3DPA_Atomic(const Real_ptr B,
7477
}
7578
}
7679
}
80+
__syncthreads();
7781

7882
GPU_FOREACH_THREAD_DIRECT(qz, z, mpa_at::Q1D) {
7983
GPU_FOREACH_THREAD_DIRECT(qy, y, mpa_at::Q1D) {
@@ -82,6 +86,7 @@ __global__ void Mass3DPA_Atomic(const Real_ptr B,
8286
}
8387
}
8488
}
89+
__syncthreads();
8590

8691
GPU_FOREACH_THREAD_DIRECT(qz, z, mpa_at::Q1D) {
8792
GPU_FOREACH_THREAD_DIRECT(dy, y, mpa_at::D1D) {
@@ -90,6 +95,7 @@ __global__ void Mass3DPA_Atomic(const Real_ptr B,
9095
}
9196
}
9297
}
98+
__syncthreads();
9399

94100
GPU_FOREACH_THREAD_DIRECT(dz, z, mpa_at::D1D) {
95101
GPU_FOREACH_THREAD_DIRECT(dy, y, mpa_at::D1D) {
@@ -191,6 +197,7 @@ void MASS3DPA_ATOMIC::runHipVariantImpl(VariantID vid) {
191197
); // RAJA::loop<inner_y>
192198
} // lambda ()
193199
); // RAJA::loop<inner_z>
200+
ctx.teamSync();
194201

195202

196203
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::D1D),
@@ -206,6 +213,7 @@ void MASS3DPA_ATOMIC::runHipVariantImpl(VariantID vid) {
206213
); // RAJA::loop<inner_y>
207214
} // lambda (dz)
208215
); // RAJA::loop<inner_z>
216+
ctx.teamSync();
209217

210218
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::D1D),
211219
[&](Index_type dz) {
@@ -220,6 +228,7 @@ void MASS3DPA_ATOMIC::runHipVariantImpl(VariantID vid) {
220228
); // RAJA::loop<inner_y>
221229
} // lambda (dz)
222230
); // RAJA::loop<inner_z>
231+
ctx.teamSync();
223232

224233
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::Q1D),
225234
[&](Index_type qz) {
@@ -234,6 +243,7 @@ void MASS3DPA_ATOMIC::runHipVariantImpl(VariantID vid) {
234243
); // RAJA::loop<inner_y>
235244
} // lambda (qz)
236245
); // RAJA::loop<inner_z>
246+
ctx.teamSync();
237247

238248
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::Q1D),
239249
[&](Index_type qz) {
@@ -248,6 +258,7 @@ void MASS3DPA_ATOMIC::runHipVariantImpl(VariantID vid) {
248258
); // RAJA::loop<inner_y>
249259
} // lambda (dz)
250260
); // RAJA::loop<inner_z>
261+
ctx.teamSync();
251262

252263
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::Q1D),
253264
[&](Index_type qz) {
@@ -262,6 +273,7 @@ void MASS3DPA_ATOMIC::runHipVariantImpl(VariantID vid) {
262273
); // RAJA::loop<inner_y>
263274
} // lambda (dz)
264275
); // RAJA::loop<inner_z>
276+
ctx.teamSync();
265277

266278

267279
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::D1D),

src/apps/MASS3DPA_ATOMIC-OMP.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ void MASS3DPA_ATOMIC::runOpenMPVariant(VariantID vid) {
136136
); // RAJA::loop<inner_y>
137137
} // lambda ()
138138
); // RAJA::loop<inner_z>
139+
ctx.teamSync();
139140

140141
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::D1D),
141142
[&](Index_type dz) {
@@ -150,6 +151,7 @@ void MASS3DPA_ATOMIC::runOpenMPVariant(VariantID vid) {
150151
); // RAJA::loop<inner_y>
151152
} // lambda (dz)
152153
); // RAJA::loop<inner_z>
154+
ctx.teamSync();
153155

154156
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::D1D),
155157
[&](Index_type dz) {
@@ -164,6 +166,7 @@ void MASS3DPA_ATOMIC::runOpenMPVariant(VariantID vid) {
164166
); // RAJA::loop<inner_y>
165167
} // lambda (dz)
166168
); // RAJA::loop<inner_z>
169+
ctx.teamSync();
167170

168171
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::Q1D),
169172
[&](Index_type qz) {
@@ -178,6 +181,7 @@ void MASS3DPA_ATOMIC::runOpenMPVariant(VariantID vid) {
178181
); // RAJA::loop<inner_y>
179182
} // lambda (qz)
180183
); // RAJA::loop<inner_z>
184+
ctx.teamSync();
181185

182186
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::Q1D),
183187
[&](Index_type qz) {
@@ -192,6 +196,7 @@ void MASS3DPA_ATOMIC::runOpenMPVariant(VariantID vid) {
192196
); // RAJA::loop<inner_y>
193197
} // lambda (dz)
194198
); // RAJA::loop<inner_z>
199+
ctx.teamSync();
195200

196201
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::Q1D),
197202
[&](Index_type qz) {
@@ -206,6 +211,7 @@ void MASS3DPA_ATOMIC::runOpenMPVariant(VariantID vid) {
206211
); // RAJA::loop<inner_y>
207212
} // lambda (dz)
208213
); // RAJA::loop<inner_z>
214+
ctx.teamSync();
209215

210216

211217
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::D1D),

src/apps/MASS3DPA_ATOMIC-Seq.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ void MASS3DPA_ATOMIC::runSeqVariant(VariantID vid) {
138138
); // RAJA::loop<inner_y>
139139
} // lambda ()
140140
); // RAJA::loop<inner_z>
141+
ctx.teamSync();
141142

142143

143144
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::D1D),
@@ -153,6 +154,7 @@ void MASS3DPA_ATOMIC::runSeqVariant(VariantID vid) {
153154
); // RAJA::loop<inner_y>
154155
} // lambda (dz)
155156
); // RAJA::loop<inner_z>
157+
ctx.teamSync();
156158

157159
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::D1D),
158160
[&](Index_type dz) {
@@ -167,6 +169,7 @@ void MASS3DPA_ATOMIC::runSeqVariant(VariantID vid) {
167169
); // RAJA::loop<inner_y>
168170
} // lambda (dz)
169171
); // RAJA::loop<inner_z>
172+
ctx.teamSync();
170173

171174
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::Q1D),
172175
[&](Index_type qz) {
@@ -181,6 +184,7 @@ void MASS3DPA_ATOMIC::runSeqVariant(VariantID vid) {
181184
); // RAJA::loop<inner_y>
182185
} // lambda (qz)
183186
); // RAJA::loop<inner_z>
187+
ctx.teamSync();
184188

185189
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::Q1D),
186190
[&](Index_type qz) {
@@ -195,6 +199,7 @@ void MASS3DPA_ATOMIC::runSeqVariant(VariantID vid) {
195199
); // RAJA::loop<inner_y>
196200
} // lambda (dz)
197201
); // RAJA::loop<inner_z>
202+
ctx.teamSync();
198203

199204
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::Q1D),
200205
[&](Index_type qz) {
@@ -209,6 +214,7 @@ void MASS3DPA_ATOMIC::runSeqVariant(VariantID vid) {
209214
); // RAJA::loop<inner_y>
210215
} // lambda (dz)
211216
); // RAJA::loop<inner_z>
217+
ctx.teamSync();
212218

213219

214220
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::D1D),

src/apps/MASS3DPA_ATOMIC-Sycl.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ void MASS3DPA_ATOMIC::runSyclVariantImpl(VariantID vid) {
9898
}
9999
}
100100
}
101+
itm.barrier(::sycl::access::fence_space::local_space);
101102

102103
SYCL_FOREACH_THREAD_DIRECT(dz, 0, mpa_at::D1D) {
103104
SYCL_FOREACH_THREAD_DIRECT(dy, 1, mpa_at::D1D) {
@@ -106,6 +107,7 @@ void MASS3DPA_ATOMIC::runSyclVariantImpl(VariantID vid) {
106107
}
107108
}
108109
}
110+
itm.barrier(::sycl::access::fence_space::local_space);
109111

110112
SYCL_FOREACH_THREAD_DIRECT(dz, 0, mpa_at::D1D) {
111113
SYCL_FOREACH_THREAD_DIRECT(qy, 1, mpa_at::Q1D) {
@@ -114,6 +116,7 @@ void MASS3DPA_ATOMIC::runSyclVariantImpl(VariantID vid) {
114116
}
115117
}
116118
}
119+
itm.barrier(::sycl::access::fence_space::local_space);
117120

118121
SYCL_FOREACH_THREAD_DIRECT(qz, 0, mpa_at::Q1D) {
119122
SYCL_FOREACH_THREAD_DIRECT(qy, 1, mpa_at::Q1D) {
@@ -122,6 +125,7 @@ void MASS3DPA_ATOMIC::runSyclVariantImpl(VariantID vid) {
122125
}
123126
}
124127
}
128+
itm.barrier(::sycl::access::fence_space::local_space);
125129

126130
SYCL_FOREACH_THREAD_DIRECT(qz, 0, mpa_at::Q1D) {
127131
SYCL_FOREACH_THREAD_DIRECT(qy, 1, mpa_at::Q1D) {
@@ -130,6 +134,7 @@ void MASS3DPA_ATOMIC::runSyclVariantImpl(VariantID vid) {
130134
}
131135
}
132136
}
137+
itm.barrier(::sycl::access::fence_space::local_space);
133138

134139
SYCL_FOREACH_THREAD_DIRECT(qz, 0, mpa_at::Q1D) {
135140
SYCL_FOREACH_THREAD_DIRECT(dy, 1, mpa_at::D1D) {
@@ -138,6 +143,7 @@ void MASS3DPA_ATOMIC::runSyclVariantImpl(VariantID vid) {
138143
}
139144
}
140145
}
146+
itm.barrier(::sycl::access::fence_space::local_space);
141147

142148
SYCL_FOREACH_THREAD_DIRECT(dz, 0, mpa_at::D1D) {
143149
SYCL_FOREACH_THREAD_DIRECT(dy, 2, mpa_at::D1D) {
@@ -247,6 +253,7 @@ void MASS3DPA_ATOMIC::runSyclVariantImpl(VariantID vid) {
247253
); // RAJA::loop<inner_y>
248254
} // lambda ()
249255
); // RAJA::loop<inner_z>
256+
ctx.teamSync();
250257

251258

252259
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::D1D),
@@ -262,6 +269,7 @@ void MASS3DPA_ATOMIC::runSyclVariantImpl(VariantID vid) {
262269
); // RAJA::loop<inner_y>
263270
} // lambda (dz)
264271
); // RAJA::loop<inner_z>
272+
ctx.teamSync();
265273

266274
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::D1D),
267275
[&](Index_type dz) {
@@ -276,6 +284,7 @@ void MASS3DPA_ATOMIC::runSyclVariantImpl(VariantID vid) {
276284
); // RAJA::loop<inner_y>
277285
} // lambda (dz)
278286
); // RAJA::loop<inner_z>
287+
ctx.teamSync();
279288

280289
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::Q1D),
281290
[&](Index_type qz) {
@@ -290,6 +299,7 @@ void MASS3DPA_ATOMIC::runSyclVariantImpl(VariantID vid) {
290299
); // RAJA::loop<inner_y>
291300
} // lambda (qz)
292301
); // RAJA::loop<inner_z>
302+
ctx.teamSync();
293303

294304
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::Q1D),
295305
[&](Index_type qz) {
@@ -304,6 +314,7 @@ void MASS3DPA_ATOMIC::runSyclVariantImpl(VariantID vid) {
304314
); // RAJA::loop<inner_y>
305315
} // lambda (dz)
306316
); // RAJA::loop<inner_z>
317+
ctx.teamSync();
307318

308319
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::Q1D),
309320
[&](Index_type qz) {
@@ -318,6 +329,7 @@ void MASS3DPA_ATOMIC::runSyclVariantImpl(VariantID vid) {
318329
); // RAJA::loop<inner_y>
319330
} // lambda (dz)
320331
); // RAJA::loop<inner_z>
332+
ctx.teamSync();
321333

322334

323335
RAJA::loop<inner_z>(ctx, RAJA::RangeSegment(0, mpa_at::D1D),

0 commit comments

Comments
 (0)