Skip to content

Commit 4447765

Browse files
Fixes for CUDA build.
1 parent 888015a commit 4447765

File tree

5 files changed

+51
-43
lines changed

5 files changed

+51
-43
lines changed

Diff for: src/CudaHelp.hh

+5-1
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,14 @@
66
#define MIN_CTAS_PER_SM 4
77
#define MAX_REDUCTION_CTAS 1024
88

9-
#ifdef __CUDACC__
9+
#ifdef USE_CUDA
1010
#include <cuda_runtime.h>
1111
#include "legion.h"
12+
#ifndef __CUDA_HD__
1213
#define __CUDA_HD__ __host__ __device__
14+
#endif
1315

16+
#ifdef __CUDACC__
1417
template<typename REDUCTION>
1518
__device__ __forceinline__
1619
void reduce_double(Legion::DeferredReduction<REDUCTION> result, double value)
@@ -39,6 +42,7 @@ void reduce_double(Legion::DeferredReduction<REDUCTION> result, double value)
3942
__threadfence_system();
4043
}
4144
}
45+
#endif
4246

4347
#else
4448
#define __CUDA_HD__

Diff for: src/Hydro.cc

+5-5
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,8 @@ Hydro::Hydro(
217217
tts = new TTS(inp, this);
218218
qcs = new QCS(inp, this);
219219

220-
const double2 vfixx = double2(1., 0.);
221-
const double2 vfixy = double2(0., 1.);
220+
const double2 vfixx = make_double2(1., 0.);
221+
const double2 vfixy = make_double2(0., 1.);
222222
for (int i = 0; i < bcx.size(); ++i)
223223
bcs.push_back(new HydroBC(mesh, vfixx, bcx[i], true/*xplane*/));
224224
for (int i = 0; i < bcy.size(); ++i)
@@ -331,7 +331,7 @@ void Hydro::init() {
331331
}
332332
else
333333
{
334-
const double2 zero2(0., 0.);
334+
const double2 zero2 = make_double2(0., 0.);
335335
FillLauncher launcher(lrp, lrp, TaskArgument(&zero2,sizeof(zero2)));
336336
launcher.add_field(FID_PU);
337337
runtime->fill_fields(ctx, launcher);
@@ -375,7 +375,7 @@ Future Hydro::doCycle(
375375
launchffd.argument = TaskArgument(ffdargs, sizeof(ffdargs));
376376
launchffd.predicate = p_not_done;
377377

378-
double2 ffd2args[] = { double2(0., 0.) };
378+
double2 ffd2args[] = { make_double2(0., 0.) };
379379
IndexFillLauncher launchffd2;
380380
launchffd2.launch_space = ispc;
381381
launchffd2.projection = 0;
@@ -1802,7 +1802,7 @@ void Hydro::initRadialVelTask(
18021802
if (pmag > args->eps)
18031803
acc_pu[*itr] = args->vel * px / pmag;
18041804
else
1805-
acc_pu[*itr] = double2(0., 0.);
1805+
acc_pu[*itr] = make_double2(0., 0.);
18061806
}
18071807
}
18081808

Diff for: src/Mesh.cc

+4-4
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ const int SumOp<int>::identity = 0;
163163
template <>
164164
const double SumOp<double>::identity = 0.;
165165
template <>
166-
const double2 SumOp<double2>::identity = double2(0., 0.);
166+
const double2 SumOp<double2>::identity = make_double2(0., 0.);
167167
template <>
168168
const double MinOp<double>::identity = DBL_MAX;
169169
template <>
@@ -817,7 +817,7 @@ void Mesh::calcCtrsTask(
817817

818818
const IndexSpace& isz = task->regions[1].region.get_index_space();
819819
for (PointIterator itr(runtime, isz); itr(); itr++)
820-
acc_zx[*itr] = double2(0., 0.);
820+
acc_zx[*itr] = make_double2(0., 0.);
821821

822822
const IndexSpace& iss = task->regions[0].region.get_index_space();
823823
for (PointIterator itr(runtime, iss); itr(); itr++)
@@ -863,7 +863,7 @@ void Mesh::calcCtrsOMPTask(
863863
const Rect<1> rectz = runtime->get_index_space_domain(isz);
864864
#pragma omp parallel for
865865
for (coord_t z = rectz.lo[0]; z <= rectz.hi[0]; z++)
866-
acc_zx[z] = double2(0., 0.);
866+
acc_zx[z] = make_double2(0., 0.);
867867

868868
const IndexSpace& iss = task->regions[0].region.get_index_space();
869869
// This will assert if it is not dense
@@ -1288,7 +1288,7 @@ void Mesh::calcCtrs(
12881288

12891289
int zfirst = mapsz[sfirst];
12901290
int zlast = (slast < nums ? mapsz[slast] : numz);
1291-
fill(&zx[zfirst], &zx[zlast], double2(0., 0.));
1291+
fill(&zx[zfirst], &zx[zlast], make_double2(0., 0.));
12921292

12931293
for (int s = sfirst; s < slast; ++s) {
12941294
int p1 = mapsp1[s];

Diff for: src/QCS.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ void QCS::setCornerDivTask(
146146
// [1] Compute a zone-centered velocity
147147
const IndexSpace& isz = task->regions[1].region.get_index_space();
148148
for (PointIterator itz(runtime, isz); itz(); itz++)
149-
acc_zuc[*itz] = double2(0., 0.);
149+
acc_zuc[*itz] = make_double2(0., 0.);
150150

151151
const IndexSpace& iss = task->regions[0].region.get_index_space();
152152
for (PointIterator its(runtime, iss); its(); its++)
@@ -488,7 +488,7 @@ void QCS::setCornerDivOMPTask(
488488
const Rect<1> rectz = runtime->get_index_space_domain(isz);
489489
#pragma omp parallel for
490490
for (coord_t z = rectz.lo[0]; z <= rectz.hi[0]; z++)
491-
acc_zuc[z] = double2(0., 0.);
491+
acc_zuc[z] = make_double2(0., 0.);
492492

493493
const IndexSpace& iss = task->regions[0].region.get_index_space();
494494
// This will assert if it is not dense

Diff for: src/Vec2.hh

+35-31
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
// This struct is defined with all functions inline,
2121
// to give the compiler maximum opportunity to optimize.
2222

23-
#ifndef __CUDACC__
23+
#ifndef USE_CUDA
2424
struct double2
2525
{
2626
typedef double value_type;
@@ -37,38 +37,10 @@ struct double2
3737
return(*this);
3838
}
3939

40-
inline double2& operator+=(const double2& v2)
41-
{
42-
x += v2.x;
43-
y += v2.y;
44-
return(*this);
45-
}
46-
47-
inline double2& operator-=(const double2& v2)
48-
{
49-
x -= v2.x;
50-
y -= v2.y;
51-
return(*this);
52-
}
53-
54-
inline double2& operator*=(const double& r)
55-
{
56-
x *= r;
57-
y *= r;
58-
return(*this);
59-
}
60-
61-
inline double2& operator/=(const double& r)
62-
{
63-
x /= r;
64-
y /= r;
65-
return(*this);
66-
}
67-
6840
}; // double2
69-
#endif // __CUDACC__
41+
#endif // USE_CUDA
7042

71-
#ifndef __CUDACC__
43+
#ifndef USE_CUDA
7244
// Already has a decleration in cuda
7345
inline double2 make_double2(double x_, double y_) {
7446
return(double2(x_, y_));
@@ -119,20 +91,44 @@ inline double2 operator+(const double2& v1, const double2& v2)
11991
return make_double2(v1.x + v2.x, v1.y + v2.y);
12092
}
12193

94+
__CUDA_HD__
95+
inline double2& operator+=(double2& v1, const double2& v2)
96+
{
97+
v1.x += v2.x;
98+
v1.y += v2.y;
99+
return v1;
100+
}
101+
122102
// subtract
123103
__CUDA_HD__
124104
inline double2 operator-(const double2& v1, const double2& v2)
125105
{
126106
return make_double2(v1.x - v2.x, v1.y - v2.y);
127107
}
128108

109+
__CUDA_HD__
110+
inline double2& operator-=(double2& v1, const double2& v2)
111+
{
112+
v1.x -= v2.x;
113+
v1.y -= v2.y;
114+
return v1;
115+
}
116+
129117
// multiply vector by scalar
130118
__CUDA_HD__
131119
inline double2 operator*(const double2& v, const double& r)
132120
{
133121
return make_double2(v.x * r, v.y * r);
134122
}
135123

124+
__CUDA_HD__
125+
inline double2& operator*=(double2& v, const double& r)
126+
{
127+
v.x *= r;
128+
v.y *= r;
129+
return v;
130+
}
131+
136132
// multiply scalar by vector
137133
__CUDA_HD__
138134
inline double2 operator*(const double& r, const double2& v)
@@ -148,6 +144,14 @@ inline double2 operator/(const double2& v, const double& r)
148144
return make_double2(v.x * rinv, v.y * rinv);
149145
}
150146

147+
__CUDA_HD__
148+
inline double2& operator/=(double2& v, const double& r)
149+
{
150+
double rinv = (double) 1. / r;
151+
v.x *= rinv;
152+
v.y *= rinv;
153+
return v;
154+
}
151155

152156
// other vector operations:
153157

0 commit comments

Comments
 (0)