Skip to content

Commit c994a83

Browse files
committed
rxmesh construction timing
1 parent 51871d9 commit c994a83

File tree

1 file changed

+56
-27
lines changed

1 file changed

+56
-27
lines changed

include/rxmesh/rxmesh.cpp

Lines changed: 56 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -87,37 +87,37 @@ void RXMesh::init(const std::vector<std::vector<uint32_t>>& fv,
8787
m_timers.add("buildHT");
8888
m_timers.add("cudaMalloc");
8989
m_timers.add("malloc");
90+
m_timers.add("hashtable.move");
91+
m_timers.add("cudaMemcpy");
92+
m_timers.add("bitmask.cudaMemcpy");
9093

94+
// 1)
9195
m_timers.add("build");
9296
m_timers.start("build");
9397
build(fv, patcher_file);
9498
m_timers.stop("build");
95-
RXMESH_INFO("build time = {} (ms)", m_timers.elapsed_millis("build"));
9699

100+
// 2)
97101
m_timers.add("populate_patch_stash");
98102
m_timers.start("populate_patch_stash");
99103
populate_patch_stash();
100104
m_timers.stop("populate_patch_stash");
101-
RXMESH_INFO("populate_patch_stash time = {} (ms)",
102-
m_timers.elapsed_millis("populate_patch_stash"));
103105

106+
// 3)
104107
m_timers.add("coloring");
105108
m_timers.start("coloring");
106109
patch_graph_coloring();
107110
m_timers.stop("coloring");
108111
RXMESH_INFO("Num colors = {}", m_num_colors);
109-
RXMESH_INFO("patch graph coloring time = {} (ms)",
110-
m_timers.elapsed_millis("coloring"));
111-
112112

113+
// 4)
113114
m_timers.add("build_device");
114115
m_timers.start("build_device");
115116
build_device();
116117
m_timers.stop("build_device");
117-
RXMESH_INFO("build_device time = {} (ms)",
118-
m_timers.elapsed_millis("build_device"));
119118

120119

120+
// 5)
121121
m_timers.add("PatchScheduler");
122122
m_timers.start("PatchScheduler");
123123
PatchScheduler sch;
@@ -126,18 +126,16 @@ void RXMesh::init(const std::vector<std::vector<uint32_t>>& fv,
126126
BYTES_TO_MEGABYTES(sizeof(uint32_t) * get_max_num_patches());
127127
sch.refill(get_num_patches());
128128
m_timers.stop("PatchScheduler");
129-
RXMESH_INFO("PatchScheduler time = {} (ms)",
130-
m_timers.elapsed_millis("PatchScheduler"));
131129

132130

131+
// 6)
133132
m_timers.add("allocate_extra_patches");
134133
m_timers.start("allocate_extra_patches");
135134
// Allocate extra patches
136135
allocate_extra_patches();
137136
m_timers.stop("allocate_extra_patches");
138-
RXMESH_INFO("allocate_extra_patches time = {} (ms)",
139-
m_timers.elapsed_millis("allocate_extra_patches"));
140137

138+
// 7)
141139
m_timers.add("context.init");
142140
m_timers.start("context.init");
143141
// Allocate and copy the context to the gpu
@@ -159,8 +157,6 @@ void RXMesh::init(const std::vector<std::vector<uint32_t>>& fv,
159157
m_d_patches_info,
160158
sch);
161159
m_timers.stop("context.init");
162-
RXMESH_INFO("context.init time = {} (ms)",
163-
m_timers.elapsed_millis("context.init"));
164160

165161

166162
RXMESH_INFO("#Vertices = {}, #Faces= {}, #Edges= {}, #Patches = {}",
@@ -181,19 +177,39 @@ void RXMesh::init(const std::vector<std::vector<uint32_t>>& fv,
181177
RXMESH_INFO("per-patch maximum vertex count = {}",
182178
m_max_vertices_per_patch);
183179

184-
RXMESH_INFO("cudaMalloc time = {} (ms)",
185-
m_timers.elapsed_millis("cudaMalloc"));
186-
187-
RXMESH_INFO("malloc time = {} (ms)", m_timers.elapsed_millis("malloc"));
188-
189-
RXMESH_INFO("buildHT time = {} (ms)", m_timers.elapsed_millis("buildHT"));
190-
RXMESH_INFO("bitmask time = {} (ms)", m_timers.elapsed_millis("bitmask"));
191-
RXMESH_INFO("lower_bound time = {} (ms)",
180+
////
181+
RXMESH_INFO("1) build time = {} (ms)", m_timers.elapsed_millis("build"));
182+
RXMESH_INFO("2) populate_patch_stash time = {} (ms)",
183+
m_timers.elapsed_millis("populate_patch_stash"));
184+
RXMESH_INFO("3) patch graph coloring time = {} (ms)",
185+
m_timers.elapsed_millis("coloring"));
186+
RXMESH_INFO("4) build_device time = {} (ms)",
187+
m_timers.elapsed_millis("build_device"));
188+
RXMESH_INFO(" -buildHT time = {} (ms)", m_timers.elapsed_millis("buildHT"));
189+
RXMESH_INFO(" --lower_bound time = {} (ms)",
192190
m_timers.elapsed_millis("lower_bound"));
193-
RXMESH_INFO("ht.insert time = {} (ms)",
191+
RXMESH_INFO(" --ht.insert time = {} (ms)",
194192
m_timers.elapsed_millis("ht.insert"));
195-
RXMESH_INFO("LPHashTable time = {} (ms)",
193+
RXMESH_INFO(" --hashtable.move time = {} (ms)",
194+
m_timers.elapsed_millis("hashtable.move"));
195+
RXMESH_INFO(" --LPHashTable time = {} (ms)",
196196
m_timers.elapsed_millis("LPHashTable"));
197+
RXMESH_INFO(" -bitmask time = {} (ms)", m_timers.elapsed_millis("bitmask"));
198+
RXMESH_INFO(" --bitmask.cudaMemcpy time = {} (ms)",
199+
m_timers.elapsed_millis("bitmask.cudaMemcpy"));
200+
201+
RXMESH_INFO("5) PatchScheduler time = {} (ms)",
202+
m_timers.elapsed_millis("PatchScheduler"));
203+
RXMESH_INFO("6) allocate_extra_patches time = {} (ms)",
204+
m_timers.elapsed_millis("allocate_extra_patches"));
205+
RXMESH_INFO("7) context.init time = {} (ms)",
206+
m_timers.elapsed_millis("context.init"));
207+
208+
RXMESH_INFO("cudaMemcpy time = {} (ms)",
209+
m_timers.elapsed_millis("cudaMemcpy"));
210+
RXMESH_INFO("cudaMalloc time = {} (ms)",
211+
m_timers.elapsed_millis("cudaMalloc"));
212+
RXMESH_INFO("malloc time = {} (ms)", m_timers.elapsed_millis("malloc"));
197213
}
198214

199215
RXMesh::~RXMesh()
@@ -980,7 +996,7 @@ void RXMesh::build_device()
980996
BYTES_TO_MEGABYTES(get_max_num_patches() * sizeof(PatchInfo));
981997

982998

983-
#pragma omp parallel for
999+
// #pragma omp parallel for
9841000
for (int p = 0; p < static_cast<int>(get_num_patches()); ++p) {
9851001

9861002
const uint16_t p_num_vertices =
@@ -1094,6 +1110,7 @@ void RXMesh::build_device_single_patch(const uint32_t patch_id,
10941110
BYTES_TO_MEGABYTES(PatchStash::stash_size * sizeof(uint32_t));
10951111

10961112
// copy count and capacities
1113+
m_timers.start("cudaMemcpy");
10971114
CUDA_ERROR(cudaMemcpy(d_patch.num_faces,
10981115
h_patch_info.num_faces,
10991116
sizeof(uint16_t),
@@ -1106,6 +1123,7 @@ void RXMesh::build_device_single_patch(const uint32_t patch_id,
11061123
h_patch_info.num_vertices,
11071124
sizeof(uint16_t),
11081125
cudaMemcpyHostToDevice));
1126+
m_timers.stop("cudaMemcpy");
11091127

11101128
// allocate and copy patch topology to the device
11111129
// we realloc the host h_patch_info EV and FE to ensure that both host and
@@ -1122,10 +1140,12 @@ void RXMesh::build_device_single_patch(const uint32_t patch_id,
11221140
h_patch_info.ev, p_edges_capacity * 2 * sizeof(LocalVertexT));
11231141

11241142
if (p_num_edges > 0) {
1143+
m_timers.start("cudaMemcpy");
11251144
CUDA_ERROR(cudaMemcpy(d_patch.ev,
11261145
h_patch_info.ev,
11271146
p_num_edges * 2 * sizeof(LocalVertexT),
11281147
cudaMemcpyHostToDevice));
1148+
m_timers.stop("cudaMemcpy");
11291149
}
11301150

11311151
m_timers.start("cudaMalloc");
@@ -1140,10 +1160,12 @@ void RXMesh::build_device_single_patch(const uint32_t patch_id,
11401160
h_patch_info.fe, p_faces_capacity * 3 * sizeof(LocalEdgeT));
11411161

11421162
if (p_num_faces > 0) {
1163+
m_timers.start("cudaMemcpy");
11431164
CUDA_ERROR(cudaMemcpy(d_patch.fe,
11441165
h_patch_info.fe,
11451166
p_num_faces * 3 * sizeof(LocalEdgeT),
11461167
cudaMemcpyHostToDevice));
1168+
m_timers.stop("cudaMemcpy");
11471169
}
11481170

11491171
m_timers.start("cudaMalloc");
@@ -1183,8 +1205,10 @@ void RXMesh::build_device_single_patch(const uint32_t patch_id,
11831205
}
11841206
}
11851207

1208+
m_timers.start("bitmask.cudaMemcpy");
11861209
CUDA_ERROR(
11871210
cudaMemcpy(d_mask, h_mask, num_bytes, cudaMemcpyHostToDevice));
1211+
m_timers.stop("bitmask.cudaMemcpy");
11881212

11891213
m_timers.stop("bitmask");
11901214
};
@@ -1229,10 +1253,12 @@ void RXMesh::build_device_single_patch(const uint32_t patch_id,
12291253

12301254
// Copy PatchStash
12311255
if (patch_id != INVALID32) {
1256+
m_timers.start("cudaMemcpy");
12321257
CUDA_ERROR(cudaMemcpy(d_patch.patch_stash.m_stash,
12331258
h_patch_info.patch_stash.m_stash,
12341259
PatchStash::stash_size * sizeof(uint32_t),
12351260
cudaMemcpyHostToDevice));
1261+
m_timers.stop("cudaMemcpy");
12361262
}
12371263

12381264

@@ -1298,7 +1324,9 @@ void RXMesh::build_device_single_patch(const uint32_t patch_id,
12981324
}
12991325
}
13001326

1327+
m_timers.start("hashtable.move");
13011328
d_hashtable.move(h_hashtable);
1329+
m_timers.stop("hashtable.move");
13021330

13031331
m_timers.stop("buildHT");
13041332
};
@@ -1339,9 +1367,10 @@ void RXMesh::build_device_single_patch(const uint32_t patch_id,
13391367
h_patch_info.lp_f,
13401368
d_patch.lp_f);
13411369

1342-
1370+
m_timers.start("cudaMemcpy");
13431371
CUDA_ERROR(cudaMemcpy(
13441372
&d_patch_info, &d_patch, sizeof(PatchInfo), cudaMemcpyHostToDevice));
1373+
m_timers.stop("cudaMemcpy");
13451374
}
13461375

13471376
void RXMesh::allocate_extra_patches()
@@ -1351,7 +1380,7 @@ void RXMesh::allocate_extra_patches()
13511380
const uint16_t p_edges_capacity = get_per_patch_max_edge_capacity();
13521381
const uint16_t p_faces_capacity = get_per_patch_max_face_capacity();
13531382

1354-
// #pragma omp parallel for
1383+
#pragma omp parallel for
13551384
for (int p = get_num_patches(); p < static_cast<int>(get_max_num_patches());
13561385
++p) {
13571386

0 commit comments

Comments
 (0)