Skip to content

Commit 61ae0aa

Browse files
committed
Added Kokkos Profiling Regions
1 parent 0d1ddfe commit 61ae0aa

4 files changed

Lines changed: 65 additions & 44 deletions

File tree

cuda-scorec-config.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
export NVCC_WRAPPER_DEFAULT_COMPILER=`which mpicxx`
22
cmake -B build-cuda -S . \
3-
-DCMAKE_BUILD_TYPE=Debug \
3+
-DCMAKE_BUILD_TYPE=Release \
44
-DCMAKE_INSTALL_PREFIX=build-cuda/install \
55
-DCMAKE_CXX_COMPILER=/lore/hasanm4/practice_projects/advanced_comp/assignment2_deps/kokkos/bin/nvcc_wrapper \
66
-DKokkos_ROOT=../assignment2_deps/kokkos/build-cuda/install/ \
Lines changed: 47 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,52 @@
1-
#include "CalculateStiffnessMatrixAndLoadVector.hpp"
2-
3-
4-
Results calculateAllElementStiffnessMatrixAndLoadVector(const Mesh& mesh, double k) {
5-
6-
int numElements = mesh.GetNumElements();
7-
8-
int numNodes = mesh.GetNumNodesPerElement();
9-
10-
int sizePerElement = numNodes * numNodes;
11-
12-
View1D allElementStiffnessMatrix("stores all element stiffness matrix", numElements * sizePerElement);
13-
View1D allElementLoadVector("stores all element load vector", numElements * numNodes);
14-
15-
Kokkos::parallel_for("CalculateStiffness", numElements,
16-
KOKKOS_LAMBDA(const int elemIdx) {
17-
18-
double stiffnessMatrixPerElement[MAX_STIFFNESS_MATRIX_SIZE] = {};
19-
double loadVectorPerElement[MAX_LOAD_VECTOR_SIZE] = {};
20-
if (numNodes == 3) { // Triangle element
21-
TriElement triElem(mesh, elemIdx);
22-
triElem.setMaterialProperty(k);
23-
triElem.computeElementStiffnessMatrix(stiffnessMatrixPerElement);
24-
triElem.computeElementLoadVector(loadVectorPerElement);
25-
} else { // Quad element
26-
QuadElement quadElem(mesh, elemIdx);
27-
quadElem.setMaterialProperty(k);
28-
quadElem.computeElementStiffnessMatrix(stiffnessMatrixPerElement);
29-
quadElem.computeElementLoadVector(loadVectorPerElement);
1+
#include "CalculateStiffnessMatrixAndLoadVector.hpp"
2+
3+
Results calculateAllElementStiffnessMatrixAndLoadVector(const Mesh& mesh,
4+
double k) {
5+
int numElements = mesh.GetNumElements();
6+
7+
int numNodes = mesh.GetNumNodesPerElement();
8+
9+
int sizePerElement = numNodes * numNodes;
10+
11+
Kokkos::Profiling::pushRegion(
12+
"Allocate Element Stiffness Matrix and Load Vector");
13+
View1D allElementStiffnessMatrix("stores all element stiffness matrix",
14+
numElements * sizePerElement);
15+
View1D allElementLoadVector("stores all element load vector",
16+
numElements * numNodes);
17+
Kokkos::Profiling::popRegion();
18+
19+
Kokkos::Profiling::pushRegion(
20+
"Compute Element Stiffness Matrix and Load Vector");
21+
Kokkos::parallel_for(
22+
"CalculateStiffness", numElements, KOKKOS_LAMBDA(const int elemIdx) {
23+
double stiffnessMatrixPerElement[MAX_STIFFNESS_MATRIX_SIZE] = {};
24+
double loadVectorPerElement[MAX_LOAD_VECTOR_SIZE] = {};
25+
if (numNodes == 3) { // Triangle element
26+
TriElement triElem(mesh, elemIdx);
27+
triElem.setMaterialProperty(k);
28+
triElem.computeElementStiffnessMatrix(stiffnessMatrixPerElement);
29+
triElem.computeElementLoadVector(loadVectorPerElement);
30+
} else { // Quad element
31+
QuadElement quadElem(mesh, elemIdx);
32+
quadElem.setMaterialProperty(k);
33+
quadElem.computeElementStiffnessMatrix(stiffnessMatrixPerElement);
34+
quadElem.computeElementLoadVector(loadVectorPerElement);
3035
}
31-
32-
int base_stiffness_idx = elemIdx * sizePerElement;
33-
for (int i = 0; i < sizePerElement; ++i){
34-
allElementStiffnessMatrix(base_stiffness_idx + i) = stiffnessMatrixPerElement[i];
35-
}
36-
37-
int base_load_idx = elemIdx * numNodes;
3836

39-
for (int i = 0; i < numNodes; ++i){
40-
allElementLoadVector(base_load_idx + i) = loadVectorPerElement[i];
41-
}
37+
int base_stiffness_idx = elemIdx * sizePerElement;
38+
for (int i = 0; i < sizePerElement; ++i) {
39+
allElementStiffnessMatrix(base_stiffness_idx + i) =
40+
stiffnessMatrixPerElement[i];
41+
}
42+
43+
int base_load_idx = elemIdx * numNodes;
4244

43-
});
45+
for (int i = 0; i < numNodes; ++i) {
46+
allElementLoadVector(base_load_idx + i) = loadVectorPerElement[i];
47+
}
48+
});
49+
Kokkos::Profiling::popRegion();
4450

45-
return Results{allElementStiffnessMatrix, allElementLoadVector};
51+
return Results{allElementStiffnessMatrix, allElementLoadVector};
4652
}

src/StiffnessMatrix.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,17 @@ void ElementStiffnessMatrix::createOOROOC(Mesh mesh) {
4141
}
4242

4343
void ElementStiffnessMatrix::sortDataByRowCol(Kokkos::View<double *> data) {
44+
Kokkos::Profiling::pushRegion("Sort Element Stiffness Matrix");
4445
assert(rowColCOO_.size() == data.size());
4546
auto rowColCoo_l = rowColCOO_;
4647

4748
Kokkos::Experimental::sort_by_key(Kokkos::DefaultExecutionSpace(),
4849
rowColCoo_l, data, gIDComparator());
50+
Kokkos::Profiling::popRegion();
4951
}
5052

5153
void StiffnessMatrix::assemble(Kokkos::View<double *> data) {
54+
Kokkos::Profiling::pushRegion("Create CSR Row Index");
5255
auto rowColCOO_l = elementStiffnessMatrix.rowColCOO_;
5356
size_t coo_size = rowColCOO_l.size();
5457

@@ -89,15 +92,20 @@ void StiffnessMatrix::assemble(Kokkos::View<double *> data) {
8992
csrDataSize_);
9093
Kokkos::fence();
9194
printf("Total unique entries: %zu\n", csrDataSize_);
95+
Kokkos::Profiling::popRegion();
9296

97+
Kokkos::Profiling::pushRegion("Allocate CSR");
9398
Kokkos::resize(csrColIds_, csrDataSize_);
9499
Kokkos::resize(csrValues_, csrDataSize_);
100+
Kokkos::Profiling::popRegion();
101+
95102
auto csrColIds_l = csrColIds_;
96103
auto csrValues_l = csrValues_;
97104

98105
// * Fill CSR row index
99106
// TODO: Multilevel parallelism
100107
auto nDof_l = nDof_;
108+
Kokkos::Profiling::pushRegion("Fill Stiffness CSR");
101109
Kokkos::parallel_for(
102110
"fill CSR", nDof_, KOKKOS_LAMBDA(const size_t row) {
103111
size_t row_start = unique_row_start_index(row);
@@ -113,6 +121,7 @@ void StiffnessMatrix::assemble(Kokkos::View<double *> data) {
113121
csrValues_l(csr_data_index) += data(coo_i);
114122
}
115123
});
124+
Kokkos::Profiling::popRegion();
116125
}
117126

118127
void StiffnessMatrix::printStiffnessMatrix() const {

src/main.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44

55
#include <CalculateStiffnessMatrixAndLoadVector.hpp>
66
#include <Kokkos_Core.hpp>
7+
#include <chrono>
78
#include <iostream>
89

910
#include "StiffnessMatrix.h"
1011

1112
int main(int argc, char** argv) {
13+
auto start = std::chrono::steady_clock::now();
1214
#ifdef KOKKOS_ENABLE_OPENMP
1315
printf("Using Kokkos OpenMP backend\n");
1416
#endif
@@ -52,14 +54,18 @@ int main(int argc, char** argv) {
5254
stiffnessMatrix.sortDataByRowCol(element_stiffness);
5355
stiffnessMatrix.assemble(element_stiffness);
5456

55-
stiffnessMatrix.printStiffnessMatrix();
56-
5757
#ifndef NDEBUG
58+
stiffnessMatrix.printStiffnessMatrix();
5859
printf("=>----------- Dense Matrix -----------<=\n");
5960
stiffnessMatrix.printDenseMatrix();
6061
#endif
6162
}
6263
Kokkos::finalize();
6364

65+
auto end = std::chrono::steady_clock::now();
66+
auto elapsed_time =
67+
std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
68+
printf("Total execution time: %lld ms\n", elapsed_time.count());
69+
6470
return 0;
6571
}

0 commit comments

Comments
 (0)