Skip to content

Commit b657cac

Browse files
committed
#10: allow for different N for each benchmark
1 parent 6b798d6 commit b657cac

File tree

3 files changed

+39
-11
lines changed

3 files changed

+39
-11
lines changed

src/benchmarks.cc

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ namespace benchmarks {
1111

1212
template <>
1313
std::string typeToString<double>() {
14-
return "double";
14+
return "double ";
1515
}
1616

1717
template <>
@@ -61,7 +61,7 @@ benchmark_results_t runBenchmarkLevel1(int N, int iters) {
6161

6262
int rank = -1;
6363
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
64-
std::cout << "[level1 " << typeToString<T>() << "] rank: " << rank << ", total_time=" << total_time << std::endl;
64+
std::cout << "[level1 " << typeToString<T>() << "] (N=" << N << ") rank: " << rank << ", total_time=" << total_time << std::endl;
6565

6666
return std::make_tuple(iter_timings, total_time);
6767
}
@@ -101,7 +101,7 @@ benchmark_results_t runBenchmarkLevel2(int M, int N, int iters) {
101101

102102
int rank = -1;
103103
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
104-
std::cout << "[level2 " << typeToString<T>() << "] rank: " << rank << ", total_time=" << total_time << std::endl;
104+
std::cout << "[level2 " << typeToString<T>() << "] (M=" << M << ", N=" << N << ") rank: " << rank << ", total_time=" << total_time << std::endl;
105105

106106
return std::make_tuple(iter_timings, total_time);
107107
}
@@ -142,7 +142,7 @@ benchmark_results_t runBenchmarkLevel3(int M, int N, int K, int iters) {
142142
int rank = -1;
143143
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
144144

145-
std::cout << "[level3 " << typeToString<T>() << "] rank: " << rank << ", total_time=" << total_time << std::endl;
145+
std::cout << "[level3 " << typeToString<T>() << "] (M=" << M << ", N=" << N << ", K=" << K << ") rank: " << rank << ", total_time=" << total_time << std::endl;
146146

147147
return std::make_tuple(iter_timings, total_time);
148148
}
@@ -191,7 +191,7 @@ benchmark_results_t runBenchmarkDPOTRF(int N, int iters) {
191191
int rank = -1;
192192
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
193193

194-
std::cout << "[dpotrf " << typeToString<T>() << "] rank: " << rank << ", total_time=" << total_time << std::endl;
194+
std::cout << "[dpotrf " << typeToString<T>() << "] (N=" << N << ") rank: " << rank << ", total_time=" << total_time << std::endl;
195195

196196
return std::make_tuple(iter_timings, total_time);
197197
}
@@ -212,9 +212,11 @@ benchmark_results_t runBenchmark(benchmark_types b, int M, int N, int K, int ite
212212
}
213213
}
214214

215-
all_results_t runAllBenchmarks(int M, int N, int K, int iters) {
215+
all_results_t runAllBenchmarks(int M, int N1, int N2, int N3, int K, int iters) {
216216
all_results_t all_results;
217+
int N;
217218
for (int i=0; i < benchmark_types::num_benchmarks; i++) {
219+
N = i == 0 ? N1 : (i == 1 ? N2 : N3);
218220
auto b = static_cast<benchmark_types>(i);
219221
std::string benchmark_str = benchmarkToString(b);
220222
all_results[benchmark_str + "_double"] = runBenchmark<double>(b, M, N, K, iters);

src/benchmarks.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ benchmark_results_t runBenchmarkDPOTRF(int N, int iters);
3737
template <typename T>
3838
benchmark_results_t runBenchmark(benchmark_types b, int M, int N, int K, int iters);
3939

40-
all_results_t runAllBenchmarks(int M, int N, int K, int iters);
40+
all_results_t runAllBenchmarks(int M, int N1, int N2, int N3, int K, int iters);
4141

4242
void printBenchmarkOutput(all_results_t benchmark_results, int iters);
4343

src/slow_node.cc

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,41 @@
88

99
static int iters = 100;
1010
static int M = 128;
11-
static int N = 128;
11+
static int N1 = 128;
12+
static int N2 = 128;
13+
static int N3 = 128;
1214
static int K = 128;
1315

16+
/*
17+
* USAGE: ./slow_node <iters> <M/K> | <N1> <N2> <N3>
18+
*
19+
* M and K share the same value, given by the <M/K> slot.
20+
*
21+
* If <N1> <N2> and <N3> are not provided, the value given for
22+
* M and K is also used for all N.
23+
*
24+
* Similarly, if <N2> is not provided, the value given for <N1>
25+
* is used for all N, and so on with <N3>.
26+
*/
27+
1428
int main(int argc, char** argv) {
29+
1530
if (argc > 1) {
1631
iters = atoi(argv[1]) + 1; // add one iteration since we will drop the first one
17-
M = N = K = atoi(argv[2]);
32+
M = N1 = N2 = N3 = K = atoi(argv[2]);
33+
if (argc > 3) {
34+
N1 = N2 = N3 = atoi(argv[3]);
35+
if (argc > 4) {
36+
N2 = N3 = atoi(argv[4]);
37+
if (argc > 5) {
38+
N3 = atoi(argv[5]);
39+
}
40+
}
41+
}
1842
}
19-
std::cout << "iters: " << iters << ", M=N=K=" << M << std::endl;
43+
44+
std::cout << "iters: " << iters << ", M=K=" << M << std::endl;
45+
std::cout << "N1=" << N1 << ", N2=" << N2 << ", N3=" << N3 << std::endl;
2046

2147
MPI_Init(&argc, &argv);
2248
Kokkos::initialize(argc, argv);
@@ -27,7 +53,7 @@ int main(int argc, char** argv) {
2753

2854
// Loop through all available benchmarks
2955
sensors::runSensorsAndReduceOutput(processor_name, "pre");
30-
auto output = benchmarks::runAllBenchmarks(M, N, K, iters);
56+
auto output = benchmarks::runAllBenchmarks(M, N1, N2, N3, K, iters);
3157
sensors::runSensorsAndReduceOutput(processor_name, "post");
3258
benchmarks::printBenchmarkOutput(output, iters);
3359

0 commit comments

Comments
 (0)