Skip to content

Commit ff08fe0

Browse files
Merge pull request #1 from NVIDIA/release_1.1
Release 1.1
2 parents b22e1fc + 0771c05 commit ff08fe0

20 files changed

+922
-125
lines changed

CHANGELOG.md

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,28 @@
11
# Changelog
22

3+
## [1.1.0] - 2025-05-22
4+
5+
### Added
6+
7+
- Heatmap plotter
8+
- Support for CUDA Error Log Management
9+
- Retry mechanism for CUDA multicast allocations
10+
- Nvloom_cli argument to set number of samples in gpu-to-rack testcases
11+
- Nvloom_cli now prints its version, git commit it was built from, and specified buffer size
12+
- Nvloom_cli now prints units when reporting results
13+
- Native compilation for sm_103 on CUDA 12.9 toolkits
14+
15+
### Changed
16+
17+
- Expanded README.md
18+
- Rack-to-rack are now both unidir and bidir, and bidir rack-to-rack are symmetry-optimized.
19+
20+
### Fixed
21+
22+
- Bug where requesting allocations over 4 GiB would fail with CUDA_OUT_OF_MEMORY
23+
324
## [1.0.0] - 2025-03-17
425

526
### Added
627

7-
Initial release
28+
- Initial release

CMakeLists.txt

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,24 @@ set(src
4242
library/kernels.cu
4343
)
4444

45+
execute_process(
46+
COMMAND git describe --always --tags
47+
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
48+
OUTPUT_VARIABLE GIT_COMMIT
49+
OUTPUT_STRIP_TRAILING_WHITESPACE
50+
)
51+
4552
add_executable(nvloom_cli ${src})
4653
target_include_directories(nvloom_cli PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ${CMAKE_CURRENT_SOURCE_DIR}/library .)
4754
target_link_libraries(nvloom_cli cuda nvidia-ml Boost::program_options)
4855

49-
set_target_properties(nvloom_cli PROPERTIES CUDA_ARCHITECTURES "86;90;100")
56+
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "12.9")
57+
set_target_properties(nvloom_cli PROPERTIES CUDA_ARCHITECTURES "86;90;100;103")
58+
else()
59+
set_target_properties(nvloom_cli PROPERTIES CUDA_ARCHITECTURES "86;90;100")
60+
endif()
5061

5162
find_package(MPI REQUIRED)
5263
include_directories(SYSTEM ${MPI_INCLUDE_PATH})
5364
target_link_libraries(nvloom_cli MPI::MPI_CXX)
54-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMULTINODE")
65+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGIT_COMMIT=\\\"\"${GIT_COMMIT}\"\\\"")

README.md

Lines changed: 264 additions & 15 deletions
Large diffs are not rendered by default.

cli/nvloom_cli.cpp

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,13 @@
2424
#include <iostream>
2525
#include <memory>
2626

27+
#define NVLOOM_VERSION "1.1.0"
28+
#ifndef GIT_COMMIT
29+
#define GIT_COMMIT "unknown"
30+
#endif
31+
2732
bool richOutput = false;
33+
int gpuToRackSamples = 5;
2834

2935
int run_program(int argc, char **argv) {
3036
boost::program_options::options_description opts("nvloom CLI");
@@ -43,7 +49,8 @@ int run_program(int argc, char **argv) {
4349
("suite,s", boost::program_options::value<std::vector<std::string>>(&suitesToRun)->multitoken(), suitesOptionDescription.c_str())
4450
("listTestcases,l", boost::program_options::bool_switch(&listTestcases)->default_value(listTestcases), "List testcases")
4551
("richOutput,r", boost::program_options::bool_switch(&richOutput)->default_value(richOutput), "Rich output")
46-
("allocatorStrategy,a", boost::program_options::value<std::string>(&allocatorStrategyString)->default_value("reuse"), "Allocator strategy: choose between unique and reuse");
52+
("allocatorStrategy,a", boost::program_options::value<std::string>(&allocatorStrategyString)->default_value("reuse"), "Allocator strategy: choose between unique and reuse")
53+
("gpuToRackSamples", boost::program_options::value<int>(&gpuToRackSamples)->default_value(gpuToRackSamples), "Number of per-rack samples to use in gpu_to_rack testcases")
4754
;
4855

4956
boost::program_options::variables_map vm;
@@ -62,6 +69,9 @@ int run_program(int argc, char **argv) {
6269
return 0;
6370
}
6471

72+
OUTPUT << "nvloom_cli " << NVLOOM_VERSION << std::endl;
73+
OUTPUT << "git commit: " << GIT_COMMIT << std::endl;
74+
6575
AllocatorStrategy allocatorStrategy;
6676
if (allocatorStrategyString == "reuse"){
6777
allocatorStrategy = ALLOCATOR_STRATEGY_REUSE;
@@ -77,11 +87,13 @@ int run_program(int argc, char **argv) {
7787

7888
size_t bufferSizeInB = (size_t) bufferSizeInMiB * 1024 * 1024;
7989

90+
OUTPUT << "Buffer size: " << bufferSizeInMiB << " MiB" << std::endl;
91+
8092
auto [testcases, suites] = buildTestcases(allocatorStrategy);
8193

8294
if (listTestcases) {
8395
OUTPUT << "Available testcases: " << std::endl;
84-
for (auto const& [testcaseName, testcaseFunction] : testcases) {
96+
for (auto const& [testcaseName, testcaseFunction] : testcases) {
8597
OUTPUT << testcaseName << std::endl;
8698
}
8799
return 0;
@@ -96,13 +108,13 @@ int run_program(int argc, char **argv) {
96108
for (auto suite: suitesToRun) {
97109
if (suites.count(suite) == 0) {
98110
std::cerr << "No such suite as \"" << suite << "\"\n";
99-
return 1;
111+
return 1;
100112
}
101113
for (auto testcase: suites[suite]) {
102114
testcasesToRunSet.insert(testcase);
103115
}
104116
}
105-
117+
106118
for (auto testcase: testcasesToRun) {
107119
if (testcases.count(testcase) == 0) {
108120
std::cerr << "No such testcase as \"" << testcase << "\"\n";
@@ -123,7 +135,7 @@ int run_program(int argc, char **argv) {
123135
testcases[testcase]->filterRun(bufferSizeInB);
124136
clearAllocationPools();
125137
auto endTime = std::chrono::high_resolution_clock::now();
126-
OUTPUT << "ExecutionTime[s] " << testcase << " " << std::chrono::duration<double>(endTime - startTime).count() << std::endl;
138+
OUTPUT << "ExecutionTime " << testcase << " " << std::chrono::duration<double>(endTime - startTime).count() << " s" << std::endl;
127139
OUTPUT << "Done " << testcase << std::endl;
128140
OUTPUT << std::endl;
129141
}

0 commit comments

Comments
 (0)