Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion ci/build_wheel_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,12 @@ rapids-telemetry-record sccache-stats.txt sccache --show-adv-stats

EXCLUDE_ARGS=(
--exclude "librapids_logger.so"
--exclude "librmm.so"
)
python -m auditwheel repair "${EXCLUDE_ARGS[@]}" -w "${RAPIDS_WHEEL_BLD_OUTPUT_DIR}" dist/*
python -m auditwheel repair \
"${EXCLUDE_ARGS[@]}" \
-w "${RAPIDS_WHEEL_BLD_OUTPUT_DIR}" \
dist/*

../../ci/validate_wheel.sh "${RAPIDS_WHEEL_BLD_OUTPUT_DIR}"

Expand Down
12 changes: 12 additions & 0 deletions conda/recipes/librmm/recipe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,19 +69,31 @@ outputs:
- cmake --install cpp/build
dynamic_linking:
overlinking_behavior: "error"
prefix_detection:
ignore:
# See https://github.com/rapidsai/build-planning/issues/160
- lib/librmm.so
requirements:
build:
- cmake ${{ cmake_version }}
- ${{ stdlib("c") }}
host:
- cuda-version =${{ cuda_version }}
- if: cuda_major == "11"
then: cudatoolkit
else: cuda-cudart-dev
run:
- ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }}
- if: cuda_major == "11"
then: cudatoolkit
else: cuda-cudart
- rapids-logger =0.1
run_exports:
- ${{ pin_subpackage("librmm", upper_bound="x.x") }}
ignore_run_exports:
from_package:
- if: cuda_major != "11"
then: cuda-cudart-dev
by_name:
- cuda-version
- if: cuda_major == "11"
Expand Down
33 changes: 21 additions & 12 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,31 +83,40 @@ include(cmake/thirdparty/get_nvtx.cmake)
# ##################################################################################################
# * library targets --------------------------------------------------------------------------------

add_library(rmm INTERFACE)
add_library(rmm src/aligned.cpp src/cuda_device.cpp src/cuda_stream_pool.cpp
src/cuda_stream_view.cpp src/cuda_stream.cpp)
add_library(rmm::rmm ALIAS rmm)

target_include_directories(
rmm
INTERFACE "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
"$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include>" "$<INSTALL_INTERFACE:include>")
PUBLIC "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
"$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include>"
INTERFACE "$<INSTALL_INTERFACE:include>")

if(CUDA_STATIC_RUNTIME)
message(STATUS "RMM: Enabling static linking of cudart")
target_link_libraries(rmm INTERFACE CUDA::cudart_static)
target_link_libraries(rmm PUBLIC CUDA::cudart_static)
else()
target_link_libraries(rmm INTERFACE CUDA::cudart)
target_link_libraries(rmm PUBLIC CUDA::cudart)
endif()

target_link_libraries(rmm INTERFACE CCCL::CCCL)
target_link_libraries(rmm INTERFACE dl)
target_link_libraries(rmm INTERFACE nvtx3::nvtx3-cpp)
target_link_libraries(rmm INTERFACE rapids_logger::rapids_logger)
target_compile_features(rmm INTERFACE cxx_std_17 $<BUILD_INTERFACE:cuda_std_17>)
target_compile_definitions(rmm INTERFACE LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE)
target_link_libraries(rmm PUBLIC CCCL::CCCL ${CMAKE_DL_LIBS} nvtx3::nvtx3-cpp
rapids_logger::rapids_logger)

set_target_properties(
rmm
PROPERTIES BUILD_RPATH "\$ORIGIN"
INSTALL_RPATH "\$ORIGIN"
CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON
CXX_VISIBILITY_PRESET hidden
POSITION_INDEPENDENT_CODE ON
INTERFACE_POSITION_INDEPENDENT_CODE ON)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may be able to drop this eventually depending on how much of rmm stops being header-only. Ideally we wouldn't be proscribing this for consumers I think, @robertmaynard any thoughts?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally once RMM is a proper library we can drop INTERFACE_POSITION_INDEPENDENT_CODE but currently having it won't hurt downstream projects

target_compile_definitions(rmm PUBLIC LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE)

# Enable NVTX if necessary
if(RMM_NVTX)
target_compile_definitions(rmm INTERFACE RMM_NVTX)
target_compile_definitions(rmm PUBLIC RMM_NVTX)
endif()

# ##################################################################################################
Expand Down
42 changes: 10 additions & 32 deletions cpp/include/rmm/aligned.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -22,7 +22,7 @@
#include <cstddef>
#include <cstdint>

namespace RMM_NAMESPACE {
namespace RMM_EXPORT rmm {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the idea to get rid of RMM_NAMESPACE altogether eventually?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, see: #1896 (comment)


/**
* @addtogroup utilities
Expand All @@ -49,10 +49,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
*
* @return True if the input is a power of two with non-negative integer exponent, false otherwise.
*/
[[nodiscard]] constexpr bool is_pow2(std::size_t value) noexcept
{
return (value != 0U) && ((value & (value - 1)) == 0U);
}
[[nodiscard]] bool is_pow2(std::size_t value) noexcept;

/**
* @brief Returns whether or not `alignment` is a valid memory alignment.
Expand All @@ -61,10 +58,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
*
* @return True if the alignment is valid, false otherwise.
*/
[[nodiscard]] constexpr bool is_supported_alignment(std::size_t alignment) noexcept
{
return is_pow2(alignment);
}
[[nodiscard]] bool is_supported_alignment(std::size_t alignment) noexcept;

/**
* @brief Align up to nearest multiple of specified power of 2
Expand All @@ -74,11 +68,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
*
* @return the aligned value
*/
[[nodiscard]] constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcept
{
assert(is_supported_alignment(alignment));
return (value + (alignment - 1)) & ~(alignment - 1);
}
[[nodiscard]] std::size_t align_up(std::size_t value, std::size_t alignment) noexcept;

/**
* @brief Align down to the nearest multiple of specified power of 2
Expand All @@ -88,11 +78,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
*
* @return the aligned value
*/
[[nodiscard]] constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexcept
{
assert(is_supported_alignment(alignment));
return value & ~(alignment - 1);
}
[[nodiscard]] std::size_t align_down(std::size_t value, std::size_t alignment) noexcept;

/**
* @brief Checks whether a value is aligned to a multiple of a specified power of 2
Expand All @@ -102,11 +88,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
*
* @return true if aligned
*/
[[nodiscard]] constexpr bool is_aligned(std::size_t value, std::size_t alignment) noexcept
{
assert(is_supported_alignment(alignment));
return value == align_down(value, alignment);
}
[[nodiscard]] bool is_aligned(std::size_t value, std::size_t alignment) noexcept;

/**
* @brief Checks whether the provided pointer is aligned to a specified @p alignment
Expand All @@ -116,13 +98,9 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
*
* @return true if the pointer is aligned
*/
[[nodiscard]] inline bool is_pointer_aligned(
void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT) noexcept
{
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
return is_aligned(reinterpret_cast<std::uintptr_t>(ptr), alignment);
}
[[nodiscard]] bool is_pointer_aligned(void* ptr,
std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT) noexcept;

/** @} */ // end of group

} // namespace RMM_NAMESPACE
} // namespace RMM_EXPORT rmm
51 changes: 9 additions & 42 deletions cpp/include/rmm/cuda_device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,15 @@
#pragma once

#include <rmm/aligned.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/detail/export.hpp>

#include <cuda_runtime_api.h>

#include <cstddef>
#include <utility>

namespace RMM_NAMESPACE {
namespace RMM_EXPORT rmm {

struct cuda_device_id;
inline cuda_device_id get_current_cuda_device();
cuda_device_id get_current_cuda_device();

/**
* @addtogroup cuda_device_management
Expand Down Expand Up @@ -96,37 +93,21 @@ struct cuda_device_id {
*
* @return `cuda_device_id` for the current device
*/
inline cuda_device_id get_current_cuda_device()
{
cuda_device_id::value_type dev_id{-1};
RMM_ASSERT_CUDA_SUCCESS(cudaGetDevice(&dev_id));
return cuda_device_id{dev_id};
}
cuda_device_id get_current_cuda_device();

/**
* @brief Returns the number of CUDA devices in the system
*
* @return Number of CUDA devices in the system
*/
inline int get_num_cuda_devices()
{
cuda_device_id::value_type num_dev{-1};
RMM_ASSERT_CUDA_SUCCESS(cudaGetDeviceCount(&num_dev));
return num_dev;
}
int get_num_cuda_devices();

/**
* @brief Returns the available and total device memory in bytes for the current device
*
* @return The available and total device memory in bytes for the current device as a std::pair.
*/
inline std::pair<std::size_t, std::size_t> available_device_memory()
{
std::size_t free{};
std::size_t total{};
RMM_CUDA_TRY(cudaMemGetInfo(&free, &total));
return {free, total};
}
std::pair<std::size_t, std::size_t> available_device_memory();

/**
* @brief Returns the approximate specified percent of available device memory on the current CUDA
Expand All @@ -136,13 +117,7 @@ inline std::pair<std::size_t, std::size_t> available_device_memory()
*
* @return The recommended initial device memory pool size in bytes.
*/
inline std::size_t percent_of_free_device_memory(int percent)
{
[[maybe_unused]] auto const [free, total] = rmm::available_device_memory();
auto fraction = static_cast<double>(percent) / 100.0;
return rmm::align_down(static_cast<std::size_t>(static_cast<double>(free) * fraction),
rmm::CUDA_ALLOCATION_ALIGNMENT);
}
std::size_t percent_of_free_device_memory(int percent);

/**
* @brief RAII class that sets the current CUDA device to the specified device on construction
Expand All @@ -154,19 +129,11 @@ struct cuda_set_device_raii {
*
* @param dev_id The device to set as the current CUDA device
*/
explicit cuda_set_device_raii(cuda_device_id dev_id)
: old_device_{get_current_cuda_device()},
needs_reset_{dev_id.value() >= 0 && old_device_ != dev_id}
{
if (needs_reset_) { RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(dev_id.value())); }
}
explicit cuda_set_device_raii(cuda_device_id dev_id);
/**
* @brief Reactivates the previous CUDA device
*/
~cuda_set_device_raii() noexcept
{
if (needs_reset_) { RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(old_device_.value())); }
}
~cuda_set_device_raii() noexcept;

cuda_set_device_raii(cuda_set_device_raii const&) = delete;
cuda_set_device_raii& operator=(cuda_set_device_raii const&) = delete;
Expand All @@ -179,4 +146,4 @@ struct cuda_set_device_raii {
};

/** @} */ // end of group
} // namespace RMM_NAMESPACE
} // namespace RMM_EXPORT rmm
Loading