Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 3 additions & 11 deletions kernel-builder/src/templates/get_gpu_lang.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,8 @@
# Get the GPU language from Torch.
#
function(get_gpu_lang OUT)
execute_process(
COMMAND
"${Python3_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/get_gpu_lang.py"
OUTPUT_VARIABLE PYTHON_OUT
RESULT_VARIABLE PYTHON_ERROR_CODE
ERROR_VARIABLE PYTHON_STDERR
OUTPUT_STRIP_TRAILING_WHITESPACE)

if(NOT PYTHON_ERROR_CODE EQUAL 0)
message(FATAL_ERROR "Cannot detect GPU language: ${PYTHON_STDERR}")
endif()
run_python_script(PYTHON_OUT
"${CMAKE_CURRENT_SOURCE_DIR}/cmake/get_gpu_lang.py"
"Cannot detect GPU language")
set(${OUT} ${PYTHON_OUT} PARENT_SCOPE)
endfunction()
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/usr/bin/env python3
"""Detect the CUDA compute capability of the first available GPU device.

Exits with code 0 and prints the capability (e.g. "8.6") on success.
Exits with code 1 if the capability cannot be determined.
"""

import ctypes
import ctypes.util
import sys

# Definitions from cuda.h
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = ctypes.c_int(75)
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = ctypes.c_int(76)


def check_result(ret: int, msg: str) -> None:
if ret != 0:
print(f"{msg} (error code {ret})", file=sys.stderr)
sys.exit(1)


def find_libcuda() -> ctypes.CDLL | None:
candidates = [
"libcuda.so.1",
"libcuda.so",
"/run/opengl-driver/lib/libcuda.so.1",
"/run/opengl-driver/lib/libcuda.so",
ctypes.util.find_library("cuda"),
]
for name in candidates:
if name is None:
continue
try:
return ctypes.CDLL(name)
except OSError:
continue
return None


def main() -> int:
lib = find_libcuda()
if lib is None:
print("Could not load libcuda.so", file=sys.stderr)
return 1

# libcuda needs to be initialized before calling other functions.
check_result(lib.cuInit(ctypes.c_uint(0)), "cuInit failed")

# Get the first CUDA device.
device = ctypes.c_int(0)
check_result(
lib.cuDeviceGet(ctypes.byref(device), ctypes.c_int(0)),
"cuDeviceGet failed",
)

major = ctypes.c_int(0)
check_result(
lib.cuDeviceGetAttribute(
ctypes.byref(major),
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
device,
),
"cuDeviceGetAttribute (major) failed",
)

minor = ctypes.c_int(0)
check_result(
lib.cuDeviceGetAttribute(
ctypes.byref(minor),
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
device,
),
"cuDeviceGetAttribute (minor) failed",
)

print(f"{major.value}.{minor.value}")
return 0


if __name__ == "__main__":
sys.exit(main())
10 changes: 7 additions & 3 deletions kernel-builder/src/templates/tvm_ffi/preamble.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ endif()
set(KERNEL_REVISION "{{ revision }}" CACHE STRING "Kernel revision, must be unique")
set(OPS_NAME "_{{python_name}}_${BACKEND}_{{ revision }}")

option(BUILD_ALL_SUPPORTED_ARCHS "Build all supported architectures" on)
option(BUILD_ALL_SUPPORTED_ARCHS "Build all supported architectures" off)

if(GPU_LANG STREQUAL "CUDA")
enable_language(CUDA)
Expand All @@ -62,8 +62,12 @@ if(GPU_LANG STREQUAL "CUDA")
if(BUILD_ALL_SUPPORTED_ARCHS)
set(CUDA_KERNEL_ARCHS "${CUDA_DEFAULT_KERNEL_ARCHS}")
else()
# TODO: detect capability.
message(FATAL_ERROR "Capability detection is not implemented for CUDA yet, please set BUILD_ALL_SUPPORTED_ARCHS to ON to build for all supported architectures.")
# Detect the compute capability of the first available GPU device.
run_python_script(DETECTED_CUDA_CAPABILITY
"${CMAKE_CURRENT_LIST_DIR}/cmake/cuda/detect-cuda-capability.py"
"Cannot detect CUDA device capability. Set BUILD_ALL_SUPPORTED_ARCHS=ON to disable detection.")
message(STATUS "Detected CUDA device capability: ${DETECTED_CUDA_CAPABILITY}")
set(CUDA_KERNEL_ARCHS "${DETECTED_CUDA_CAPABILITY}")
endif()

{% if cuda_minver %}
Expand Down
20 changes: 20 additions & 0 deletions kernel-builder/src/templates/utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,26 @@ function (run_python OUT EXPR ERR_MSG)
set(${OUT} ${PYTHON_OUT} PARENT_SCOPE)
endfunction()

#
# Run `SCRIPT_PATH` in Python. The standard output of Python is stored in
# `OUT` and has trailing whitespace stripped. If the script exits with a
# non-zero code, a fatal message `ERR_MSG` is issued.
#
function (run_python_script OUT SCRIPT_PATH ERR_MSG)
execute_process(
COMMAND
"${Python_EXECUTABLE}" "${SCRIPT_PATH}"
OUTPUT_VARIABLE PYTHON_OUT
RESULT_VARIABLE PYTHON_ERROR_CODE
ERROR_VARIABLE PYTHON_STDERR
OUTPUT_STRIP_TRAILING_WHITESPACE)

if(NOT PYTHON_ERROR_CODE EQUAL 0)
message(FATAL_ERROR "${ERR_MSG}: ${PYTHON_STDERR}")
endif()
set(${OUT} ${PYTHON_OUT} PARENT_SCOPE)
endfunction()

#
# Run `EXPR` in python. The standard output of python is stored in `OUT` and
# has trailing whitespace stripped. If an error is encountered when running
Expand Down
7 changes: 7 additions & 0 deletions kernel-builder/src/tvm_ffi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ static CMAKE_KERNEL: &str = include_str!("../templates/kernel.cmake");
static CMAKE_UTILS: &str = include_str!("../templates/utils.cmake");
static COMPAT_PY: &str = include_str!("../templates/compat.py");
static OPS_PY_IN: &str = include_str!("../templates/tvm_ffi/_ops.py.in");
static DETECT_CUDA_CAPABILITY_PY: &str =
include_str!("../templates/tvm_ffi/cuda/detect-cuda-capability.py");

fn write_cmake_helpers(file_set: &mut FileSet) {
write_cmake_file(file_set, "utils.cmake", CMAKE_UTILS.as_bytes());
Expand All @@ -26,6 +28,11 @@ fn write_cmake_helpers(file_set: &mut FileSet) {
BUILD_VARIANTS_UTILS.as_bytes(),
);
write_cmake_file(file_set, "_ops.py.in", OPS_PY_IN.as_bytes());
write_cmake_file(
file_set,
"cuda/detect-cuda-capability.py",
DETECT_CUDA_CAPABILITY_PY.as_bytes(),
);
}

pub fn write_compat_py(file_set: &mut FileSet) -> Result<()> {
Expand Down
Loading