forked from tile-ai/tilelang
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCMakeLists.txt
More file actions
485 lines (428 loc) · 18.7 KB
/
CMakeLists.txt
File metadata and controls
485 lines (428 loc) · 18.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
# Learn a lot from the MLC - LLM Project
# https://github.com/mlc-ai/mlc-llm/blob/main/CMakeLists.txt
cmake_minimum_required(VERSION 3.26)
project(TILE_LANG C CXX)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND "$ENV{CIBUILDWHEEL}")
# Warning came from tvm submodule
string(APPEND CMAKE_CXX_FLAGS " -Wno-dangling-reference")
endif()
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.gitmodules" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git")
find_package(Git QUIET)
if(Git_FOUND)
execute_process(
COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
RESULT_VARIABLE TILELANG_GIT_SUBMODULE_RESULT
)
if(NOT TILELANG_GIT_SUBMODULE_RESULT EQUAL 0)
message(
FATAL_ERROR
"Failed to initialize git submodules. Please run "
"`git submodule update --init --recursive` and re-run CMake."
)
endif()
else()
message(
FATAL_ERROR
"Git is required to initialize TileLang submodules. "
"Please install git or fetch the submodules manually."
)
endif()
endif()
find_program(CCACHE_PROGRAM ccache)
if(CCACHE_PROGRAM)
message(STATUS "Using ccache: ${CCACHE_PROGRAM} with base_dir=${CMAKE_SOURCE_DIR}")
if(APPLE)
# Passing configs like `ccache base_dir=/xxx cc ...` is supported
# (likely) since ccache 4.x, which has been provided by homebrew.
# Our Linux builder image (manylinux2014 & manylinux_2_28) still
# provides ccache 3.x and do not support this form.
# `cibuildwheel` uses fixed folder on Linux (`/project`) as working directory,
# so cache would work without setting `base_dir`.
set(CCACHE_PROGRAM "${CCACHE_PROGRAM};base_dir=${CMAKE_SOURCE_DIR}")
endif()
set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE_PROGRAM}" CACHE STRING "C compiler launcher")
set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_PROGRAM}" CACHE STRING "CXX compiler launcher")
set(CMAKE_CUDA_COMPILER_LAUNCHER "${CCACHE_PROGRAM}" CACHE STRING "CUDA compiler launcher")
else()
find_program(SCCACHE_PROGRAM sccache)
if(SCCACHE_PROGRAM)
message(STATUS "Using sccache: ${SCCACHE_PROGRAM}")
set(CMAKE_C_COMPILER_LAUNCHER "${SCCACHE_PROGRAM}" CACHE STRING "C compiler launcher")
set(CMAKE_CXX_COMPILER_LAUNCHER "${SCCACHE_PROGRAM}" CACHE STRING "CXX compiler launcher")
set(CMAKE_CUDA_COMPILER_LAUNCHER "${SCCACHE_PROGRAM}" CACHE STRING "CUDA compiler launcher")
endif()
endif()
# Configs
set(TILELANG_BACKENDS CUDA ROCM METAL)
set(TILELANG_BACKEND_DOC_CUDA "Enable CUDA backend (ON/OFF/or CUDA SDK path)")
set(TILELANG_BACKEND_DOC_ROCM "Enable ROCm backend (ON/OFF/or ROCm SDK path)")
set(TILELANG_BACKEND_DOC_METAL "Enable Metal backend")
# TVM's config.cmake redefines USE_* options later, so we cache the user's choice
# (including explicit -DUSE_XXX arguments) before we include TVM and restore it
# afterwards.
macro(tilelang_define_backend_option BACKEND)
set(_backend_var "USE_${BACKEND}")
set(_doc "${TILELANG_BACKEND_DOC_${BACKEND}}")
set(_user_override_var "TILELANG_USER_OVERRIDE_${_backend_var}")
set(_user_override OFF)
if(DEFINED ${_user_override_var})
set(_user_override "${${_user_override_var}}")
endif()
if(DEFINED CACHE{${_backend_var}})
get_property(_cache_type CACHE ${_backend_var} PROPERTY TYPE)
if(_cache_type STREQUAL "UNINITIALIZED")
set(_user_override ON)
endif()
endif()
set(_default OFF)
if(DEFINED ${_backend_var})
set(_default "${${_backend_var}}")
endif()
option(${_backend_var} "${_doc}" "${_default}")
# Remember if the user explicitly set this option so that later logic
# won't auto-toggle backends they configured on the command line.
set(${_user_override_var} ${_user_override} CACHE INTERNAL
"User explicitly set ${_backend_var} during configuration" FORCE)
set(TILELANG_OPTION_${_backend_var} "${${_backend_var}}")
endmacro()
foreach(BACKEND IN LISTS TILELANG_BACKENDS)
tilelang_define_backend_option(${BACKEND})
endforeach()
set(PREBUILD_CYTHON ON)
# CUDA stub libraries (cuda/cudart/nvrtc) are used to build wheels that can run
# across different CUDA Toolkit major versions and/or on CPU-only machines by
# avoiding hard DT_NEEDED dependencies on versioned CUDA SONAMEs.
#
# These stubs are currently POSIX-only (dlopen/dlsym via <dlfcn.h>).
if(WIN32 AND NOT CYGWIN)
set(_TILELANG_USE_CUDA_STUBS_DEFAULT OFF)
else()
set(_TILELANG_USE_CUDA_STUBS_DEFAULT ON)
endif()
option(TILELANG_USE_CUDA_STUBS
"Use POSIX dlopen-based CUDA stub libraries (cuda/cudart/nvrtc) for portable wheels"
${_TILELANG_USE_CUDA_STUBS_DEFAULT})
unset(_TILELANG_USE_CUDA_STUBS_DEFAULT)
# Configs end
include(cmake/load_tvm.cmake)
if(EXISTS ${TVM_SOURCE}/cmake/config.cmake)
include(${TVM_SOURCE}/cmake/config.cmake)
else()
message(FATAL_ERROR "Nor tvm provided or submodule checkout-ed.")
endif()
# Re-apply TileLang's preferred backend settings after TVM's config may have
# overridden the USE_* cache entries.
foreach(BACKEND IN LISTS TILELANG_BACKENDS)
set(_backend_var "USE_${BACKEND}")
set(_doc "${TILELANG_BACKEND_DOC_${BACKEND}}")
set(${_backend_var} ${TILELANG_OPTION_${_backend_var}} CACHE STRING "${_doc}" FORCE)
set(${_backend_var} ${TILELANG_OPTION_${_backend_var}})
endforeach()
# Include directories for TileLang
set(TILE_LANG_INCLUDES ${TVM_INCLUDES})
# Collect source files
file(GLOB TILE_LANG_SRCS
src/*.cc
src/layout/*.cc
src/transform/*.cc
src/transform/common/*.cc
src/op/*.cc
src/target/utils.cc
src/target/codegen_c_host.cc
src/target/codegen_cpp.cc
src/target/rt_mod_cpp.cc
# intrin_rule doesn't have system dependency
src/target/intrin_rule*.cc
)
# Always include CPU-safe runtime helpers
list(APPEND TILE_LANG_SRCS
src/runtime/error_helpers.cc
)
# Track if the user explicitly selected a backend via cache options.
set(TILELANG_BACKEND_USER_SELECTED OFF)
foreach(BACKEND IN LISTS TILELANG_BACKENDS)
set(_backend_var "USE_${BACKEND}")
set(_override_var "TILELANG_USER_OVERRIDE_${_backend_var}")
if(${_backend_var} OR ${_override_var})
set(TILELANG_BACKEND_USER_SELECTED ON)
endif()
endforeach()
# Only auto-select a backend when the user didn't specify one explicitly.
if(NOT TILELANG_BACKEND_USER_SELECTED)
if($ENV{USE_METAL})
set(USE_METAL ON)
elseif(APPLE)
message(STATUS "Enable Metal support by default.")
set(USE_METAL ON)
elseif($ENV{USE_ROCM})
set(USE_ROCM ON)
else()
if($ENV{USE_CUDA})
set(USE_CUDA ON)
elseif(DEFINED ENV{USE_CUDA} AND NOT $ENV{USE_CUDA})
# Build CPU-only when we explicitly disable CUDA
set(USE_CUDA OFF)
else()
message(STATUS "Enable CUDA support by default.")
set(USE_CUDA ON)
endif()
endif()
endif()
if(USE_METAL)
file(GLOB TILE_LANG_METAL_SRCS
src/target/rt_mod_metal.cc
)
list(APPEND TILE_LANG_SRCS ${TILE_LANG_METAL_SRCS})
# FIXME: CIBW failed with backtrace, why???
set(TVM_FFI_USE_LIBBACKTRACE OFF)
elseif(USE_ROCM)
set(CMAKE_HIP_STANDARD 17)
include(${TVM_SOURCE}/cmake/utils/FindROCM.cmake)
find_rocm(${USE_ROCM})
add_compile_definitions(__HIP_PLATFORM_AMD__ __HIP_PLATFORM_HCC__=1)
file(GLOB TILE_LANG_HIP_SRCS
src/target/codegen_hip.cc
src/target/rt_mod_hip.cc
)
list(APPEND TILE_LANG_SRCS ${TILE_LANG_HIP_SRCS})
list(APPEND TILE_LANG_INCLUDES ${ROCM_INCLUDE_DIRS})
elseif(USE_CUDA)
set(CMAKE_CUDA_STANDARD 17)
find_package(CUDAToolkit REQUIRED)
set(CMAKE_CUDA_COMPILER "${CUDAToolkit_BIN_DIR}/nvcc")
add_compile_definitions("CUDA_MAJOR_VERSION=${CUDAToolkit_VERSION_MAJOR}")
# Set `USE_CUDA=/usr/local/cuda-x.y`
cmake_path(GET CUDAToolkit_BIN_DIR PARENT_PATH USE_CUDA)
if(TILELANG_USE_CUDA_STUBS)
if(WIN32 AND NOT CYGWIN)
message(FATAL_ERROR "TILELANG_USE_CUDA_STUBS=ON is not supported on Windows. "
"Please configure with -DTILELANG_USE_CUDA_STUBS=OFF.")
endif()
# ============================================================================
# CUDA Driver Stub Library (libcuda_stub.so)
# ============================================================================
# This library provides drop-in replacements for CUDA driver API functions.
# Instead of linking directly against libcuda.so (which would fail on
# CPU-only machines), we link against this stub which loads libcuda.so
# lazily at runtime on first API call.
#
# The stub exports global C functions matching the CUDA driver API:
# - cuModuleLoadData, cuLaunchKernel, cuMemsetD32_v2, etc.
# These can be called directly without any wrapper macros.
# ============================================================================
add_library(cuda_stub SHARED src/target/stubs/cuda.cc)
target_include_directories(cuda_stub PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
# Export symbols with visibility="default" when building
target_compile_definitions(cuda_stub PRIVATE TILELANG_CUDA_STUB_EXPORTS)
# Use dlopen/dlsym for runtime library loading
target_link_libraries(cuda_stub PRIVATE ${CMAKE_DL_LIBS})
set_target_properties(cuda_stub PROPERTIES
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
# Use consistent naming
OUTPUT_NAME "cuda_stub"
)
# ============================================================================
# CUDA Runtime Stub Library (libcudart_stub.so)
# ============================================================================
# libcudart's SONAME includes its major version (e.g. libcudart.so.11.0 / .12 / .13).
# Link against this stub instead of the real libcudart so a single wheel can
# run in environments that provide different libcudart major versions.
#
# The stub exports a minimal set of CUDA Runtime API entrypoints used by TVM
# and lazily loads libcudart at runtime on first API call.
# ============================================================================
add_library(cudart_stub SHARED src/target/stubs/cudart.cc)
target_include_directories(cudart_stub PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
target_compile_definitions(cudart_stub PRIVATE TILELANG_CUDART_STUB_EXPORTS)
target_link_libraries(cudart_stub PRIVATE ${CMAKE_DL_LIBS})
set_target_properties(cudart_stub PROPERTIES
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
OUTPUT_NAME "cudart_stub"
)
# Make TVM link against our CUDA Runtime stub instead of the real libcudart.
#
# NOTE: TVM's `find_cuda()` calls `find_library(CUDA_CUDART_LIBRARY cudart ...)`.
# `find_library()` will not override an already-cached variable, so setting it
# here ensures TVM doesn't record a DT_NEEDED on `libcudart.so.<major>`.
set(CUDA_CUDART_LIBRARY cudart_stub CACHE STRING "CUDART library to link against" FORCE)
# ============================================================================
# NVRTC Stub Library (libnvrtc_stub.so)
# ============================================================================
# NVRTC's SONAME includes its major version (e.g. libnvrtc.so.11.2 / .12 / .13).
# Link against this stub instead of the real NVRTC library so a single wheel
# can run in environments that provide different NVRTC major versions.
#
# The stub exports a minimal set of NVRTC C API entrypoints used by TVM and
# lazily loads libnvrtc at runtime on first API call.
# ============================================================================
add_library(nvrtc_stub SHARED src/target/stubs/nvrtc.cc)
target_include_directories(nvrtc_stub PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
target_compile_definitions(nvrtc_stub PRIVATE TILELANG_NVRTC_STUB_EXPORTS)
target_link_libraries(nvrtc_stub PRIVATE ${CMAKE_DL_LIBS})
set_target_properties(nvrtc_stub PROPERTIES
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
OUTPUT_NAME "nvrtc_stub"
)
# Make TVM link against our NVRTC stub instead of the real libnvrtc.
#
# NOTE: TVM's `find_cuda()` calls `find_library(CUDA_NVRTC_LIBRARY nvrtc ...)`.
# `find_library()` will not override an already-cached variable, so setting it
# here ensures TVM doesn't record a DT_NEEDED on `libnvrtc.so.<major>`.
set(CUDA_NVRTC_LIBRARY nvrtc_stub CACHE STRING "NVRTC library to link against" FORCE)
endif()
file(GLOB TILE_LANG_CUDA_SRCS
src/runtime/runtime.cc
src/target/ptx.cc
src/target/codegen_cuda.cc
src/target/codegen_py.cc
src/target/codegen_utils.cc
src/target/codegen_cutedsl.cc
src/target/rt_mod_cuda.cc
src/target/rt_mod_cutedsl.cc
)
list(APPEND TILE_LANG_SRCS ${TILE_LANG_CUDA_SRCS})
list(APPEND TILE_LANG_INCLUDES ${CUDAToolkit_INCLUDE_DIRS})
endif()
set(USE_Z3 ON CACHE STRING "Use Z3 SMT solver for TileLang optimizations")
set(USE_PYPI_Z3 ON CACHE BOOL "Use Z3 provided by PyPI z3-solver package")
if(USE_Z3 AND USE_PYPI_Z3)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/pypi-z3")
find_package(Z3 REQUIRED)
endif()
# Include tvm after configs have been populated
add_subdirectory(${TVM_SOURCE} tvm EXCLUDE_FROM_ALL)
# Resolve compile warnings in tvm
add_compile_definitions(DMLC_USE_LOGGING_LIBRARY=<tvm/runtime/logging.h>)
add_library(tilelang_objs OBJECT ${TILE_LANG_SRCS})
# Set debug mode compile definitions
# Enable the TVM debug option, i.e., TVM_LOG_DEBUG
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
message(STATUS "Building TileLang with DEBUG mode")
target_compile_definitions(tilelang_objs PRIVATE "TVM_LOG_DEBUG")
endif()
target_include_directories(tilelang_objs PRIVATE ${TILE_LANG_INCLUDES})
add_library(tilelang SHARED $<TARGET_OBJECTS:tilelang_objs>)
target_link_libraries(tilelang PUBLIC tvm)
# Place dev build outputs under build/lib for consistency
set_target_properties(tilelang PROPERTIES
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
)
# Build cython extension
find_package(Python REQUIRED COMPONENTS Interpreter Development.Module ${SKBUILD_SABI_COMPONENT})
add_custom_command(
OUTPUT "${CMAKE_BINARY_DIR}/tilelang_cython_wrapper.cpp"
COMMENT
"Cythoning tilelang/jit/adapter/cython/cython_wrapper.pyx"
COMMAND Python::Interpreter -m cython
"${CMAKE_CURRENT_SOURCE_DIR}/tilelang/jit/adapter/cython/cython_wrapper.pyx"
--module-name tilelang_cython_wrapper
--cplus --output-file "${CMAKE_BINARY_DIR}/tilelang_cython_wrapper.cpp"
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/tilelang/jit/adapter/cython/cython_wrapper.pyx"
VERBATIM)
if(NOT "${SKBUILD_SABI_VERSION}" STREQUAL "")
set(USE_SABI USE_SABI ${SKBUILD_SABI_VERSION})
endif()
python_add_library(tilelang_cython_wrapper MODULE "${CMAKE_BINARY_DIR}/tilelang_cython_wrapper.cpp" ${USE_SABI} WITH_SOABI)
# Ensure dev builds drop the extension into build/lib alongside other shared libs
set_target_properties(tilelang_cython_wrapper PROPERTIES
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
)
# Install the extension into tilelang/lib inside the wheel
install(TARGETS tilelang_cython_wrapper
LIBRARY DESTINATION tilelang/lib
RUNTIME DESTINATION tilelang/lib
ARCHIVE DESTINATION tilelang/lib)
# Copy libz3.so to build folder to workaround isolated build env issue
if(USE_Z3 AND USE_PYPI_Z3)
get_target_property(Z3_LIBRARY_PATH z3::libz3 IMPORTED_LOCATION)
install(FILES "${Z3_LIBRARY_PATH}" DESTINATION "${CMAKE_BINARY_DIR}/lib")
if(APPLE)
set_target_properties(tvm PROPERTIES BUILD_RPATH "@loader_path")
else()
set_target_properties(tvm PROPERTIES BUILD_RPATH "\$ORIGIN")
endif()
endif()
set(TILELANG_OUTPUT_TARGETS tilelang tvm)
if(USE_CUDA AND TILELANG_USE_CUDA_STUBS)
# Link against CUDA stub library instead of libcuda.so
# This enables lazy loading of libcuda.so at runtime, allowing
# `import tilelang` to succeed on CPU-only machines.
foreach(target IN LISTS TILELANG_OUTPUT_TARGETS)
target_link_libraries(${target} PUBLIC cuda_stub)
endforeach()
# Include CUDA stubs in output targets for RPATH configuration
list(APPEND TILELANG_OUTPUT_TARGETS cuda_stub cudart_stub nvrtc_stub)
endif()
unset(PATCHELF_EXECUTABLE CACHE)
if(APPLE)
set(TILELANG_INSTALL_RPATH "@loader_path;@loader_path/../../tvm_ffi/lib")
if(USE_Z3 AND USE_PYPI_Z3)
# Some z3 is placed in lib/ and some in bin/, we add both in rpath
string(APPEND TILELANG_INSTALL_RPATH ";@loader_path/../../z3/lib;@loader_path/../../z3/bin")
endif()
elseif(UNIX)
set(TILELANG_INSTALL_RPATH "\$ORIGIN:\$ORIGIN/../../tvm_ffi/lib")
if(USE_Z3 AND USE_PYPI_Z3)
string(APPEND TILELANG_INSTALL_RPATH ":\$ORIGIN/../../z3/lib")
endif()
if(USE_CUDA)
string(APPEND TILELANG_INSTALL_RPATH ":\$ORIGIN/../../nvidia/cu${CUDAToolkit_VERSION_MAJOR}/lib")
endif()
find_program(PATCHELF_EXECUTABLE patchelf)
if (NOT PATCHELF_EXECUTABLE)
message(STATUS "`patchelf` not found.")
endif()
endif()
# Let libtilelang search for tvm in the same directory
foreach(target IN LISTS TILELANG_OUTPUT_TARGETS)
set_target_properties(${target} PROPERTIES INSTALL_RPATH "${TILELANG_INSTALL_RPATH}")
set_target_properties(${target} PROPERTIES
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
)
endforeach()
# Exclude libcuda.so to allow importing on a CPU-only machine
if(USE_CUDA AND TILELANG_USE_CUDA_STUBS AND PATCHELF_EXECUTABLE)
# Run `patchelf` on built libraries to remove libcuda.so dependency.
# Use `install(CODE ...)` instead of `add_custom_command(... POST_BUILD ...)`
# to avoid race conditions during linking.
foreach(target IN LISTS TILELANG_OUTPUT_TARGETS)
install(CODE "
execute_process(
COMMAND ${PATCHELF_EXECUTABLE}
--remove-needed libcuda.so.1
--remove-needed libcuda.so
\"$<TARGET_FILE:${target}>\"
WORKING_DIRECTORY \"${CMAKE_INSTALL_PREFIX}\"
RESULT_VARIABLE patchelf_result
)
if(patchelf_result EQUAL 0)
message(STATUS \"`patchelf` successfully removed dependency `libcuda.so` from $<TARGET_FILE:${target}>\")
else()
message(WARNING \"`patchelf` failed to remove dependency `libcuda.so` from $<TARGET_FILE:${target}>\")
endif()
")
endforeach()
endif()
install(
TARGETS ${TILELANG_OUTPUT_TARGETS}
LIBRARY DESTINATION tilelang/lib
RUNTIME DESTINATION tilelang/lib
ARCHIVE DESTINATION tilelang/lib
)