-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathCMakeLists.txt
More file actions
71 lines (60 loc) · 2.61 KB
/
Copy pathCMakeLists.txt
File metadata and controls
71 lines (60 loc) · 2.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
cmake_minimum_required(VERSION 3.20)
project(flash_attention_cuda CUDA CXX)
# ============================================================================
# Build Configuration
# ============================================================================
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CUDA_ARCHITECTURES "80;86;89;90;120")
# For fastest builds targeting only your GPU, uncomment one:
# set(CMAKE_CUDA_ARCHITECTURES "120") # RTX 5080 (Blackwell)
# set(CMAKE_CUDA_ARCHITECTURES "89") # RTX 4090 (Ada)
# set(CMAKE_CUDA_ARCHITECTURES "86") # RTX 3090 (Ampere)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --use_fast_math -lineinfo")
set(CMAKE_CUDA_FLAGS_RELEASE "-O3 -DNDEBUG")
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG")
# ============================================================================
# Dependencies
# ============================================================================
find_package(CUDAToolkit REQUIRED)
# ============================================================================
# Flash Attention Library
# ============================================================================
add_library(flash_attention STATIC
kernels/flash_attention.cu
)
target_include_directories(flash_attention PUBLIC
${CMAKE_SOURCE_DIR}/include
${CUDAToolkit_INCLUDE_DIRS}
)
target_link_libraries(flash_attention PUBLIC
CUDA::cudart
)
target_compile_options(flash_attention PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:
--expt-relaxed-constexpr
--expt-extended-lambda
$<$<NOT:$<PLATFORM_ID:Windows>>:-Xcompiler=-fPIC>
>
)
# ============================================================================
# Demo — Runs kernel, dumps visualization data
# ============================================================================
add_executable(flash_demo src/demo.cu)
target_link_libraries(flash_demo PRIVATE flash_attention)
target_include_directories(flash_demo PRIVATE
${CMAKE_SOURCE_DIR}/include
)
# ============================================================================
# Benchmark — Correctness tests + performance suite
# ============================================================================
add_executable(flash_bench src/bench.cu)
target_link_libraries(flash_bench PRIVATE flash_attention)
target_include_directories(flash_bench PRIVATE
${CMAKE_SOURCE_DIR}/include
)
# ============================================================================
# Info
# ============================================================================
message(STATUS "CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")