@@ -47,6 +47,8 @@ set(SYCL_SUPPORTED_ARCHS "intel_gpu_pvc;intel_gpu_bmg_g21")
4747#
4848set (TORCH_SUPPORTED_VERSION_XPU "2.8.0" )
4949
50+ set (FA2_ENABLED ON )
51+
5052#
5153# Try to find python package with an executable that exactly matches
5254# `VLLM_PYTHON_EXECUTABLE` and is one of the supported versions.
@@ -155,12 +157,60 @@ if(VLLM_GPU_LANG STREQUAL "SYCL")
155157 "csrc/quantization/fp8/fp8_quant.cpp"
156158 )
157159 include_directories ("/usr/include" )
158- set (CMPLR_ROOT $ENV{CMPLR_ROOT} )
159- set (CMAKE_CXX_COMPILER icpx)
160+ list (APPEND VLLM_INCLUDE_DIR ${CMPLR_ROOT} /include /)
161+ list (APPEND VLLM_INCLUDE_DIR ${CMPLR_ROOT} /include /sycl/)
162+ list (APPEND VLLM_INCLUDE_DIR ${CMPLR_ROOT} /include /syclcompat/)
163+ message (STATUS "VLLM_INCLUDE_DIR: ${VLLM_INCLUDE_DIR} " )
160164 set (VLLM_EXTRA_INCLUDE_DIRECTORIES ${CMPLR_ROOT} /include/sycl)
161165 list (APPEND VLLM_GPU_FLAGS "-DVLLM_BUILD_XPU_OPS" )
162- list (APPEND VLLM_GPU_LINK_FLAGS "-fsycl" "-fsycl-targets=spir64" )
166+ list (APPEND VLLM_GPU_LINK_FLAGS "-fsycl" "-fsycl-targets=spir64" "-Xspirv-translator" "-spirv-ext=+SPV_INTEL_split_barrier" )
163167 list (APPEND VLLM_LINK_LIBRARIES "sycl" "OpenCL" "pthread" "m" "dl" "torch" )
168+
169+
170+ # add cutlass dependency
171+ set (CUTLASS_ENABLE_HEADERS_ONLY "ON" CACHE BOOL "Enable only the header library" )
172+
173+ # Set CUTLASS_REVISION. Used for FetchContent. Also fixes some bogus messages when building.
174+ set (CUTLASS_REVISION "main" CACHE STRING "CUTLASS revision to use" )
175+
176+ # Use the specified CUTLASS source directory for compilation if VLLM_CUTLASS_SRC_DIR is provided
177+ FetchContent_Declare (
178+ cutlass-sycl
179+ GIT_REPOSITORY https://github.com/intel/cutlass-sycl
180+ # Please keep this in sync with CUTLASS_REVISION line above.
181+ GIT_TAG ${CUTLASS_REVISION}
182+ GIT_PROGRESS TRUE
183+
184+ # Speed up CUTLASS download by retrieving only the specified GIT_TAG instead of the history.
185+ # Important: If GIT_SHALLOW is enabled then GIT_TAG works only with branch names and tags.
186+ # So if the GIT_TAG above is updated to a commit hash, GIT_SHALLOW must be set to FALSE
187+ GIT_SHALLOW TRUE
188+ )
189+
190+ # cutlass compilation flags
191+ set (CUTLASS_ENABLE_SYCL "ON" )
192+ # set(DPCPP_SYCL_TARGET "intel_gpu_pvc;intel_gpu_bmg_g21" CACHE STRING "DPC++ SYCL target architectures")
193+ set (CMAKE_EXPORT_COMPILE_COMMANDS "ON" )
194+ set (CUTLASS_ENABLE_BENCHMARKS "OFF" )
195+ # disable cuda
196+ set (CUTLASS_ENABLE_GDC_FOR_SM100_DEFAULT OFF CACHE BOOL "DISABLE CUDA" )
197+ # list(APPEND CMAKE_CXX_FLAGS "-ftemplate-backtrace-limit=0 " )
198+ # list(APPEND CMAKE_CXX_FLAGS "-fdiagnostics-color=always " )
199+
200+
201+ FetchContent_MakeAvailable (cutlass-sycl)
202+ set (CUTLASS_INCLUDE_DIR ${cutlass-sycl_SOURCE_DIR }/include CACHE PATH "CUTLASS Header Library" )
203+ set (CUTLASS_TOOLS_UTIL_INCLUDE_DIR ${cutlass-sycl_SOURCE_DIR }/tools/util/include CACHE INTERNAL "" )
204+ set (CUTLASS_APP_INCLUDE_DIR ${cutlass-sycl_SOURCE_DIR }/applications CACHE INTERNAL "" )
205+ message (STATUS "cutlass dir: ${CUTLASS_INCLUDE_DIR} and ${CUTLASS_TOOLS_UTIL_INCLUDE_DIR} and ${CUTLASS_APP_INCLUDE_DIR} " )
206+
207+ # header only library
208+ list (APPEND VLLM_GPU_FLAGS "-DCUTLASS_ENABLE_SYCL" )
209+ list (APPEND VLLM_GPU_FLAGS "-DSYCL_INTEL_TARGET" )
210+ list (APPEND VLLM_GPU_FLAGS "-DCUTLASS_VERSIONS_GENERATED" )
211+ list (APPEND VLLM_GPU_FLAGS "-ftemplate-backtrace-limit=0" )
212+ list (APPEND VLLM_GPU_FLAGS "-fdiagnostics-color=always" )
213+
164214endif ()
165215
166216message (STATUS "Enabling C extension." )
@@ -174,9 +224,48 @@ define_gpu_extension_target(
174224 ARCHITECTURES ${VLLM_GPU_ARCHES}
175225 INCLUDE_DIRECTORIES ${CUTLASS_INCLUDE_DIR}
176226 INCLUDE_DIRECTORIES ${CUTLASS_TOOLS_UTIL_INCLUDE_DIR}
227+ INCLUDE_DIRECTORIES ${CUTLASS_APP_INCLUDE_DIR}
228+ INCLUDE_DIRECTORIES ${VLLM_INCLUDE_DIR}
177229 USE_SABI 3
178230 WITH_SOABI )
179231
232+ #
233+ # flash attention _C extension
234+ #
235+
236+ if (FA2_ENABLED)
237+ message (STATUS "Enabling fa2 extension." )
238+ file (GLOB FA2_GEN_SRCS "csrc/flash_attn/*.cpp" )
239+
240+ set (CUTLASS_GPU_FLAGS ${VLLM_GPU_FLAGS} )
241+ set (CUTLASS_LINK_FLAGS ${VLLM_GPU_LINK_FLAGS} )
242+
243+ # XPU FLAGS
244+ list (APPEND CUTLASS_GPU_FLAGS "-O3" "-DNDEBUG" )
245+ list (APPEND CUTLASS_GPU_FLAGS "-gline-tables-only" )
246+ list (APPEND CUTLASS_GPU_FLAGS "-fsycl" "-fsycl-targets=spir64_gen" "-ftemplate-backtrace-limit=10" )
247+
248+ list (APPEND CUTLASS_LINK_FLAGS "-fsycl" "-fsycl-targets=spir64_gen" )
249+ list (APPEND CUTLASS_LINK_FLAGS -Xsycl-target-backend=spir64_gen "-device bmg-g21-a0 -internal_options -cl-intel-256-GRF-per-thread" )
250+
251+ define_gpu_extension_target (
252+ _vllm_fa2_C
253+ DESTINATION vllm_xpu_kernels
254+ LANGUAGE ${VLLM_GPU_LANG}
255+ SOURCES
256+ csrc/flash_attn/flash_api.cpp
257+ ${FA2_GEN_SRCS}
258+ COMPILE_FLAGS ${CUTLASS_GPU_FLAGS}
259+ LINK_FLAGS ${CUTLASS_LINK_FLAGS}
260+ ARCHITECTURES ${VLLM_GPU_ARCHES}
261+ INCLUDE_DIRECTORIES ${CUTLASS_INCLUDE_DIR}
262+ INCLUDE_DIRECTORIES ${CUTLASS_TOOLS_UTIL_INCLUDE_DIR}
263+ INCLUDE_DIRECTORIES ${CUTLASS_APP_INCLUDE_DIR}
264+ INCLUDE_DIRECTORIES ${VLLM_INCLUDE_DIR}
265+ USE_SABI 3
266+ WITH_SOABI )
267+ endif ()
268+
180269#
181270# xpu only ops/kernels, implemented with cutlass/onednn/sycl.
182271#
0 commit comments