Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
209 changes: 209 additions & 0 deletions cmake/SetupRajaConfig.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,218 @@ set(RAJA_ENABLE_HIP ${ENABLE_HIP})
set(RAJA_ENABLE_SYCL ${ENABLE_SYCL})
set(RAJA_ENABLE_CUB ${ENABLE_CUB})

function(raja_collect_prefixed_alias_blocks header prefix out_prefix)
file(READ "${header}" _content)
string(REPLACE "\r\n" "\n" _content "${_content}")
string(REPLACE "\r" "\n" _content "${_content}")
string(REPLACE ";" "\\;" _content "${_content}")
string(REPLACE "\n" ";" _lines "${_content}")

set(_suffixes "")
set(_pending_template "")
set(_collecting_template FALSE)
set(_collecting FALSE)
set(_alias_block "")

foreach(_line IN LISTS _lines)
string(STRIP "${_line}" _stripped)

if(_collecting_template)
string(APPEND _pending_template "${_line}\n")
if(_stripped MATCHES ">[ \t]*$")
set(_collecting_template FALSE)
endif()
continue()
endif()

if(_collecting)
string(APPEND _alias_block "${_line}\n")
if(_stripped MATCHES ";[ \t]*(//.*)?$")
string(REGEX MATCH "using[ \t]+${prefix}_([A-Za-z0-9_]+)[ \t]*="
_match
"${_alias_block}")
set(_suffix "${CMAKE_MATCH_1}")
if(_suffix)
list(APPEND _suffixes "${_suffix}")
set(${out_prefix}_BLOCK_${_suffix} "${_alias_block}" PARENT_SCOPE)
endif()
set(_alias_block "")
set(_collecting FALSE)
endif()
continue()
endif()

if(_stripped MATCHES "^template[ \t]*<")
string(APPEND _pending_template "${_line}\n")
if(NOT _stripped MATCHES ">[ \t]*$")
set(_collecting_template TRUE)
endif()
continue()
endif()

if(_stripped MATCHES "^using[ \t]+${prefix}_[A-Za-z0-9_]+[ \t]*=")
set(_alias_block "${_pending_template}${_line}\n")
set(_pending_template "")
if(_stripped MATCHES ";[ \t]*(//.*)?$")
string(REGEX MATCH "using[ \t]+${prefix}_([A-Za-z0-9_]+)[ \t]*="
_match
"${_alias_block}")
set(_suffix "${CMAKE_MATCH_1}")
if(_suffix)
list(APPEND _suffixes "${_suffix}")
set(${out_prefix}_BLOCK_${_suffix} "${_alias_block}" PARENT_SCOPE)
endif()
set(_alias_block "")
else()
set(_collecting TRUE)
endif()
continue()
endif()

if(NOT _stripped MATCHES "^//")
set(_pending_template "")
endif()
endforeach()

list(REMOVE_DUPLICATES _suffixes)
set(${out_prefix}_SUFFIXES "${_suffixes}" PARENT_SCOPE)
endfunction()

function(raja_render_device_alias_blocks backend_prefix block_prefix suffixes out_var)
set(_rendered "")
foreach(_suffix IN LISTS ${suffixes})
set(_alias_block "${${block_prefix}_BLOCK_${_suffix}}")
string(REGEX REPLACE
"using[ \t]+${backend_prefix}_${_suffix}([ \t]*=)"
"using device_${_suffix}\\1"
_device_block
"${_alias_block}")
string(APPEND _rendered "${_device_block}\n")
endforeach()
set(${out_var} "${_rendered}" PARENT_SCOPE)
endfunction()

function(raja_map_xyz_to_sycl_dims xyz out_var)
set(_mapped "${xyz}")
string(REPLACE "x" "2" _mapped "${_mapped}")
string(REPLACE "y" "1" _mapped "${_mapped}")
string(REPLACE "z" "0" _mapped "${_mapped}")
set(${out_var} "${_mapped}" PARENT_SCOPE)
endfunction()

raja_collect_prefixed_alias_blocks(
"${PROJECT_SOURCE_DIR}/include/RAJA/policy/cuda/policy.hpp"
cuda
RAJA_CUDA_ALIAS)
raja_collect_prefixed_alias_blocks(
"${PROJECT_SOURCE_DIR}/include/RAJA/policy/hip/policy.hpp"
hip
RAJA_HIP_ALIAS)

set(RAJA_DEVICE_COMMON_ALIAS_SUFFIXES "")
foreach(_suffix IN LISTS RAJA_CUDA_ALIAS_SUFFIXES)
if(DEFINED RAJA_HIP_ALIAS_BLOCK_${_suffix})
list(APPEND RAJA_DEVICE_COMMON_ALIAS_SUFFIXES "${_suffix}")
endif()
endforeach()

raja_render_device_alias_blocks(
cuda
RAJA_CUDA_ALIAS
RAJA_DEVICE_COMMON_ALIAS_SUFFIXES
RAJA_DEVICE_CUDA_ALIAS_BLOCKS)
raja_render_device_alias_blocks(
hip
RAJA_HIP_ALIAS
RAJA_DEVICE_COMMON_ALIAS_SUFFIXES
RAJA_DEVICE_HIP_ALIAS_BLOCKS)

string(APPEND RAJA_DEVICE_CUDA_ALIAS_BLOCKS
"template<bool Async, int num_threads = named_usage::unspecified>\n"
"using device_launch_t = RAJA::cuda_launch_t<Async, num_threads>;\n\n")
string(APPEND RAJA_DEVICE_HIP_ALIAS_BLOCKS
"template<bool Async, int num_threads = named_usage::unspecified>\n"
"using device_launch_t = RAJA::hip_launch_t<Async, num_threads>;\n\n")

file(READ "${PROJECT_SOURCE_DIR}/include/RAJA/policy/sycl/policy.hpp"
RAJA_SYCL_POLICY_CONTENT)
file(READ "${PROJECT_SOURCE_DIR}/include/RAJA/policy/sycl/launch.hpp"
RAJA_SYCL_LAUNCH_CONTENT)
set(RAJA_SYCL_PUBLIC_ALIAS_CONTENT
"${RAJA_SYCL_POLICY_CONTENT}\n${RAJA_SYCL_LAUNCH_CONTENT}")

set(RAJA_DEVICE_SYCL_ALIAS_BLOCKS "")
foreach(_suffix IN LISTS RAJA_DEVICE_COMMON_ALIAS_SUFFIXES)
if(_suffix STREQUAL "exec")
string(APPEND RAJA_DEVICE_SYCL_ALIAS_BLOCKS
"template<size_t WORK_GROUP_SIZE, bool Async = false>\n"
"using device_exec = RAJA::sycl_exec<WORK_GROUP_SIZE, Async>;\n\n")
elseif(_suffix STREQUAL "exec_async")
string(APPEND RAJA_DEVICE_SYCL_ALIAS_BLOCKS
"template<size_t WORK_GROUP_SIZE>\n"
"using device_exec_async = device_exec<WORK_GROUP_SIZE, true>;\n\n")
elseif(_suffix STREQUAL "launch_t")
string(APPEND RAJA_DEVICE_SYCL_ALIAS_BLOCKS
"template<bool Async, int num_threads = RAJA::named_usage::unspecified>\n"
"using device_launch_t = RAJA::sycl_launch_t<Async, num_threads>;\n\n")
elseif(_suffix MATCHES "^global_size_([xyz])_direct$")
set(_axis "${CMAKE_MATCH_1}")
raja_map_xyz_to_sycl_dims("${_axis}" _sycl_dims)
if(RAJA_SYCL_PUBLIC_ALIAS_CONTENT MATCHES
"(using|struct)[ \t]+sycl_global_${_sycl_dims}([ \t=:{<])")
string(APPEND RAJA_DEVICE_SYCL_ALIAS_BLOCKS
"template<int n${_axis}_threads>\n"
"using device_${_suffix} = RAJA::sycl_global_${_sycl_dims}<n${_axis}_threads>;\n\n")
endif()
elseif(_suffix MATCHES "^global_thread_([xyz]+)$")
set(_axes "${CMAKE_MATCH_1}")
raja_map_xyz_to_sycl_dims("${_axes}" _sycl_dims)
if(RAJA_SYCL_PUBLIC_ALIAS_CONTENT MATCHES
"(using|struct)[ \t]+sycl_global_item_${_sycl_dims}([ \t=:{<])")
string(APPEND RAJA_DEVICE_SYCL_ALIAS_BLOCKS
"using device_${_suffix} = RAJA::sycl_global_item_${_sycl_dims};\n\n")
endif()
elseif(_suffix MATCHES "^thread_([xyz])_direct$")
set(_axis "${CMAKE_MATCH_1}")
raja_map_xyz_to_sycl_dims("${_axis}" _sycl_dims)
if(RAJA_SYCL_PUBLIC_ALIAS_CONTENT MATCHES
"(using|struct)[ \t]+sycl_local_${_sycl_dims}_direct([ \t=:{<])")
string(APPEND RAJA_DEVICE_SYCL_ALIAS_BLOCKS
"using device_${_suffix} = RAJA::sycl_local_${_sycl_dims}_direct;\n\n")
endif()
elseif(_suffix MATCHES "^thread_([xyz])_loop$")
set(_axis "${CMAKE_MATCH_1}")
raja_map_xyz_to_sycl_dims("${_axis}" _sycl_dims)
if(RAJA_SYCL_PUBLIC_ALIAS_CONTENT MATCHES
"(using|struct)[ \t]+sycl_local_${_sycl_dims}_loop([ \t=:{<])")
string(APPEND RAJA_DEVICE_SYCL_ALIAS_BLOCKS
"using device_${_suffix} = RAJA::sycl_local_${_sycl_dims}_loop;\n\n")
endif()
elseif(_suffix MATCHES "^block_([xyz])_direct$")
set(_axis "${CMAKE_MATCH_1}")
raja_map_xyz_to_sycl_dims("${_axis}" _sycl_dims)
if(RAJA_SYCL_PUBLIC_ALIAS_CONTENT MATCHES
"(using|struct)[ \t]+sycl_group_${_sycl_dims}_direct([ \t=:{<])")
string(APPEND RAJA_DEVICE_SYCL_ALIAS_BLOCKS
"using device_${_suffix} = RAJA::sycl_group_${_sycl_dims}_direct;\n\n")
endif()
elseif(_suffix MATCHES "^block_([xyz])_loop$")
set(_axis "${CMAKE_MATCH_1}")
raja_map_xyz_to_sycl_dims("${_axis}" _sycl_dims)
if(RAJA_SYCL_PUBLIC_ALIAS_CONTENT MATCHES
"(using|struct)[ \t]+sycl_group_${_sycl_dims}_loop([ \t=:{<])")
string(APPEND RAJA_DEVICE_SYCL_ALIAS_BLOCKS
"using device_${_suffix} = RAJA::sycl_group_${_sycl_dims}_loop;\n\n")
endif()
endif()
endforeach()

# Configure a header file with all the variables we found.
configure_file(${PROJECT_SOURCE_DIR}/include/RAJA/config.hpp.in
${PROJECT_BINARY_DIR}/include/RAJA/config.hpp)
configure_file(${PROJECT_SOURCE_DIR}/include/RAJA/policy/device.hpp.in
${PROJECT_BINARY_DIR}/include/RAJA/policy/device.hpp
@ONLY)

# Configure CMake config
include(CMakePackageConfigHelpers)
Expand Down
27 changes: 27 additions & 0 deletions docs/sphinx/user_guide/feature/policies.rst
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,33 @@ GPU Policies for SYCL
configuration. SYCL dimension 2 always exists and should be used as
one would use the x dimension in CUDA and HIP.

Device policy aliases
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

To simplify transitions between GPU backends (CUDA/HIP/SYCL) and reduce
downstream preprocessor conditionals, RAJA provides a small set of
``device_*`` policy aliases that resolve to the *active* GPU backend.

In particular, the following aliases are available when compiling for a GPU
device backend (i.e., when one of ``RAJA_CUDA_ACTIVE``, ``RAJA_HIP_ACTIVE``,
or ``RAJA_SYCL_ACTIVE`` is defined):

* ``device_exec<BLOCK_SIZE>`` (maps to ``cuda_exec`` / ``hip_exec`` /
``sycl_exec``)
* ``device_launch_t`` (maps to ``cuda_launch_t`` / ``hip_launch_t`` /
``sycl_launch_t``)
* ``device_global_size_{x,y,z}_direct<N>`` (maps to
``cuda/hip_global_size_*`` or ``sycl_global_{2,1,0}``)
* ``device_global_thread_{x,y,z}``, ``device_thread_{x,y,z}_{direct,loop}``,
and ``device_block_{x,y,z}_{direct,loop}`` (maps to the corresponding
backend loop/index mapping policies)

For SYCL, these aliases use CUDA-like (x,y,z) naming with the standard RAJA
mapping described above: x corresponds to SYCL dimension 2, y to dimension 1,
and z to dimension 0.

See also the example ``examples/device-policy-aliases.cpp``.

======================================== ============= ==============================
SYCL Execution Policies Works with Brief description
======================================== ============= ==============================
Expand Down
12 changes: 12 additions & 0 deletions docs/sphinx/user_guide/tutorial/launch_basic.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,18 @@ then enclosed by a host/device lambda which takes a
``RAJA::LaunchContext`` object, which may be used to control the flow
within the kernel, for example by creating thread-team synchronization points.

.. note::
RAJA treats ``Teams(i,j,k)`` and ``Threads(i,j,k)`` as an (x,y,z) ordering.
For users who prefer SYCL's (dim0, dim1, dim2) ordering, RAJA provides
``Teams::sycl_order(dim0, dim1, dim2)`` and
``Threads::sycl_order(dim0, dim1, dim2)``, which map to the RAJA (x,y,z)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this what is meant, or should a more explicit mapping be documented here?

Suggested change
``Threads::sycl_order(dim0, dim1, dim2)``, which map to the RAJA (x,y,z)
``Threads::sycl_order(dim0, dim1, dim2)``, which map to the RAJA (z,y,x)

ordering. For example::

RAJA::LaunchParams(RAJA::Teams::sycl_order(g0, g1, g2),
RAJA::Threads::sycl_order(l0, l1, l2))

See also the example ``examples/launch-device-policy-aliases.cpp``.

Inside the execution space, developers write a kernel using nested
``RAJA::loop`` methods. The manner in which each loop is executed
is determined by a template parameter type, which
Expand Down
8 changes: 8 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@ raja_add_executable(
NAME dynamic-forall
SOURCES dynamic-forall.cpp)

raja_add_executable(
NAME device-policy-aliases
SOURCES device-policy-aliases.cpp)

raja_add_executable(
NAME launch-device-policy-aliases
SOURCES launch-device-policy-aliases.cpp)

if (RAJA_ENABLE_JIT)
raja_add_executable(
NAME forall-jit
Expand Down
Loading
Loading