-
Notifications
You must be signed in to change notification settings - Fork 723
Add samples for SYCL-Graph extension #2643
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
konradkusiak97
wants to merge
10
commits into
oneapi-src:development
Choose a base branch
from
konradkusiak97:addSyclGraphSample
base: development
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 2 commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
d8cf20c
Added Sycl Graph samples
konradkusiak97 55560f5
Removed Samples dir and left only 2 samples working with limited grap…
konradkusiak97 68249ec
Update copyright in dotProduct.cpp
konradkusiak97 885254d
Fix comment: Free memory outside the graph only
konradkusiak97 2743331
include experimental graph.hpp header
konradkusiak97 27c06da
update description
konradkusiak97 0ad5c2a
Add verification code and print success
konradkusiak97 9ec8d86
Applied requested changes
konradkusiak97 f10b38c
Added icpx as a default compiler
konradkusiak97 ee2830c
Added sample.json for both samples
konradkusiak97 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
66 changes: 66 additions & 0 deletions
66
DirectProgramming/C++SYCL/SYCL-Graph/common/aspect_queries.hpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
//============================================================== | ||
// Copyright © 2025 Codeplay Software | ||
// | ||
// SPDX-License-Identifier: MIT | ||
// ============================================================= | ||
|
||
#pragma once | ||
|
||
#include <sycl/sycl.hpp> | ||
|
||
#include <iostream> | ||
#include <string> | ||
|
||
inline void ensure_full_aspects_support(const sycl::device &dev) { | ||
std::string error_msg; | ||
|
||
if (!dev.has(sycl::aspect::ext_oneapi_graph)) { | ||
error_msg += "Error: The device does NOT support ext_oneapi_graph. \n"; | ||
} | ||
if (!dev.has(sycl::aspect::ext_oneapi_limited_graph)) { | ||
error_msg += | ||
"Error: The device does NOT support ext_oneapi_limited_graph. \n"; | ||
} | ||
if (!dev.has(sycl::aspect::usm_shared_allocations)) { | ||
error_msg += | ||
"Error: The device does NOT support usm_shared_allocations. \n"; | ||
} | ||
|
||
if (!error_msg.empty()) { | ||
std::cerr << error_msg; | ||
std::exit(1); | ||
} | ||
}; | ||
|
||
inline void ensure_required_aspects_support(const sycl::device &dev) { | ||
std::string error_msg; | ||
|
||
if (!dev.has(sycl::aspect::ext_oneapi_limited_graph)) { | ||
error_msg += | ||
"Error: The device does NOT support ext_oneapi_limited_graph. \n"; | ||
} | ||
if (!dev.has(sycl::aspect::usm_shared_allocations)) { | ||
error_msg += | ||
"Error: The device does NOT support usm_shared_allocations. \n"; | ||
} | ||
|
||
if (!error_msg.empty()) { | ||
std::cerr << error_msg; | ||
std::exit(1); | ||
} | ||
}; | ||
|
||
inline void ensure_full_graph_support(const sycl::device &dev) { | ||
if (!dev.has(sycl::aspect::ext_oneapi_graph)) { | ||
std::cerr << "Error: The device does NOT support ext_oneapi_graph.\n"; | ||
std::exit(1); | ||
} | ||
}; | ||
|
||
inline void ensure_graph_support(const sycl::device &dev) { | ||
if (!dev.has(sycl::aspect::ext_oneapi_limited_graph)) { | ||
std::cerr | ||
<< "Error: The device does NOT support ext_oneapi_limited_graph.\n"; | ||
std::exit(1); | ||
} | ||
}; |
90 changes: 90 additions & 0 deletions
90
DirectProgramming/C++SYCL/SYCL-Graph/common/cmake/ConfigureSYCL.cmake
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
############################################################################ | ||
## Copyright © 2025 Codeplay Software | ||
## | ||
## SPDX-License-Identifier: MIT | ||
############################################################################ | ||
|
||
# ------------------------------------------------ | ||
# Detect available backends | ||
# ------------------------------------------------ | ||
execute_process( | ||
COMMAND bash -c "! sycl-ls | grep -q cuda" | ||
RESULT_VARIABLE CUDA_BACKEND_AVAILABLE) | ||
execute_process( | ||
COMMAND bash -c "! sycl-ls | grep -q hip" | ||
RESULT_VARIABLE HIP_BACKEND_AVAILABLE) | ||
execute_process( | ||
COMMAND bash -c "! sycl-ls | grep -q 'opencl\\|level_zero'" | ||
RESULT_VARIABLE SPIR_BACKEND_AVAILABLE) | ||
|
||
set(ENABLE_CUDA ${CUDA_BACKEND_AVAILABLE} CACHE BOOL "Build with CUDA target") | ||
set(ENABLE_HIP ${HIP_BACKEND_AVAILABLE} CACHE BOOL "Build with HIP target") | ||
set(ENABLE_SPIR ${SPIR_BACKEND_AVAILABLE} CACHE BOOL "Build with spir64 target") | ||
set(SYCL_TARGETS "") | ||
|
||
# ------------------------------------------------ | ||
# Configure CUDA target | ||
# ------------------------------------------------ | ||
if(${ENABLE_CUDA}) | ||
string(JOIN "," SYCL_TARGETS "${SYCL_TARGETS}" "nvptx64-nvidia-cuda") | ||
set(DEFAULT_CUDA_COMPUTE_CAPABILITY "50") | ||
set(CUDA_COMPUTE_CAPABILITY "" CACHE BOOL | ||
"CUDA architecture (compute capability), e.g. sm_80. Default value is auto-configured using nvidia-smi.") | ||
# Auto-configure if not specified by user | ||
if ("${CUDA_COMPUTE_CAPABILITY}" STREQUAL "") | ||
execute_process( | ||
COMMAND bash -c "which nvidia-smi >/dev/null && nvidia-smi --query-gpu=compute_cap --format=csv,noheader | head -n 1 | tr -d '.'" | ||
OUTPUT_VARIABLE CUDA_COMPUTE_CAPABILITY | ||
OUTPUT_STRIP_TRAILING_WHITESPACE) | ||
endif() | ||
# Warn if not specified and failed to auto-configure | ||
if ("${CUDA_COMPUTE_CAPABILITY}" STREQUAL "") | ||
message(WARNING "Failed to autoconfigure CUDA_COMPUTE_CAPABILITY using nvidia-smi. Will default to sm_${DEFAULT_CUDA_COMPUTE_CAPABILITY}") | ||
set(CUDA_COMPUTE_CAPABILITY ${DEFAULT_CUDA_COMPUTE_CAPABILITY} CACHE STRING "CUDA Compute Capability") | ||
else() | ||
message(STATUS "Enabled SYCL target CUDA with Compute Capability sm_${CUDA_COMPUTE_CAPABILITY}") | ||
endif() | ||
endif() | ||
|
||
# ------------------------------------------------ | ||
# Configure HIP target | ||
# ------------------------------------------------ | ||
if(${ENABLE_HIP}) | ||
string(JOIN "," SYCL_TARGETS "${SYCL_TARGETS}" "amdgcn-amd-amdhsa") | ||
set(DEFAULT_HIP_GFX_ARCH "gfx906") | ||
set(HIP_GFX_ARCH "" CACHE BOOL | ||
"HIP architecture tag, e.g. gfx90a. Default value is auto-configured using rocminfo.") | ||
# Auto-configure if not specified by user | ||
if ("${CUDA_COMPUTE_CAPABILITY}" STREQUAL "") | ||
execute_process( | ||
COMMAND bash -c "which rocminfo >/dev/null && rocminfo | grep -o 'gfx[0-9]*' | head -n 1" | ||
OUTPUT_VARIABLE HIP_GFX_ARCH | ||
OUTPUT_STRIP_TRAILING_WHITESPACE) | ||
endif() | ||
# Warn if not specified and failed to auto-configure | ||
if ("${HIP_GFX_ARCH}" STREQUAL "") | ||
message(WARNING "Failed to autoconfigure HIP_GFX_ARCH using rocminfo. Will default to ${DEFAULT_HIP_GFX_ARCH}") | ||
set(HIP_GFX_ARCH ${DEFAULT_HIP_GFX_ARCH} CACHE STRING "HIP gfx arch") | ||
else() | ||
message(STATUS "Enabled SYCL target HIP with gfx arch ${HIP_GFX_ARCH}") | ||
endif() | ||
endif() | ||
|
||
# ------------------------------------------------ | ||
# Configure spir64 target | ||
# ------------------------------------------------ | ||
if(${ENABLE_SPIR}) | ||
string(JOIN "," SYCL_TARGETS "${SYCL_TARGETS}" "spir64") | ||
message(STATUS "Enabled SYCL target spir64") | ||
endif() | ||
|
||
# ------------------------------------------------ | ||
# Configure the complete SYCL flags | ||
# ------------------------------------------------ | ||
set(SYCL_FLAGS -fsycl -fsycl-targets=${SYCL_TARGETS}) | ||
if(${ENABLE_CUDA}) | ||
list(APPEND SYCL_FLAGS -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=sm_${CUDA_COMPUTE_CAPABILITY}) | ||
endif() | ||
if(${ENABLE_HIP}) | ||
list(APPEND SYCL_FLAGS -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=${HIP_GFX_ARCH}) | ||
endif() |
33 changes: 33 additions & 0 deletions
33
DirectProgramming/C++SYCL/SYCL-Graph/diamond-dependency/CMakeLists.txt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
############################################################################ | ||
## Copyright © 2025 Codeplay Software | ||
## | ||
## SPDX-License-Identifier: MIT | ||
############################################################################ | ||
|
||
cmake_minimum_required(VERSION 3.12) | ||
project(SYCL-Graph-Samples) | ||
|
||
# Set global flags | ||
set(CMAKE_CXX_STANDARD 17) | ||
|
||
# Configure SYCL | ||
include("${CMAKE_SOURCE_DIR}/../common/cmake/ConfigureSYCL.cmake") | ||
|
||
# Output directory for executables | ||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} CACHE PATH "" FORCE) | ||
|
||
# Find all .cpp files in the src/ directory | ||
file(GLOB SAMPLE_SOURCES "${CMAKE_SOURCE_DIR}/src/*.cpp") | ||
|
||
# Add executable for each .cpp file | ||
foreach(SOURCE_FILE ${SAMPLE_SOURCES}) | ||
# Extract the file name without the extension | ||
get_filename_component(EXE_NAME ${SOURCE_FILE} NAME_WE) | ||
|
||
# Create executable | ||
add_executable(${EXE_NAME} ${SOURCE_FILE}) | ||
|
||
# Add SYCL flags | ||
target_compile_options(${EXE_NAME} PUBLIC ${SYCL_FLAGS}) | ||
target_link_options(${EXE_NAME} PUBLIC ${SYCL_FLAGS}) | ||
endforeach() |
107 changes: 107 additions & 0 deletions
107
DirectProgramming/C++SYCL/SYCL-Graph/diamond-dependency/README.md
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
# Diamond Dependency Sample | ||
|
||
Code example demonstrating the usage of [`sycl_ext_oneapi_graph`](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc) extension with queue recording API. | ||
|
||
| Area | Description | ||
|:--- |:--- | ||
| What you will learn | How to use SYCL-Graphs extension for optimizing kernel execution. | ||
| Time to complete | 15 minutes | ||
|
||
|
||
## Purpose | ||
|
||
This code example shows how a SYCL queue can be put into a recording state, which allows a `command_graph` object to be populated by the command-groups submitted to the queue. Once the graph is complete, recording finishes on the queue to put it back into the default executing state. The graph is then finalized so that no more nodes can be added. Lastly, the graph is submitted in its entirety for execution via `handler::ext_oneapi_graph(command_graph<graph_state::executable>)`. | ||
|
||
|
||
## Prerequisites | ||
| Optimized for | Description | ||
|:--- |:--- | ||
| OS | Linux* Ubuntu* <br>Windows* 10, 11 | ||
| Hardware | Intel GPU | ||
| Software | Intel® oneAPI DPC++/C++ Compiler | ||
|
||
|
||
## Key Implementation Details | ||
|
||
Key SYCL* concepts demonstrated in the code sample include using command graph extension with buffers and accessors. This sample is using queue recording API. | ||
|
||
>**Note**: For comprehensive information about oneAPI programming, see the *[Intel® oneAPI Programming Guide](https://www.intel.com/content/www/us/en/docs/oneapi/programming-guide/current/overview.html)*. (Use search or the table of contents to find relevant information quickly.) | ||
|
||
|
||
## Set Environment Variables | ||
|
||
When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables. Set up your CLI environment by sourcing the `setvars` script every time you open a new terminal window. This practice ensures that your compiler, libraries, and tools are ready for development. | ||
|
||
> **Note**: You can use [Modulefiles scripts](https://www.intel.com/content/www/us/en/docs/oneapi/programming-guide/current/use-modulefiles-with-linux.html) to set up your development environment. The modulefiles scripts work with all Linux shells. | ||
|
||
> **Note**: If you want only specific components or versions of those components, use a [setvars config file](https://www.intel.com/content/www/us/en/docs/oneapi/programming-guide/current/use-a-config-file-for-setvars-sh-on-linux-or-macos.html) to set up your development environment. | ||
|
||
|
||
## Build the `Diamond Dependency` Sample for CPU and GPU | ||
|
||
> **Note**: If you have not already done so, set up your CLI | ||
> environment by sourcing the `setvars` script in the root of your oneAPI installation. | ||
> | ||
> Linux*: | ||
> - For system wide installations: `. /opt/intel/oneapi/setvars.sh` | ||
> - For private installations: ` . ~/intel/oneapi/setvars.sh` | ||
> - For non-POSIX shells, like csh, use the following command: `bash -c 'source <install-dir>/setvars.sh ; exec csh'` | ||
> | ||
> Windows*: | ||
> - `C:\Program Files (x86)\Intel\oneAPI\setvars.bat` | ||
> - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'` | ||
> | ||
> For more information on configuring environment variables or if you have a Unified Directory Layout, see | ||
*[Use the setvars and oneapi-vars Scripts with Linux*](https://www.intel.com/content/www/us/en/docs/oneapi/programming-guide/current/use-the-setvars-script-with-linux-or-macos.html)* or *[Use the setvars and oneapi-vars Scripts with Windows*](https://www.intel.com/content/www/us/en/docs/oneapi/programming-guide/current/use-the-setvars-script-with-windows.html)*. | ||
|
||
### On Linux* | ||
|
||
The project uses a standard CMake build configuration system. Ensure the SYCL compiler is used by the configuration either by setting the environment variable `CXX=<compiler>` or passing the configuration flag | ||
`-DCMAKE_CXX_COMPILER=<compiler>` where `<compiler>` is your SYCL compiler's | ||
executable (for example Intel `icpx` or LLVM `clang++`). | ||
|
||
1. Change to the sample directory. | ||
2. Build the program. | ||
``` | ||
mkdir -p build && cd build | ||
cmake .. -DCMAKE_CXX_COMPILER=<compiler> | ||
cmake --build . | ||
``` | ||
|
||
The CMake configuration automatically detects the available SYCL backends and | ||
enables the SPIR/CUDA/HIP targets for the device code, including the corresponding | ||
architecture flags. If desired, these auto-configured cmake options may be overridden | ||
with the following ones: | ||
|
||
| OPTION | VALUE | ||
|:--- |:--- | ||
| ENABLE_SPIR | ON or OFF | ||
| ENABLE_CUDA | ON or OFF | ||
| ENABLE_HIP | ON or OFF | ||
| CUDA_COMPUTE_CAPABILITY | Integer, e.g. `70` meaning capability 7.0 (arch `sm_70`) | ||
| HIP_GFX_ARCH | String, e.g. `gfx1030` | ||
|
||
#### Troubleshooting | ||
|
||
If an error occurs, you can get more details by running `make` with | ||
the `VERBOSE=1` argument: | ||
``` | ||
make VERBOSE=1 | ||
``` | ||
If you receive an error message, troubleshoot the problem using the **Diagnostics Utility for Intel® oneAPI Toolkits**. The diagnostic utility provides configuration and system checks to help find missing dependencies, permissions errors, and other issues. See the *[Diagnostics Utility for Intel® oneAPI Toolkits User Guide](https://www.intel.com/content/www/us/en/docs/oneapi/user-guide-diagnostic-utility/current/overview.html)* for more information on using the utility. | ||
|
||
|
||
## Run the `Diamond Dependency` Sample | ||
|
||
### On Linux | ||
|
||
1. Run the program. | ||
``` | ||
./diamondDependency | ||
``` | ||
|
||
## License | ||
|
||
Code samples are licensed under the MIT license. See [License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details. | ||
|
||
Third-party program Licenses can be found here: [third-party-programs.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/third-party-programs.txt). |
96 changes: 96 additions & 0 deletions
96
DirectProgramming/C++SYCL/SYCL-Graph/diamond-dependency/src/diamondDependency.cpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
//============================================================== | ||
// Copyright © 2025 Codeplay Software | ||
// | ||
// SPDX-License-Identifier: MIT | ||
// ============================================================= | ||
|
||
#include "../../common/aspect_queries.hpp" | ||
|
||
#include <sycl/sycl.hpp> | ||
|
||
namespace sycl_ext = sycl::ext::oneapi::experimental; | ||
using namespace sycl; | ||
|
||
int main() { | ||
constexpr size_t Size = 1024; | ||
|
||
queue Queue{}; | ||
|
||
ensure_graph_support(Queue.get_device()); | ||
|
||
std::vector<int> DataA(Size), DataB(Size), DataC(Size); | ||
|
||
// Lifetime of buffers must exceed the lifetime of graphs they are used in. | ||
buffer<int> BufferA{DataA.data(), range<1>{Size}}; | ||
BufferA.set_write_back(false); | ||
buffer<int> BufferB{DataB.data(), range<1>{Size}}; | ||
BufferB.set_write_back(false); | ||
buffer<int> BufferC{DataC.data(), range<1>{Size}}; | ||
BufferC.set_write_back(false); | ||
|
||
{ | ||
// New object representing graph of command-groups | ||
sycl_ext::command_graph Graph( | ||
Queue.get_context(), Queue.get_device(), | ||
{sycl_ext::property::graph::assume_buffer_outlives_graph{}}); | ||
|
||
// `Queue` will be put in the recording state where commands are recorded to | ||
// `Graph` rather than submitted for execution immediately. | ||
Graph.begin_recording(Queue); | ||
|
||
// Record commands to `Graph` with the following topology. | ||
// | ||
// increment_kernel | ||
// / \ | ||
// A->/ A->\ | ||
// / \ | ||
// add_kernel subtract_kernel | ||
// \ / | ||
// B->\ C->/ | ||
// \ / | ||
// decrement_kernel | ||
|
||
Queue.submit([&](handler &CGH) { | ||
auto Pdata = BufferA.get_access<access::mode::read_write>(CGH); | ||
CGH.parallel_for<class Increment_kernel>( | ||
range<1>(Size), [=](item<1> Id) { Pdata[Id]++; }); | ||
}); | ||
|
||
Queue.submit([&](handler &CGH) { | ||
auto Pdata1 = BufferA.get_access<access::mode::read>(CGH); | ||
auto Pdata2 = BufferB.get_access<access::mode::read_write>(CGH); | ||
CGH.parallel_for<class Add_kernel>( | ||
range<1>(Size), [=](item<1> Id) { Pdata2[Id] += Pdata1[Id]; }); | ||
}); | ||
|
||
Queue.submit([&](handler &CGH) { | ||
auto Pdata1 = BufferA.get_access<access::mode::read>(CGH); | ||
auto Pdata2 = BufferC.get_access<access::mode::read_write>(CGH); | ||
CGH.parallel_for<class Subtract_kernel>( | ||
range<1>(Size), [=](item<1> Id) { Pdata2[Id] -= Pdata1[Id]; }); | ||
}); | ||
|
||
Queue.submit([&](handler &CGH) { | ||
auto Pdata1 = BufferB.get_access<access::mode::read_write>(CGH); | ||
auto Pdata2 = BufferC.get_access<access::mode::read_write>(CGH); | ||
CGH.parallel_for<class Decrement_kernel>(range<1>(Size), [=](item<1> Id) { | ||
Pdata1[Id]--; | ||
Pdata2[Id]--; | ||
}); | ||
}); | ||
|
||
// `Queue` will be returned to the executing state where commands are | ||
// submitted immediately for extension. | ||
Graph.end_recording(); | ||
|
||
// Finalize the modifiable graph to create an executable graph that can be | ||
// submitted for execution. | ||
auto Exec_graph = Graph.finalize(); | ||
|
||
// Execute graph | ||
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(Exec_graph); }) | ||
.wait(); | ||
} | ||
|
||
return 0; | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.