diff --git a/DirectProgramming/C++SYCL/SYCL-Graph/common/aspect_queries.hpp b/DirectProgramming/C++SYCL/SYCL-Graph/common/aspect_queries.hpp new file mode 100644 index 0000000000..5691f8f408 --- /dev/null +++ b/DirectProgramming/C++SYCL/SYCL-Graph/common/aspect_queries.hpp @@ -0,0 +1,48 @@ +//============================================================== +// Copyright © 2025 Codeplay Software +// +// SPDX-License-Identifier: MIT +// ============================================================= + +#pragma once + +#include + +#include +#include + +inline void ensure_full_aspects_support(const sycl::device &dev) { + std::string error_msg; + + if (!dev.has(sycl::aspect::ext_oneapi_graph)) { + error_msg += "Error: The device does NOT support ext_oneapi_graph. \n"; + } + if (!dev.has(sycl::aspect::ext_oneapi_limited_graph)) { + error_msg += + "Error: The device does NOT support ext_oneapi_limited_graph. \n"; + } + if (!dev.has(sycl::aspect::usm_shared_allocations)) { + error_msg += + "Error: The device does NOT support usm_shared_allocations. \n"; + } + + if (!error_msg.empty()) { + std::cerr << error_msg; + std::exit(1); + } +}; + +inline void ensure_full_graph_support(const sycl::device &dev) { + if (!dev.has(sycl::aspect::ext_oneapi_graph)) { + std::cerr << "Error: The device does NOT support ext_oneapi_graph.\n"; + std::exit(1); + } +}; + +inline void ensure_graph_support(const sycl::device &dev) { + if (!dev.has(sycl::aspect::ext_oneapi_limited_graph)) { + std::cerr + << "Error: The device does NOT support ext_oneapi_limited_graph.\n"; + std::exit(1); + } +}; diff --git a/DirectProgramming/C++SYCL/SYCL-Graph/common/cmake/ConfigureSYCL.cmake b/DirectProgramming/C++SYCL/SYCL-Graph/common/cmake/ConfigureSYCL.cmake new file mode 100644 index 0000000000..30b07b1907 --- /dev/null +++ b/DirectProgramming/C++SYCL/SYCL-Graph/common/cmake/ConfigureSYCL.cmake @@ -0,0 +1,90 @@ +############################################################################ +## Copyright © 2025 Codeplay Software +## +## SPDX-License-Identifier: MIT +############################################################################ + +# ------------------------------------------------ +# Detect available backends +# ------------------------------------------------ +execute_process( + COMMAND bash -c "! sycl-ls | grep -q cuda" + RESULT_VARIABLE CUDA_BACKEND_AVAILABLE) +execute_process( + COMMAND bash -c "! sycl-ls | grep -q hip" + RESULT_VARIABLE HIP_BACKEND_AVAILABLE) +execute_process( + COMMAND bash -c "! sycl-ls | grep -q 'opencl\\|level_zero'" + RESULT_VARIABLE SPIR_BACKEND_AVAILABLE) + +set(ENABLE_CUDA ${CUDA_BACKEND_AVAILABLE} CACHE BOOL "Build with CUDA target") +set(ENABLE_HIP ${HIP_BACKEND_AVAILABLE} CACHE BOOL "Build with HIP target") +set(ENABLE_SPIR ${SPIR_BACKEND_AVAILABLE} CACHE BOOL "Build with spir64 target") +set(SYCL_TARGETS "") + +# ------------------------------------------------ +# Configure CUDA target +# ------------------------------------------------ +if(${ENABLE_CUDA}) + string(JOIN "," SYCL_TARGETS "${SYCL_TARGETS}" "nvptx64-nvidia-cuda") + set(DEFAULT_CUDA_COMPUTE_CAPABILITY "50") + set(CUDA_COMPUTE_CAPABILITY "" CACHE BOOL + "CUDA architecture (compute capability), e.g. sm_80. Default value is auto-configured using nvidia-smi.") + # Auto-configure if not specified by user + if ("${CUDA_COMPUTE_CAPABILITY}" STREQUAL "") + execute_process( + COMMAND bash -c "which nvidia-smi >/dev/null && nvidia-smi --query-gpu=compute_cap --format=csv,noheader | head -n 1 | tr -d '.'" + OUTPUT_VARIABLE CUDA_COMPUTE_CAPABILITY + OUTPUT_STRIP_TRAILING_WHITESPACE) + endif() + # Warn if not specified and failed to auto-configure + if ("${CUDA_COMPUTE_CAPABILITY}" STREQUAL "") + message(WARNING "Failed to autoconfigure CUDA_COMPUTE_CAPABILITY using nvidia-smi. Will default to sm_${DEFAULT_CUDA_COMPUTE_CAPABILITY}") + set(CUDA_COMPUTE_CAPABILITY ${DEFAULT_CUDA_COMPUTE_CAPABILITY} CACHE STRING "CUDA Compute Capability") + else() + message(STATUS "Enabled SYCL target CUDA with Compute Capability sm_${CUDA_COMPUTE_CAPABILITY}") + endif() +endif() + +# ------------------------------------------------ +# Configure HIP target +# ------------------------------------------------ +if(${ENABLE_HIP}) + string(JOIN "," SYCL_TARGETS "${SYCL_TARGETS}" "amdgcn-amd-amdhsa") + set(DEFAULT_HIP_GFX_ARCH "gfx906") + set(HIP_GFX_ARCH "" CACHE BOOL + "HIP architecture tag, e.g. gfx90a. Default value is auto-configured using rocminfo.") + # Auto-configure if not specified by user + if ("${CUDA_COMPUTE_CAPABILITY}" STREQUAL "") + execute_process( + COMMAND bash -c "which rocminfo >/dev/null && rocminfo | grep -o 'gfx[0-9]*' | head -n 1" + OUTPUT_VARIABLE HIP_GFX_ARCH + OUTPUT_STRIP_TRAILING_WHITESPACE) + endif() + # Warn if not specified and failed to auto-configure + if ("${HIP_GFX_ARCH}" STREQUAL "") + message(WARNING "Failed to autoconfigure HIP_GFX_ARCH using rocminfo. Will default to ${DEFAULT_HIP_GFX_ARCH}") + set(HIP_GFX_ARCH ${DEFAULT_HIP_GFX_ARCH} CACHE STRING "HIP gfx arch") + else() + message(STATUS "Enabled SYCL target HIP with gfx arch ${HIP_GFX_ARCH}") + endif() +endif() + +# ------------------------------------------------ +# Configure spir64 target +# ------------------------------------------------ +if(${ENABLE_SPIR}) + string(JOIN "," SYCL_TARGETS "${SYCL_TARGETS}" "spir64") + message(STATUS "Enabled SYCL target spir64") +endif() + +# ------------------------------------------------ +# Configure the complete SYCL flags +# ------------------------------------------------ +set(SYCL_FLAGS -fsycl -fsycl-targets=${SYCL_TARGETS}) +if(${ENABLE_CUDA}) + list(APPEND SYCL_FLAGS -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=sm_${CUDA_COMPUTE_CAPABILITY}) +endif() +if(${ENABLE_HIP}) + list(APPEND SYCL_FLAGS -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=${HIP_GFX_ARCH}) +endif() diff --git a/DirectProgramming/C++SYCL/SYCL-Graph/diamond-dependency/CMakeLists.txt b/DirectProgramming/C++SYCL/SYCL-Graph/diamond-dependency/CMakeLists.txt new file mode 100644 index 0000000000..1dfdb79f07 --- /dev/null +++ b/DirectProgramming/C++SYCL/SYCL-Graph/diamond-dependency/CMakeLists.txt @@ -0,0 +1,38 @@ +############################################################################ +## Copyright © 2025 Codeplay Software +## +## SPDX-License-Identifier: MIT +############################################################################ + +cmake_minimum_required(VERSION 3.12) + +if (NOT DEFINED CMAKE_CXX_COMPILER) + set(CMAKE_CXX_COMPILER icpx) +endif() + +project(SYCL-Graph-Samples) + +# Set global flags +set(CMAKE_CXX_STANDARD 17) + +# Configure SYCL +include("${CMAKE_SOURCE_DIR}/../common/cmake/ConfigureSYCL.cmake") + +# Output directory for executables +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} CACHE PATH "" FORCE) + +# Find all .cpp files in the src/ directory +file(GLOB SAMPLE_SOURCES "${CMAKE_SOURCE_DIR}/src/*.cpp") + +# Add executable for each .cpp file +foreach(SOURCE_FILE ${SAMPLE_SOURCES}) + # Extract the file name without the extension + get_filename_component(EXE_NAME ${SOURCE_FILE} NAME_WE) + + # Create executable + add_executable(${EXE_NAME} ${SOURCE_FILE}) + + # Add SYCL flags + target_compile_options(${EXE_NAME} PUBLIC ${SYCL_FLAGS}) + target_link_options(${EXE_NAME} PUBLIC ${SYCL_FLAGS}) +endforeach() diff --git a/DirectProgramming/C++SYCL/SYCL-Graph/diamond-dependency/README.md b/DirectProgramming/C++SYCL/SYCL-Graph/diamond-dependency/README.md new file mode 100644 index 0000000000..d68581d596 --- /dev/null +++ b/DirectProgramming/C++SYCL/SYCL-Graph/diamond-dependency/README.md @@ -0,0 +1,108 @@ +# Diamond Dependency Sample + +Code example demonstrating the usage of [`sycl_ext_oneapi_graph`](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc) extension with queue recording API. + +| Area | Description +|:--- |:--- +| What you will learn | How to use SYCL-Graphs extension for optimizing kernel execution. +| Time to complete | 15 minutes + + +## Purpose + +This code example shows how to record commands submitted to a SYCL `queue` into a `command_graph` object. Once the graph recording is complete, the graph is finalized which means a new `command_graph` object in `graph_state::executable` is created which is ready for submission. Lastly, the graph is submitted en bloc for execution to a queue with a new function `ext_oneapi_graph()`. And can be replayed as many times as needed. + + + +## Prerequisites +| Optimized for | Description +|:--- |:--- +| OS | Linux* Ubuntu*
Windows* 10, 11 +| Hardware | Intel GPU +| Software | Intel® oneAPI DPC++/C++ Compiler + + +## Key Implementation Details + +Key SYCL* concepts demonstrated in the code sample include using command graph extension with buffers and accessors. This sample is using queue recording API. + +>**Note**: For comprehensive information about oneAPI programming, see the *[Intel® oneAPI Programming Guide](https://www.intel.com/content/www/us/en/docs/oneapi/programming-guide/current/overview.html)*. (Use search or the table of contents to find relevant information quickly.) + + +## Set Environment Variables + +When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables. Set up your CLI environment by sourcing the `setvars` script every time you open a new terminal window. This practice ensures that your compiler, libraries, and tools are ready for development. + +> **Note**: You can use [Modulefiles scripts](https://www.intel.com/content/www/us/en/docs/oneapi/programming-guide/current/use-modulefiles-with-linux.html) to set up your development environment. The modulefiles scripts work with all Linux shells. + +> **Note**: If you want only specific components or versions of those components, use a [setvars config file](https://www.intel.com/content/www/us/en/docs/oneapi/programming-guide/current/use-a-config-file-for-setvars-sh-on-linux-or-macos.html) to set up your development environment. + + +## Build the `Diamond Dependency` Sample for CPU and GPU + +> **Note**: If you have not already done so, set up your CLI +> environment by sourcing the `setvars` script in the root of your oneAPI installation. +> +> Linux*: +> - For system wide installations: `. /opt/intel/oneapi/setvars.sh` +> - For private installations: ` . ~/intel/oneapi/setvars.sh` +> - For non-POSIX shells, like csh, use the following command: `bash -c 'source /setvars.sh ; exec csh'` +> +> Windows*: +> - `C:\Program Files (x86)\Intel\oneAPI\setvars.bat` +> - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'` +> +> For more information on configuring environment variables or if you have a Unified Directory Layout, see +*[Use the setvars and oneapi-vars Scripts with Linux*](https://www.intel.com/content/www/us/en/docs/oneapi/programming-guide/current/use-the-setvars-script-with-linux-or-macos.html)* or *[Use the setvars and oneapi-vars Scripts with Windows*](https://www.intel.com/content/www/us/en/docs/oneapi/programming-guide/current/use-the-setvars-script-with-windows.html)*. + +### On Linux* + +The project uses a standard CMake build configuration system. Ensure the SYCL compiler is used by the configuration either by setting the environment variable `CXX=` or passing the configuration flag +`-DCMAKE_CXX_COMPILER=` where `` is your SYCL compiler's +executable (for example Intel `icpx` or LLVM `clang++`). + +1. Change to the sample directory. +2. Build the program. + ``` + mkdir -p build && cd build + cmake .. -DCMAKE_CXX_COMPILER= + cmake --build . + ``` + +The CMake configuration automatically detects the available SYCL backends and +enables the SPIR/CUDA/HIP targets for the device code, including the corresponding +architecture flags. If desired, these auto-configured cmake options may be overridden +with the following ones: + +| OPTION | VALUE +|:--- |:--- +| ENABLE_SPIR | ON or OFF +| ENABLE_CUDA | ON or OFF +| ENABLE_HIP | ON or OFF +| CUDA_COMPUTE_CAPABILITY | Integer, e.g. `70` meaning capability 7.0 (arch `sm_70`) +| HIP_GFX_ARCH | String, e.g. `gfx1030` + +#### Troubleshooting + +If an error occurs, you can get more details by running `make` with +the `VERBOSE=1` argument: +``` +make VERBOSE=1 +``` +If you receive an error message, troubleshoot the problem using the **Diagnostics Utility for Intel® oneAPI Toolkits**. The diagnostic utility provides configuration and system checks to help find missing dependencies, permissions errors, and other issues. See the *[Diagnostics Utility for Intel® oneAPI Toolkits User Guide](https://www.intel.com/content/www/us/en/docs/oneapi/user-guide-diagnostic-utility/current/overview.html)* for more information on using the utility. + + +## Run the `Diamond Dependency` Sample + +### On Linux + +1. Run the program. + ``` + ./diamondDependency + ``` + +## License + +Code samples are licensed under the MIT license. See [License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details. + +Third-party program Licenses can be found here: [third-party-programs.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/third-party-programs.txt). diff --git a/DirectProgramming/C++SYCL/SYCL-Graph/diamond-dependency/sample.json b/DirectProgramming/C++SYCL/SYCL-Graph/diamond-dependency/sample.json new file mode 100644 index 0000000000..fd3d6d18eb --- /dev/null +++ b/DirectProgramming/C++SYCL/SYCL-Graph/diamond-dependency/sample.json @@ -0,0 +1,40 @@ +{ + "guid": "3AB46B31-4A82-491D-AFF5-6532720E76B8", + "name": "Diamond Dependency", + "categories": [ + "Toolkit/oneAPI Direct Programming/C++SYCL/SYCL-Graph" + ], + "description": "Diamond Dependency kernel structure using SYCL-Graph", + "toolchain": [ + "dpcpp" + ], + "targetDevice": [ + "CPU", + "GPU" + ], + "languages": [ + { + "cpp": {} + } + ], + "os": [ + "linux", + ], + "builder": [ + "cmake" + ], + "ciTests": { + "linux": [ + { + "id": "test", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "cmake --build . -j$(nproc)", + ] + } + ], + }, + "expertise": "Concepts and Functionality" +} diff --git a/DirectProgramming/C++SYCL/SYCL-Graph/diamond-dependency/src/diamondDependency.cpp b/DirectProgramming/C++SYCL/SYCL-Graph/diamond-dependency/src/diamondDependency.cpp new file mode 100644 index 0000000000..91a52b0643 --- /dev/null +++ b/DirectProgramming/C++SYCL/SYCL-Graph/diamond-dependency/src/diamondDependency.cpp @@ -0,0 +1,115 @@ +//============================================================== +// Copyright © 2025 Codeplay Software +// +// SPDX-License-Identifier: MIT +// ============================================================= + +#include "../../common/aspect_queries.hpp" + +#include +#include +#include + +namespace sycl_ext = sycl::ext::oneapi::experimental; +using namespace sycl; + +int main() { + constexpr size_t Size = 1024; + + queue Queue{}; + + ensure_graph_support(Queue.get_device()); + + std::vector DataA(Size, 1), DataB(Size, 1), DataC(Size, 1); + + // Lifetime of buffers must exceed the lifetime of graphs they are used in. + buffer BufferA{DataA.data(), range<1>{Size}}; + BufferA.set_write_back(false); + buffer BufferB{DataB.data(), range<1>{Size}}; + BufferB.set_write_back(false); + buffer BufferC{DataC.data(), range<1>{Size}}; + BufferC.set_write_back(false); + + { + // New object representing graph of command-groups + sycl_ext::command_graph Graph( + Queue.get_context(), Queue.get_device(), + {sycl_ext::property::graph::assume_buffer_outlives_graph{}}); + + // `Queue` will be put in the recording state where commands are recorded to + // `Graph` rather than submitted for execution immediately. + Graph.begin_recording(Queue); + + // Record commands to `Graph` with the following topology. + // + // increment_kernel + // / \ + // A->/ A->\ + // / \ + // add_kernel subtract_kernel + // \ / + // B->\ C->/ + // \ / + // decrement_kernel + + Queue.submit([&](handler &CGH) { + auto PdataA = BufferA.get_access(CGH); + CGH.parallel_for( + range<1>(Size), [=](item<1> Id) { PdataA[Id]++; }); + }); + + Queue.submit([&](handler &CGH) { + auto PdataA = BufferA.get_access(CGH); + auto PdataB = BufferB.get_access(CGH); + CGH.parallel_for( + range<1>(Size), [=](item<1> Id) { PdataB[Id] += PdataA[Id]; }); + }); + + Queue.submit([&](handler &CGH) { + auto PdataA = BufferA.get_access(CGH); + auto PdataC = BufferC.get_access(CGH); + CGH.parallel_for( + range<1>(Size), [=](item<1> Id) { PdataC[Id] -= PdataA[Id]; }); + }); + + Queue.submit([&](handler &CGH) { + auto PdataB = BufferB.get_access(CGH); + auto PdataC = BufferC.get_access(CGH); + CGH.parallel_for(range<1>(Size), [=](item<1> Id) { + PdataB[Id]--; + PdataC[Id]--; + }); + }); + + // `Queue` will be returned to the executing state where commands are + // submitted immediately for extension. + Graph.end_recording(); + + // Finalize the modifiable graph to create an executable graph that can be + // submitted for execution. + auto GraphExec = Graph.finalize(); + + // Execute graph. + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }).wait(); + } + + // Access the data back on the host. + host_accessor HostDataA(BufferA); + host_accessor HostDataB(BufferB); + host_accessor HostDataC(BufferC); + + // Verify the results. + int dataAResult{2}; + int dataBResult{2}; + int dataCResult{-2}; + + for (size_t i = 0; i < Size; ++i) { + assert(HostDataA[i] == dataAResult); + assert(HostDataB[i] == dataBResult); + assert(HostDataC[i] == dataCResult); + } + + std::cout << "Success!" << std::endl; + + return 0; +} diff --git a/DirectProgramming/C++SYCL/SYCL-Graph/dot-product/CMakeLists.txt b/DirectProgramming/C++SYCL/SYCL-Graph/dot-product/CMakeLists.txt new file mode 100644 index 0000000000..34bae18f83 --- /dev/null +++ b/DirectProgramming/C++SYCL/SYCL-Graph/dot-product/CMakeLists.txt @@ -0,0 +1,37 @@ +############################################################################ +## Copyright © 2025 Codeplay Software +## +## SPDX-License-Identifier: MIT +############################################################################ + +cmake_minimum_required(VERSION 3.12) +project(SYCL-Graph-Samples) + +if (NOT DEFINED CMAKE_CXX_COMPILER) + set(CMAKE_CXX_COMPILER icpx) +endif() + +# Set global flags +set(CMAKE_CXX_STANDARD 17) + +# Configure SYCL +include("${CMAKE_SOURCE_DIR}/../common/cmake/ConfigureSYCL.cmake") + +# Output directory for executables +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} CACHE PATH "" FORCE) + +# Find all .cpp files in the src/ directory +file(GLOB SAMPLE_SOURCES "${CMAKE_SOURCE_DIR}/src/*.cpp") + +# Add executable for each .cpp file +foreach(SOURCE_FILE ${SAMPLE_SOURCES}) + # Extract the file name without the extension + get_filename_component(EXE_NAME ${SOURCE_FILE} NAME_WE) + + # Create executable + add_executable(${EXE_NAME} ${SOURCE_FILE}) + + # Add SYCL flags + target_compile_options(${EXE_NAME} PUBLIC ${SYCL_FLAGS}) + target_link_options(${EXE_NAME} PUBLIC ${SYCL_FLAGS}) +endforeach() diff --git a/DirectProgramming/C++SYCL/SYCL-Graph/dot-product/README.md b/DirectProgramming/C++SYCL/SYCL-Graph/dot-product/README.md new file mode 100644 index 0000000000..4dc47284f4 --- /dev/null +++ b/DirectProgramming/C++SYCL/SYCL-Graph/dot-product/README.md @@ -0,0 +1,106 @@ +# Dot Product Sample + +Code example demonstrating the usage of [`sycl_ext_oneapi_graph`](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc) extension with explicit graph building API. + +| Area | Description +|:--- |:--- +| What you will learn | How to use SYCL-Graphs extension for optimizing kernel execution. +| Time to complete | 15 minutes + + +## Purpose + +This example uses the explicit graph creation API to perform a dot product operation. + +## Prerequisites +| Optimized for | Description +|:--- |:--- +| OS | Linux* Ubuntu*
Windows* 10, 11 +| Hardware | Intel GPU +| Software | Intel® oneAPI DPC++/C++ Compiler + + +## Key Implementation Details + +Key SYCL* concepts demonstrated in the code sample include using command graph extension with unified shared memory and explicit graph building API. + +>**Note**: For comprehensive information about oneAPI programming, see the *[Intel® oneAPI Programming Guide](https://www.intel.com/content/www/us/en/docs/oneapi/programming-guide/current/overview.html)*. (Use search or the table of contents to find relevant information quickly.) + + +## Set Environment Variables + +When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables. Set up your CLI environment by sourcing the `setvars` script every time you open a new terminal window. This practice ensures that your compiler, libraries, and tools are ready for development. + +> **Note**: You can use [Modulefiles scripts](https://www.intel.com/content/www/us/en/docs/oneapi/programming-guide/current/use-modulefiles-with-linux.html) to set up your development environment. The modulefiles scripts work with all Linux shells. + +> **Note**: If you want only specific components or versions of those components, use a [setvars config file](https://www.intel.com/content/www/us/en/docs/oneapi/programming-guide/current/use-a-config-file-for-setvars-sh-on-linux-or-macos.html) to set up your development environment. + + +## Build the `Dot Product` Sample for CPU and GPU + +> **Note**: If you have not already done so, set up your CLI +> environment by sourcing the `setvars` script in the root of your oneAPI installation. +> +> Linux*: +> - For system wide installations: `. /opt/intel/oneapi/setvars.sh` +> - For private installations: ` . ~/intel/oneapi/setvars.sh` +> - For non-POSIX shells, like csh, use the following command: `bash -c 'source /setvars.sh ; exec csh'` +> +> Windows*: +> - `C:\Program Files (x86)\Intel\oneAPI\setvars.bat` +> - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'` +> +> For more information on configuring environment variables or if you have a Unified Directory Layout, see +*[Use the setvars and oneapi-vars Scripts with Linux*](https://www.intel.com/content/www/us/en/docs/oneapi/programming-guide/current/use-the-setvars-script-with-linux-or-macos.html)* or *[Use the setvars and oneapi-vars Scripts with Windows*](https://www.intel.com/content/www/us/en/docs/oneapi/programming-guide/current/use-the-setvars-script-with-windows.html)*. + +### On Linux* + +The project uses a standard CMake build configuration system. Ensure the SYCL compiler is used by the configuration either by setting the environment variable `CXX=` or passing the configuration flag +`-DCMAKE_CXX_COMPILER=` where `` is your SYCL compiler's +executable (for example Intel `icpx` or LLVM `clang++`). + +1. Change to the sample directory. +2. Build the program. + ``` + mkdir -p build && cd build + cmake .. -DCMAKE_CXX_COMPILER= + cmake --build . + ``` + +The CMake configuration automatically detects the available SYCL backends and +enables the SPIR/CUDA/HIP targets for the device code, including the corresponding +architecture flags. If desired, these auto-configured cmake options may be overridden +with the following ones: + +| OPTION | VALUE +|:--- |:--- +| ENABLE_SPIR | ON or OFF +| ENABLE_CUDA | ON or OFF +| ENABLE_HIP | ON or OFF +| CUDA_COMPUTE_CAPABILITY | Integer, e.g. `70` meaning capability 7.0 (arch `sm_70`) +| HIP_GFX_ARCH | String, e.g. `gfx1030` + +#### Troubleshooting + +If an error occurs, you can get more details by running `make` with +the `VERBOSE=1` argument: +``` +make VERBOSE=1 +``` +If you receive an error message, troubleshoot the problem using the **Diagnostics Utility for Intel® oneAPI Toolkits**. The diagnostic utility provides configuration and system checks to help find missing dependencies, permissions errors, and other issues. See the *[Diagnostics Utility for Intel® oneAPI Toolkits User Guide](https://www.intel.com/content/www/us/en/docs/oneapi/user-guide-diagnostic-utility/current/overview.html)* for more information on using the utility. + + +## Run the `Dot Product` Sample + +### On Linux + +1. Run the program. + ``` + ./dotProduct + ``` + +## License + +Code samples are licensed under the MIT license. See [License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details. + +Third-party program Licenses can be found here: [third-party-programs.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/third-party-programs.txt). diff --git a/DirectProgramming/C++SYCL/SYCL-Graph/dot-product/sample.json b/DirectProgramming/C++SYCL/SYCL-Graph/dot-product/sample.json new file mode 100644 index 0000000000..cf18c7b22c --- /dev/null +++ b/DirectProgramming/C++SYCL/SYCL-Graph/dot-product/sample.json @@ -0,0 +1,40 @@ +{ + "guid": "D9C73D3A-9E23-4AD6-B3DF-FECB3786BAC8", + "name": "Dot Product", + "categories": [ + "Toolkit/oneAPI Direct Programming/C++SYCL/SYCL-Graph" + ], + "description": "Dot Product operation using SYCL-Graph", + "toolchain": [ + "dpcpp" + ], + "targetDevice": [ + "CPU", + "GPU" + ], + "languages": [ + { + "cpp": {} + } + ], + "os": [ + "linux", + ], + "builder": [ + "cmake" + ], + "ciTests": { + "linux": [ + { + "id": "test", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "cmake --build . -j$(nproc)", + ] + } + ], + }, + "expertise": "Concepts and Functionality" +} diff --git a/DirectProgramming/C++SYCL/SYCL-Graph/dot-product/src/dotProduct.cpp b/DirectProgramming/C++SYCL/SYCL-Graph/dot-product/src/dotProduct.cpp new file mode 100644 index 0000000000..c03e699cbe --- /dev/null +++ b/DirectProgramming/C++SYCL/SYCL-Graph/dot-product/src/dotProduct.cpp @@ -0,0 +1,114 @@ +//============================================================== +// Copyright © 2022 - 2023 Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= + +#include "../../common/aspect_queries.hpp" + +#include +#include +#include +#include + +namespace sycl_ext = sycl::ext::oneapi::experimental; +using namespace sycl; + +int main() { + constexpr size_t Size = 10; + constexpr size_t Iter = 5; + + float Alpha = 1.0f; + float Beta = 2.0f; + float Gamma = 3.0f; + + queue Queue{}; + + ensure_graph_support(Queue.get_device()); + + sycl_ext::command_graph Graph(Queue.get_context(), Queue.get_device()); + + float *Dotp = malloc_device(1, Queue); + float *X = malloc_device(Size, Queue); + float *Y = malloc_device(Size, Queue); + float *Z = malloc_device(Size, Queue); + + // Add commands to the graph to create the following topology. + // + // i + // / \ + // a b + // \ / + // c + + // Init data on the device. + auto Node_i = Graph.add([&](handler &CGH) { + CGH.parallel_for(Size, [=](id<1> Id) { + const size_t i = Id[0]; + X[i] = 1.0f; + Y[i] = 3.0f; + Z[i] = 2.0f; + *Dotp = 0.; + }); + }); + + auto Node_a = Graph.add( + [&](handler &CGH) { + CGH.parallel_for(range<1>{Size}, [=](id<1> Id) { + const size_t i = Id[0]; + X[i] = Alpha * X[i] + Beta * Y[i]; + }); + }, + {sycl_ext::property::node::depends_on(Node_i)}); + + auto Node_b = Graph.add( + [&](handler &CGH) { + CGH.parallel_for(range<1>{Size}, [=](id<1> Id) { + const size_t i = Id[0]; + Z[i] = Gamma * Z[i] + Beta * Y[i]; + }); + }, + {sycl_ext::property::node::depends_on(Node_i)}); + + auto Node_c = Graph.add( + [&](handler &CGH) { + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + *Dotp += X[i] * Z[i]; + } + }); + }, + {sycl_ext::property::node::depends_on(Node_a, Node_b)}); + + auto GraphExec = Graph.finalize(); + + // Use queue shortcut for graph submission. + for (int i = 0; i < Iter; ++i) { + Queue.ext_oneapi_graph(GraphExec).wait(); + } + + // Copy the result back to the host. + float ResultDotp; + Queue.copy(Dotp, &ResultDotp, 1).wait(); + + // Verify the results. + float HostDotp{0.0f}; + std::vector HostX(Size, 1.0f), HostY(Size, 3.0f), HostZ(Size, 2.0f); + for (size_t i = 0; i < Size; ++i) { + HostX[i] = Alpha * HostX[i] + Beta * HostY[i]; + HostZ[i] = Gamma * HostZ[i] + Beta * HostY[i]; + HostDotp += HostX[i] * HostZ[i]; + } + + assert(HostDotp == ResultDotp); + + std::cout << "Success!" << std::endl; + + // Memory is freed outside the graph. + free(X, Queue); + free(Y, Queue); + free(Z, Queue); + free(Dotp, Queue); + + return 0; +}