diff --git a/layer_gpu_performance/CMakeLists.txt b/layer_gpu_performance/CMakeLists.txt new file mode 100644 index 0000000..625064e --- /dev/null +++ b/layer_gpu_performance/CMakeLists.txt @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: MIT +# ----------------------------------------------------------------------------- +# Copyright (c) 2024 Arm Limited +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ----------------------------------------------------------------------------- + +cmake_minimum_required(VERSION 3.17) + +set(CMAKE_CXX_STANDARD 20) + +project(VkLayerGPUPerformance VERSION 1.0.0) + +# Common configuration +set(LGL_LOG_TAG "VkLayerGPUPerformance") +set(LGL_CONFIG_TRACE 0) +set(LGL_CONFIG_LOG 1) + +include(../source_common/compiler_helper.cmake) + +# Build steps +add_subdirectory(source) +add_subdirectory(../source_common/comms source_common/comms) +add_subdirectory(../source_common/framework source_common/framework) +add_subdirectory(../source_common/trackers source_common/trackers) diff --git a/layer_gpu_performance/README_LAYER.md b/layer_gpu_performance/README_LAYER.md new file mode 100644 index 0000000..a103878 --- /dev/null +++ b/layer_gpu_performance/README_LAYER.md @@ -0,0 +1,126 @@ +# Layer: GPU Performance + +This layer is a standalone performance analysis layer that can be used to +analyze the workloads that make up a single frame. + +This layer supports two modes: + +* Per workload time, read via Vulkan API queries +* Per workload performance counters, read via a non-API mechanism + +## What devices are supported? + +The per workload timing uses Vulkan API timer queries, and should work on any +GPU that supports the required Vulkan features. + +The per workload performance counters uses the Arm libGPUCounters library, +and requires an Arm GPU. + +## Is this layer non-invasive? + +The goal of this layer is to cost the major workloads submitted via the API, in +a way which is compatible with the way that a tile-based renderer schedules +render passes. + +Under normal scheduling, tile-based renderers split render passes into two +pieces which are independently scheduled and that can overlap with other work +that is running on the GPU. Blindly timing render passes using timer queries +can result in confusing results because the reported time might include time +spent processing unrelated workloads that happen to be running in parallel. + +The timing diagram below shows one possible arrangement of workloads scheduled +on the GPU hardware queues for an Arm 5th Generation architecture GPU. We are +trying to time render pass 1 indicated by the `1` characters in the diagram, +starting a timer query when this render pass starts (`S`) in the binning phase +queue, and stopping when it ends (`E`) in the main phase queue. + +``` + Compute: 222 + Binning phase: S 11111 3333 + Main phase: 00000000 111111111111 E +``` + +In this scenario the timer query correctly reflects the elapsed time of the +render pass, but does not give an accurate measure of its cost. The elapsed +time includes time where other workloads are running in parallel, indicated by +the `0`, `2`, and `3` characters. It also includes time between the two phases +where workload `1` is not running at all, because the binning phase work has +completed and the main phase work is stuck waiting for an earlier workload to +finish to free up the hardware. + +To accurately cost workloads on a tile-based renderer, which will overlap and +run workloads in parallel if it is allowed to, the layer must inject additional +synchronization to serialize all workloads within a queue and across queues. +This ensures that timer query values reflect the cost of individual workloads, +however it also means that overall frame performance will be reduced due to +loss of workload parallelization. + +# Design notes + +## Dependencies + +This layer uses timeline semaphores, so requires either Vulkan 1.1 or +the `VK_KHR_timeline_semaphore` extension. + +## Implementing serialization + +Cross-queue serialization is implemented using an injected timeline semaphore. +Each submit is assigned an incrementing `ID`, and will wait for `ID - 1` in the +timeline before starting, and set `ID` in the timeline when completing. This +allows us to implement serialization using a single sync primitive. + +Serialization within a queue is implemented by injecting a full pipeline +barrier before the pre-workload timer query, ensuring that all prior work has +completed before the time is sampled. Similarly we put a full pipeline barrier +after the post-workload timer query, ensuring that no following work starts +before the time is sampled. + +## Implementing query lifetime tracking + +Timer queries are implemented using query pools. The timer write commands are +recorded into each command buffer alongside the user commands. Each timer write +command specifies the specific counter slots used in a specific query pool, so +the query pool and slot usage must be assigned when the command buffer is +recorded. + +Query pools in the layer are a managed resource. We allocate query pools on +demand, and maintain a free-list of query pools that have been freed and are +ready for reuse. + +Query pools are allocated with enough space for 64 query results which is, in +the best case, enough for 63 workloads (N+1 counters). This can reduce for +render passes using multi-view rendering, which allocate 1 counter slot per +view. + +Query pools are assigned to a command buffer when recording, and multiple +query pools can be assigned to a single command buffer if more query result +space is needed. Query pools are fully reset on first use in the command +buffer. Query pools are returned to the layer free-list when the command buffer +is reset or destroyed. + +### Multi-submit command buffers + +Reusable command buffers that are not one-time submit can be problematic for +this type of instrumentation. + +A single primary command buffer could be submitted multiple times. This can be +managed by serializing the workloads and ensuring that the query results are +consumed between executions. This may impact performance due to additional +serialization, but it can be made to work. + +**NOTE:** This impact of this case could be mitigated by having the layer +inject a command buffer after the user command buffer, which inserts a copy +command to copy the query results to a buffer. This buffer is owned by the +layer and can be N-buffered to avoid stalls. + +The more problematic case is the case where a single secondary command buffer +is executed multiple times from within the same primary. In this case there +is no place to solve the collision with CPU-side synchronization, and relying +on only CPU-side recording will only capture the last copy. + +### Split command buffers + +Vulkan 1.3 can split dynamic render passes over multiple command buffers, +although all parts must be part of the same queue submit call. The layer will +only emit timestamps for the final part of the render pass, and will ignore +suspend/resumes. diff --git a/layer_gpu_performance/android_build.sh b/layer_gpu_performance/android_build.sh new file mode 100644 index 0000000..960b2b0 --- /dev/null +++ b/layer_gpu_performance/android_build.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: MIT +# ---------------------------------------------------------------------------- +# Copyright (c) 2024 Arm Limited +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# ---------------------------------------------------------------------------- + +# ---------------------------------------------------------------------------- +# Configuration + +# Exit immediately if any component command errors +set -e + +BUILD_DIR_64=build_arm64 +BUILD_DIR_PACK=build_package + +# ---------------------------------------------------------------------------- +# Process command line options +if [ "$#" -lt 1 ]; then + BUILD_TYPE=Release +else + BUILD_TYPE=$1 +fi + +# Process command line options +if [ "$#" -lt 2 ]; then + PACKAGE=0 +else + PACKAGE=$2 +fi + +if [ "${PACKAGE}" -gt "0" ]; then + echo "Building a ${BUILD_TYPE} build with packaging" +else + echo "Building a ${BUILD_TYPE} build without packaging" +fi + +# ---------------------------------------------------------------------------- +# Build the 64-bit layer +mkdir -p ${BUILD_DIR_64} +pushd ${BUILD_DIR_64} + +cmake \ + -DCMAKE_SYSTEM_NAME=Android \ + -DANDROID_PLATFORM=29 \ + -DANDROID_ABI=arm64-v8a \ + -DANDROID_TOOLCHAIN=clang \ + -DANDROID_STL=c++_static \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake" \ + .. + +make -j1 + +popd + +# ---------------------------------------------------------------------------- +# Build the release package +if [ "${PACKAGE}" -gt "0" ]; then + # Setup the package directories + mkdir -p ${BUILD_DIR_PACK}/bin/android/arm64 + + # Install the 64-bit layer + cp ${BUILD_DIR_64}/source/*.so ${BUILD_DIR_PACK}/bin/android/arm64 +fi diff --git a/layer_gpu_performance/android_install.json b/layer_gpu_performance/android_install.json new file mode 100644 index 0000000..9d933c0 --- /dev/null +++ b/layer_gpu_performance/android_install.json @@ -0,0 +1,4 @@ +{ + "layer_name": "VK_LAYER_LGL_GPUPERFORMANCE", + "layer_binary": "libVkLayerGPUPerformance.so" +} diff --git a/layer_gpu_performance/source/CMakeLists.txt b/layer_gpu_performance/source/CMakeLists.txt new file mode 100644 index 0000000..2975722 --- /dev/null +++ b/layer_gpu_performance/source/CMakeLists.txt @@ -0,0 +1,86 @@ +# SPDX-License-Identifier: MIT +# ----------------------------------------------------------------------------- +# Copyright (c) 2024 Arm Limited +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ----------------------------------------------------------------------------- + +# Set output file names +if (CMAKE_BUILD_TYPE STREQUAL "Release") + set(VK_LAYER VkLayerGPUPerformance_sym) + set(VK_LAYER_STRIP libVkLayerGPUPerformance.so) +else() + set(VK_LAYER VkLayerGPUPerformance) +endif() + +# Set strings used by configure +set(LGL_LAYER_NAME_STR "VK_LAYER_LGL_GPUPERFORMANCE") +set(LGL_LAYER_DESC_STR "VkLayerGPUPerformance by LGL") + +# Vulkan layer library +configure_file( + version.hpp.in + version.hpp + ESCAPE_QUOTES @ONLY) + +add_library( + ${VK_LAYER} SHARED + ${PROJECT_SOURCE_DIR}/../source_common/framework/entry.cpp + device.cpp + instance.cpp + layer_device_functions_command_buffer.cpp + layer_device_functions_command_pool.cpp + layer_device_functions_debug.cpp + layer_device_functions_dispatch.cpp + layer_device_functions_draw_call.cpp + layer_device_functions_queue.cpp + layer_device_functions_render_pass.cpp + layer_device_functions_trace_rays.cpp + layer_device_functions_transfer.cpp + layer_instance_functions_device.cpp + performance_comms.cpp) + +target_include_directories( + ${VK_LAYER} PRIVATE + ${PROJECT_SOURCE_DIR}/../source_common + ${PROJECT_SOURCE_DIR}/../source_third_party + ${CMAKE_CURRENT_BINARY_DIR} + .) + +target_include_directories( + ${VK_LAYER} SYSTEM PRIVATE + ../../khronos/vulkan/include) + +lgl_set_build_options(${VK_LAYER}) + +target_link_libraries( + ${VK_LAYER} + lib_layer_comms + lib_layer_framework + lib_layer_trackers + $<$:log>) + +if (CMAKE_BUILD_TYPE STREQUAL "Release") + add_custom_command( + TARGET "${VK_LAYER}" POST_BUILD + DEPENDS "${VK_LAYER}" + COMMAND ${CMAKE_STRIP} + ARGS --strip-all -o ${VK_LAYER_STRIP} $ + COMMENT "Stripped lib${VK_LAYER}.so to ${VK_LAYER_STRIP}") +endif() diff --git a/layer_gpu_performance/source/device.cpp b/layer_gpu_performance/source/device.cpp new file mode 100644 index 0000000..571b2e4 --- /dev/null +++ b/layer_gpu_performance/source/device.cpp @@ -0,0 +1,111 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include +#include +#include +#include +#include + +#include "comms/comms_module.hpp" +#include "framework/utils.hpp" + +#include "device.hpp" +#include "instance.hpp" + +/** + * @brief The dispatch lookup for all of the created Vulkan devices. + */ +static std::unordered_map> g_devices; + +/* See header for documentation. */ +std::unique_ptr Device::commsModule; + +/* See header for documentation. */ +std::unique_ptr Device::commsWrapper; + +/* See header for documentation. */ +void Device::store( + VkDevice handle, + std::unique_ptr device +) { + void* key = getDispatchKey(handle); + g_devices.insert({ key, std::move(device) }); +} + +/* See header for documentation. */ +Device* Device::retrieve( + VkDevice handle +) { + void* key = getDispatchKey(handle); + assert(isInMap(key, g_devices)); + return g_devices.at(key).get(); +} + +/* See header for documentation. */ +Device* Device::retrieve( + VkQueue handle +) { + void* key = getDispatchKey(handle); + assert(isInMap(key, g_devices)); + return g_devices.at(key).get(); +} + +/* See header for documentation. */ +Device* Device::retrieve( + VkCommandBuffer handle +) { + void* key = getDispatchKey(handle); + assert(isInMap(key, g_devices)); + return g_devices.at(key).get(); +} + +/* See header for documentation. */ +void Device::destroy( + Device* device +) { + g_devices.erase(getDispatchKey(device)); +} + +/* See header for documentation. */ +Device::Device( + Instance* _instance, + VkPhysicalDevice _physicalDevice, + VkDevice _device, + PFN_vkGetDeviceProcAddr nlayerGetProcAddress +): + instance(_instance), + physicalDevice(_physicalDevice), + device(_device) +{ + initDriverDeviceDispatchTable(device, nlayerGetProcAddress, driver); + + // Init the shared comms module for the first device built + if (!commsModule) + { + commsModule = std::make_unique("lglcomms"); + commsWrapper = std::make_unique(*commsModule); + } +} diff --git a/layer_gpu_performance/source/device.hpp b/layer_gpu_performance/source/device.hpp new file mode 100644 index 0000000..acfee7e --- /dev/null +++ b/layer_gpu_performance/source/device.hpp @@ -0,0 +1,203 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +/** + * @file Declares the root class for layer management of VkDevice objects. + * + * Role summary + * ============ + * + * Devices represent the core context used by the application to connect to the + * underlying graphics driver. A device object is the dispatch root for the + * Vulkan driver, so device commands all take some form of dispatchable handle + * that can be resolved into a unique per-device key. For the driver this key + * would simply be a pointer directly to the driver-internal device object, but + * for our layer we use a device dispatch key as an index in to the map to find + * the layer's driver object. + * + * Key properties + * ============== + * + * Vulkan devices are designed to be used concurrently by multiple application + * threads. An application can have multiple concurrent devices, and use each + * device from multiple threads. + * + * Access to the layer driver structures must therefore be kept thread-safe. + * For sake of simplicity, we generally implement this by: + * - Holding a global lock whenever any thread is inside layer code. + * - Releasing the global lock whenever the layer calls a driver function. + */ + +#pragma once + +#include + +#include "comms/comms_module.hpp" +#include "framework/device_dispatch_table.hpp" +#include "trackers/device.hpp" + +#include "instance.hpp" +#include "performance_comms.hpp" + +/** + * @brief This class implements the layer state tracker for a single device. + */ +class Device +{ +public: + /** + * @brief Store a new device into the global store of dispatchable devices. + * + * @param handle The dispatchable device handle to use as an indirect key. + * @param device The @c Device object to store. + */ + static void store( + VkDevice handle, + std::unique_ptr device); + + /** + * @brief Fetch a device from the global store of dispatchable devices. + * + * @param handle The dispatchable device handle to use as an indirect lookup. + * + * @return The layer device context. + */ + static Device* retrieve( + VkDevice handle); + + /** + * @brief Fetch a device from the global store of dispatchable devices. + * + * @param handle The dispatchable queue handle to use as an indirect lookup. + * + * @return The layer device context. + */ + static Device* retrieve( + VkQueue handle); + + /** + * @brief Fetch a device from the global store of dispatchable devices. + * + * @param handle The dispatchable command buffer handle to use as an indirect lookup. + * + * @return The layer device context. + */ + static Device* retrieve( + VkCommandBuffer handle); + + /** + * @brief Drop a device from the global store of dispatchable devices. + * + * @param device The device to drop. + */ + static void destroy( + Device* device); + + /** + * @brief Create a new layer device object. + * + * @param instance The layer instance object this device is created with. + * @param physicalDevice The physical device this logical device is for. + * @param device The device handle this device is created with. + * @param nlayerGetProcAddress The vkGetDeviceProcAddress function for the driver. + */ + Device( + Instance* instance, + VkPhysicalDevice physicalDevice, + VkDevice device, + PFN_vkGetDeviceProcAddr nlayerGetProcAddress); + + /** + * @brief Destroy this layer device object. + */ + ~Device() = default; + + /** + * @brief Callback for sending messages on frame boundary. + * + * @param message The message to send. + */ + void onFrame( + const std::string& message + ) { + commsWrapper->txMessage(message); + } + + /** + * @brief Callback for sending messages on workload submit to a queue. + * + * @param message The message to send. + */ + void onWorkloadSubmit( + const std::string& message + ) { + commsWrapper->txMessage(message); + } + + /** + * @brief Get the cumulative stats for this device. + */ + Tracker::Device& getStateTracker() + { + return stateTracker; + } + +public: + /** + * @brief The driver function dispatch table. + */ + DeviceDispatchTable driver {}; + +private: + /** + * @brief The instance this device is created with. + */ + const Instance* instance; + + /** + * @brief The physical device this device is created with. + */ + const VkPhysicalDevice physicalDevice; + + /** + * @brief The device handle this device is created with. + */ + const VkDevice device; + + /** + * @brief State tracker for this device. + */ + Tracker::Device stateTracker; + + /** + * @brief Shared network communications module. + */ + static std::unique_ptr commsModule; + + /** + * @brief Shared network communications message encoder. + */ + static std::unique_ptr commsWrapper; +}; diff --git a/layer_gpu_performance/source/device_utils.hpp b/layer_gpu_performance/source/device_utils.hpp new file mode 100644 index 0000000..eddf193 --- /dev/null +++ b/layer_gpu_performance/source/device_utils.hpp @@ -0,0 +1,56 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#pragma once + +#include + +#include "framework/utils.hpp" + +#include "device.hpp" + +/** + * @brief Emit a start tag via a driver debug utils label. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param tagID The tagID to emit into the label. + */ +[[maybe_unused]] static void emitStartTag( + Device* layer, + VkCommandBuffer commandBuffer, + uint64_t tagID +) { + // Emit the unique workload tag into the command stream + std::string tagLabel = formatString("t%" PRIu64, tagID); + VkDebugUtilsLabelEXT tagInfo { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pNext = nullptr, + .pLabelName = tagLabel.c_str(), + .color = { 0.0f, 0.0f, 0.0f, 0.0f } + }; + + layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); +} diff --git a/layer_gpu_performance/source/instance.cpp b/layer_gpu_performance/source/instance.cpp new file mode 100644 index 0000000..0b62857 --- /dev/null +++ b/layer_gpu_performance/source/instance.cpp @@ -0,0 +1,80 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "framework/utils.hpp" + +#include "instance.hpp" + +/** + * @brief The dispatch lookup for all of the created Vulkan instances. + */ +static std::unordered_map> g_instances; + +/* See header for documentation. */ +void Instance::store( + VkInstance handle, + std::unique_ptr& instance +) { + void* key = getDispatchKey(handle); + g_instances.insert({ key, std::move(instance) }); +} + +/* See header for documentation. */ +Instance* Instance::retrieve( + VkInstance handle +) { + void* key = getDispatchKey(handle); + assert(isInMap(key, g_instances)); + return g_instances.at(key).get(); +} + +/* See header for documentation. */ +Instance* Instance::retrieve( + VkPhysicalDevice handle +) { + void* key = getDispatchKey(handle); + assert(isInMap(key, g_instances)); + return g_instances.at(key).get(); +} + +/* See header for documentation. */ +void Instance::destroy( + Instance* instance +) { + g_instances.erase(getDispatchKey(instance->instance)); +} + +/* See header for documentation. */ +Instance::Instance( + VkInstance _instance, + PFN_vkGetInstanceProcAddr _nlayerGetProcAddress +) : + instance(_instance), + nlayerGetProcAddress(_nlayerGetProcAddress) +{ + initDriverInstanceDispatchTable(instance, nlayerGetProcAddress, driver); +} diff --git a/layer_gpu_performance/source/instance.hpp b/layer_gpu_performance/source/instance.hpp new file mode 100644 index 0000000..fc6af6b --- /dev/null +++ b/layer_gpu_performance/source/instance.hpp @@ -0,0 +1,134 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +/** + * @file + * Declares the root class for layer management of VkInstance objects. + * + * Role summary + * ============ + * + * Instances represent the core context used by the application to connect to + * the OS graphics subsystem prior to connection to a specific device instance. + * An instance object is the dispatch root for the Vulkan subsystem, so + * instance commands all take some form of dispatchable handle that can be + * resolved into a unique per-instance key. For the driver this key would + * simply be a pointer directly to the driver-internal instance object, but for + * our layer we use a instance dispatch key as an index in to the map to find + * the layer's instance object. + * + * Key properties + * ============== + * + * Vulkan instances are designed to be used concurrently by multiple + * application threads. An application can have multiple concurrent instances, + * and use each instance from multiple threads. + * + * Access to the layer driver structures must therefore be kept thread-safe. + * For sake of simplicity, we generally implement this by: + * - Holding a global lock whenever any thread is inside layer code. + * - Releasing the global lock whenever the layer calls a driver function. + */ + +#pragma once + +#include +#include + +#include +#include + +#include "framework/instance_dispatch_table.hpp" + +/** + * @brief This class implements the layer state tracker for a single instance. + */ +class Instance +{ +public: + /** + * @brief Store a new instance into the global store of dispatchable instances. + * + * @param handle The dispatchable instance handle to use as an indirect key. + * @param instance The @c Instance object to store. + */ + static void store( + VkInstance handle, + std::unique_ptr& instance); + + /** + * @brief Fetch an instance from the global store of dispatchable instances. + * + * @param handle The dispatchable instance handle to use as an indirect lookup. + * + * @return The layer instance context. + */ + static Instance* retrieve( + VkInstance handle); + + /** + * @brief Fetch an instance from the global store of dispatchable instances. + * + * @param handle The dispatchable physical device handle to use as an indirect lookup. + * + * @return The layer instance context. + */ + static Instance* retrieve( + VkPhysicalDevice handle); + + /** + * @brief Drop an instance from the global store of dispatchable instances. + * + * @param instance The instance to drop. + */ + static void destroy( + Instance* instance); + + /** + * @brief Create a new layer instance object. + * + * @param instance The instance handle this instance is created with. + * @param nlayerGetProcAddress The vkGetProcAddress function in the driver/next layer down. + */ + Instance( + VkInstance instance, + PFN_vkGetInstanceProcAddr nlayerGetProcAddress); + +public: + /** + * @brief The instance handle this instance is created with. + */ + VkInstance instance; + + /** + * @brief The next layer's \c vkGetInstanceProcAddr() function pointer. + */ + PFN_vkGetInstanceProcAddr nlayerGetProcAddress; + + /** + * @brief The driver function dispatch table. + */ + InstanceDispatchTable driver {}; +}; diff --git a/layer_gpu_performance/source/layer_device_functions.hpp b/layer_gpu_performance/source/layer_device_functions.hpp new file mode 100644 index 0000000..660502a --- /dev/null +++ b/layer_gpu_performance/source/layer_device_functions.hpp @@ -0,0 +1,510 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#pragma once + +#include + +#include "framework/utils.hpp" + +// Functions for command pools + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateCommandPool( + VkDevice device, + const VkCommandPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkCommandPool* pCommandPool); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandPool( + VkDevice device, + VkCommandPool commandPool, + VkCommandPoolResetFlags flags); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkDestroyCommandPool( + VkDevice device, + VkCommandPool commandPool, + const VkAllocationCallbacks* pAllocator); + +// Functions for command buffers + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkAllocateCommandBuffers( + VkDevice device, + const VkCommandBufferAllocateInfo* pAllocateInfo, + VkCommandBuffer* pCommandBuffers); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult layer_vkBeginCommandBuffer( + VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo* pBeginInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdExecuteCommands( + VkCommandBuffer commandBuffer, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandBuffer( + VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkFreeCommandBuffers( + VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers); + +// Functions for render passes + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass( + VkDevice device, + const VkRenderPassCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2( + VkDevice device, + const VkRenderPassCreateInfo2* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2KHR( + VkDevice device, + const VkRenderPassCreateInfo2* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkDestroyRenderPass( + VkDevice device, + VkRenderPass renderPass, + const VkAllocationCallbacks* pAllocator); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkSubpassContents contents); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + const VkSubpassBeginInfo* pSubpassBeginInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2KHR( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + const VkSubpassBeginInfo* pSubpassBeginInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRendering( + VkCommandBuffer commandBuffer, + const VkRenderingInfo* pRenderingInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderingKHR( + VkCommandBuffer commandBuffer, + const VkRenderingInfo* pRenderingInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderPass( + VkCommandBuffer commandBuffer); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRendering( + VkCommandBuffer commandBuffer); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderingKHR( + VkCommandBuffer commandBuffer); + +// Functions for draw calls + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDraw( + VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexed( + VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCount( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCountKHR( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectByteCountEXT( + VkCommandBuffer commandBuffer, + uint32_t instanceCount, + uint32_t firstInstance, + VkBuffer counterBuffer, + VkDeviceSize counterBufferOffset, + uint32_t counterOffset, + uint32_t vertexStride); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCount( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCountKHR( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride); + +// Functions for compute dispatches + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatch( + VkCommandBuffer commandBuffer, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBase( + VkCommandBuffer commandBuffer, + uint32_t baseGroupX, + uint32_t baseGroupY, + uint32_t baseGroupZ, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBaseKHR( + VkCommandBuffer commandBuffer, + uint32_t baseGroupX, + uint32_t baseGroupY, + uint32_t baseGroupZ, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset); + +// Commands for trace rays + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirect2KHR( + VkCommandBuffer commandBuffer, + VkDeviceAddress indirectDeviceAddress); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirectKHR( + VkCommandBuffer commandBuffer, + const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable, + VkDeviceAddress indirectDeviceAddress); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysKHR( + VkCommandBuffer commandBuffer, + const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable, + uint32_t width, + uint32_t height, + uint32_t depth); + + +// Commands for transfers + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdFillBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize size, + uint32_t data); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearColorImage( + VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearDepthStencilImage( + VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearDepthStencilValue* pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2( + VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2* pCopyBufferInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2KHR( + VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2* pCopyBufferInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2( + VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2KHR( + VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2( + VkCommandBuffer commandBuffer, + const VkCopyImageInfo2* pCopyImageInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2KHR( + VkCommandBuffer commandBuffer, + const VkCopyImageInfo2* pCopyImageInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferImageCopy* pRegions); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2( + VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2KHR( + VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo); + +// Functions for debug + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerBeginEXT( + VkCommandBuffer commandBuffer, + const VkDebugMarkerMarkerInfoEXT* pMarkerInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerEndEXT( + VkCommandBuffer commandBuffer); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginDebugUtilsLabelEXT( + VkCommandBuffer commandBuffer, + const VkDebugUtilsLabelEXT* pLabelInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndDebugUtilsLabelEXT( + VkCommandBuffer commandBuffer); + +// Functions for queues + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR( + VkQueue queue, + const VkPresentInfoKHR* pPresentInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo* pSubmits, + VkFence fence); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo2* pSubmits, + VkFence fence); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2KHR( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo2* pSubmits, + VkFence fence); diff --git a/layer_gpu_performance/source/layer_device_functions_command_buffer.cpp b/layer_gpu_performance/source/layer_device_functions_command_buffer.cpp new file mode 100644 index 0000000..ef8e920 --- /dev/null +++ b/layer_gpu_performance/source/layer_device_functions_command_buffer.cpp @@ -0,0 +1,160 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "device.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkAllocateCommandBuffers( + VkDevice device, + const VkCommandBufferAllocateInfo* pAllocateInfo, + VkCommandBuffer* pCommandBuffers +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + // Release the lock to call into the driver + lock.unlock(); + VkResult result = layer->driver.vkAllocateCommandBuffers( + device, pAllocateInfo, pCommandBuffers); + if (result != VK_SUCCESS) + { + return result; + } + + // Retake the lock to access layer-wide global store + lock.lock(); + auto& tracker = layer->getStateTracker(); + for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++) + { + tracker.allocateCommandBuffer( + pAllocateInfo->commandPool, pCommandBuffers[i]); + } + + return result; +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult layer_vkBeginCommandBuffer( + VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo* pBeginInfo +) { + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cmdBuffer = tracker.getCommandBuffer(commandBuffer); + cmdBuffer.reset(); + cmdBuffer.begin(pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkBeginCommandBuffer(commandBuffer, pBeginInfo); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandBuffer( + VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cmdBuffer = tracker.getCommandBuffer(commandBuffer); + cmdBuffer.reset(); + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkResetCommandBuffer(commandBuffer, flags); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkFreeCommandBuffers( + VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + auto& tracker = layer->getStateTracker(); + for (uint32_t i = 0; i < commandBufferCount; i++) + { + tracker.freeCommandBuffer(commandPool, pCommandBuffers[i]); + } + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkFreeCommandBuffers( + device, commandPool, commandBufferCount, pCommandBuffers); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdExecuteCommands( + VkCommandBuffer commandBuffer, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store and device-wide data + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& primary = tracker.getCommandBuffer(commandBuffer); + + for (uint32_t i = 0; i < commandBufferCount; i++) + { + auto& secondary = tracker.getCommandBuffer(pCommandBuffers[i]); + primary.executeCommands(secondary); + } + + // Release the lock to call into the main driver + lock.unlock(); + layer->driver.vkCmdExecuteCommands( + commandBuffer, commandBufferCount, pCommandBuffers); +} diff --git a/layer_gpu_performance/source/layer_device_functions_command_pool.cpp b/layer_gpu_performance/source/layer_device_functions_command_pool.cpp new file mode 100644 index 0000000..a640a90 --- /dev/null +++ b/layer_gpu_performance/source/layer_device_functions_command_pool.cpp @@ -0,0 +1,103 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "device.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateCommandPool( + VkDevice device, + const VkCommandPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkCommandPool* pCommandPool +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + // Release the lock to call into the driver + lock.unlock(); + VkResult result = layer->driver.vkCreateCommandPool( + device, pCreateInfo, pAllocator, pCommandPool); + if (result != VK_SUCCESS) + { + return result; + } + + // Retake the lock to access layer-wide global store + lock.lock(); + auto& tracker = layer->getStateTracker(); + tracker.createCommandPool(*pCommandPool); + return result; +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandPool( + VkDevice device, + VkCommandPool commandPool, + VkCommandPoolResetFlags flags +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + auto& tracker = layer->getStateTracker(); + tracker.getCommandPool(commandPool).reset(); + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkResetCommandPool(device, commandPool, flags); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkDestroyCommandPool( + VkDevice device, + VkCommandPool commandPool, + const VkAllocationCallbacks* pAllocator +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + auto& tracker = layer->getStateTracker(); + tracker.destroyCommandPool(commandPool); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkDestroyCommandPool(device, commandPool, pAllocator); +} diff --git a/layer_gpu_performance/source/layer_device_functions_debug.cpp b/layer_gpu_performance/source/layer_device_functions_debug.cpp new file mode 100644 index 0000000..664d2b8 --- /dev/null +++ b/layer_gpu_performance/source/layer_device_functions_debug.cpp @@ -0,0 +1,121 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "device.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerBeginEXT( + VkCommandBuffer commandBuffer, + const VkDebugMarkerMarkerInfoEXT* pMarkerInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + // Increment the render pass counter in the tracker + cb.debugMarkerBegin(pMarkerInfo->pMarkerName); + + // Note that we do not call the driver for user labels - they are + // emitted via the comms side-channel for each workload to avoid + // polluting the layer's use of the driver for tag labelling +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerEndEXT( + VkCommandBuffer commandBuffer +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + // Increment the render pass counter in the tracker + cb.debugMarkerEnd(); + + // Note that we do not call the driver for user labels - they are + // emitted via the comms side-channel for each workload to avoid + // polluting the layer's use of the driver for tag labelling +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginDebugUtilsLabelEXT( + VkCommandBuffer commandBuffer, + const VkDebugUtilsLabelEXT* pLabelInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + // Increment the render pass counter in the tracker + cb.debugMarkerBegin(pLabelInfo->pLabelName); + + // Note that we do not call the driver for user labels - they are + // emitted via the comms side-channel for each workload to avoid + // polluting the layer's use of the driver for tag labelling +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndDebugUtilsLabelEXT( + VkCommandBuffer commandBuffer +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + // Increment the render pass counter in the tracker + cb.debugMarkerEnd(); + + // Note that we do not call the driver for user labels - they are + // emitted via the comms side-channel for each workload to avoid + // polluting the layer's use of the driver for tag labelling +} diff --git a/layer_gpu_performance/source/layer_device_functions_dispatch.cpp b/layer_gpu_performance/source/layer_device_functions_dispatch.cpp new file mode 100644 index 0000000..de5ee10 --- /dev/null +++ b/layer_gpu_performance/source/layer_device_functions_dispatch.cpp @@ -0,0 +1,167 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "device.hpp" +#include "device_utils.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +/** + * @brief Register a compute dispatch with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param groupX The X size of the dispatch in groups. + * @param groupY The Y size of the dispatch in groups. + * @param groupZ The Z size of the dispatch in groups. + * + * @return The assigned tagID for the workload. + */ +static uint64_t registerDispatch( + Device* layer, + VkCommandBuffer commandBuffer, + int64_t groupX, + int64_t groupY, + int64_t groupZ +) { + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + return cb.dispatch(groupX, groupY, groupZ); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatch( + VkCommandBuffer commandBuffer, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerDispatch( + layer, + commandBuffer, + static_cast(groupCountX), + static_cast(groupCountY), + static_cast(groupCountZ)); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBase( + VkCommandBuffer commandBuffer, + uint32_t baseGroupX, + uint32_t baseGroupY, + uint32_t baseGroupZ, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerDispatch( + layer, + commandBuffer, + static_cast(groupCountX), + static_cast(groupCountY), + static_cast(groupCountZ)); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdDispatchBase(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBaseKHR( + VkCommandBuffer commandBuffer, + uint32_t baseGroupX, + uint32_t baseGroupY, + uint32_t baseGroupZ, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerDispatch( + layer, + commandBuffer, + static_cast(groupCountX), + static_cast(groupCountY), + static_cast(groupCountZ)); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdDispatchBaseKHR(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerDispatch(layer, commandBuffer, -1, -1, -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdDispatchIndirect(commandBuffer, buffer, offset); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} diff --git a/layer_gpu_performance/source/layer_device_functions_draw_call.cpp b/layer_gpu_performance/source/layer_device_functions_draw_call.cpp new file mode 100644 index 0000000..42350d0 --- /dev/null +++ b/layer_gpu_performance/source/layer_device_functions_draw_call.cpp @@ -0,0 +1,257 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include +#include +#include + +#include "device.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +/** + * @brief Register a draw call with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + */ +static void registerDrawCall( + Device* layer, + VkCommandBuffer commandBuffer +) { + auto& state = layer->getStateTracker(); + auto& stats = state.getCommandBuffer(commandBuffer).getStats(); + stats.incDrawCallCount(); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDraw( + VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDraw(commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexed( + VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndexed(commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndexedIndirect(commandBuffer, buffer, offset, drawCount, stride); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCount( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndexedIndirectCount(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCountKHR( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndexedIndirectCountKHR(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndirect(commandBuffer, buffer, offset, drawCount, stride); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectByteCountEXT( + VkCommandBuffer commandBuffer, + uint32_t instanceCount, + uint32_t firstInstance, + VkBuffer counterBuffer, + VkDeviceSize counterBufferOffset, + uint32_t counterOffset, + uint32_t vertexStride +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndirectByteCountEXT(commandBuffer, instanceCount, firstInstance, counterBuffer, counterBufferOffset, counterOffset, vertexStride); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCount( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndirectCount(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCountKHR( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndirectCountKHR(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride); +} diff --git a/layer_gpu_performance/source/layer_device_functions_queue.cpp b/layer_gpu_performance/source/layer_device_functions_queue.cpp new file mode 100644 index 0000000..6f435ba --- /dev/null +++ b/layer_gpu_performance/source/layer_device_functions_queue.cpp @@ -0,0 +1,178 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include +#include + +#include "utils/misc.hpp" + +#include "device.hpp" +#include "layer_device_functions.hpp" + +using json = nlohmann::json; + +using namespace std::placeholders; + +extern std::mutex g_vulkanLock; + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR( + VkQueue queue, + const VkPresentInfoKHR* pPresentInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(queue); + + auto& tracker = layer->getStateTracker(); + tracker.queuePresent(); + + // This is run with the lock held to ensure that all queue submit + // messages are sent sequentially to the host tool + json frame { + { "type", "frame" }, + { "fid", tracker.totalStats.getFrameCount() } + }; + + layer->onFrame(frame.dump()); + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkQueuePresentKHR(queue, pPresentInfo); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo* pSubmits, + VkFence fence +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(queue); + + auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1); + + auto& tracker = layer->getStateTracker(); + auto& trackQueue = tracker.getQueue(queue); + + // This is run with the lock held to ensure that all queue submit + // messages are sent sequentially to the host tool + for (uint32_t i = 0; i < submitCount; i++) + { + const auto& submit = pSubmits[i]; + for (uint32_t j = 0; j < submit.commandBufferCount; j++) + { + auto& trackCB = tracker.getCommandBuffer(submit.pCommandBuffers[j]); + const auto& LCS = trackCB.getSubmitCommandStream(); + trackQueue.runSubmitCommandStream(LCS, onSubmit); + } + } + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkQueueSubmit(queue, submitCount, pSubmits, fence); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo2* pSubmits, + VkFence fence +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(queue); + + auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1); + + auto& tracker = layer->getStateTracker(); + auto& trackQueue = tracker.getQueue(queue); + + // This is run with the lock held to ensure that all queue submit + // messages are sent sequentially to the host tool + for (uint32_t i = 0; i < submitCount; i++) + { + const auto& submit = pSubmits[i]; + for (uint32_t j = 0; j < submit.commandBufferInfoCount; j++) + { + auto& trackCB = tracker.getCommandBuffer(submit.pCommandBufferInfos[j].commandBuffer); + const auto& LCS = trackCB.getSubmitCommandStream(); + trackQueue.runSubmitCommandStream(LCS, onSubmit); + } + } + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkQueueSubmit2(queue, submitCount, pSubmits, fence); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2KHR( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo2* pSubmits, + VkFence fence +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(queue); + + auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1); + + auto& tracker = layer->getStateTracker(); + auto& trackQueue = tracker.getQueue(queue); + + // This is run with the lock held to ensure that all queue submit + // messages are sent sequentially to the host tool + for (uint32_t i = 0; i < submitCount; i++) + { + const auto& submit = pSubmits[i]; + for (uint32_t j = 0; j < submit.commandBufferInfoCount; j++) + { + auto& trackCB = tracker.getCommandBuffer(submit.pCommandBufferInfos[j].commandBuffer); + const auto& LCS = trackCB.getSubmitCommandStream(); + trackQueue.runSubmitCommandStream(LCS, onSubmit); + } + } + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkQueueSubmit2KHR(queue, submitCount, pSubmits, fence); +} diff --git a/layer_gpu_performance/source/layer_device_functions_render_pass.cpp b/layer_gpu_performance/source/layer_device_functions_render_pass.cpp new file mode 100644 index 0000000..5d16880 --- /dev/null +++ b/layer_gpu_performance/source/layer_device_functions_render_pass.cpp @@ -0,0 +1,376 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "framework/utils.hpp" +#include "trackers/render_pass.hpp" + +#include "device.hpp" +#include "device_utils.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass( + VkDevice device, + const VkRenderPassCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + // Release the lock to call into the driver + lock.unlock(); + VkResult ret = layer->driver.vkCreateRenderPass(device, pCreateInfo, pAllocator, pRenderPass); + if (ret != VK_SUCCESS) + { + return ret; + } + + // Retake the lock to access layer-wide global store + lock.lock(); + auto& tracker = layer->getStateTracker(); + tracker.createRenderPass(*pRenderPass, *pCreateInfo); + return VK_SUCCESS; +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2( + VkDevice device, + const VkRenderPassCreateInfo2* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + // Release the lock to call into the driver + lock.unlock(); + VkResult ret = layer->driver.vkCreateRenderPass2(device, pCreateInfo, pAllocator, pRenderPass); + if (ret != VK_SUCCESS) + { + return ret; + } + + // Retake the lock to access layer-wide global store + lock.lock(); + auto& tracker = layer->getStateTracker(); + tracker.createRenderPass(*pRenderPass, *pCreateInfo); + return VK_SUCCESS; +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2KHR( + VkDevice device, + const VkRenderPassCreateInfo2* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + // Release the lock to call into the driver + lock.unlock(); + VkResult ret = layer->driver.vkCreateRenderPass2KHR(device, pCreateInfo, pAllocator, pRenderPass); + if (ret != VK_SUCCESS) + { + return ret; + } + + // Retake the lock to access layer-wide global store + lock.lock(); + auto& tracker = layer->getStateTracker(); + tracker.createRenderPass(*pRenderPass, *pCreateInfo); + return VK_SUCCESS; +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkDestroyRenderPass( + VkDevice device, + VkRenderPass renderPass, + const VkAllocationCallbacks* pAllocator +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + auto& tracker = layer->getStateTracker(); + tracker.destroyRenderPass(renderPass); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkDestroyRenderPass(device, renderPass, pAllocator); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkSubpassContents contents +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass); + uint32_t width = pRenderPassBegin->renderArea.extent.width; + uint32_t height = pRenderPassBegin->renderArea.extent.height; + + // Notify the command buffer we are starting a new render pass + uint64_t tagID = cb.renderPassBegin(rp, width, height); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdBeginRenderPass(commandBuffer, pRenderPassBegin, contents); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + const VkSubpassBeginInfo* pSubpassBeginInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass); + uint32_t width = pRenderPassBegin->renderArea.extent.width; + uint32_t height = pRenderPassBegin->renderArea.extent.height; + + // Notify the command buffer we are starting a new render pass + uint64_t tagID = cb.renderPassBegin(rp, width, height); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdBeginRenderPass2(commandBuffer, pRenderPassBegin, pSubpassBeginInfo); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2KHR( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + const VkSubpassBeginInfo* pSubpassBeginInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass); + uint32_t width = pRenderPassBegin->renderArea.extent.width; + uint32_t height = pRenderPassBegin->renderArea.extent.height; + + // Notify the command buffer we are starting a new render pass + uint64_t tagID = cb.renderPassBegin(rp, width, height); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdBeginRenderPass2KHR(commandBuffer, pRenderPassBegin, pSubpassBeginInfo); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRendering( + VkCommandBuffer commandBuffer, + const VkRenderingInfo* pRenderingInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + bool resuming = pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT; + bool suspending = pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT; + + // Extract metadata for later use ... + Tracker::RenderPass rp(*pRenderingInfo); + uint32_t width = pRenderingInfo->renderArea.extent.width; + uint32_t height = pRenderingInfo->renderArea.extent.height; + + // Notify the command buffer we are starting a new render pass + uint64_t tagID = cb.renderPassBegin( + rp, width, height, resuming, suspending); + + // Release the lock to call into the driver + lock.unlock(); + // Emit the label only for new render passes + if (!resuming) + { + emitStartTag(layer, commandBuffer, tagID); + } + layer->driver.vkCmdBeginRendering(commandBuffer, pRenderingInfo); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderingKHR( + VkCommandBuffer commandBuffer, + const VkRenderingInfo* pRenderingInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + bool resuming = pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT; + bool suspending = pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT; + + // Extract metadata for later use ... + Tracker::RenderPass rp(*pRenderingInfo); + uint32_t width = pRenderingInfo->renderArea.extent.width; + uint32_t height = pRenderingInfo->renderArea.extent.height; + + // Notify the command buffer we are starting a new render pass + uint64_t tagID = cb.renderPassBegin( + rp, width, height, resuming, suspending); + + // Release the lock to call into the driver + lock.unlock(); + // Emit the label only for new render passes + if (!resuming) + { + emitStartTag(layer, commandBuffer, tagID); + } + layer->driver.vkCmdBeginRenderingKHR(commandBuffer, pRenderingInfo); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderPass( + VkCommandBuffer commandBuffer +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Update the layer command stream in the tracker + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + cb.renderPassEnd(); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdEndRenderPass(commandBuffer); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRendering( + VkCommandBuffer commandBuffer +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Update the layer command stream in the tracker + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + bool suspending = cb.renderPassEnd(); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdEndRendering(commandBuffer); + if (!suspending) + { + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); + } +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderingKHR( + VkCommandBuffer commandBuffer +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Update the layer command stream in the tracker + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + bool suspending = cb.renderPassEnd(); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdEndRenderingKHR(commandBuffer); + if (!suspending) + { + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); + } +} diff --git a/layer_gpu_performance/source/layer_device_functions_trace_rays.cpp b/layer_gpu_performance/source/layer_device_functions_trace_rays.cpp new file mode 100644 index 0000000..e6df5e3 --- /dev/null +++ b/layer_gpu_performance/source/layer_device_functions_trace_rays.cpp @@ -0,0 +1,130 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include +#include +#include + +#include "device.hpp" +#include "device_utils.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +/** + * @brief Register a trace rays dispatch with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param itemsX The X size of the dispatch in work items. + * @param itemsY The Y size of the dispatch in work items. + * @param itemsZ The Z size of the dispatch in work items. + * + * @return The assigned tagID for the workload. + */ +static uint64_t registerTraceRays( + Device* layer, + VkCommandBuffer commandBuffer, + int64_t itemsX, + int64_t itemsY, + int64_t itemsZ +) { + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + return cb.traceRays(itemsX, itemsY, itemsZ); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirect2KHR( + VkCommandBuffer commandBuffer, + VkDeviceAddress indirectDeviceAddress +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerTraceRays(layer, commandBuffer, -1, -1, -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdTraceRaysIndirect2KHR(commandBuffer, indirectDeviceAddress); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirectKHR( + VkCommandBuffer commandBuffer, + const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable, + VkDeviceAddress indirectDeviceAddress +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerTraceRays(layer, commandBuffer, -1, -1, -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdTraceRaysIndirectKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, indirectDeviceAddress); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysKHR( + VkCommandBuffer commandBuffer, + const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable, + uint32_t width, + uint32_t height, + uint32_t depth +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerTraceRays(layer, commandBuffer, width, height, depth); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdTraceRaysKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, width, height, depth); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} \ No newline at end of file diff --git a/layer_gpu_performance/source/layer_device_functions_transfer.cpp b/layer_gpu_performance/source/layer_device_functions_transfer.cpp new file mode 100644 index 0000000..ecfaa65 --- /dev/null +++ b/layer_gpu_performance/source/layer_device_functions_transfer.cpp @@ -0,0 +1,619 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include +#include +#include + +#include "device.hpp" +#include "device_utils.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +/** + * @brief Register a transfer to a buffer with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param transferType The type of transfer being performed. + * @param byteCount The number of bytes transferred. + * + * @return The assigned tagID for the workload. + */ +static uint64_t registerBufferTransfer( + Device* layer, + VkCommandBuffer commandBuffer, + const std::string& transferType, + int64_t byteCount +) { + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + return cb.bufferTransfer(transferType, byteCount); +} + +/** + * @brief Register a transfer to an image with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param transferType The type of transfer being performed. + * @param pixelCount The number of pixels transferred. + * + * @return The assigned tagID for the workload. + */ +static uint64_t registerImageTransfer( + Device* layer, + VkCommandBuffer commandBuffer, + const std::string& transferType, + int64_t pixelCount +) { + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + return cb.imageTransfer(transferType, pixelCount); +} + +// Commands for transfers + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdFillBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize size, + uint32_t data +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + // TODO: Add buffer tracking so we can turn VK_WHOLE_SIZE into bytes + int64_t byteCount = static_cast(size); + if (size == VK_WHOLE_SIZE) + { + byteCount = -2; + } + + uint64_t tagID = registerBufferTransfer( + layer, + commandBuffer, + "Fill buffer", + byteCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdFillBuffer(commandBuffer, dstBuffer, dstOffset, size, data); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearColorImage( + VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + // TODO: Add image tracking so we can turn image and pRanges into pixels + int64_t pixelCount = -1; + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Clear image", + pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdClearColorImage(commandBuffer, image, imageLayout, pColor, rangeCount, pRanges); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearDepthStencilImage( + VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearDepthStencilValue* pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + // TODO: Add image tracking so we can turn image and pRanges into pixels + int64_t pixelCount = -1; + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Clear image", + pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdClearDepthStencilImage(commandBuffer, image, imageLayout, pDepthStencil, rangeCount, pRanges); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t byteCount = 0; + for (uint32_t i = 0; i < regionCount; i++) + { + byteCount += static_cast(pRegions[i].size); + } + + uint64_t tagID = registerBufferTransfer( + layer, + commandBuffer, + "Copy buffer", + byteCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyBuffer(commandBuffer, srcBuffer, dstBuffer, regionCount, pRegions); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2( + VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2* pCopyBufferInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t byteCount = 0; + for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) + { + byteCount += static_cast(pCopyBufferInfo->pRegions[i].size); + } + + uint64_t tagID = registerBufferTransfer( + layer, + commandBuffer, + "Copy buffer", + byteCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyBuffer2(commandBuffer, pCopyBufferInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2KHR( + VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2* pCopyBufferInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t byteCount = 0; + for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) + { + byteCount += static_cast(pCopyBufferInfo->pRegions[i].size); + } + + uint64_t tagID = registerBufferTransfer( + layer, + commandBuffer, + "Copy buffer", + byteCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyBuffer2KHR(commandBuffer, pCopyBufferInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < regionCount; i++) + { + int64_t rPixelCount = static_cast(pRegions[i].imageExtent.width) + * static_cast(pRegions[i].imageExtent.height) + * static_cast(pRegions[i].imageExtent.depth); + pixelCount += rPixelCount; + } + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy buffer to image", + pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyBufferToImage(commandBuffer, srcBuffer, dstImage, dstImageLayout, regionCount, pRegions); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2( + VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < pCopyBufferToImageInfo->regionCount; i++) + { + int64_t rPixelCount = static_cast(pCopyBufferToImageInfo->pRegions[i].imageExtent.width) + * static_cast(pCopyBufferToImageInfo->pRegions[i].imageExtent.height) + * static_cast(pCopyBufferToImageInfo->pRegions[i].imageExtent.depth); + pixelCount += rPixelCount; + } + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy buffer to image", + pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyBufferToImage2(commandBuffer, pCopyBufferToImageInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2KHR( + VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < pCopyBufferToImageInfo->regionCount; i++) + { + int64_t rPixelCount = static_cast(pCopyBufferToImageInfo->pRegions[i].imageExtent.width) + * static_cast(pCopyBufferToImageInfo->pRegions[i].imageExtent.height) + * static_cast(pCopyBufferToImageInfo->pRegions[i].imageExtent.depth); + pixelCount += rPixelCount; + } + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy buffer to image", + pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyBufferToImage2KHR(commandBuffer, pCopyBufferToImageInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < regionCount; i++) + { + int64_t rPixelCount = static_cast(pRegions[i].extent.width) + * static_cast(pRegions[i].extent.height) + * static_cast(pRegions[i].extent.depth); + pixelCount += rPixelCount; + } + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy image", + pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyImage(commandBuffer, srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2( + VkCommandBuffer commandBuffer, + const VkCopyImageInfo2* pCopyImageInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < pCopyImageInfo->regionCount; i++) + { + int64_t rPixelCount = static_cast(pCopyImageInfo->pRegions[i].extent.width) + * static_cast(pCopyImageInfo->pRegions[i].extent.height) + * static_cast(pCopyImageInfo->pRegions[i].extent.depth); + pixelCount += rPixelCount; + } + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy image", + pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyImage2(commandBuffer, pCopyImageInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2KHR( + VkCommandBuffer commandBuffer, + const VkCopyImageInfo2* pCopyImageInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < pCopyImageInfo->regionCount; i++) + { + int64_t rPixelCount = static_cast(pCopyImageInfo->pRegions[i].extent.width) + * static_cast(pCopyImageInfo->pRegions[i].extent.height) + * static_cast(pCopyImageInfo->pRegions[i].extent.depth); + pixelCount += rPixelCount; + } + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy image", + pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyImage2KHR(commandBuffer, pCopyImageInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferImageCopy* pRegions +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < regionCount; i++) + { + int64_t rPixelCount = static_cast(pRegions[i].imageExtent.width) + * static_cast(pRegions[i].imageExtent.height) + * static_cast(pRegions[i].imageExtent.depth); + pixelCount += rPixelCount; + } + + // TODO: Our usual convention is to mark the transfer using the destination + // type, which means this should be a bufferTransfer reporting size in + // bytes. Without image tracking we only have pixels, so for now we report + // as "Copy image" and report size in pixels. + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy image to buffer", + pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyImageToBuffer(commandBuffer, srcImage, srcImageLayout, dstBuffer, regionCount, pRegions); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2( + VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < pCopyImageToBufferInfo->regionCount; i++) + { + int64_t rPixelCount = static_cast(pCopyImageToBufferInfo->pRegions[i].imageExtent.width) + * static_cast(pCopyImageToBufferInfo->pRegions[i].imageExtent.height) + * static_cast(pCopyImageToBufferInfo->pRegions[i].imageExtent.depth); + pixelCount += rPixelCount; + } + + // TODO: Our usual convention is to mark the transfer using the destination + // type, which means this should be a bufferTransfer reporting size in + // bytes. Without image tracking we only have pixels, so for now we report + // as "Copy image" and report size in pixels. + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy image to buffer", + pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyImageToBuffer2(commandBuffer, pCopyImageToBufferInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2KHR( + VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < pCopyImageToBufferInfo->regionCount; i++) + { + int64_t rPixelCount = static_cast(pCopyImageToBufferInfo->pRegions[i].imageExtent.width) + * static_cast(pCopyImageToBufferInfo->pRegions[i].imageExtent.height) + * static_cast(pCopyImageToBufferInfo->pRegions[i].imageExtent.depth); + pixelCount += rPixelCount; + } + + // TODO: Our usual convention is to mark the transfer using the destination + // type, which means this should be a bufferTransfer reporting size in + // bytes. Without image tracking we only have pixels, so for now we report + // as "Copy image" and report size in pixels. + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy image to buffer", + pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyImageToBuffer2KHR(commandBuffer, pCopyImageToBufferInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} diff --git a/layer_gpu_performance/source/layer_instance_functions.hpp b/layer_gpu_performance/source/layer_instance_functions.hpp new file mode 100644 index 0000000..00f93a9 --- /dev/null +++ b/layer_gpu_performance/source/layer_instance_functions.hpp @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#pragma once + +#include + +#include "framework/utils.hpp" + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateDevice( + VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDevice* pDevice); diff --git a/layer_gpu_performance/source/layer_instance_functions_device.cpp b/layer_gpu_performance/source/layer_instance_functions_device.cpp new file mode 100644 index 0000000..f31143b --- /dev/null +++ b/layer_gpu_performance/source/layer_instance_functions_device.cpp @@ -0,0 +1,80 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "framework/manual_functions.hpp" + +#include "device.hpp" +#include "layer_instance_functions.hpp" + +extern std::mutex g_vulkanLock; + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateDevice( + VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDevice* pDevice +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Instance::retrieve(physicalDevice); + + // Release the lock to call into the driver + lock.unlock(); + + auto* chainInfo = getChainInfo(pCreateInfo); + auto fpGetInstanceProcAddr = chainInfo->u.pLayerInfo->pfnNextGetInstanceProcAddr; + auto fpGetDeviceProcAddr = chainInfo->u.pLayerInfo->pfnNextGetDeviceProcAddr; + + auto extensions = getDeviceExtensionList( + layer->instance, physicalDevice, pCreateInfo); + + auto fpCreateDevice = reinterpret_cast( + fpGetInstanceProcAddr(layer->instance, "vkCreateDevice")); + if (!fpCreateDevice) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + + // Advance the link info for the next element on the chain + chainInfo->u.pLayerInfo = chainInfo->u.pLayerInfo->pNext; + auto res = fpCreateDevice(physicalDevice, pCreateInfo, pAllocator, pDevice); + if (res != VK_SUCCESS) + { + return res; + } + + // Retake the lock to access layer-wide global store + lock.lock(); + auto device = std::make_unique(layer, physicalDevice, *pDevice, fpGetDeviceProcAddr); + Device::store(*pDevice, std::move(device)); + + return VK_SUCCESS; +} diff --git a/layer_gpu_performance/source/performance_comms.cpp b/layer_gpu_performance/source/performance_comms.cpp new file mode 100644 index 0000000..bf04114 --- /dev/null +++ b/layer_gpu_performance/source/performance_comms.cpp @@ -0,0 +1,54 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "performance_comms.hpp" + +/* See header for documentation. */ +PerformanceComms::PerformanceComms( + Comms::CommsInterface& _comms +): + comms(_comms) +{ + if (comms.isConnected()) + { + endpoint = comms.getEndpointID("GPUTimeline"); + } +} + +/* See header for documentation. */ +void PerformanceComms::txMessage( + const std::string& message) +{ + // Message endpoint is not available + if (endpoint == 0) + { + return; + } + + auto data = std::make_unique(message.begin(), message.end()); + comms.txAsync(endpoint, std::move(data)); +} diff --git a/layer_gpu_performance/source/performance_comms.hpp b/layer_gpu_performance/source/performance_comms.hpp new file mode 100644 index 0000000..d9f3916 --- /dev/null +++ b/layer_gpu_performance/source/performance_comms.hpp @@ -0,0 +1,71 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +/** + * @file Declares a simple comms encoded for the timeline layer. + */ + +#pragma once + +#include "comms/comms_interface.hpp" + +/** + * @brief A simple message encoder for the timeline comms endpoint. + * + * TODO: This is currently a very simple implementation because we are simply + * passing JSON strings around. This is not the most efficient way of doing + * this and in future this module will be used to implement binary encoders + * for each specific message type that needs sending. + */ +class PerformanceComms +{ +public: + /** + * @brief Construct a new encoder. + * + * @param comms The common comms module used by all services. + */ + PerformanceComms( + Comms::CommsInterface& comms); + + /** + * @brief Send a message to the GPU timeline endpoint service. + * + * @param message The message to send. + */ + void txMessage( + const std::string& message); + +private: + /** + * @brief The endpoint ID of the service, or 0 if not found. + */ + Comms::EndpointID endpoint { 0 }; + + /** + * @brief The common module for network messaging. + */ + Comms::CommsInterface& comms; +}; diff --git a/layer_gpu_performance/source/version.hpp.in b/layer_gpu_performance/source/version.hpp.in new file mode 100644 index 0000000..5fcb9c3 --- /dev/null +++ b/layer_gpu_performance/source/version.hpp.in @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +/** + * @file Placeholder templates that are populated by CMake during configure. + */ + +#pragma once + +#define LGL_VER_MAJOR @PROJECT_VERSION_MAJOR@ +#define LGL_VER_MINOR @PROJECT_VERSION_MINOR@ +#define LGL_VER_PATCH @PROJECT_VERSION_PATCH@ +#define LGL_LAYER_NAME "@LGL_LAYER_NAME_STR@" +#define LGL_LAYER_DESC "@LGL_LAYER_DESC_STR@"