From 0e8c4ba654e39b07a4c56489acda224f4a1a5f7a Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Mon, 23 Dec 2024 22:00:40 +0000 Subject: [PATCH 1/4] Commit empty performance layer --- layer_gpu_performance/CMakeLists.txt | 39 +++ layer_gpu_performance/android_build.sh | 82 +++++++ layer_gpu_performance/android_install.py | 254 ++++++++++++++++++++ layer_gpu_performance/source/CMakeLists.txt | 72 ++++++ layer_gpu_performance/source/device.cpp | 97 ++++++++ layer_gpu_performance/source/device.hpp | 154 ++++++++++++ layer_gpu_performance/source/instance.cpp | 80 ++++++ layer_gpu_performance/source/instance.hpp | 134 +++++++++++ layer_gpu_performance/source/version.hpp.in | 36 +++ 9 files changed, 948 insertions(+) create mode 100644 layer_gpu_performance/CMakeLists.txt create mode 100644 layer_gpu_performance/android_build.sh create mode 100644 layer_gpu_performance/android_install.py create mode 100644 layer_gpu_performance/source/CMakeLists.txt create mode 100644 layer_gpu_performance/source/device.cpp create mode 100644 layer_gpu_performance/source/device.hpp create mode 100644 layer_gpu_performance/source/instance.cpp create mode 100644 layer_gpu_performance/source/instance.hpp create mode 100644 layer_gpu_performance/source/version.hpp.in diff --git a/layer_gpu_performance/CMakeLists.txt b/layer_gpu_performance/CMakeLists.txt new file mode 100644 index 0000000..34d05e0 --- /dev/null +++ b/layer_gpu_performance/CMakeLists.txt @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: MIT +# ----------------------------------------------------------------------------- +# Copyright (c) 2024 Arm Limited +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ----------------------------------------------------------------------------- + +cmake_minimum_required(VERSION 3.17) + +set(CMAKE_CXX_STANDARD 20) + +project(VkLayerGPUPerformance VERSION 1.0.0) + +# Common configuration +set(LGL_LOG_TAG "VkLayerGPUPerformance") +set(LGL_CONFIG_TRACE 0) +set(LGL_CONFIG_LOG 1) + +include(../source_common/compiler_helper.cmake) + +# Build steps +add_subdirectory(source) +add_subdirectory(../source_common/framework source_common/framework) diff --git a/layer_gpu_performance/android_build.sh b/layer_gpu_performance/android_build.sh new file mode 100644 index 0000000..960b2b0 --- /dev/null +++ b/layer_gpu_performance/android_build.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: MIT +# ---------------------------------------------------------------------------- +# Copyright (c) 2024 Arm Limited +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# ---------------------------------------------------------------------------- + +# ---------------------------------------------------------------------------- +# Configuration + +# Exit immediately if any component command errors +set -e + +BUILD_DIR_64=build_arm64 +BUILD_DIR_PACK=build_package + +# ---------------------------------------------------------------------------- +# Process command line options +if [ "$#" -lt 1 ]; then + BUILD_TYPE=Release +else + BUILD_TYPE=$1 +fi + +# Process command line options +if [ "$#" -lt 2 ]; then + PACKAGE=0 +else + PACKAGE=$2 +fi + +if [ "${PACKAGE}" -gt "0" ]; then + echo "Building a ${BUILD_TYPE} build with packaging" +else + echo "Building a ${BUILD_TYPE} build without packaging" +fi + +# ---------------------------------------------------------------------------- +# Build the 64-bit layer +mkdir -p ${BUILD_DIR_64} +pushd ${BUILD_DIR_64} + +cmake \ + -DCMAKE_SYSTEM_NAME=Android \ + -DANDROID_PLATFORM=29 \ + -DANDROID_ABI=arm64-v8a \ + -DANDROID_TOOLCHAIN=clang \ + -DANDROID_STL=c++_static \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake" \ + .. + +make -j1 + +popd + +# ---------------------------------------------------------------------------- +# Build the release package +if [ "${PACKAGE}" -gt "0" ]; then + # Setup the package directories + mkdir -p ${BUILD_DIR_PACK}/bin/android/arm64 + + # Install the 64-bit layer + cp ${BUILD_DIR_64}/source/*.so ${BUILD_DIR_PACK}/bin/android/arm64 +fi diff --git a/layer_gpu_performance/android_install.py b/layer_gpu_performance/android_install.py new file mode 100644 index 0000000..35780ea --- /dev/null +++ b/layer_gpu_performance/android_install.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# ----------------------------------------------------------------------------- +# Copyright (c) 2024 Arm Limited +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the 'Software'), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ----------------------------------------------------------------------------- +''' +A simple installer for Android Vulkan layers. +''' + +import argparse +import os +import shlex +import subprocess as sp +import sys +from typing import Any, Optional + +# Android temp directory +ANDROID_TMP_DIR = '/data/local/tmp/' + +# Expected layer names +EXPECTED_VULKAN_LAYER_NAME = 'VK_LAYER_LGL_GPUPERFORMANCE' +EXPECTED_VULKAN_LAYER_FILE = 'libVkLayerGPUPerformance.so' + + +class Device: + ''' + A basic wrapper around adb, allowing a specific device to be registered. + + Attributes: + device: The name of the device to call, or None for non-specific use. + ''' + + def adb_quiet(self, *args: str) -> None: + ''' + Call `adb` to run a command, but ignore output and errors. + + Args: + *args : List of command line parameters. + ''' + commands = ['adb'] + commands.extend(args) + sp.run(commands, stdout=sp.DEVNULL, stderr=sp.DEVNULL, check=False) + + def adb(self, *args: str, **kwargs: Any) -> str: + ''' + Call `adb` to run command, and capture output and results. + + Args: + *args: List of command line parameters. + **kwargs: text: Is output is text, or binary? + shell: Use the host shell? + quote: Quote arguments before forwarding + + Returns: + The contents of stdout. + + Raises: + CalledProcessError: The subprocess was not successfully executed. + ''' + commands = ['adb'] # type: Any + commands.extend(args) + + text = kwargs.get('text', True) + shell = kwargs.get('shell', False) + quote = kwargs.get('quote', False) + + # Run on the host shell + if shell: + # Unix shells need a flattened command for shell commands + if os.name != 'nt': + quoted_commands = [] + for command in commands: + if command != '>': + command = shlex.quote(command) + quoted_commands.append(command) + commands = ' '.join(quoted_commands) + + # Run on the device but with shell argument quoting + if quote: + for i, command in enumerate(commands): + commands[i] = shlex.quote(command) + + rep = sp.run(commands, check=True, shell=shell, stdout=sp.PIPE, + stderr=sp.PIPE, universal_newlines=text) + + return rep.stdout + + def adb_run_as(self, package: str, + *args: str, quiet: bool = False) -> Optional[str]: + ''' + Call `adb` to run command as a package using `run-as` or as root, + if root is accessible. If command will be run as root, this function + will change CWD to the package data directory before executing the + command. + + Args: + package: Package name to run-as or change CWD to. + *args: List of command line parameters. + quiet: If True, ignores output from adb. + + Returns: + The contents of stdout or None if quiet=True. + + Raises: + CalledProcessError: The subprocess was not successfully executed. + ''' + command = ['shell', 'run-as', package] + command.extend(args) + + if quiet: + self.adb_quiet(*command) + return None + + return self.adb(*command) + + +def enable_vulkan_debug_layer( + device: Device, package: str, layer: str) -> None: + ''' + Args: + device: The device instance. + package: The Android package name. + layer: The layer file path name. + ''' + + print('\nInstalling Vulkan debug layer') + + layer = os.path.normpath(layer) + layer_base = os.path.basename(os.path.normpath(layer)) + + device.adb('push', layer, ANDROID_TMP_DIR) + + device.adb_run_as(package, 'cp', ANDROID_TMP_DIR + layer_base, '.') + + device.adb('shell', 'settings', 'put', 'global', + 'enable_gpu_debug_layers', '1') + + device.adb('shell', 'settings', 'put', 'global', + 'gpu_debug_app', package) + + device.adb('shell', 'settings', 'put', 'global', + 'gpu_debug_layers', EXPECTED_VULKAN_LAYER_NAME) + + +def disable_vulkan_debug_layer( + device: Device, package: str, layer: str) -> None: + ''' + Clean up the Vulkan layer installation. + + Args: + device: The device instance. + args: The command arguments. + ''' + print('\nRemoving Vulkan debug layer') + + layer_base = os.path.basename(os.path.normpath(layer)) + + device.adb('shell', 'settings', 'delete', 'global', + 'enable_gpu_debug_layers') + + device.adb('shell', 'settings', 'delete', 'global', + 'gpu_debug_app') + + device.adb('shell', 'settings', 'delete', 'global', + 'gpu_debug_layers') + + device.adb_run_as(package, 'rm', layer_base, quiet=True) + + +def get_layer() -> Optional[str]: + ''' + Find the debug layer to use in the build directory. + + Returns: + The part to the library to use. + ''' + + base_dir = './build_arm64/source/' + + # TODO: If we want to use symbolized layer we need to rename it + lib = None + + for path in os.listdir(base_dir): + # Match symbolized library first so we don't use it + if path.endswith('_sym.so'): + _ = os.path.join(base_dir, path) + elif path.endswith('.so'): + lib = os.path.join(base_dir, path) + + return lib + + +def parse_command_line() -> argparse.Namespace: + ''' + Parse the command line. + + Returns: + The parsed command line container. + ''' + parser = argparse.ArgumentParser() + + parser.add_argument('--package', required=True, + help='Android package name') + + return parser.parse_args() + + +def main() -> int: + ''' + Script main function. + + Returns: + Process return code. + ''' + args = parse_command_line() + + device = Device() + layer = get_layer() + if not layer: + print('ERROR: Layer binary not found') + return 1 + + enable_vulkan_debug_layer(device, args.package, layer) + + input('Press Enter to disable layers') + + disable_vulkan_debug_layer(device, args.package, layer) + + return 0 + + +if __name__ == '__main__': + try: + sys.exit(main()) + except KeyboardInterrupt: + print('\n\nERROR: User interrupted execution') diff --git a/layer_gpu_performance/source/CMakeLists.txt b/layer_gpu_performance/source/CMakeLists.txt new file mode 100644 index 0000000..1ee65f4 --- /dev/null +++ b/layer_gpu_performance/source/CMakeLists.txt @@ -0,0 +1,72 @@ +# SPDX-License-Identifier: MIT +# ----------------------------------------------------------------------------- +# Copyright (c) 2024 Arm Limited +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ----------------------------------------------------------------------------- + +# Set output file names +if (CMAKE_BUILD_TYPE STREQUAL "Release") + set(VK_LAYER VkLayerGPUPerformance_sym) + set(VK_LAYER_STRIP libVkLayerGPUPerformance.so) +else() + set(VK_LAYER VkLayerGPUPerformance) +endif() + +# Set strings used by configure +set(LGL_LAYER_NAME_STR "VK_LAYER_LGL_GPUPERFORMANCE") +set(LGL_LAYER_DESC_STR "VkLayerGPUPerformance by LGL") + +# Vulkan layer library +configure_file( + version.hpp.in + version.hpp + ESCAPE_QUOTES @ONLY) + +add_library( + ${VK_LAYER} SHARED + ${PROJECT_SOURCE_DIR}/../source_common/framework/entry.cpp + device.cpp + instance.cpp) + +target_include_directories( + ${VK_LAYER} PRIVATE + ${PROJECT_SOURCE_DIR}/../source_common + ${CMAKE_CURRENT_BINARY_DIR} + .) + +target_include_directories( + ${VK_LAYER} SYSTEM PRIVATE + ../../khronos/vulkan/include) + +lgl_set_build_options(${VK_LAYER}) + +target_link_libraries( + ${VK_LAYER} + lib_layer_framework + $<$:log>) + +if (CMAKE_BUILD_TYPE STREQUAL "Release") + add_custom_command( + TARGET "${VK_LAYER}" POST_BUILD + DEPENDS "${VK_LAYER}" + COMMAND ${CMAKE_STRIP} + ARGS --strip-all -o ${VK_LAYER_STRIP} $ + COMMENT "Stripped lib${VK_LAYER}.so to ${VK_LAYER_STRIP}") +endif() diff --git a/layer_gpu_performance/source/device.cpp b/layer_gpu_performance/source/device.cpp new file mode 100644 index 0000000..3371cff --- /dev/null +++ b/layer_gpu_performance/source/device.cpp @@ -0,0 +1,97 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include +#include +#include +#include +#include + +#include "framework/utils.hpp" + +#include "device.hpp" +#include "instance.hpp" + +/** + * @brief The dispatch lookup for all of the created Vulkan instances. + */ +static std::unordered_map> g_devices; + +/* See header for documentation. */ +void Device::store( + VkDevice handle, + std::unique_ptr device +) { + void* key = getDispatchKey(handle); + g_devices.insert({ key, std::move(device) }); +} + +/* See header for documentation. */ +Device* Device::retrieve( + VkDevice handle +) { + void* key = getDispatchKey(handle); + assert(isInMap(key, g_devices)); + return g_devices.at(key).get(); +} + +/* See header for documentation. */ +Device* Device::retrieve( + VkQueue handle +) { + void* key = getDispatchKey(handle); + assert(isInMap(key, g_devices)); + return g_devices.at(key).get(); +} + +/* See header for documentation. */ +Device* Device::retrieve( + VkCommandBuffer handle +) { + void* key = getDispatchKey(handle); + assert(isInMap(key, g_devices)); + return g_devices.at(key).get(); +} + +/* See header for documentation. */ +void Device::destroy( + Device* device +) { + g_devices.erase(getDispatchKey(device)); +} + +/* See header for documentation. */ +Device::Device( + Instance* _instance, + VkPhysicalDevice _physicalDevice, + VkDevice _device, + PFN_vkGetDeviceProcAddr nlayerGetProcAddress +): + instance(_instance), + physicalDevice(_physicalDevice), + device(_device) +{ + initDriverDeviceDispatchTable(device, nlayerGetProcAddress, driver); +} diff --git a/layer_gpu_performance/source/device.hpp b/layer_gpu_performance/source/device.hpp new file mode 100644 index 0000000..c0e1f0a --- /dev/null +++ b/layer_gpu_performance/source/device.hpp @@ -0,0 +1,154 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +/** + * @file Declares the root class for layer management of VkDevice objects. + * + * Role summary + * ============ + * + * Devices represent the core context used by the application to connect to the + * underlying graphics driver. A device object is the dispatch root for the + * Vulkan driver, so device commands all take some form of dispatchable handle + * that can be resolved into a unique per-device key. For the driver this key + * would simply be a pointer directly to the driver-internal device object, but + * for our layer we use a device dispatch key as an index in to the map to find + * the layer's driver object. + * + * Key properties + * ============== + * + * Vulkan devices are designed to be used concurrently by multiple application + * threads. An application can have multiple concurrent devices, and use each + * device from multiple threads. + * + * Access to the layer driver structures must therefore be kept thread-safe. + * For sake of simplicity, we generally implement this by: + * - Holding a global lock whenever any thread is inside layer code. + * - Releasing the global lock whenever the layer calls a driver function. + */ + +#pragma once + +#include + +#include "framework/device_dispatch_table.hpp" + +#include "instance.hpp" + +/** + * @brief This class implements the layer state tracker for a single device. + */ +class Device +{ +public: + /** + * @brief Store a new device into the global store of dispatchable devices. + * + * @param handle The dispatchable device handle to use as an indirect key. + * @param device The @c Device object to store. + */ + static void store( + VkDevice handle, + std::unique_ptr device); + + /** + * @brief Fetch a device from the global store of dispatchable devices. + * + * @param handle The dispatchable device handle to use as an indirect lookup. + * + * @return The layer device context. + */ + static Device* retrieve( + VkDevice handle); + + /** + * @brief Fetch a device from the global store of dispatchable devices. + * + * @param handle The dispatchable queue handle to use as an indirect lookup. + * + * @return The layer device context. + */ + static Device* retrieve( + VkQueue handle); + + /** + * @brief Fetch a device from the global store of dispatchable devices. + * + * @param handle The dispatchable command buffer handle to use as an indirect lookup. + * + * @return The layer device context. + */ + static Device* retrieve( + VkCommandBuffer handle); + + /** + * @brief Drop a device from the global store of dispatchable devices. + * + * @param device The device to drop. + */ + static void destroy( + Device* device); + + /** + * @brief Create a new layer device object. + * + * @param instance The layer instance object this device is created with. + * @param physicalDevice The physical device this logical device is for. + * @param device The device handle this device is created with. + * @param nlayerGetProcAddress The vkGetProcAddress function in the driver/next layer down. + */ + Device( + Instance* instance, + VkPhysicalDevice physicalDevice, + VkDevice device, + PFN_vkGetDeviceProcAddr nlayerGetProcAddress); + + /** + * @brief Destroy this layer device object. + */ + ~Device() = default; + +public: + /** + * @brief The instance this device is created with. + */ + const Instance* instance; + + /** + * @brief The physical device this device is created with. + */ + const VkPhysicalDevice physicalDevice; + + /** + * @brief The device handle this device is created with. + */ + const VkDevice device; + + /** + * @brief The driver function dispatch table. + */ + DeviceDispatchTable driver {}; +}; diff --git a/layer_gpu_performance/source/instance.cpp b/layer_gpu_performance/source/instance.cpp new file mode 100644 index 0000000..0b62857 --- /dev/null +++ b/layer_gpu_performance/source/instance.cpp @@ -0,0 +1,80 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "framework/utils.hpp" + +#include "instance.hpp" + +/** + * @brief The dispatch lookup for all of the created Vulkan instances. + */ +static std::unordered_map> g_instances; + +/* See header for documentation. */ +void Instance::store( + VkInstance handle, + std::unique_ptr& instance +) { + void* key = getDispatchKey(handle); + g_instances.insert({ key, std::move(instance) }); +} + +/* See header for documentation. */ +Instance* Instance::retrieve( + VkInstance handle +) { + void* key = getDispatchKey(handle); + assert(isInMap(key, g_instances)); + return g_instances.at(key).get(); +} + +/* See header for documentation. */ +Instance* Instance::retrieve( + VkPhysicalDevice handle +) { + void* key = getDispatchKey(handle); + assert(isInMap(key, g_instances)); + return g_instances.at(key).get(); +} + +/* See header for documentation. */ +void Instance::destroy( + Instance* instance +) { + g_instances.erase(getDispatchKey(instance->instance)); +} + +/* See header for documentation. */ +Instance::Instance( + VkInstance _instance, + PFN_vkGetInstanceProcAddr _nlayerGetProcAddress +) : + instance(_instance), + nlayerGetProcAddress(_nlayerGetProcAddress) +{ + initDriverInstanceDispatchTable(instance, nlayerGetProcAddress, driver); +} diff --git a/layer_gpu_performance/source/instance.hpp b/layer_gpu_performance/source/instance.hpp new file mode 100644 index 0000000..fc6af6b --- /dev/null +++ b/layer_gpu_performance/source/instance.hpp @@ -0,0 +1,134 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +/** + * @file + * Declares the root class for layer management of VkInstance objects. + * + * Role summary + * ============ + * + * Instances represent the core context used by the application to connect to + * the OS graphics subsystem prior to connection to a specific device instance. + * An instance object is the dispatch root for the Vulkan subsystem, so + * instance commands all take some form of dispatchable handle that can be + * resolved into a unique per-instance key. For the driver this key would + * simply be a pointer directly to the driver-internal instance object, but for + * our layer we use a instance dispatch key as an index in to the map to find + * the layer's instance object. + * + * Key properties + * ============== + * + * Vulkan instances are designed to be used concurrently by multiple + * application threads. An application can have multiple concurrent instances, + * and use each instance from multiple threads. + * + * Access to the layer driver structures must therefore be kept thread-safe. + * For sake of simplicity, we generally implement this by: + * - Holding a global lock whenever any thread is inside layer code. + * - Releasing the global lock whenever the layer calls a driver function. + */ + +#pragma once + +#include +#include + +#include +#include + +#include "framework/instance_dispatch_table.hpp" + +/** + * @brief This class implements the layer state tracker for a single instance. + */ +class Instance +{ +public: + /** + * @brief Store a new instance into the global store of dispatchable instances. + * + * @param handle The dispatchable instance handle to use as an indirect key. + * @param instance The @c Instance object to store. + */ + static void store( + VkInstance handle, + std::unique_ptr& instance); + + /** + * @brief Fetch an instance from the global store of dispatchable instances. + * + * @param handle The dispatchable instance handle to use as an indirect lookup. + * + * @return The layer instance context. + */ + static Instance* retrieve( + VkInstance handle); + + /** + * @brief Fetch an instance from the global store of dispatchable instances. + * + * @param handle The dispatchable physical device handle to use as an indirect lookup. + * + * @return The layer instance context. + */ + static Instance* retrieve( + VkPhysicalDevice handle); + + /** + * @brief Drop an instance from the global store of dispatchable instances. + * + * @param instance The instance to drop. + */ + static void destroy( + Instance* instance); + + /** + * @brief Create a new layer instance object. + * + * @param instance The instance handle this instance is created with. + * @param nlayerGetProcAddress The vkGetProcAddress function in the driver/next layer down. + */ + Instance( + VkInstance instance, + PFN_vkGetInstanceProcAddr nlayerGetProcAddress); + +public: + /** + * @brief The instance handle this instance is created with. + */ + VkInstance instance; + + /** + * @brief The next layer's \c vkGetInstanceProcAddr() function pointer. + */ + PFN_vkGetInstanceProcAddr nlayerGetProcAddress; + + /** + * @brief The driver function dispatch table. + */ + InstanceDispatchTable driver {}; +}; diff --git a/layer_gpu_performance/source/version.hpp.in b/layer_gpu_performance/source/version.hpp.in new file mode 100644 index 0000000..5fcb9c3 --- /dev/null +++ b/layer_gpu_performance/source/version.hpp.in @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +/** + * @file Placeholder templates that are populated by CMake during configure. + */ + +#pragma once + +#define LGL_VER_MAJOR @PROJECT_VERSION_MAJOR@ +#define LGL_VER_MINOR @PROJECT_VERSION_MINOR@ +#define LGL_VER_PATCH @PROJECT_VERSION_PATCH@ +#define LGL_LAYER_NAME "@LGL_LAYER_NAME_STR@" +#define LGL_LAYER_DESC "@LGL_LAYER_DESC_STR@" From 6e3953e1c7249cc851f3789609058c99d0e47262 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Mon, 23 Dec 2024 22:33:40 +0000 Subject: [PATCH 2/4] Start with the timeline layer as a starting point --- layer_gpu_performance/CMakeLists.txt | 2 + layer_gpu_performance/README_LAYER.md | 56 ++ layer_gpu_performance/source/CMakeLists.txt | 15 +- layer_gpu_performance/source/device.cpp | 16 +- layer_gpu_performance/source/device.hpp | 55 +- layer_gpu_performance/source/device_utils.hpp | 56 ++ .../source/layer_device_functions.hpp | 510 +++++++++++++++ .../layer_device_functions_command_buffer.cpp | 160 +++++ .../layer_device_functions_command_pool.cpp | 103 +++ .../source/layer_device_functions_debug.cpp | 121 ++++ .../layer_device_functions_dispatch.cpp | 167 +++++ .../layer_device_functions_draw_call.cpp | 257 ++++++++ .../source/layer_device_functions_queue.cpp | 178 +++++ .../layer_device_functions_render_pass.cpp | 376 +++++++++++ .../layer_device_functions_trace_rays.cpp | 130 ++++ .../layer_device_functions_transfer.cpp | 619 ++++++++++++++++++ .../source/performance_comms.cpp | 54 ++ .../source/performance_comms.hpp | 71 ++ 18 files changed, 2941 insertions(+), 5 deletions(-) create mode 100644 layer_gpu_performance/README_LAYER.md create mode 100644 layer_gpu_performance/source/device_utils.hpp create mode 100644 layer_gpu_performance/source/layer_device_functions.hpp create mode 100644 layer_gpu_performance/source/layer_device_functions_command_buffer.cpp create mode 100644 layer_gpu_performance/source/layer_device_functions_command_pool.cpp create mode 100644 layer_gpu_performance/source/layer_device_functions_debug.cpp create mode 100644 layer_gpu_performance/source/layer_device_functions_dispatch.cpp create mode 100644 layer_gpu_performance/source/layer_device_functions_draw_call.cpp create mode 100644 layer_gpu_performance/source/layer_device_functions_queue.cpp create mode 100644 layer_gpu_performance/source/layer_device_functions_render_pass.cpp create mode 100644 layer_gpu_performance/source/layer_device_functions_trace_rays.cpp create mode 100644 layer_gpu_performance/source/layer_device_functions_transfer.cpp create mode 100644 layer_gpu_performance/source/performance_comms.cpp create mode 100644 layer_gpu_performance/source/performance_comms.hpp diff --git a/layer_gpu_performance/CMakeLists.txt b/layer_gpu_performance/CMakeLists.txt index 34d05e0..625064e 100644 --- a/layer_gpu_performance/CMakeLists.txt +++ b/layer_gpu_performance/CMakeLists.txt @@ -36,4 +36,6 @@ include(../source_common/compiler_helper.cmake) # Build steps add_subdirectory(source) +add_subdirectory(../source_common/comms source_common/comms) add_subdirectory(../source_common/framework source_common/framework) +add_subdirectory(../source_common/trackers source_common/trackers) diff --git a/layer_gpu_performance/README_LAYER.md b/layer_gpu_performance/README_LAYER.md new file mode 100644 index 0000000..f046cd4 --- /dev/null +++ b/layer_gpu_performance/README_LAYER.md @@ -0,0 +1,56 @@ +# Layer: GPU Performance + +This layer is a standalone performance analysis layer that can be used to +analyze the workloads that make up a single frame. + +This layer supports two modes: + +* Per workload time, read via queries +* Per workload performance counters, read via a non-API mechanism + +## What devices are supported? + +The per workload timing uses Vulkan API timer queries, and should work on any +GPU that supports the required Vulkan features. + +The per workload performance counters uses the Arm libGPUCounters library, +and requires an Arm GPU. + +## Is this layer non-invasive? + +The goal of this layer is to cost the major workloads submitted via the API, in +a way which is compatible with the way that a tile-based renderer schedules +render passes. + +Under normal scheduling, tile-based renderers split render passes into two +pieces which are independently scheduled and can overlap with other work that +is running on the GPU. Blindly timing render passes using timer queries can +result in confusing results because the time includes time spend processing +unrelated workloads running in parallel. + +The diagram shows one possible arrangement of workloads scheduled on the GPU +hardware queues for an Arm 5th Generation architecture GPU. We're trying to +time render pass 1 indicated by the `1` characters in the diagram, starting a +timer query when this render pass starts (`S`) in the binning phase queue, and +stopping when it ends (`E`) in the main phase queue. + +``` + Compute: 222 + Binning phase: S 11111 3333 + Main phase: 00000000 111111111111 E +``` + +In this scenario the timer query correctly reflects the elapsed time of the +render pass, but is not an accurate measure of cost of this workload. The +elapsed time includes time where other workloads are running in parallel, +indicated by the `0`, `2`, and `3` characters. It also includes time between +the two phases where workload `1` is not running at all, because the binning +phase work has completed, but is waiting for the main phase queue to finish an +earlier workload. + +To accurately cost workloads on a tile-based renderer, which will overlap and +run workloads in parallel if it is allowed to, the layer must inject additional +synchronization primitives to serialize all workloads within a queue and across +queues. This ensures that timer query values reflect the cost of individual +workloads, however it also means that overall frame performance will be reduced +due to loss of workload parallelization. diff --git a/layer_gpu_performance/source/CMakeLists.txt b/layer_gpu_performance/source/CMakeLists.txt index 1ee65f4..bdd3091 100644 --- a/layer_gpu_performance/source/CMakeLists.txt +++ b/layer_gpu_performance/source/CMakeLists.txt @@ -43,11 +43,22 @@ add_library( ${VK_LAYER} SHARED ${PROJECT_SOURCE_DIR}/../source_common/framework/entry.cpp device.cpp - instance.cpp) + instance.cpp + layer_device_functions_command_buffer.cpp + layer_device_functions_command_pool.cpp + layer_device_functions_debug.cpp + layer_device_functions_dispatch.cpp + layer_device_functions_draw_call.cpp + layer_device_functions_queue.cpp + layer_device_functions_render_pass.cpp + layer_device_functions_trace_rays.cpp + layer_device_functions_transfer.cpp + performance_comms.cpp) target_include_directories( ${VK_LAYER} PRIVATE ${PROJECT_SOURCE_DIR}/../source_common + ${PROJECT_SOURCE_DIR}/../source_third_party ${CMAKE_CURRENT_BINARY_DIR} .) @@ -59,7 +70,9 @@ lgl_set_build_options(${VK_LAYER}) target_link_libraries( ${VK_LAYER} + lib_layer_comms lib_layer_framework + lib_layer_trackers $<$:log>) if (CMAKE_BUILD_TYPE STREQUAL "Release") diff --git a/layer_gpu_performance/source/device.cpp b/layer_gpu_performance/source/device.cpp index 3371cff..571b2e4 100644 --- a/layer_gpu_performance/source/device.cpp +++ b/layer_gpu_performance/source/device.cpp @@ -29,16 +29,23 @@ #include #include +#include "comms/comms_module.hpp" #include "framework/utils.hpp" #include "device.hpp" #include "instance.hpp" /** - * @brief The dispatch lookup for all of the created Vulkan instances. + * @brief The dispatch lookup for all of the created Vulkan devices. */ static std::unordered_map> g_devices; +/* See header for documentation. */ +std::unique_ptr Device::commsModule; + +/* See header for documentation. */ +std::unique_ptr Device::commsWrapper; + /* See header for documentation. */ void Device::store( VkDevice handle, @@ -94,4 +101,11 @@ Device::Device( device(_device) { initDriverDeviceDispatchTable(device, nlayerGetProcAddress, driver); + + // Init the shared comms module for the first device built + if (!commsModule) + { + commsModule = std::make_unique("lglcomms"); + commsWrapper = std::make_unique(*commsModule); + } } diff --git a/layer_gpu_performance/source/device.hpp b/layer_gpu_performance/source/device.hpp index c0e1f0a..acfee7e 100644 --- a/layer_gpu_performance/source/device.hpp +++ b/layer_gpu_performance/source/device.hpp @@ -54,9 +54,12 @@ #include +#include "comms/comms_module.hpp" #include "framework/device_dispatch_table.hpp" +#include "trackers/device.hpp" #include "instance.hpp" +#include "performance_comms.hpp" /** * @brief This class implements the layer state tracker for a single device. @@ -118,7 +121,7 @@ class Device * @param instance The layer instance object this device is created with. * @param physicalDevice The physical device this logical device is for. * @param device The device handle this device is created with. - * @param nlayerGetProcAddress The vkGetProcAddress function in the driver/next layer down. + * @param nlayerGetProcAddress The vkGetDeviceProcAddress function for the driver. */ Device( Instance* instance, @@ -131,7 +134,43 @@ class Device */ ~Device() = default; + /** + * @brief Callback for sending messages on frame boundary. + * + * @param message The message to send. + */ + void onFrame( + const std::string& message + ) { + commsWrapper->txMessage(message); + } + + /** + * @brief Callback for sending messages on workload submit to a queue. + * + * @param message The message to send. + */ + void onWorkloadSubmit( + const std::string& message + ) { + commsWrapper->txMessage(message); + } + + /** + * @brief Get the cumulative stats for this device. + */ + Tracker::Device& getStateTracker() + { + return stateTracker; + } + public: + /** + * @brief The driver function dispatch table. + */ + DeviceDispatchTable driver {}; + +private: /** * @brief The instance this device is created with. */ @@ -148,7 +187,17 @@ class Device const VkDevice device; /** - * @brief The driver function dispatch table. + * @brief State tracker for this device. */ - DeviceDispatchTable driver {}; + Tracker::Device stateTracker; + + /** + * @brief Shared network communications module. + */ + static std::unique_ptr commsModule; + + /** + * @brief Shared network communications message encoder. + */ + static std::unique_ptr commsWrapper; }; diff --git a/layer_gpu_performance/source/device_utils.hpp b/layer_gpu_performance/source/device_utils.hpp new file mode 100644 index 0000000..eddf193 --- /dev/null +++ b/layer_gpu_performance/source/device_utils.hpp @@ -0,0 +1,56 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#pragma once + +#include + +#include "framework/utils.hpp" + +#include "device.hpp" + +/** + * @brief Emit a start tag via a driver debug utils label. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param tagID The tagID to emit into the label. + */ +[[maybe_unused]] static void emitStartTag( + Device* layer, + VkCommandBuffer commandBuffer, + uint64_t tagID +) { + // Emit the unique workload tag into the command stream + std::string tagLabel = formatString("t%" PRIu64, tagID); + VkDebugUtilsLabelEXT tagInfo { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pNext = nullptr, + .pLabelName = tagLabel.c_str(), + .color = { 0.0f, 0.0f, 0.0f, 0.0f } + }; + + layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); +} diff --git a/layer_gpu_performance/source/layer_device_functions.hpp b/layer_gpu_performance/source/layer_device_functions.hpp new file mode 100644 index 0000000..8c2f8b5 --- /dev/null +++ b/layer_gpu_performance/source/layer_device_functions.hpp @@ -0,0 +1,510 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#pragma once + +#include + +#include "framework/utils.hpp" + +// Functions for command pools + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateCommandPool( + VkDevice device, + const VkCommandPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkCommandPool* pCommandPool); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandPool( + VkDevice device, + VkCommandPool commandPool, + VkCommandPoolResetFlags flags); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkDestroyCommandPool( + VkDevice device, + VkCommandPool commandPool, + const VkAllocationCallbacks* pAllocator); + +// Functions for command buffers + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkAllocateCommandBuffers( + VkDevice device, + const VkCommandBufferAllocateInfo* pAllocateInfo, + VkCommandBuffer* pCommandBuffers); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult layer_vkBeginCommandBuffer( + VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo* pBeginInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdExecuteCommands( + VkCommandBuffer commandBuffer, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandBuffer( + VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkFreeCommandBuffers( + VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers); + +// Functions for render passes + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass( + VkDevice device, + const VkRenderPassCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2( + VkDevice device, + const VkRenderPassCreateInfo2* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2KHR( + VkDevice device, + const VkRenderPassCreateInfo2* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkDestroyRenderPass( + VkDevice device, + VkRenderPass renderPass, + const VkAllocationCallbacks* pAllocator); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkSubpassContents contents); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + const VkSubpassBeginInfo* pSubpassBeginInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2KHR( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + const VkSubpassBeginInfo* pSubpassBeginInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRendering( + VkCommandBuffer commandBuffer, + const VkRenderingInfo* pRenderingInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderingKHR( + VkCommandBuffer commandBuffer, + const VkRenderingInfo* pRenderingInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderPass( + VkCommandBuffer commandBuffer); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRendering( + VkCommandBuffer commandBuffer); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderingKHR( + VkCommandBuffer commandBuffer); + +// Functions for draw calls + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDraw( + VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexed( + VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCount( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCountKHR( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectByteCountEXT( + VkCommandBuffer commandBuffer, + uint32_t instanceCount, + uint32_t firstInstance, + VkBuffer counterBuffer, + VkDeviceSize counterBufferOffset, + uint32_t counterOffset, + uint32_t vertexStride); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCount( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCountKHR( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride); + +// Functions for compute dispatches + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatch( + VkCommandBuffer commandBuffer, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBase( + VkCommandBuffer commandBuffer, + uint32_t baseGroupX, + uint32_t baseGroupY, + uint32_t baseGroupZ, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBaseKHR( + VkCommandBuffer commandBuffer, + uint32_t baseGroupX, + uint32_t baseGroupY, + uint32_t baseGroupZ, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset); + +// Commands for trace rays + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirect2KHR( + VkCommandBuffer commandBuffer, + VkDeviceAddress indirectDeviceAddress); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirectKHR( + VkCommandBuffer commandBuffer, + const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable, + VkDeviceAddress indirectDeviceAddress); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysKHR( + VkCommandBuffer commandBuffer, + const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable, + uint32_t width, + uint32_t height, + uint32_t depth); + + +// Commands for transfers + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdFillBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize size, + uint32_t data); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearColorImage( + VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearDepthStencilImage( + VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearDepthStencilValue* pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2( + VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2* pCopyBufferInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2KHR( + VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2* pCopyBufferInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2( + VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2KHR( + VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2( + VkCommandBuffer commandBuffer, + const VkCopyImageInfo2* pCopyImageInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2KHR( + VkCommandBuffer commandBuffer, + const VkCopyImageInfo2* pCopyImageInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferImageCopy* pRegions); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2( + VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2KHR( + VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo); + +// Functions for debug + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerBeginEXT( + VkCommandBuffer commandBuffer, + const VkDebugMarkerMarkerInfoEXT* pMarkerInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerEndEXT( + VkCommandBuffer commandBuffer); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginDebugUtilsLabelEXT( + VkCommandBuffer commandBuffer, + const VkDebugUtilsLabelEXT* pLabelInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndDebugUtilsLabelEXT( + VkCommandBuffer commandBuffer); + +// Functions for queues + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR( + VkQueue queue, + const VkPresentInfoKHR* pPresentInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo* pSubmits, + VkFence fence); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo2* pSubmits, + VkFence fence); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2KHR( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo2* pSubmits, + VkFence fence); diff --git a/layer_gpu_performance/source/layer_device_functions_command_buffer.cpp b/layer_gpu_performance/source/layer_device_functions_command_buffer.cpp new file mode 100644 index 0000000..ef8e920 --- /dev/null +++ b/layer_gpu_performance/source/layer_device_functions_command_buffer.cpp @@ -0,0 +1,160 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "device.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkAllocateCommandBuffers( + VkDevice device, + const VkCommandBufferAllocateInfo* pAllocateInfo, + VkCommandBuffer* pCommandBuffers +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + // Release the lock to call into the driver + lock.unlock(); + VkResult result = layer->driver.vkAllocateCommandBuffers( + device, pAllocateInfo, pCommandBuffers); + if (result != VK_SUCCESS) + { + return result; + } + + // Retake the lock to access layer-wide global store + lock.lock(); + auto& tracker = layer->getStateTracker(); + for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++) + { + tracker.allocateCommandBuffer( + pAllocateInfo->commandPool, pCommandBuffers[i]); + } + + return result; +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult layer_vkBeginCommandBuffer( + VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo* pBeginInfo +) { + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cmdBuffer = tracker.getCommandBuffer(commandBuffer); + cmdBuffer.reset(); + cmdBuffer.begin(pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkBeginCommandBuffer(commandBuffer, pBeginInfo); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandBuffer( + VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cmdBuffer = tracker.getCommandBuffer(commandBuffer); + cmdBuffer.reset(); + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkResetCommandBuffer(commandBuffer, flags); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkFreeCommandBuffers( + VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + auto& tracker = layer->getStateTracker(); + for (uint32_t i = 0; i < commandBufferCount; i++) + { + tracker.freeCommandBuffer(commandPool, pCommandBuffers[i]); + } + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkFreeCommandBuffers( + device, commandPool, commandBufferCount, pCommandBuffers); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdExecuteCommands( + VkCommandBuffer commandBuffer, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store and device-wide data + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& primary = tracker.getCommandBuffer(commandBuffer); + + for (uint32_t i = 0; i < commandBufferCount; i++) + { + auto& secondary = tracker.getCommandBuffer(pCommandBuffers[i]); + primary.executeCommands(secondary); + } + + // Release the lock to call into the main driver + lock.unlock(); + layer->driver.vkCmdExecuteCommands( + commandBuffer, commandBufferCount, pCommandBuffers); +} diff --git a/layer_gpu_performance/source/layer_device_functions_command_pool.cpp b/layer_gpu_performance/source/layer_device_functions_command_pool.cpp new file mode 100644 index 0000000..a640a90 --- /dev/null +++ b/layer_gpu_performance/source/layer_device_functions_command_pool.cpp @@ -0,0 +1,103 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "device.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateCommandPool( + VkDevice device, + const VkCommandPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkCommandPool* pCommandPool +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + // Release the lock to call into the driver + lock.unlock(); + VkResult result = layer->driver.vkCreateCommandPool( + device, pCreateInfo, pAllocator, pCommandPool); + if (result != VK_SUCCESS) + { + return result; + } + + // Retake the lock to access layer-wide global store + lock.lock(); + auto& tracker = layer->getStateTracker(); + tracker.createCommandPool(*pCommandPool); + return result; +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandPool( + VkDevice device, + VkCommandPool commandPool, + VkCommandPoolResetFlags flags +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + auto& tracker = layer->getStateTracker(); + tracker.getCommandPool(commandPool).reset(); + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkResetCommandPool(device, commandPool, flags); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkDestroyCommandPool( + VkDevice device, + VkCommandPool commandPool, + const VkAllocationCallbacks* pAllocator +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + auto& tracker = layer->getStateTracker(); + tracker.destroyCommandPool(commandPool); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkDestroyCommandPool(device, commandPool, pAllocator); +} diff --git a/layer_gpu_performance/source/layer_device_functions_debug.cpp b/layer_gpu_performance/source/layer_device_functions_debug.cpp new file mode 100644 index 0000000..1905193 --- /dev/null +++ b/layer_gpu_performance/source/layer_device_functions_debug.cpp @@ -0,0 +1,121 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "device.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerBeginEXT( + VkCommandBuffer commandBuffer, + const VkDebugMarkerMarkerInfoEXT* pMarkerInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + // Increment the render pass counter in the tracker + cb.debugMarkerBegin(pMarkerInfo->pMarkerName); + + // Note that we do not call the driver for user labels - they are + // emitted via the comms side-channel for each workload to avoid + // polluting the layer's use of the driver for tag labelling +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerEndEXT( + VkCommandBuffer commandBuffer +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + // Increment the render pass counter in the tracker + cb.debugMarkerEnd(); + + // Note that we do not call the driver for user labels - they are + // emitted via the comms side-channel for each workload to avoid + // polluting the layer's use of the driver for tag labelling +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginDebugUtilsLabelEXT( + VkCommandBuffer commandBuffer, + const VkDebugUtilsLabelEXT* pLabelInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + // Increment the render pass counter in the tracker + cb.debugMarkerBegin(pLabelInfo->pLabelName); + + // Note that we do not call the driver for user labels - they are + // emitted via the comms side-channel for each workload to avoid + // polluting the layer's use of the driver for tag labelling +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndDebugUtilsLabelEXT( + VkCommandBuffer commandBuffer +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + // Increment the render pass counter in the tracker + cb.debugMarkerEnd(); + + // Note that we do not call the driver for user labels - they are + // emitted via the comms side-channel for each workload to avoid + // polluting the layer's use of the driver for tag labelling +} diff --git a/layer_gpu_performance/source/layer_device_functions_dispatch.cpp b/layer_gpu_performance/source/layer_device_functions_dispatch.cpp new file mode 100644 index 0000000..de5ee10 --- /dev/null +++ b/layer_gpu_performance/source/layer_device_functions_dispatch.cpp @@ -0,0 +1,167 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "device.hpp" +#include "device_utils.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +/** + * @brief Register a compute dispatch with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param groupX The X size of the dispatch in groups. + * @param groupY The Y size of the dispatch in groups. + * @param groupZ The Z size of the dispatch in groups. + * + * @return The assigned tagID for the workload. + */ +static uint64_t registerDispatch( + Device* layer, + VkCommandBuffer commandBuffer, + int64_t groupX, + int64_t groupY, + int64_t groupZ +) { + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + return cb.dispatch(groupX, groupY, groupZ); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatch( + VkCommandBuffer commandBuffer, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerDispatch( + layer, + commandBuffer, + static_cast(groupCountX), + static_cast(groupCountY), + static_cast(groupCountZ)); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBase( + VkCommandBuffer commandBuffer, + uint32_t baseGroupX, + uint32_t baseGroupY, + uint32_t baseGroupZ, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerDispatch( + layer, + commandBuffer, + static_cast(groupCountX), + static_cast(groupCountY), + static_cast(groupCountZ)); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdDispatchBase(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBaseKHR( + VkCommandBuffer commandBuffer, + uint32_t baseGroupX, + uint32_t baseGroupY, + uint32_t baseGroupZ, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerDispatch( + layer, + commandBuffer, + static_cast(groupCountX), + static_cast(groupCountY), + static_cast(groupCountZ)); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdDispatchBaseKHR(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerDispatch(layer, commandBuffer, -1, -1, -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdDispatchIndirect(commandBuffer, buffer, offset); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} diff --git a/layer_gpu_performance/source/layer_device_functions_draw_call.cpp b/layer_gpu_performance/source/layer_device_functions_draw_call.cpp new file mode 100644 index 0000000..42350d0 --- /dev/null +++ b/layer_gpu_performance/source/layer_device_functions_draw_call.cpp @@ -0,0 +1,257 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include +#include +#include + +#include "device.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +/** + * @brief Register a draw call with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + */ +static void registerDrawCall( + Device* layer, + VkCommandBuffer commandBuffer +) { + auto& state = layer->getStateTracker(); + auto& stats = state.getCommandBuffer(commandBuffer).getStats(); + stats.incDrawCallCount(); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDraw( + VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDraw(commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexed( + VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndexed(commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndexedIndirect(commandBuffer, buffer, offset, drawCount, stride); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCount( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndexedIndirectCount(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCountKHR( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndexedIndirectCountKHR(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndirect(commandBuffer, buffer, offset, drawCount, stride); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectByteCountEXT( + VkCommandBuffer commandBuffer, + uint32_t instanceCount, + uint32_t firstInstance, + VkBuffer counterBuffer, + VkDeviceSize counterBufferOffset, + uint32_t counterOffset, + uint32_t vertexStride +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndirectByteCountEXT(commandBuffer, instanceCount, firstInstance, counterBuffer, counterBufferOffset, counterOffset, vertexStride); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCount( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndirectCount(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCountKHR( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndirectCountKHR(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride); +} diff --git a/layer_gpu_performance/source/layer_device_functions_queue.cpp b/layer_gpu_performance/source/layer_device_functions_queue.cpp new file mode 100644 index 0000000..a5c92e2 --- /dev/null +++ b/layer_gpu_performance/source/layer_device_functions_queue.cpp @@ -0,0 +1,178 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include +#include + +#include "utils/misc.hpp" + +#include "device.hpp" +#include "layer_device_functions.hpp" + +using json = nlohmann::json; + +using namespace std::placeholders; + +extern std::mutex g_vulkanLock; + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR( + VkQueue queue, + const VkPresentInfoKHR* pPresentInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(queue); + + auto& tracker = layer->getStateTracker(); + tracker.queuePresent(); + + // This is run with the lock held to ensure that all queue submit + // messages are sent sequentially to the host tool + json frame { + { "type", "frame" }, + { "fid", tracker.totalStats.getFrameCount() } + }; + + layer->onFrame(frame.dump()); + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkQueuePresentKHR(queue, pPresentInfo); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo* pSubmits, + VkFence fence +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(queue); + + auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1); + + auto& tracker = layer->getStateTracker(); + auto& trackQueue = tracker.getQueue(queue); + + // This is run with the lock held to ensure that all queue submit + // messages are sent sequentially to the host tool + for (uint32_t i = 0; i < submitCount; i++) + { + const auto& submit = pSubmits[i]; + for (uint32_t j = 0; j < submit.commandBufferCount; j++) + { + auto& trackCB = tracker.getCommandBuffer(submit.pCommandBuffers[j]); + const auto& LCS = trackCB.getSubmitCommandStream(); + trackQueue.runSubmitCommandStream(LCS, onSubmit); + } + } + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkQueueSubmit(queue, submitCount, pSubmits, fence); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo2* pSubmits, + VkFence fence +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(queue); + + auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1); + + auto& tracker = layer->getStateTracker(); + auto& trackQueue = tracker.getQueue(queue); + + // This is run with the lock held to ensure that all queue submit + // messages are sent sequentially to the host tool + for (uint32_t i = 0; i < submitCount; i++) + { + const auto& submit = pSubmits[i]; + for (uint32_t j = 0; j < submit.commandBufferInfoCount; j++) + { + auto& trackCB = tracker.getCommandBuffer(submit.pCommandBufferInfos[j].commandBuffer); + const auto& LCS = trackCB.getSubmitCommandStream(); + trackQueue.runSubmitCommandStream(LCS, onSubmit); + } + } + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkQueueSubmit2(queue, submitCount, pSubmits, fence); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2KHR( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo2* pSubmits, + VkFence fence +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(queue); + + auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1); + + auto& tracker = layer->getStateTracker(); + auto& trackQueue = tracker.getQueue(queue); + + // This is run with the lock held to ensure that all queue submit + // messages are sent sequentially to the host tool + for (uint32_t i = 0; i < submitCount; i++) + { + const auto& submit = pSubmits[i]; + for (uint32_t j = 0; j < submit.commandBufferInfoCount; j++) + { + auto& trackCB = tracker.getCommandBuffer(submit.pCommandBufferInfos[j].commandBuffer); + const auto& LCS = trackCB.getSubmitCommandStream(); + trackQueue.runSubmitCommandStream(LCS, onSubmit); + } + } + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkQueueSubmit2KHR(queue, submitCount, pSubmits, fence); +} diff --git a/layer_gpu_performance/source/layer_device_functions_render_pass.cpp b/layer_gpu_performance/source/layer_device_functions_render_pass.cpp new file mode 100644 index 0000000..5d16880 --- /dev/null +++ b/layer_gpu_performance/source/layer_device_functions_render_pass.cpp @@ -0,0 +1,376 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "framework/utils.hpp" +#include "trackers/render_pass.hpp" + +#include "device.hpp" +#include "device_utils.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass( + VkDevice device, + const VkRenderPassCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + // Release the lock to call into the driver + lock.unlock(); + VkResult ret = layer->driver.vkCreateRenderPass(device, pCreateInfo, pAllocator, pRenderPass); + if (ret != VK_SUCCESS) + { + return ret; + } + + // Retake the lock to access layer-wide global store + lock.lock(); + auto& tracker = layer->getStateTracker(); + tracker.createRenderPass(*pRenderPass, *pCreateInfo); + return VK_SUCCESS; +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2( + VkDevice device, + const VkRenderPassCreateInfo2* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + // Release the lock to call into the driver + lock.unlock(); + VkResult ret = layer->driver.vkCreateRenderPass2(device, pCreateInfo, pAllocator, pRenderPass); + if (ret != VK_SUCCESS) + { + return ret; + } + + // Retake the lock to access layer-wide global store + lock.lock(); + auto& tracker = layer->getStateTracker(); + tracker.createRenderPass(*pRenderPass, *pCreateInfo); + return VK_SUCCESS; +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2KHR( + VkDevice device, + const VkRenderPassCreateInfo2* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + // Release the lock to call into the driver + lock.unlock(); + VkResult ret = layer->driver.vkCreateRenderPass2KHR(device, pCreateInfo, pAllocator, pRenderPass); + if (ret != VK_SUCCESS) + { + return ret; + } + + // Retake the lock to access layer-wide global store + lock.lock(); + auto& tracker = layer->getStateTracker(); + tracker.createRenderPass(*pRenderPass, *pCreateInfo); + return VK_SUCCESS; +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkDestroyRenderPass( + VkDevice device, + VkRenderPass renderPass, + const VkAllocationCallbacks* pAllocator +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + auto& tracker = layer->getStateTracker(); + tracker.destroyRenderPass(renderPass); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkDestroyRenderPass(device, renderPass, pAllocator); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkSubpassContents contents +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass); + uint32_t width = pRenderPassBegin->renderArea.extent.width; + uint32_t height = pRenderPassBegin->renderArea.extent.height; + + // Notify the command buffer we are starting a new render pass + uint64_t tagID = cb.renderPassBegin(rp, width, height); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdBeginRenderPass(commandBuffer, pRenderPassBegin, contents); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + const VkSubpassBeginInfo* pSubpassBeginInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass); + uint32_t width = pRenderPassBegin->renderArea.extent.width; + uint32_t height = pRenderPassBegin->renderArea.extent.height; + + // Notify the command buffer we are starting a new render pass + uint64_t tagID = cb.renderPassBegin(rp, width, height); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdBeginRenderPass2(commandBuffer, pRenderPassBegin, pSubpassBeginInfo); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2KHR( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + const VkSubpassBeginInfo* pSubpassBeginInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass); + uint32_t width = pRenderPassBegin->renderArea.extent.width; + uint32_t height = pRenderPassBegin->renderArea.extent.height; + + // Notify the command buffer we are starting a new render pass + uint64_t tagID = cb.renderPassBegin(rp, width, height); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdBeginRenderPass2KHR(commandBuffer, pRenderPassBegin, pSubpassBeginInfo); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRendering( + VkCommandBuffer commandBuffer, + const VkRenderingInfo* pRenderingInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + bool resuming = pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT; + bool suspending = pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT; + + // Extract metadata for later use ... + Tracker::RenderPass rp(*pRenderingInfo); + uint32_t width = pRenderingInfo->renderArea.extent.width; + uint32_t height = pRenderingInfo->renderArea.extent.height; + + // Notify the command buffer we are starting a new render pass + uint64_t tagID = cb.renderPassBegin( + rp, width, height, resuming, suspending); + + // Release the lock to call into the driver + lock.unlock(); + // Emit the label only for new render passes + if (!resuming) + { + emitStartTag(layer, commandBuffer, tagID); + } + layer->driver.vkCmdBeginRendering(commandBuffer, pRenderingInfo); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderingKHR( + VkCommandBuffer commandBuffer, + const VkRenderingInfo* pRenderingInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + bool resuming = pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT; + bool suspending = pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT; + + // Extract metadata for later use ... + Tracker::RenderPass rp(*pRenderingInfo); + uint32_t width = pRenderingInfo->renderArea.extent.width; + uint32_t height = pRenderingInfo->renderArea.extent.height; + + // Notify the command buffer we are starting a new render pass + uint64_t tagID = cb.renderPassBegin( + rp, width, height, resuming, suspending); + + // Release the lock to call into the driver + lock.unlock(); + // Emit the label only for new render passes + if (!resuming) + { + emitStartTag(layer, commandBuffer, tagID); + } + layer->driver.vkCmdBeginRenderingKHR(commandBuffer, pRenderingInfo); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderPass( + VkCommandBuffer commandBuffer +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Update the layer command stream in the tracker + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + cb.renderPassEnd(); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdEndRenderPass(commandBuffer); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRendering( + VkCommandBuffer commandBuffer +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Update the layer command stream in the tracker + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + bool suspending = cb.renderPassEnd(); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdEndRendering(commandBuffer); + if (!suspending) + { + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); + } +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderingKHR( + VkCommandBuffer commandBuffer +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Update the layer command stream in the tracker + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + bool suspending = cb.renderPassEnd(); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdEndRenderingKHR(commandBuffer); + if (!suspending) + { + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); + } +} diff --git a/layer_gpu_performance/source/layer_device_functions_trace_rays.cpp b/layer_gpu_performance/source/layer_device_functions_trace_rays.cpp new file mode 100644 index 0000000..e6df5e3 --- /dev/null +++ b/layer_gpu_performance/source/layer_device_functions_trace_rays.cpp @@ -0,0 +1,130 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include +#include +#include + +#include "device.hpp" +#include "device_utils.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +/** + * @brief Register a trace rays dispatch with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param itemsX The X size of the dispatch in work items. + * @param itemsY The Y size of the dispatch in work items. + * @param itemsZ The Z size of the dispatch in work items. + * + * @return The assigned tagID for the workload. + */ +static uint64_t registerTraceRays( + Device* layer, + VkCommandBuffer commandBuffer, + int64_t itemsX, + int64_t itemsY, + int64_t itemsZ +) { + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + return cb.traceRays(itemsX, itemsY, itemsZ); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirect2KHR( + VkCommandBuffer commandBuffer, + VkDeviceAddress indirectDeviceAddress +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerTraceRays(layer, commandBuffer, -1, -1, -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdTraceRaysIndirect2KHR(commandBuffer, indirectDeviceAddress); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirectKHR( + VkCommandBuffer commandBuffer, + const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable, + VkDeviceAddress indirectDeviceAddress +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerTraceRays(layer, commandBuffer, -1, -1, -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdTraceRaysIndirectKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, indirectDeviceAddress); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysKHR( + VkCommandBuffer commandBuffer, + const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable, + uint32_t width, + uint32_t height, + uint32_t depth +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerTraceRays(layer, commandBuffer, width, height, depth); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdTraceRaysKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, width, height, depth); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} \ No newline at end of file diff --git a/layer_gpu_performance/source/layer_device_functions_transfer.cpp b/layer_gpu_performance/source/layer_device_functions_transfer.cpp new file mode 100644 index 0000000..ecfaa65 --- /dev/null +++ b/layer_gpu_performance/source/layer_device_functions_transfer.cpp @@ -0,0 +1,619 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include +#include +#include + +#include "device.hpp" +#include "device_utils.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +/** + * @brief Register a transfer to a buffer with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param transferType The type of transfer being performed. + * @param byteCount The number of bytes transferred. + * + * @return The assigned tagID for the workload. + */ +static uint64_t registerBufferTransfer( + Device* layer, + VkCommandBuffer commandBuffer, + const std::string& transferType, + int64_t byteCount +) { + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + return cb.bufferTransfer(transferType, byteCount); +} + +/** + * @brief Register a transfer to an image with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param transferType The type of transfer being performed. + * @param pixelCount The number of pixels transferred. + * + * @return The assigned tagID for the workload. + */ +static uint64_t registerImageTransfer( + Device* layer, + VkCommandBuffer commandBuffer, + const std::string& transferType, + int64_t pixelCount +) { + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + return cb.imageTransfer(transferType, pixelCount); +} + +// Commands for transfers + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdFillBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize size, + uint32_t data +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + // TODO: Add buffer tracking so we can turn VK_WHOLE_SIZE into bytes + int64_t byteCount = static_cast(size); + if (size == VK_WHOLE_SIZE) + { + byteCount = -2; + } + + uint64_t tagID = registerBufferTransfer( + layer, + commandBuffer, + "Fill buffer", + byteCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdFillBuffer(commandBuffer, dstBuffer, dstOffset, size, data); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearColorImage( + VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + // TODO: Add image tracking so we can turn image and pRanges into pixels + int64_t pixelCount = -1; + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Clear image", + pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdClearColorImage(commandBuffer, image, imageLayout, pColor, rangeCount, pRanges); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearDepthStencilImage( + VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearDepthStencilValue* pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + // TODO: Add image tracking so we can turn image and pRanges into pixels + int64_t pixelCount = -1; + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Clear image", + pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdClearDepthStencilImage(commandBuffer, image, imageLayout, pDepthStencil, rangeCount, pRanges); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t byteCount = 0; + for (uint32_t i = 0; i < regionCount; i++) + { + byteCount += static_cast(pRegions[i].size); + } + + uint64_t tagID = registerBufferTransfer( + layer, + commandBuffer, + "Copy buffer", + byteCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyBuffer(commandBuffer, srcBuffer, dstBuffer, regionCount, pRegions); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2( + VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2* pCopyBufferInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t byteCount = 0; + for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) + { + byteCount += static_cast(pCopyBufferInfo->pRegions[i].size); + } + + uint64_t tagID = registerBufferTransfer( + layer, + commandBuffer, + "Copy buffer", + byteCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyBuffer2(commandBuffer, pCopyBufferInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2KHR( + VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2* pCopyBufferInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t byteCount = 0; + for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) + { + byteCount += static_cast(pCopyBufferInfo->pRegions[i].size); + } + + uint64_t tagID = registerBufferTransfer( + layer, + commandBuffer, + "Copy buffer", + byteCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyBuffer2KHR(commandBuffer, pCopyBufferInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < regionCount; i++) + { + int64_t rPixelCount = static_cast(pRegions[i].imageExtent.width) + * static_cast(pRegions[i].imageExtent.height) + * static_cast(pRegions[i].imageExtent.depth); + pixelCount += rPixelCount; + } + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy buffer to image", + pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyBufferToImage(commandBuffer, srcBuffer, dstImage, dstImageLayout, regionCount, pRegions); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2( + VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < pCopyBufferToImageInfo->regionCount; i++) + { + int64_t rPixelCount = static_cast(pCopyBufferToImageInfo->pRegions[i].imageExtent.width) + * static_cast(pCopyBufferToImageInfo->pRegions[i].imageExtent.height) + * static_cast(pCopyBufferToImageInfo->pRegions[i].imageExtent.depth); + pixelCount += rPixelCount; + } + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy buffer to image", + pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyBufferToImage2(commandBuffer, pCopyBufferToImageInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2KHR( + VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < pCopyBufferToImageInfo->regionCount; i++) + { + int64_t rPixelCount = static_cast(pCopyBufferToImageInfo->pRegions[i].imageExtent.width) + * static_cast(pCopyBufferToImageInfo->pRegions[i].imageExtent.height) + * static_cast(pCopyBufferToImageInfo->pRegions[i].imageExtent.depth); + pixelCount += rPixelCount; + } + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy buffer to image", + pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyBufferToImage2KHR(commandBuffer, pCopyBufferToImageInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < regionCount; i++) + { + int64_t rPixelCount = static_cast(pRegions[i].extent.width) + * static_cast(pRegions[i].extent.height) + * static_cast(pRegions[i].extent.depth); + pixelCount += rPixelCount; + } + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy image", + pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyImage(commandBuffer, srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2( + VkCommandBuffer commandBuffer, + const VkCopyImageInfo2* pCopyImageInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < pCopyImageInfo->regionCount; i++) + { + int64_t rPixelCount = static_cast(pCopyImageInfo->pRegions[i].extent.width) + * static_cast(pCopyImageInfo->pRegions[i].extent.height) + * static_cast(pCopyImageInfo->pRegions[i].extent.depth); + pixelCount += rPixelCount; + } + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy image", + pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyImage2(commandBuffer, pCopyImageInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2KHR( + VkCommandBuffer commandBuffer, + const VkCopyImageInfo2* pCopyImageInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < pCopyImageInfo->regionCount; i++) + { + int64_t rPixelCount = static_cast(pCopyImageInfo->pRegions[i].extent.width) + * static_cast(pCopyImageInfo->pRegions[i].extent.height) + * static_cast(pCopyImageInfo->pRegions[i].extent.depth); + pixelCount += rPixelCount; + } + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy image", + pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyImage2KHR(commandBuffer, pCopyImageInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferImageCopy* pRegions +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < regionCount; i++) + { + int64_t rPixelCount = static_cast(pRegions[i].imageExtent.width) + * static_cast(pRegions[i].imageExtent.height) + * static_cast(pRegions[i].imageExtent.depth); + pixelCount += rPixelCount; + } + + // TODO: Our usual convention is to mark the transfer using the destination + // type, which means this should be a bufferTransfer reporting size in + // bytes. Without image tracking we only have pixels, so for now we report + // as "Copy image" and report size in pixels. + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy image to buffer", + pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyImageToBuffer(commandBuffer, srcImage, srcImageLayout, dstBuffer, regionCount, pRegions); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2( + VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < pCopyImageToBufferInfo->regionCount; i++) + { + int64_t rPixelCount = static_cast(pCopyImageToBufferInfo->pRegions[i].imageExtent.width) + * static_cast(pCopyImageToBufferInfo->pRegions[i].imageExtent.height) + * static_cast(pCopyImageToBufferInfo->pRegions[i].imageExtent.depth); + pixelCount += rPixelCount; + } + + // TODO: Our usual convention is to mark the transfer using the destination + // type, which means this should be a bufferTransfer reporting size in + // bytes. Without image tracking we only have pixels, so for now we report + // as "Copy image" and report size in pixels. + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy image to buffer", + pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyImageToBuffer2(commandBuffer, pCopyImageToBufferInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2KHR( + VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < pCopyImageToBufferInfo->regionCount; i++) + { + int64_t rPixelCount = static_cast(pCopyImageToBufferInfo->pRegions[i].imageExtent.width) + * static_cast(pCopyImageToBufferInfo->pRegions[i].imageExtent.height) + * static_cast(pCopyImageToBufferInfo->pRegions[i].imageExtent.depth); + pixelCount += rPixelCount; + } + + // TODO: Our usual convention is to mark the transfer using the destination + // type, which means this should be a bufferTransfer reporting size in + // bytes. Without image tracking we only have pixels, so for now we report + // as "Copy image" and report size in pixels. + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy image to buffer", + pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyImageToBuffer2KHR(commandBuffer, pCopyImageToBufferInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} diff --git a/layer_gpu_performance/source/performance_comms.cpp b/layer_gpu_performance/source/performance_comms.cpp new file mode 100644 index 0000000..bf04114 --- /dev/null +++ b/layer_gpu_performance/source/performance_comms.cpp @@ -0,0 +1,54 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "performance_comms.hpp" + +/* See header for documentation. */ +PerformanceComms::PerformanceComms( + Comms::CommsInterface& _comms +): + comms(_comms) +{ + if (comms.isConnected()) + { + endpoint = comms.getEndpointID("GPUTimeline"); + } +} + +/* See header for documentation. */ +void PerformanceComms::txMessage( + const std::string& message) +{ + // Message endpoint is not available + if (endpoint == 0) + { + return; + } + + auto data = std::make_unique(message.begin(), message.end()); + comms.txAsync(endpoint, std::move(data)); +} diff --git a/layer_gpu_performance/source/performance_comms.hpp b/layer_gpu_performance/source/performance_comms.hpp new file mode 100644 index 0000000..d9f3916 --- /dev/null +++ b/layer_gpu_performance/source/performance_comms.hpp @@ -0,0 +1,71 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +/** + * @file Declares a simple comms encoded for the timeline layer. + */ + +#pragma once + +#include "comms/comms_interface.hpp" + +/** + * @brief A simple message encoder for the timeline comms endpoint. + * + * TODO: This is currently a very simple implementation because we are simply + * passing JSON strings around. This is not the most efficient way of doing + * this and in future this module will be used to implement binary encoders + * for each specific message type that needs sending. + */ +class PerformanceComms +{ +public: + /** + * @brief Construct a new encoder. + * + * @param comms The common comms module used by all services. + */ + PerformanceComms( + Comms::CommsInterface& comms); + + /** + * @brief Send a message to the GPU timeline endpoint service. + * + * @param message The message to send. + */ + void txMessage( + const std::string& message); + +private: + /** + * @brief The endpoint ID of the service, or 0 if not found. + */ + Comms::EndpointID endpoint { 0 }; + + /** + * @brief The common module for network messaging. + */ + Comms::CommsInterface& comms; +}; From 5c1171e1f4e1dc02fbbbab86f658b7f84bd2ce74 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Mon, 30 Dec 2024 22:40:11 +0000 Subject: [PATCH 3/4] Add layer implementation of vkCreateDevice --- layer_gpu_performance/README_LAYER.md | 110 ++++++++++++++---- layer_gpu_performance/source/CMakeLists.txt | 1 + .../source/layer_device_functions.hpp | 14 +-- .../source/layer_device_functions_debug.cpp | 8 +- .../source/layer_device_functions_queue.cpp | 8 +- .../source/layer_instance_functions.hpp | 38 ++++++ .../layer_instance_functions_device.cpp | 80 +++++++++++++ 7 files changed, 224 insertions(+), 35 deletions(-) create mode 100644 layer_gpu_performance/source/layer_instance_functions.hpp create mode 100644 layer_gpu_performance/source/layer_instance_functions_device.cpp diff --git a/layer_gpu_performance/README_LAYER.md b/layer_gpu_performance/README_LAYER.md index f046cd4..a103878 100644 --- a/layer_gpu_performance/README_LAYER.md +++ b/layer_gpu_performance/README_LAYER.md @@ -5,7 +5,7 @@ analyze the workloads that make up a single frame. This layer supports two modes: -* Per workload time, read via queries +* Per workload time, read via Vulkan API queries * Per workload performance counters, read via a non-API mechanism ## What devices are supported? @@ -23,16 +23,16 @@ a way which is compatible with the way that a tile-based renderer schedules render passes. Under normal scheduling, tile-based renderers split render passes into two -pieces which are independently scheduled and can overlap with other work that -is running on the GPU. Blindly timing render passes using timer queries can -result in confusing results because the time includes time spend processing -unrelated workloads running in parallel. +pieces which are independently scheduled and that can overlap with other work +that is running on the GPU. Blindly timing render passes using timer queries +can result in confusing results because the reported time might include time +spent processing unrelated workloads that happen to be running in parallel. -The diagram shows one possible arrangement of workloads scheduled on the GPU -hardware queues for an Arm 5th Generation architecture GPU. We're trying to -time render pass 1 indicated by the `1` characters in the diagram, starting a -timer query when this render pass starts (`S`) in the binning phase queue, and -stopping when it ends (`E`) in the main phase queue. +The timing diagram below shows one possible arrangement of workloads scheduled +on the GPU hardware queues for an Arm 5th Generation architecture GPU. We are +trying to time render pass 1 indicated by the `1` characters in the diagram, +starting a timer query when this render pass starts (`S`) in the binning phase +queue, and stopping when it ends (`E`) in the main phase queue. ``` Compute: 222 @@ -41,16 +41,86 @@ stopping when it ends (`E`) in the main phase queue. ``` In this scenario the timer query correctly reflects the elapsed time of the -render pass, but is not an accurate measure of cost of this workload. The -elapsed time includes time where other workloads are running in parallel, -indicated by the `0`, `2`, and `3` characters. It also includes time between -the two phases where workload `1` is not running at all, because the binning -phase work has completed, but is waiting for the main phase queue to finish an -earlier workload. +render pass, but does not give an accurate measure of its cost. The elapsed +time includes time where other workloads are running in parallel, indicated by +the `0`, `2`, and `3` characters. It also includes time between the two phases +where workload `1` is not running at all, because the binning phase work has +completed and the main phase work is stuck waiting for an earlier workload to +finish to free up the hardware. To accurately cost workloads on a tile-based renderer, which will overlap and run workloads in parallel if it is allowed to, the layer must inject additional -synchronization primitives to serialize all workloads within a queue and across -queues. This ensures that timer query values reflect the cost of individual -workloads, however it also means that overall frame performance will be reduced -due to loss of workload parallelization. +synchronization to serialize all workloads within a queue and across queues. +This ensures that timer query values reflect the cost of individual workloads, +however it also means that overall frame performance will be reduced due to +loss of workload parallelization. + +# Design notes + +## Dependencies + +This layer uses timeline semaphores, so requires either Vulkan 1.1 or +the `VK_KHR_timeline_semaphore` extension. + +## Implementing serialization + +Cross-queue serialization is implemented using an injected timeline semaphore. +Each submit is assigned an incrementing `ID`, and will wait for `ID - 1` in the +timeline before starting, and set `ID` in the timeline when completing. This +allows us to implement serialization using a single sync primitive. + +Serialization within a queue is implemented by injecting a full pipeline +barrier before the pre-workload timer query, ensuring that all prior work has +completed before the time is sampled. Similarly we put a full pipeline barrier +after the post-workload timer query, ensuring that no following work starts +before the time is sampled. + +## Implementing query lifetime tracking + +Timer queries are implemented using query pools. The timer write commands are +recorded into each command buffer alongside the user commands. Each timer write +command specifies the specific counter slots used in a specific query pool, so +the query pool and slot usage must be assigned when the command buffer is +recorded. + +Query pools in the layer are a managed resource. We allocate query pools on +demand, and maintain a free-list of query pools that have been freed and are +ready for reuse. + +Query pools are allocated with enough space for 64 query results which is, in +the best case, enough for 63 workloads (N+1 counters). This can reduce for +render passes using multi-view rendering, which allocate 1 counter slot per +view. + +Query pools are assigned to a command buffer when recording, and multiple +query pools can be assigned to a single command buffer if more query result +space is needed. Query pools are fully reset on first use in the command +buffer. Query pools are returned to the layer free-list when the command buffer +is reset or destroyed. + +### Multi-submit command buffers + +Reusable command buffers that are not one-time submit can be problematic for +this type of instrumentation. + +A single primary command buffer could be submitted multiple times. This can be +managed by serializing the workloads and ensuring that the query results are +consumed between executions. This may impact performance due to additional +serialization, but it can be made to work. + +**NOTE:** This impact of this case could be mitigated by having the layer +inject a command buffer after the user command buffer, which inserts a copy +command to copy the query results to a buffer. This buffer is owned by the +layer and can be N-buffered to avoid stalls. + +The more problematic case is the case where a single secondary command buffer +is executed multiple times from within the same primary. In this case there +is no place to solve the collision with CPU-side synchronization, and relying +on only CPU-side recording will only capture the last copy. + +### Split command buffers + +Vulkan 1.3 can split dynamic render passes over multiple command buffers, +although all parts must be part of the same queue submit call. The layer will +only emit timestamps for the final part of the render pass, and will ignore +suspend/resumes. diff --git a/layer_gpu_performance/source/CMakeLists.txt b/layer_gpu_performance/source/CMakeLists.txt index bdd3091..2975722 100644 --- a/layer_gpu_performance/source/CMakeLists.txt +++ b/layer_gpu_performance/source/CMakeLists.txt @@ -53,6 +53,7 @@ add_library( layer_device_functions_render_pass.cpp layer_device_functions_trace_rays.cpp layer_device_functions_transfer.cpp + layer_instance_functions_device.cpp performance_comms.cpp) target_include_directories( diff --git a/layer_gpu_performance/source/layer_device_functions.hpp b/layer_gpu_performance/source/layer_device_functions.hpp index 8c2f8b5..660502a 100644 --- a/layer_gpu_performance/source/layer_device_functions.hpp +++ b/layer_gpu_performance/source/layer_device_functions.hpp @@ -456,18 +456,18 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2KHR( // Functions for debug /* See Vulkan API for documentation. */ -template<> +template <> VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerBeginEXT( VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT* pMarkerInfo); /* See Vulkan API for documentation. */ -template<> +template <> VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerEndEXT( VkCommandBuffer commandBuffer); /* See Vulkan API for documentation. */ -template<> +template <> VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginDebugUtilsLabelEXT( VkCommandBuffer commandBuffer, const VkDebugUtilsLabelEXT* pLabelInfo); @@ -480,13 +480,13 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndDebugUtilsLabelEXT( // Functions for queues /* See Vulkan API for documentation. */ -template<> +template <> VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR( VkQueue queue, const VkPresentInfoKHR* pPresentInfo); /* See Vulkan API for documentation. */ -template<> +template <> VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit( VkQueue queue, uint32_t submitCount, @@ -494,7 +494,7 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit( VkFence fence); /* See Vulkan API for documentation. */ -template<> +template <> VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2( VkQueue queue, uint32_t submitCount, @@ -502,7 +502,7 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2( VkFence fence); /* See Vulkan API for documentation. */ -template<> +template <> VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2KHR( VkQueue queue, uint32_t submitCount, diff --git a/layer_gpu_performance/source/layer_device_functions_debug.cpp b/layer_gpu_performance/source/layer_device_functions_debug.cpp index 1905193..664d2b8 100644 --- a/layer_gpu_performance/source/layer_device_functions_debug.cpp +++ b/layer_gpu_performance/source/layer_device_functions_debug.cpp @@ -31,7 +31,7 @@ extern std::mutex g_vulkanLock; /* See Vulkan API for documentation. */ -template<> +template <> VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerBeginEXT( VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT* pMarkerInfo @@ -54,7 +54,7 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerBeginEXT( } /* See Vulkan API for documentation. */ -template<> +template <> VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerEndEXT( VkCommandBuffer commandBuffer ) { @@ -76,7 +76,7 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerEndEXT( } /* See Vulkan API for documentation. */ -template<> +template <> VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginDebugUtilsLabelEXT( VkCommandBuffer commandBuffer, const VkDebugUtilsLabelEXT* pLabelInfo @@ -99,7 +99,7 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginDebugUtilsLabelEXT( } /* See Vulkan API for documentation. */ -template<> +template <> VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndDebugUtilsLabelEXT( VkCommandBuffer commandBuffer ) { diff --git a/layer_gpu_performance/source/layer_device_functions_queue.cpp b/layer_gpu_performance/source/layer_device_functions_queue.cpp index a5c92e2..6f435ba 100644 --- a/layer_gpu_performance/source/layer_device_functions_queue.cpp +++ b/layer_gpu_performance/source/layer_device_functions_queue.cpp @@ -38,7 +38,7 @@ using namespace std::placeholders; extern std::mutex g_vulkanLock; /* See Vulkan API for documentation. */ -template<> +template <> VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR( VkQueue queue, const VkPresentInfoKHR* pPresentInfo @@ -67,7 +67,7 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR( } /* See Vulkan API for documentation. */ -template<> +template <> VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit( VkQueue queue, uint32_t submitCount, @@ -104,7 +104,7 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit( } /* See Vulkan API for documentation. */ -template<> +template <> VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2( VkQueue queue, uint32_t submitCount, @@ -141,7 +141,7 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2( } /* See Vulkan API for documentation. */ -template<> +template <> VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2KHR( VkQueue queue, uint32_t submitCount, diff --git a/layer_gpu_performance/source/layer_instance_functions.hpp b/layer_gpu_performance/source/layer_instance_functions.hpp new file mode 100644 index 0000000..00f93a9 --- /dev/null +++ b/layer_gpu_performance/source/layer_instance_functions.hpp @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#pragma once + +#include + +#include "framework/utils.hpp" + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateDevice( + VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDevice* pDevice); diff --git a/layer_gpu_performance/source/layer_instance_functions_device.cpp b/layer_gpu_performance/source/layer_instance_functions_device.cpp new file mode 100644 index 0000000..f31143b --- /dev/null +++ b/layer_gpu_performance/source/layer_instance_functions_device.cpp @@ -0,0 +1,80 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "framework/manual_functions.hpp" + +#include "device.hpp" +#include "layer_instance_functions.hpp" + +extern std::mutex g_vulkanLock; + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateDevice( + VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDevice* pDevice +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Instance::retrieve(physicalDevice); + + // Release the lock to call into the driver + lock.unlock(); + + auto* chainInfo = getChainInfo(pCreateInfo); + auto fpGetInstanceProcAddr = chainInfo->u.pLayerInfo->pfnNextGetInstanceProcAddr; + auto fpGetDeviceProcAddr = chainInfo->u.pLayerInfo->pfnNextGetDeviceProcAddr; + + auto extensions = getDeviceExtensionList( + layer->instance, physicalDevice, pCreateInfo); + + auto fpCreateDevice = reinterpret_cast( + fpGetInstanceProcAddr(layer->instance, "vkCreateDevice")); + if (!fpCreateDevice) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + + // Advance the link info for the next element on the chain + chainInfo->u.pLayerInfo = chainInfo->u.pLayerInfo->pNext; + auto res = fpCreateDevice(physicalDevice, pCreateInfo, pAllocator, pDevice); + if (res != VK_SUCCESS) + { + return res; + } + + // Retake the lock to access layer-wide global store + lock.lock(); + auto device = std::make_unique(layer, physicalDevice, *pDevice, fpGetDeviceProcAddr); + Device::store(*pDevice, std::move(device)); + + return VK_SUCCESS; +} From f928a2d4282eb6a06d42f5d044507200c6d58f3c Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Tue, 7 Jan 2025 13:37:00 +0000 Subject: [PATCH 4/4] Merge Python changes --- layer_gpu_performance/android_install.json | 4 + layer_gpu_performance/android_install.py | 254 --------------------- 2 files changed, 4 insertions(+), 254 deletions(-) create mode 100644 layer_gpu_performance/android_install.json delete mode 100644 layer_gpu_performance/android_install.py diff --git a/layer_gpu_performance/android_install.json b/layer_gpu_performance/android_install.json new file mode 100644 index 0000000..9d933c0 --- /dev/null +++ b/layer_gpu_performance/android_install.json @@ -0,0 +1,4 @@ +{ + "layer_name": "VK_LAYER_LGL_GPUPERFORMANCE", + "layer_binary": "libVkLayerGPUPerformance.so" +} diff --git a/layer_gpu_performance/android_install.py b/layer_gpu_performance/android_install.py deleted file mode 100644 index 35780ea..0000000 --- a/layer_gpu_performance/android_install.py +++ /dev/null @@ -1,254 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: MIT -# ----------------------------------------------------------------------------- -# Copyright (c) 2024 Arm Limited -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the 'Software'), to -# deal in the Software without restriction, including without limitation the -# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -# sell copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -# ----------------------------------------------------------------------------- -''' -A simple installer for Android Vulkan layers. -''' - -import argparse -import os -import shlex -import subprocess as sp -import sys -from typing import Any, Optional - -# Android temp directory -ANDROID_TMP_DIR = '/data/local/tmp/' - -# Expected layer names -EXPECTED_VULKAN_LAYER_NAME = 'VK_LAYER_LGL_GPUPERFORMANCE' -EXPECTED_VULKAN_LAYER_FILE = 'libVkLayerGPUPerformance.so' - - -class Device: - ''' - A basic wrapper around adb, allowing a specific device to be registered. - - Attributes: - device: The name of the device to call, or None for non-specific use. - ''' - - def adb_quiet(self, *args: str) -> None: - ''' - Call `adb` to run a command, but ignore output and errors. - - Args: - *args : List of command line parameters. - ''' - commands = ['adb'] - commands.extend(args) - sp.run(commands, stdout=sp.DEVNULL, stderr=sp.DEVNULL, check=False) - - def adb(self, *args: str, **kwargs: Any) -> str: - ''' - Call `adb` to run command, and capture output and results. - - Args: - *args: List of command line parameters. - **kwargs: text: Is output is text, or binary? - shell: Use the host shell? - quote: Quote arguments before forwarding - - Returns: - The contents of stdout. - - Raises: - CalledProcessError: The subprocess was not successfully executed. - ''' - commands = ['adb'] # type: Any - commands.extend(args) - - text = kwargs.get('text', True) - shell = kwargs.get('shell', False) - quote = kwargs.get('quote', False) - - # Run on the host shell - if shell: - # Unix shells need a flattened command for shell commands - if os.name != 'nt': - quoted_commands = [] - for command in commands: - if command != '>': - command = shlex.quote(command) - quoted_commands.append(command) - commands = ' '.join(quoted_commands) - - # Run on the device but with shell argument quoting - if quote: - for i, command in enumerate(commands): - commands[i] = shlex.quote(command) - - rep = sp.run(commands, check=True, shell=shell, stdout=sp.PIPE, - stderr=sp.PIPE, universal_newlines=text) - - return rep.stdout - - def adb_run_as(self, package: str, - *args: str, quiet: bool = False) -> Optional[str]: - ''' - Call `adb` to run command as a package using `run-as` or as root, - if root is accessible. If command will be run as root, this function - will change CWD to the package data directory before executing the - command. - - Args: - package: Package name to run-as or change CWD to. - *args: List of command line parameters. - quiet: If True, ignores output from adb. - - Returns: - The contents of stdout or None if quiet=True. - - Raises: - CalledProcessError: The subprocess was not successfully executed. - ''' - command = ['shell', 'run-as', package] - command.extend(args) - - if quiet: - self.adb_quiet(*command) - return None - - return self.adb(*command) - - -def enable_vulkan_debug_layer( - device: Device, package: str, layer: str) -> None: - ''' - Args: - device: The device instance. - package: The Android package name. - layer: The layer file path name. - ''' - - print('\nInstalling Vulkan debug layer') - - layer = os.path.normpath(layer) - layer_base = os.path.basename(os.path.normpath(layer)) - - device.adb('push', layer, ANDROID_TMP_DIR) - - device.adb_run_as(package, 'cp', ANDROID_TMP_DIR + layer_base, '.') - - device.adb('shell', 'settings', 'put', 'global', - 'enable_gpu_debug_layers', '1') - - device.adb('shell', 'settings', 'put', 'global', - 'gpu_debug_app', package) - - device.adb('shell', 'settings', 'put', 'global', - 'gpu_debug_layers', EXPECTED_VULKAN_LAYER_NAME) - - -def disable_vulkan_debug_layer( - device: Device, package: str, layer: str) -> None: - ''' - Clean up the Vulkan layer installation. - - Args: - device: The device instance. - args: The command arguments. - ''' - print('\nRemoving Vulkan debug layer') - - layer_base = os.path.basename(os.path.normpath(layer)) - - device.adb('shell', 'settings', 'delete', 'global', - 'enable_gpu_debug_layers') - - device.adb('shell', 'settings', 'delete', 'global', - 'gpu_debug_app') - - device.adb('shell', 'settings', 'delete', 'global', - 'gpu_debug_layers') - - device.adb_run_as(package, 'rm', layer_base, quiet=True) - - -def get_layer() -> Optional[str]: - ''' - Find the debug layer to use in the build directory. - - Returns: - The part to the library to use. - ''' - - base_dir = './build_arm64/source/' - - # TODO: If we want to use symbolized layer we need to rename it - lib = None - - for path in os.listdir(base_dir): - # Match symbolized library first so we don't use it - if path.endswith('_sym.so'): - _ = os.path.join(base_dir, path) - elif path.endswith('.so'): - lib = os.path.join(base_dir, path) - - return lib - - -def parse_command_line() -> argparse.Namespace: - ''' - Parse the command line. - - Returns: - The parsed command line container. - ''' - parser = argparse.ArgumentParser() - - parser.add_argument('--package', required=True, - help='Android package name') - - return parser.parse_args() - - -def main() -> int: - ''' - Script main function. - - Returns: - Process return code. - ''' - args = parse_command_line() - - device = Device() - layer = get_layer() - if not layer: - print('ERROR: Layer binary not found') - return 1 - - enable_vulkan_debug_layer(device, args.package, layer) - - input('Press Enter to disable layers') - - disable_vulkan_debug_layer(device, args.package, layer) - - return 0 - - -if __name__ == '__main__': - try: - sys.exit(main()) - except KeyboardInterrupt: - print('\n\nERROR: User interrupted execution')