From 0e8c4ba654e39b07a4c56489acda224f4a1a5f7a Mon Sep 17 00:00:00 2001
From: Peter Harris <peter.harris@arm.com>
Date: Mon, 23 Dec 2024 22:00:40 +0000
Subject: [PATCH 1/4] Commit empty performance layer

---
 layer_gpu_performance/CMakeLists.txt        |  39 +++
 layer_gpu_performance/android_build.sh      |  82 +++++++
 layer_gpu_performance/android_install.py    | 254 ++++++++++++++++++++
 layer_gpu_performance/source/CMakeLists.txt |  72 ++++++
 layer_gpu_performance/source/device.cpp     |  97 ++++++++
 layer_gpu_performance/source/device.hpp     | 154 ++++++++++++
 layer_gpu_performance/source/instance.cpp   |  80 ++++++
 layer_gpu_performance/source/instance.hpp   | 134 +++++++++++
 layer_gpu_performance/source/version.hpp.in |  36 +++
 9 files changed, 948 insertions(+)
 create mode 100644 layer_gpu_performance/CMakeLists.txt
 create mode 100644 layer_gpu_performance/android_build.sh
 create mode 100644 layer_gpu_performance/android_install.py
 create mode 100644 layer_gpu_performance/source/CMakeLists.txt
 create mode 100644 layer_gpu_performance/source/device.cpp
 create mode 100644 layer_gpu_performance/source/device.hpp
 create mode 100644 layer_gpu_performance/source/instance.cpp
 create mode 100644 layer_gpu_performance/source/instance.hpp
 create mode 100644 layer_gpu_performance/source/version.hpp.in

diff --git a/layer_gpu_performance/CMakeLists.txt b/layer_gpu_performance/CMakeLists.txt
new file mode 100644
index 0000000..34d05e0
--- /dev/null
+++ b/layer_gpu_performance/CMakeLists.txt
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: MIT
+# -----------------------------------------------------------------------------
+# Copyright (c) 2024 Arm Limited
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# -----------------------------------------------------------------------------
+
+cmake_minimum_required(VERSION 3.17)
+
+set(CMAKE_CXX_STANDARD 20)
+
+project(VkLayerGPUPerformance VERSION 1.0.0)
+
+# Common configuration
+set(LGL_LOG_TAG "VkLayerGPUPerformance")
+set(LGL_CONFIG_TRACE 0)
+set(LGL_CONFIG_LOG 1)
+
+include(../source_common/compiler_helper.cmake)
+
+# Build steps
+add_subdirectory(source)
+add_subdirectory(../source_common/framework source_common/framework)
diff --git a/layer_gpu_performance/android_build.sh b/layer_gpu_performance/android_build.sh
new file mode 100644
index 0000000..960b2b0
--- /dev/null
+++ b/layer_gpu_performance/android_build.sh
@@ -0,0 +1,82 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: MIT
+# ----------------------------------------------------------------------------
+# Copyright (c) 2024 Arm Limited
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+# ----------------------------------------------------------------------------
+
+# ----------------------------------------------------------------------------
+# Configuration
+
+# Exit immediately if any component command errors
+set -e
+
+BUILD_DIR_64=build_arm64
+BUILD_DIR_PACK=build_package
+
+# ----------------------------------------------------------------------------
+# Process command line options
+if [ "$#" -lt 1 ]; then
+    BUILD_TYPE=Release
+else
+    BUILD_TYPE=$1
+fi
+
+# Process command line options
+if [ "$#" -lt 2 ]; then
+    PACKAGE=0
+else
+    PACKAGE=$2
+fi
+
+if [ "${PACKAGE}" -gt "0" ]; then
+    echo "Building a ${BUILD_TYPE} build with packaging"
+else
+    echo "Building a ${BUILD_TYPE} build without packaging"
+fi
+
+# ----------------------------------------------------------------------------
+# Build the 64-bit layer
+mkdir -p ${BUILD_DIR_64}
+pushd ${BUILD_DIR_64}
+
+cmake \
+    -DCMAKE_SYSTEM_NAME=Android \
+    -DANDROID_PLATFORM=29 \
+    -DANDROID_ABI=arm64-v8a \
+    -DANDROID_TOOLCHAIN=clang \
+    -DANDROID_STL=c++_static \
+    -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
+    -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake" \
+    ..
+
+make -j1
+
+popd
+
+# ----------------------------------------------------------------------------
+# Build the release package
+if [ "${PACKAGE}" -gt "0" ]; then
+    # Setup the package directories
+    mkdir -p ${BUILD_DIR_PACK}/bin/android/arm64
+
+    # Install the 64-bit layer
+    cp ${BUILD_DIR_64}/source/*.so ${BUILD_DIR_PACK}/bin/android/arm64
+fi
diff --git a/layer_gpu_performance/android_install.py b/layer_gpu_performance/android_install.py
new file mode 100644
index 0000000..35780ea
--- /dev/null
+++ b/layer_gpu_performance/android_install.py
@@ -0,0 +1,254 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: MIT
+# -----------------------------------------------------------------------------
+# Copyright (c) 2024 Arm Limited
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the 'Software'), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# -----------------------------------------------------------------------------
+'''
+A simple installer for Android Vulkan layers.
+'''
+
+import argparse
+import os
+import shlex
+import subprocess as sp
+import sys
+from typing import Any, Optional
+
+# Android temp directory
+ANDROID_TMP_DIR = '/data/local/tmp/'
+
+# Expected layer names
+EXPECTED_VULKAN_LAYER_NAME = 'VK_LAYER_LGL_GPUPERFORMANCE'
+EXPECTED_VULKAN_LAYER_FILE = 'libVkLayerGPUPerformance.so'
+
+
+class Device:
+    '''
+    A basic wrapper around adb, allowing a specific device to be registered.
+
+    Attributes:
+        device: The name of the device to call, or None for non-specific use.
+    '''
+
+    def adb_quiet(self, *args: str) -> None:
+        '''
+        Call `adb` to run a command, but ignore output and errors.
+
+        Args:
+            *args : List of command line parameters.
+        '''
+        commands = ['adb']
+        commands.extend(args)
+        sp.run(commands, stdout=sp.DEVNULL, stderr=sp.DEVNULL, check=False)
+
+    def adb(self, *args: str, **kwargs: Any) -> str:
+        '''
+        Call `adb` to run command, and capture output and results.
+
+        Args:
+            *args: List of command line parameters.
+            **kwargs: text: Is output is text, or binary?
+                      shell: Use the host shell?
+                      quote: Quote arguments before forwarding
+
+        Returns:
+            The contents of stdout.
+
+        Raises:
+            CalledProcessError: The subprocess was not successfully executed.
+        '''
+        commands = ['adb']  # type: Any
+        commands.extend(args)
+
+        text = kwargs.get('text', True)
+        shell = kwargs.get('shell', False)
+        quote = kwargs.get('quote', False)
+
+        # Run on the host shell
+        if shell:
+            # Unix shells need a flattened command for shell commands
+            if os.name != 'nt':
+                quoted_commands = []
+                for command in commands:
+                    if command != '>':
+                        command = shlex.quote(command)
+                    quoted_commands.append(command)
+                commands = ' '.join(quoted_commands)
+
+        # Run on the device but with shell argument quoting
+        if quote:
+            for i, command in enumerate(commands):
+                commands[i] = shlex.quote(command)
+
+        rep = sp.run(commands, check=True, shell=shell, stdout=sp.PIPE,
+                     stderr=sp.PIPE, universal_newlines=text)
+
+        return rep.stdout
+
+    def adb_run_as(self, package: str,
+                   *args: str, quiet: bool = False) -> Optional[str]:
+        '''
+        Call `adb` to run command as a package using `run-as` or as root,
+        if root is accessible. If command will be run as root, this function
+        will change CWD to the package data directory before executing the
+        command.
+
+        Args:
+            package: Package name to run-as or change CWD to.
+            *args: List of command line parameters.
+            quiet: If True, ignores output from adb.
+
+        Returns:
+            The contents of stdout or None if quiet=True.
+
+        Raises:
+            CalledProcessError: The subprocess was not successfully executed.
+        '''
+        command = ['shell', 'run-as', package]
+        command.extend(args)
+
+        if quiet:
+            self.adb_quiet(*command)
+            return None
+
+        return self.adb(*command)
+
+
+def enable_vulkan_debug_layer(
+        device: Device, package: str, layer: str) -> None:
+    '''
+    Args:
+        device: The device instance.
+        package: The Android package name.
+        layer: The layer file path name.
+    '''
+
+    print('\nInstalling Vulkan debug layer')
+
+    layer = os.path.normpath(layer)
+    layer_base = os.path.basename(os.path.normpath(layer))
+
+    device.adb('push', layer, ANDROID_TMP_DIR)
+
+    device.adb_run_as(package, 'cp', ANDROID_TMP_DIR + layer_base, '.')
+
+    device.adb('shell', 'settings', 'put', 'global',
+               'enable_gpu_debug_layers', '1')
+
+    device.adb('shell', 'settings', 'put', 'global',
+               'gpu_debug_app', package)
+
+    device.adb('shell', 'settings', 'put', 'global',
+               'gpu_debug_layers', EXPECTED_VULKAN_LAYER_NAME)
+
+
+def disable_vulkan_debug_layer(
+        device: Device, package: str, layer: str) -> None:
+    '''
+    Clean up the Vulkan layer installation.
+
+    Args:
+        device: The device instance.
+        args: The command arguments.
+    '''
+    print('\nRemoving Vulkan debug layer')
+
+    layer_base = os.path.basename(os.path.normpath(layer))
+
+    device.adb('shell', 'settings', 'delete', 'global',
+               'enable_gpu_debug_layers')
+
+    device.adb('shell', 'settings', 'delete', 'global',
+               'gpu_debug_app')
+
+    device.adb('shell', 'settings', 'delete', 'global',
+               'gpu_debug_layers')
+
+    device.adb_run_as(package, 'rm', layer_base, quiet=True)
+
+
+def get_layer() -> Optional[str]:
+    '''
+    Find the debug layer to use in the build directory.
+
+    Returns:
+        The part to the library to use.
+    '''
+
+    base_dir = './build_arm64/source/'
+
+    # TODO: If we want to use symbolized layer we need to rename it
+    lib = None
+
+    for path in os.listdir(base_dir):
+        # Match symbolized library first so we don't use it
+        if path.endswith('_sym.so'):
+            _ = os.path.join(base_dir, path)
+        elif path.endswith('.so'):
+            lib = os.path.join(base_dir, path)
+
+    return lib
+
+
+def parse_command_line() -> argparse.Namespace:
+    '''
+    Parse the command line.
+
+    Returns:
+        The parsed command line container.
+    '''
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument('--package', required=True,
+                        help='Android package name')
+
+    return parser.parse_args()
+
+
+def main() -> int:
+    '''
+    Script main function.
+
+    Returns:
+        Process return code.
+    '''
+    args = parse_command_line()
+
+    device = Device()
+    layer = get_layer()
+    if not layer:
+        print('ERROR: Layer binary not found')
+        return 1
+
+    enable_vulkan_debug_layer(device, args.package, layer)
+
+    input('Press Enter to disable layers')
+
+    disable_vulkan_debug_layer(device, args.package, layer)
+
+    return 0
+
+
+if __name__ == '__main__':
+    try:
+        sys.exit(main())
+    except KeyboardInterrupt:
+        print('\n\nERROR: User interrupted execution')
diff --git a/layer_gpu_performance/source/CMakeLists.txt b/layer_gpu_performance/source/CMakeLists.txt
new file mode 100644
index 0000000..1ee65f4
--- /dev/null
+++ b/layer_gpu_performance/source/CMakeLists.txt
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: MIT
+# -----------------------------------------------------------------------------
+# Copyright (c) 2024 Arm Limited
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# -----------------------------------------------------------------------------
+
+# Set output file names
+if (CMAKE_BUILD_TYPE STREQUAL "Release")
+    set(VK_LAYER VkLayerGPUPerformance_sym)
+    set(VK_LAYER_STRIP libVkLayerGPUPerformance.so)
+else()
+    set(VK_LAYER VkLayerGPUPerformance)
+endif()
+
+# Set strings used by configure
+set(LGL_LAYER_NAME_STR "VK_LAYER_LGL_GPUPERFORMANCE")
+set(LGL_LAYER_DESC_STR "VkLayerGPUPerformance by LGL")
+
+# Vulkan layer library
+configure_file(
+    version.hpp.in
+    version.hpp
+    ESCAPE_QUOTES @ONLY)
+
+add_library(
+    ${VK_LAYER} SHARED
+        ${PROJECT_SOURCE_DIR}/../source_common/framework/entry.cpp
+        device.cpp
+        instance.cpp)
+
+target_include_directories(
+    ${VK_LAYER} PRIVATE
+        ${PROJECT_SOURCE_DIR}/../source_common
+        ${CMAKE_CURRENT_BINARY_DIR}
+        .)
+
+target_include_directories(
+    ${VK_LAYER} SYSTEM PRIVATE
+        ../../khronos/vulkan/include)
+
+lgl_set_build_options(${VK_LAYER})
+
+target_link_libraries(
+    ${VK_LAYER}
+        lib_layer_framework
+        $<$<PLATFORM_ID:Android>:log>)
+
+if (CMAKE_BUILD_TYPE STREQUAL "Release")
+    add_custom_command(
+        TARGET "${VK_LAYER}" POST_BUILD
+        DEPENDS "${VK_LAYER}"
+        COMMAND ${CMAKE_STRIP}
+        ARGS --strip-all -o ${VK_LAYER_STRIP} $<TARGET_FILE:${VK_LAYER}>
+        COMMENT "Stripped lib${VK_LAYER}.so to ${VK_LAYER_STRIP}")
+endif()
diff --git a/layer_gpu_performance/source/device.cpp b/layer_gpu_performance/source/device.cpp
new file mode 100644
index 0000000..3371cff
--- /dev/null
+++ b/layer_gpu_performance/source/device.cpp
@@ -0,0 +1,97 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <array>
+#include <iostream>
+#include <fstream>
+#include <sys/stat.h>
+#include <vector>
+
+#include "framework/utils.hpp"
+
+#include "device.hpp"
+#include "instance.hpp"
+
+/**
+ * @brief The dispatch lookup for all of the created Vulkan instances.
+ */
+static std::unordered_map<void*, std::unique_ptr<Device>> g_devices;
+
+/* See header for documentation. */
+void Device::store(
+    VkDevice handle,
+    std::unique_ptr<Device> device
+) {
+    void* key = getDispatchKey(handle);
+    g_devices.insert({ key, std::move(device) });
+}
+
+/* See header for documentation. */
+Device* Device::retrieve(
+    VkDevice handle
+) {
+    void* key = getDispatchKey(handle);
+    assert(isInMap(key, g_devices));
+    return g_devices.at(key).get();
+}
+
+/* See header for documentation. */
+Device* Device::retrieve(
+    VkQueue handle
+) {
+    void* key = getDispatchKey(handle);
+    assert(isInMap(key, g_devices));
+    return g_devices.at(key).get();
+}
+
+/* See header for documentation. */
+Device* Device::retrieve(
+    VkCommandBuffer handle
+) {
+    void* key = getDispatchKey(handle);
+    assert(isInMap(key, g_devices));
+    return g_devices.at(key).get();
+}
+
+/* See header for documentation. */
+void Device::destroy(
+    Device* device
+) {
+    g_devices.erase(getDispatchKey(device));
+}
+
+/* See header for documentation. */
+Device::Device(
+    Instance* _instance,
+    VkPhysicalDevice _physicalDevice,
+    VkDevice _device,
+    PFN_vkGetDeviceProcAddr nlayerGetProcAddress
+):
+    instance(_instance),
+    physicalDevice(_physicalDevice),
+    device(_device)
+{
+    initDriverDeviceDispatchTable(device, nlayerGetProcAddress, driver);
+}
diff --git a/layer_gpu_performance/source/device.hpp b/layer_gpu_performance/source/device.hpp
new file mode 100644
index 0000000..c0e1f0a
--- /dev/null
+++ b/layer_gpu_performance/source/device.hpp
@@ -0,0 +1,154 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+/**
+ * @file Declares the root class for layer management of VkDevice objects.
+ *
+ * Role summary
+ * ============
+ *
+ * Devices represent the core context used by the application to connect to the
+ * underlying graphics driver. A device object is the dispatch root for the
+ * Vulkan driver, so device commands all take some form of dispatchable handle
+ * that can be resolved into a unique per-device key. For the driver this key
+ * would simply be a pointer directly to the driver-internal device object, but
+ * for our layer we use a device dispatch key as an index in to the map to find
+ * the layer's driver object.
+ *
+ * Key properties
+ * ==============
+ *
+ * Vulkan devices are designed to be used concurrently by multiple application
+ * threads. An application can have multiple concurrent devices, and use each
+ * device from multiple threads.
+ *
+ * Access to the layer driver structures must therefore be kept thread-safe.
+ * For sake of simplicity, we generally implement this by:
+ *   - Holding a global lock whenever any thread is inside layer code.
+ *   - Releasing the global lock whenever the layer calls a driver function.
+ */
+
+#pragma once
+
+#include <vulkan/vk_layer.h>
+
+#include "framework/device_dispatch_table.hpp"
+
+#include "instance.hpp"
+
+/**
+ * @brief This class implements the layer state tracker for a single device.
+ */
+class Device
+{
+public:
+    /**
+     * @brief Store a new device into the global store of dispatchable devices.
+     *
+     * @param handle   The dispatchable device handle to use as an indirect key.
+     * @param device   The @c Device object to store.
+     */
+    static void store(
+        VkDevice handle,
+        std::unique_ptr<Device> device);
+
+    /**
+     * @brief Fetch a device from the global store of dispatchable devices.
+     *
+     * @param handle   The dispatchable device handle to use as an indirect lookup.
+     *
+     * @return The layer device context.
+     */
+    static Device* retrieve(
+        VkDevice handle);
+
+    /**
+     * @brief Fetch a device from the global store of dispatchable devices.
+     *
+     * @param handle   The dispatchable queue handle to use as an indirect lookup.
+     *
+     * @return The layer device context.
+     */
+    static Device* retrieve(
+        VkQueue handle);
+
+    /**
+     * @brief Fetch a device from the global store of dispatchable devices.
+     *
+     * @param handle   The dispatchable command buffer handle to use as an indirect lookup.
+     *
+     * @return The layer device context.
+     */
+    static Device* retrieve(
+        VkCommandBuffer handle);
+
+    /**
+     * @brief Drop a device from the global store of dispatchable devices.
+     *
+     * @param device   The device to drop.
+     */
+    static void destroy(
+        Device* device);
+
+    /**
+     * @brief Create a new layer device object.
+     *
+     * @param instance               The layer instance object this device is created with.
+     * @param physicalDevice         The physical device this logical device is for.
+     * @param device                 The device handle this device is created with.
+     * @param nlayerGetProcAddress   The vkGetProcAddress function in the driver/next layer down.
+     */
+    Device(
+        Instance* instance,
+        VkPhysicalDevice physicalDevice,
+        VkDevice device,
+        PFN_vkGetDeviceProcAddr nlayerGetProcAddress);
+
+    /**
+     * @brief Destroy this layer device object.
+     */
+    ~Device() = default;
+
+public:
+    /**
+     * @brief The instance this device is created with.
+     */
+    const Instance* instance;
+
+    /**
+     * @brief The physical device this device is created with.
+     */
+    const VkPhysicalDevice physicalDevice;
+
+    /**
+     * @brief The device handle this device is created with.
+     */
+    const VkDevice device;
+
+    /**
+     * @brief The driver function dispatch table.
+     */
+    DeviceDispatchTable driver {};
+};
diff --git a/layer_gpu_performance/source/instance.cpp b/layer_gpu_performance/source/instance.cpp
new file mode 100644
index 0000000..0b62857
--- /dev/null
+++ b/layer_gpu_performance/source/instance.cpp
@@ -0,0 +1,80 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <cassert>
+
+#include "framework/utils.hpp"
+
+#include "instance.hpp"
+
+/**
+ * @brief The dispatch lookup for all of the created Vulkan instances.
+ */
+static std::unordered_map<void*, std::unique_ptr<Instance>> g_instances;
+
+/* See header for documentation. */
+void Instance::store(
+    VkInstance handle,
+    std::unique_ptr<Instance>& instance
+) {
+    void* key = getDispatchKey(handle);
+    g_instances.insert({ key, std::move(instance) });
+}
+
+/* See header for documentation. */
+Instance* Instance::retrieve(
+    VkInstance handle
+) {
+    void* key = getDispatchKey(handle);
+    assert(isInMap(key, g_instances));
+    return g_instances.at(key).get();
+}
+
+/* See header for documentation. */
+Instance* Instance::retrieve(
+    VkPhysicalDevice handle
+) {
+    void* key = getDispatchKey(handle);
+    assert(isInMap(key, g_instances));
+    return g_instances.at(key).get();
+}
+
+/* See header for documentation. */
+void Instance::destroy(
+    Instance* instance
+) {
+    g_instances.erase(getDispatchKey(instance->instance));
+}
+
+/* See header for documentation. */
+Instance::Instance(
+    VkInstance _instance,
+    PFN_vkGetInstanceProcAddr _nlayerGetProcAddress
+) :
+    instance(_instance),
+    nlayerGetProcAddress(_nlayerGetProcAddress)
+{
+    initDriverInstanceDispatchTable(instance, nlayerGetProcAddress, driver);
+}
diff --git a/layer_gpu_performance/source/instance.hpp b/layer_gpu_performance/source/instance.hpp
new file mode 100644
index 0000000..fc6af6b
--- /dev/null
+++ b/layer_gpu_performance/source/instance.hpp
@@ -0,0 +1,134 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+/**
+ * @file
+ * Declares the root class for layer management of VkInstance objects.
+ *
+ * Role summary
+ * ============
+ *
+ * Instances represent the core context used by the application to connect to
+ * the OS graphics subsystem prior to connection to a specific device instance.
+ * An instance object is the dispatch root for the Vulkan subsystem, so
+ * instance commands all take some form of dispatchable handle that can be
+ * resolved into a unique per-instance key. For the driver this key would
+ * simply be a pointer directly to the driver-internal instance object, but for
+ * our layer we use a instance dispatch key as an index in to the map to find
+ * the layer's instance object.
+ *
+ * Key properties
+ * ==============
+ *
+ * Vulkan instances are designed to be used concurrently by multiple
+ * application threads. An application can have multiple concurrent instances,
+ * and use each instance from multiple threads.
+ *
+ * Access to the layer driver structures must therefore be kept thread-safe.
+ * For sake of simplicity, we generally implement this by:
+ *   - Holding a global lock whenever any thread is inside layer code.
+ *   - Releasing the global lock whenever the layer calls a driver function.
+ */
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+
+#include <vulkan/vk_layer.h>
+#include <vulkan/vulkan.h>
+
+#include "framework/instance_dispatch_table.hpp"
+
+/**
+ * @brief This class implements the layer state tracker for a single instance.
+ */
+class Instance
+{
+public:
+    /**
+     * @brief Store a new instance into the global store of dispatchable instances.
+     *
+     * @param handle     The dispatchable instance handle to use as an indirect key.
+     * @param instance   The @c Instance object to store.
+     */
+    static void store(
+        VkInstance handle,
+        std::unique_ptr<Instance>& instance);
+
+    /**
+     * @brief Fetch an instance from the global store of dispatchable instances.
+     *
+     * @param handle   The dispatchable instance handle to use as an indirect lookup.
+     *
+     * @return The layer instance context.
+     */
+    static Instance* retrieve(
+        VkInstance handle);
+
+    /**
+     * @brief Fetch an instance from the global store of dispatchable instances.
+     *
+     * @param handle   The dispatchable physical device handle to use as an indirect lookup.
+     *
+     * @return The layer instance context.
+     */
+    static Instance* retrieve(
+        VkPhysicalDevice handle);
+
+    /**
+     * @brief Drop an instance from the global store of dispatchable instances.
+     *
+     * @param instance   The instance to drop.
+     */
+    static void destroy(
+        Instance* instance);
+
+    /**
+     * @brief Create a new layer instance object.
+     *
+     * @param instance               The instance handle this instance is created with.
+     * @param nlayerGetProcAddress   The vkGetProcAddress function in the driver/next layer down.
+     */
+    Instance(
+        VkInstance instance,
+        PFN_vkGetInstanceProcAddr nlayerGetProcAddress);
+
+public:
+    /**
+     * @brief The instance handle this instance is created with.
+     */
+    VkInstance instance;
+
+    /**
+     * @brief The next layer's \c vkGetInstanceProcAddr() function pointer.
+     */
+    PFN_vkGetInstanceProcAddr nlayerGetProcAddress;
+
+    /**
+     * @brief The driver function dispatch table.
+     */
+    InstanceDispatchTable driver {};
+};
diff --git a/layer_gpu_performance/source/version.hpp.in b/layer_gpu_performance/source/version.hpp.in
new file mode 100644
index 0000000..5fcb9c3
--- /dev/null
+++ b/layer_gpu_performance/source/version.hpp.in
@@ -0,0 +1,36 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+/**
+ * @file Placeholder templates that are populated by CMake during configure.
+ */
+
+#pragma once
+
+#define LGL_VER_MAJOR @PROJECT_VERSION_MAJOR@
+#define LGL_VER_MINOR @PROJECT_VERSION_MINOR@
+#define LGL_VER_PATCH @PROJECT_VERSION_PATCH@
+#define LGL_LAYER_NAME "@LGL_LAYER_NAME_STR@"
+#define LGL_LAYER_DESC "@LGL_LAYER_DESC_STR@"

From 6e3953e1c7249cc851f3789609058c99d0e47262 Mon Sep 17 00:00:00 2001
From: Peter Harris <peter.harris@arm.com>
Date: Mon, 23 Dec 2024 22:33:40 +0000
Subject: [PATCH 2/4] Start with the timeline layer as a starting point

---
 layer_gpu_performance/CMakeLists.txt          |   2 +
 layer_gpu_performance/README_LAYER.md         |  56 ++
 layer_gpu_performance/source/CMakeLists.txt   |  15 +-
 layer_gpu_performance/source/device.cpp       |  16 +-
 layer_gpu_performance/source/device.hpp       |  55 +-
 layer_gpu_performance/source/device_utils.hpp |  56 ++
 .../source/layer_device_functions.hpp         | 510 +++++++++++++++
 .../layer_device_functions_command_buffer.cpp | 160 +++++
 .../layer_device_functions_command_pool.cpp   | 103 +++
 .../source/layer_device_functions_debug.cpp   | 121 ++++
 .../layer_device_functions_dispatch.cpp       | 167 +++++
 .../layer_device_functions_draw_call.cpp      | 257 ++++++++
 .../source/layer_device_functions_queue.cpp   | 178 +++++
 .../layer_device_functions_render_pass.cpp    | 376 +++++++++++
 .../layer_device_functions_trace_rays.cpp     | 130 ++++
 .../layer_device_functions_transfer.cpp       | 619 ++++++++++++++++++
 .../source/performance_comms.cpp              |  54 ++
 .../source/performance_comms.hpp              |  71 ++
 18 files changed, 2941 insertions(+), 5 deletions(-)
 create mode 100644 layer_gpu_performance/README_LAYER.md
 create mode 100644 layer_gpu_performance/source/device_utils.hpp
 create mode 100644 layer_gpu_performance/source/layer_device_functions.hpp
 create mode 100644 layer_gpu_performance/source/layer_device_functions_command_buffer.cpp
 create mode 100644 layer_gpu_performance/source/layer_device_functions_command_pool.cpp
 create mode 100644 layer_gpu_performance/source/layer_device_functions_debug.cpp
 create mode 100644 layer_gpu_performance/source/layer_device_functions_dispatch.cpp
 create mode 100644 layer_gpu_performance/source/layer_device_functions_draw_call.cpp
 create mode 100644 layer_gpu_performance/source/layer_device_functions_queue.cpp
 create mode 100644 layer_gpu_performance/source/layer_device_functions_render_pass.cpp
 create mode 100644 layer_gpu_performance/source/layer_device_functions_trace_rays.cpp
 create mode 100644 layer_gpu_performance/source/layer_device_functions_transfer.cpp
 create mode 100644 layer_gpu_performance/source/performance_comms.cpp
 create mode 100644 layer_gpu_performance/source/performance_comms.hpp

diff --git a/layer_gpu_performance/CMakeLists.txt b/layer_gpu_performance/CMakeLists.txt
index 34d05e0..625064e 100644
--- a/layer_gpu_performance/CMakeLists.txt
+++ b/layer_gpu_performance/CMakeLists.txt
@@ -36,4 +36,6 @@ include(../source_common/compiler_helper.cmake)
 
 # Build steps
 add_subdirectory(source)
+add_subdirectory(../source_common/comms source_common/comms)
 add_subdirectory(../source_common/framework source_common/framework)
+add_subdirectory(../source_common/trackers source_common/trackers)
diff --git a/layer_gpu_performance/README_LAYER.md b/layer_gpu_performance/README_LAYER.md
new file mode 100644
index 0000000..f046cd4
--- /dev/null
+++ b/layer_gpu_performance/README_LAYER.md
@@ -0,0 +1,56 @@
+# Layer: GPU Performance
+
+This layer is a standalone performance analysis layer that can be used to
+analyze the workloads that make up a single frame.
+
+This layer supports two modes:
+
+* Per workload time, read via queries
+* Per workload performance counters, read via a non-API mechanism
+
+## What devices are supported?
+
+The per workload timing uses Vulkan API timer queries, and should work on any
+GPU that supports the required Vulkan features.
+
+The per workload performance counters uses the Arm libGPUCounters library,
+and requires an Arm GPU.
+
+## Is this layer non-invasive?
+
+The goal of this layer is to cost the major workloads submitted via the API, in
+a way which is compatible with the way that a tile-based renderer schedules
+render passes.
+
+Under normal scheduling, tile-based renderers split render passes into two
+pieces which are independently scheduled and can overlap with other work that
+is running on the GPU. Blindly timing render passes using timer queries can
+result in confusing results because the time includes time spend processing
+unrelated workloads running in parallel.
+
+The diagram shows one possible arrangement of workloads scheduled on the GPU
+hardware queues for an Arm 5th Generation architecture GPU. We're trying to
+time render pass 1 indicated by the `1` characters in the diagram, starting a
+timer query when this render pass starts (`S`) in the binning phase queue, and
+stopping when it ends (`E`) in the main phase queue.
+
+```
+         Compute:              222
+   Binning phase: S 11111 3333
+      Main phase:   00000000 111111111111 E
+```
+
+In this scenario the timer query correctly reflects the elapsed time of the
+render pass, but is not an accurate measure of cost of this workload. The
+elapsed time includes time where other workloads are running in parallel,
+indicated by the `0`, `2`, and `3` characters. It also includes time between
+the two phases where workload `1` is not running at all, because the binning
+phase work has completed, but is waiting for the main phase queue to finish an
+earlier workload.
+
+To accurately cost workloads on a tile-based renderer, which will overlap and
+run workloads in parallel if it is allowed to, the layer must inject additional
+synchronization primitives to serialize all workloads within a queue and across
+queues. This ensures that timer query values reflect the cost of individual
+workloads, however it also means that overall frame performance will be reduced
+due to loss of workload parallelization.
diff --git a/layer_gpu_performance/source/CMakeLists.txt b/layer_gpu_performance/source/CMakeLists.txt
index 1ee65f4..bdd3091 100644
--- a/layer_gpu_performance/source/CMakeLists.txt
+++ b/layer_gpu_performance/source/CMakeLists.txt
@@ -43,11 +43,22 @@ add_library(
     ${VK_LAYER} SHARED
         ${PROJECT_SOURCE_DIR}/../source_common/framework/entry.cpp
         device.cpp
-        instance.cpp)
+        instance.cpp
+        layer_device_functions_command_buffer.cpp
+        layer_device_functions_command_pool.cpp
+        layer_device_functions_debug.cpp
+        layer_device_functions_dispatch.cpp
+        layer_device_functions_draw_call.cpp
+        layer_device_functions_queue.cpp
+        layer_device_functions_render_pass.cpp
+        layer_device_functions_trace_rays.cpp
+        layer_device_functions_transfer.cpp
+        performance_comms.cpp)
 
 target_include_directories(
     ${VK_LAYER} PRIVATE
         ${PROJECT_SOURCE_DIR}/../source_common
+        ${PROJECT_SOURCE_DIR}/../source_third_party
         ${CMAKE_CURRENT_BINARY_DIR}
         .)
 
@@ -59,7 +70,9 @@ lgl_set_build_options(${VK_LAYER})
 
 target_link_libraries(
     ${VK_LAYER}
+        lib_layer_comms
         lib_layer_framework
+        lib_layer_trackers
         $<$<PLATFORM_ID:Android>:log>)
 
 if (CMAKE_BUILD_TYPE STREQUAL "Release")
diff --git a/layer_gpu_performance/source/device.cpp b/layer_gpu_performance/source/device.cpp
index 3371cff..571b2e4 100644
--- a/layer_gpu_performance/source/device.cpp
+++ b/layer_gpu_performance/source/device.cpp
@@ -29,16 +29,23 @@
 #include <sys/stat.h>
 #include <vector>
 
+#include "comms/comms_module.hpp"
 #include "framework/utils.hpp"
 
 #include "device.hpp"
 #include "instance.hpp"
 
 /**
- * @brief The dispatch lookup for all of the created Vulkan instances.
+ * @brief The dispatch lookup for all of the created Vulkan devices.
  */
 static std::unordered_map<void*, std::unique_ptr<Device>> g_devices;
 
+/* See header for documentation. */
+std::unique_ptr<Comms::CommsModule> Device::commsModule;
+
+/* See header for documentation. */
+std::unique_ptr<PerformanceComms> Device::commsWrapper;
+
 /* See header for documentation. */
 void Device::store(
     VkDevice handle,
@@ -94,4 +101,11 @@ Device::Device(
     device(_device)
 {
     initDriverDeviceDispatchTable(device, nlayerGetProcAddress, driver);
+
+    // Init the shared comms module for the first device built
+    if (!commsModule)
+    {
+        commsModule = std::make_unique<Comms::CommsModule>("lglcomms");
+        commsWrapper = std::make_unique<PerformanceComms>(*commsModule);
+    }
 }
diff --git a/layer_gpu_performance/source/device.hpp b/layer_gpu_performance/source/device.hpp
index c0e1f0a..acfee7e 100644
--- a/layer_gpu_performance/source/device.hpp
+++ b/layer_gpu_performance/source/device.hpp
@@ -54,9 +54,12 @@
 
 #include <vulkan/vk_layer.h>
 
+#include "comms/comms_module.hpp"
 #include "framework/device_dispatch_table.hpp"
+#include "trackers/device.hpp"
 
 #include "instance.hpp"
+#include "performance_comms.hpp"
 
 /**
  * @brief This class implements the layer state tracker for a single device.
@@ -118,7 +121,7 @@ class Device
      * @param instance               The layer instance object this device is created with.
      * @param physicalDevice         The physical device this logical device is for.
      * @param device                 The device handle this device is created with.
-     * @param nlayerGetProcAddress   The vkGetProcAddress function in the driver/next layer down.
+     * @param nlayerGetProcAddress   The vkGetDeviceProcAddress function for the driver.
      */
     Device(
         Instance* instance,
@@ -131,7 +134,43 @@ class Device
      */
     ~Device() = default;
 
+    /**
+     * @brief Callback for sending messages on frame boundary.
+     *
+     * @param message   The message to send.
+     */
+    void onFrame(
+        const std::string& message
+    ) {
+        commsWrapper->txMessage(message);
+    }
+
+    /**
+     * @brief Callback for sending messages on workload submit to a queue.
+     *
+     * @param message   The message to send.
+     */
+    void onWorkloadSubmit(
+        const std::string& message
+    ) {
+        commsWrapper->txMessage(message);
+    }
+
+    /**
+     * @brief Get the cumulative stats for this device.
+     */
+    Tracker::Device& getStateTracker()
+    {
+        return stateTracker;
+    }
+
 public:
+    /**
+     * @brief The driver function dispatch table.
+     */
+    DeviceDispatchTable driver {};
+
+private:
     /**
      * @brief The instance this device is created with.
      */
@@ -148,7 +187,17 @@ class Device
     const VkDevice device;
 
     /**
-     * @brief The driver function dispatch table.
+     * @brief State tracker for this device.
      */
-    DeviceDispatchTable driver {};
+    Tracker::Device stateTracker;
+
+    /**
+     * @brief Shared network communications module.
+     */
+    static std::unique_ptr<Comms::CommsModule> commsModule;
+
+    /**
+     * @brief Shared network communications message encoder.
+     */
+    static std::unique_ptr<PerformanceComms> commsWrapper;
 };
diff --git a/layer_gpu_performance/source/device_utils.hpp b/layer_gpu_performance/source/device_utils.hpp
new file mode 100644
index 0000000..eddf193
--- /dev/null
+++ b/layer_gpu_performance/source/device_utils.hpp
@@ -0,0 +1,56 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#pragma once
+
+#include <vulkan/vulkan.h>
+
+#include "framework/utils.hpp"
+
+#include "device.hpp"
+
+/**
+ * @brief Emit a start tag via a driver debug utils label.
+ *
+ * @param layer           The layer context for the device.
+ * @param commandBuffer   The command buffer we are recording.
+ * @param tagID           The tagID to emit into the label.
+ */
+[[maybe_unused]] static void emitStartTag(
+    Device* layer,
+    VkCommandBuffer commandBuffer,
+    uint64_t tagID
+) {
+    // Emit the unique workload tag into the command stream
+    std::string tagLabel = formatString("t%" PRIu64, tagID);
+    VkDebugUtilsLabelEXT tagInfo {
+        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
+        .pNext = nullptr,
+        .pLabelName = tagLabel.c_str(),
+        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
+    };
+
+    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
+}
diff --git a/layer_gpu_performance/source/layer_device_functions.hpp b/layer_gpu_performance/source/layer_device_functions.hpp
new file mode 100644
index 0000000..8c2f8b5
--- /dev/null
+++ b/layer_gpu_performance/source/layer_device_functions.hpp
@@ -0,0 +1,510 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#pragma once
+
+#include <vulkan/vulkan.h>
+
+#include "framework/utils.hpp"
+
+// Functions for command pools
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateCommandPool<user_tag>(
+    VkDevice device,
+    const VkCommandPoolCreateInfo* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkCommandPool* pCommandPool);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandPool<user_tag>(
+    VkDevice device,
+    VkCommandPool commandPool,
+    VkCommandPoolResetFlags flags);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkDestroyCommandPool<user_tag>(
+    VkDevice device,
+    VkCommandPool commandPool,
+    const VkAllocationCallbacks* pAllocator);
+
+// Functions for command buffers
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkAllocateCommandBuffers<user_tag>(
+    VkDevice device,
+    const VkCommandBufferAllocateInfo* pAllocateInfo,
+    VkCommandBuffer* pCommandBuffers);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult layer_vkBeginCommandBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCommandBufferBeginInfo* pBeginInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdExecuteCommands<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t commandBufferCount,
+    const VkCommandBuffer* pCommandBuffers);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkCommandBufferResetFlags flags);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkFreeCommandBuffers<user_tag>(
+    VkDevice device,
+    VkCommandPool commandPool,
+    uint32_t commandBufferCount,
+    const VkCommandBuffer* pCommandBuffers);
+
+// Functions for render passes
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass<user_tag>(
+    VkDevice device,
+    const VkRenderPassCreateInfo* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkRenderPass* pRenderPass);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2<user_tag>(
+    VkDevice device,
+    const VkRenderPassCreateInfo2* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkRenderPass* pRenderPass);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2KHR<user_tag>(
+    VkDevice device,
+    const VkRenderPassCreateInfo2* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkRenderPass* pRenderPass);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkDestroyRenderPass<user_tag>(
+    VkDevice device,
+    VkRenderPass renderPass,
+    const VkAllocationCallbacks* pAllocator);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderPassBeginInfo* pRenderPassBegin,
+    VkSubpassContents contents);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderPassBeginInfo* pRenderPassBegin,
+    const VkSubpassBeginInfo* pSubpassBeginInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderPassBeginInfo* pRenderPassBegin,
+    const VkSubpassBeginInfo* pSubpassBeginInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRendering<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderingInfo* pRenderingInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderingKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderingInfo* pRenderingInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderPass<user_tag>(
+    VkCommandBuffer commandBuffer);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL  layer_vkCmdEndRendering<user_tag>(
+    VkCommandBuffer commandBuffer);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL  layer_vkCmdEndRenderingKHR<user_tag>(
+    VkCommandBuffer commandBuffer);
+
+// Functions for draw calls
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDraw<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t vertexCount,
+    uint32_t instanceCount,
+    uint32_t firstVertex,
+    uint32_t firstInstance);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexed<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t indexCount,
+    uint32_t instanceCount,
+    uint32_t firstIndex,
+    int32_t vertexOffset,
+    uint32_t firstInstance);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirect<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    uint32_t drawCount,
+    uint32_t stride);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCount<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCountKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirect<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    uint32_t drawCount,
+    uint32_t stride);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectByteCountEXT<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t instanceCount,
+    uint32_t firstInstance,
+    VkBuffer counterBuffer,
+    VkDeviceSize counterBufferOffset,
+    uint32_t counterOffset,
+    uint32_t vertexStride);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCount<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCountKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride);
+
+// Functions for compute dispatches
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatch<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t groupCountX,
+    uint32_t groupCountY,
+    uint32_t groupCountZ);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBase<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t baseGroupX,
+    uint32_t baseGroupY,
+    uint32_t baseGroupZ,
+    uint32_t groupCountX,
+    uint32_t groupCountY,
+    uint32_t groupCountZ);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBaseKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t baseGroupX,
+    uint32_t baseGroupY,
+    uint32_t baseGroupZ,
+    uint32_t groupCountX,
+    uint32_t groupCountY,
+    uint32_t groupCountZ);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchIndirect<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset);
+
+// Commands for trace rays
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirect2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkDeviceAddress indirectDeviceAddress);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirectKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable,
+    VkDeviceAddress indirectDeviceAddress);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable,
+    uint32_t width,
+    uint32_t height,
+    uint32_t depth);
+
+
+// Commands for transfers
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdFillBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer dstBuffer,
+    VkDeviceSize dstOffset,
+    VkDeviceSize size,
+    uint32_t data);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearColorImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage image,
+    VkImageLayout imageLayout,
+    const VkClearColorValue* pColor,
+    uint32_t rangeCount,
+    const VkImageSubresourceRange* pRanges);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearDepthStencilImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage image,
+    VkImageLayout imageLayout,
+    const VkClearDepthStencilValue* pDepthStencil,
+    uint32_t rangeCount,
+    const VkImageSubresourceRange* pRanges);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer srcBuffer,
+    VkBuffer dstBuffer,
+    uint32_t regionCount,
+    const VkBufferCopy* pRegions);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferInfo2* pCopyBufferInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferInfo2* pCopyBufferInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer srcBuffer,
+    VkImage dstImage,
+    VkImageLayout dstImageLayout,
+    uint32_t regionCount,
+    const VkBufferImageCopy* pRegions);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage srcImage,
+    VkImageLayout srcImageLayout,
+    VkImage dstImage,
+    VkImageLayout dstImageLayout,
+    uint32_t regionCount,
+    const VkImageCopy* pRegions);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageInfo2* pCopyImageInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageInfo2* pCopyImageInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage srcImage,
+    VkImageLayout srcImageLayout,
+    VkBuffer dstBuffer,
+    uint32_t regionCount,
+    const VkBufferImageCopy* pRegions);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo);
+
+// Functions for debug
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerBeginEXT<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkDebugMarkerMarkerInfoEXT* pMarkerInfo);
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerEndEXT<user_tag>(
+    VkCommandBuffer commandBuffer);
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginDebugUtilsLabelEXT<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkDebugUtilsLabelEXT* pLabelInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndDebugUtilsLabelEXT<user_tag>(
+    VkCommandBuffer commandBuffer);
+
+// Functions for queues
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR<user_tag>(
+    VkQueue queue,
+    const VkPresentInfoKHR* pPresentInfo);
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit<user_tag>(
+    VkQueue queue,
+    uint32_t submitCount,
+    const VkSubmitInfo* pSubmits,
+    VkFence fence);
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2<user_tag>(
+    VkQueue queue,
+    uint32_t submitCount,
+    const VkSubmitInfo2* pSubmits,
+    VkFence fence);
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2KHR<user_tag>(
+    VkQueue queue,
+    uint32_t submitCount,
+    const VkSubmitInfo2* pSubmits,
+    VkFence fence);
diff --git a/layer_gpu_performance/source/layer_device_functions_command_buffer.cpp b/layer_gpu_performance/source/layer_device_functions_command_buffer.cpp
new file mode 100644
index 0000000..ef8e920
--- /dev/null
+++ b/layer_gpu_performance/source/layer_device_functions_command_buffer.cpp
@@ -0,0 +1,160 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <mutex>
+
+#include "device.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkAllocateCommandBuffers<user_tag>(
+    VkDevice device,
+    const VkCommandBufferAllocateInfo* pAllocateInfo,
+    VkCommandBuffer* pCommandBuffers
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    VkResult result = layer->driver.vkAllocateCommandBuffers(
+        device, pAllocateInfo, pCommandBuffers);
+    if (result != VK_SUCCESS)
+    {
+        return result;
+    }
+
+    // Retake the lock to access layer-wide global store
+    lock.lock();
+    auto& tracker = layer->getStateTracker();
+    for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++)
+    {
+        tracker.allocateCommandBuffer(
+            pAllocateInfo->commandPool, pCommandBuffers[i]);
+    }
+
+    return result;
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult layer_vkBeginCommandBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCommandBufferBeginInfo* pBeginInfo
+) {
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cmdBuffer = tracker.getCommandBuffer(commandBuffer);
+    cmdBuffer.reset();
+    cmdBuffer.begin(pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    return layer->driver.vkBeginCommandBuffer(commandBuffer, pBeginInfo);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkCommandBufferResetFlags flags
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cmdBuffer = tracker.getCommandBuffer(commandBuffer);
+    cmdBuffer.reset();
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    return layer->driver.vkResetCommandBuffer(commandBuffer, flags);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkFreeCommandBuffers<user_tag>(
+    VkDevice device,
+    VkCommandPool commandPool,
+    uint32_t commandBufferCount,
+    const VkCommandBuffer* pCommandBuffers
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    auto& tracker = layer->getStateTracker();
+    for (uint32_t i = 0; i < commandBufferCount; i++)
+    {
+        tracker.freeCommandBuffer(commandPool, pCommandBuffers[i]);
+    }
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkFreeCommandBuffers(
+        device, commandPool, commandBufferCount, pCommandBuffers);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdExecuteCommands<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t commandBufferCount,
+    const VkCommandBuffer* pCommandBuffers
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store and device-wide data
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& primary = tracker.getCommandBuffer(commandBuffer);
+
+    for (uint32_t i = 0; i < commandBufferCount; i++)
+    {
+        auto& secondary = tracker.getCommandBuffer(pCommandBuffers[i]);
+        primary.executeCommands(secondary);
+    }
+
+    // Release the lock to call into the main driver
+    lock.unlock();
+    layer->driver.vkCmdExecuteCommands(
+        commandBuffer, commandBufferCount, pCommandBuffers);
+}
diff --git a/layer_gpu_performance/source/layer_device_functions_command_pool.cpp b/layer_gpu_performance/source/layer_device_functions_command_pool.cpp
new file mode 100644
index 0000000..a640a90
--- /dev/null
+++ b/layer_gpu_performance/source/layer_device_functions_command_pool.cpp
@@ -0,0 +1,103 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <mutex>
+
+#include "device.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateCommandPool<user_tag>(
+    VkDevice device,
+    const VkCommandPoolCreateInfo* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkCommandPool* pCommandPool
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    VkResult result = layer->driver.vkCreateCommandPool(
+        device, pCreateInfo, pAllocator, pCommandPool);
+    if (result != VK_SUCCESS)
+    {
+        return result;
+    }
+
+    // Retake the lock to access layer-wide global store
+    lock.lock();
+    auto& tracker = layer->getStateTracker();
+    tracker.createCommandPool(*pCommandPool);
+    return result;
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandPool<user_tag>(
+    VkDevice device,
+    VkCommandPool commandPool,
+    VkCommandPoolResetFlags flags
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    auto& tracker = layer->getStateTracker();
+    tracker.getCommandPool(commandPool).reset();
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    return layer->driver.vkResetCommandPool(device, commandPool, flags);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkDestroyCommandPool<user_tag>(
+    VkDevice device,
+    VkCommandPool commandPool,
+    const VkAllocationCallbacks* pAllocator
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    auto& tracker = layer->getStateTracker();
+    tracker.destroyCommandPool(commandPool);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkDestroyCommandPool(device, commandPool, pAllocator);
+}
diff --git a/layer_gpu_performance/source/layer_device_functions_debug.cpp b/layer_gpu_performance/source/layer_device_functions_debug.cpp
new file mode 100644
index 0000000..1905193
--- /dev/null
+++ b/layer_gpu_performance/source/layer_device_functions_debug.cpp
@@ -0,0 +1,121 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <mutex>
+
+#include "device.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerBeginEXT<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkDebugMarkerMarkerInfoEXT* pMarkerInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    // Increment the render pass counter in the tracker
+    cb.debugMarkerBegin(pMarkerInfo->pMarkerName);
+
+    // Note that we do not call the driver for user labels - they are
+    // emitted via the comms side-channel for each workload to avoid
+    // polluting the layer's use of the driver for tag labelling
+}
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerEndEXT<user_tag>(
+    VkCommandBuffer commandBuffer
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    // Increment the render pass counter in the tracker
+    cb.debugMarkerEnd();
+
+    // Note that we do not call the driver for user labels - they are
+    // emitted via the comms side-channel for each workload to avoid
+    // polluting the layer's use of the driver for tag labelling
+}
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginDebugUtilsLabelEXT<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkDebugUtilsLabelEXT* pLabelInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    // Increment the render pass counter in the tracker
+    cb.debugMarkerBegin(pLabelInfo->pLabelName);
+
+    // Note that we do not call the driver for user labels - they are
+    // emitted via the comms side-channel for each workload to avoid
+    // polluting the layer's use of the driver for tag labelling
+}
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndDebugUtilsLabelEXT<user_tag>(
+    VkCommandBuffer commandBuffer
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    // Increment the render pass counter in the tracker
+    cb.debugMarkerEnd();
+
+    // Note that we do not call the driver for user labels - they are
+    // emitted via the comms side-channel for each workload to avoid
+    // polluting the layer's use of the driver for tag labelling
+}
diff --git a/layer_gpu_performance/source/layer_device_functions_dispatch.cpp b/layer_gpu_performance/source/layer_device_functions_dispatch.cpp
new file mode 100644
index 0000000..de5ee10
--- /dev/null
+++ b/layer_gpu_performance/source/layer_device_functions_dispatch.cpp
@@ -0,0 +1,167 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <mutex>
+
+#include "device.hpp"
+#include "device_utils.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/**
+ * @brief Register a compute dispatch with the tracker.
+ *
+ * @param layer           The layer context for the device.
+ * @param commandBuffer   The command buffer we are recording.
+ * @param groupX          The X size of the dispatch in groups.
+ * @param groupY          The Y size of the dispatch in groups.
+ * @param groupZ          The Z size of the dispatch in groups.
+ *
+ * @return The assigned tagID for the workload.
+ */
+static uint64_t registerDispatch(
+    Device* layer,
+    VkCommandBuffer commandBuffer,
+    int64_t groupX,
+    int64_t groupY,
+    int64_t groupZ
+) {
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+    return cb.dispatch(groupX, groupY, groupZ);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatch<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t groupCountX,
+    uint32_t groupCountY,
+    uint32_t groupCountZ
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerDispatch(
+        layer,
+        commandBuffer,
+        static_cast<int64_t>(groupCountX),
+        static_cast<int64_t>(groupCountY),
+        static_cast<int64_t>(groupCountZ));
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBase<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t baseGroupX,
+    uint32_t baseGroupY,
+    uint32_t baseGroupZ,
+    uint32_t groupCountX,
+    uint32_t groupCountY,
+    uint32_t groupCountZ
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerDispatch(
+        layer,
+        commandBuffer,
+        static_cast<int64_t>(groupCountX),
+        static_cast<int64_t>(groupCountY),
+        static_cast<int64_t>(groupCountZ));
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdDispatchBase(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBaseKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t baseGroupX,
+    uint32_t baseGroupY,
+    uint32_t baseGroupZ,
+    uint32_t groupCountX,
+    uint32_t groupCountY,
+    uint32_t groupCountZ
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerDispatch(
+        layer,
+        commandBuffer,
+        static_cast<int64_t>(groupCountX),
+        static_cast<int64_t>(groupCountY),
+        static_cast<int64_t>(groupCountZ));
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdDispatchBaseKHR(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchIndirect<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerDispatch(layer, commandBuffer, -1, -1, -1);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdDispatchIndirect(commandBuffer, buffer, offset);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
diff --git a/layer_gpu_performance/source/layer_device_functions_draw_call.cpp b/layer_gpu_performance/source/layer_device_functions_draw_call.cpp
new file mode 100644
index 0000000..42350d0
--- /dev/null
+++ b/layer_gpu_performance/source/layer_device_functions_draw_call.cpp
@@ -0,0 +1,257 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <memory>
+#include <mutex>
+#include <thread>
+
+#include "device.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/**
+ * @brief Register a draw call with the tracker.
+ *
+ * @param layer           The layer context for the device.
+ * @param commandBuffer   The command buffer we are recording.
+ */
+static void registerDrawCall(
+    Device* layer,
+    VkCommandBuffer commandBuffer
+) {
+    auto& state = layer->getStateTracker();
+    auto& stats = state.getCommandBuffer(commandBuffer).getStats();
+    stats.incDrawCallCount();
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDraw<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t vertexCount,
+    uint32_t instanceCount,
+    uint32_t firstVertex,
+    uint32_t firstInstance
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDraw(commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexed<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t indexCount,
+    uint32_t instanceCount,
+    uint32_t firstIndex,
+    int32_t vertexOffset,
+    uint32_t firstInstance
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndexed(commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirect<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    uint32_t drawCount,
+    uint32_t stride
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndexedIndirect(commandBuffer, buffer, offset, drawCount, stride);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCount<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndexedIndirectCount(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCountKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndexedIndirectCountKHR(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirect<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    uint32_t drawCount,
+    uint32_t stride
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndirect(commandBuffer, buffer, offset, drawCount, stride);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectByteCountEXT<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t instanceCount,
+    uint32_t firstInstance,
+    VkBuffer counterBuffer,
+    VkDeviceSize counterBufferOffset,
+    uint32_t counterOffset,
+    uint32_t vertexStride
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndirectByteCountEXT(commandBuffer, instanceCount, firstInstance, counterBuffer, counterBufferOffset, counterOffset, vertexStride);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCount<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndirectCount(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCountKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndirectCountKHR(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride);
+}
diff --git a/layer_gpu_performance/source/layer_device_functions_queue.cpp b/layer_gpu_performance/source/layer_device_functions_queue.cpp
new file mode 100644
index 0000000..a5c92e2
--- /dev/null
+++ b/layer_gpu_performance/source/layer_device_functions_queue.cpp
@@ -0,0 +1,178 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <mutex>
+#include <nlohmann/json.hpp>
+
+#include "utils/misc.hpp"
+
+#include "device.hpp"
+#include "layer_device_functions.hpp"
+
+using json = nlohmann::json;
+
+using namespace std::placeholders;
+
+extern std::mutex g_vulkanLock;
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR<user_tag>(
+    VkQueue queue,
+    const VkPresentInfoKHR* pPresentInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(queue);
+
+    auto& tracker = layer->getStateTracker();
+    tracker.queuePresent();
+
+    // This is run with the lock held to ensure that all queue submit
+    // messages are sent sequentially to the host tool
+    json frame {
+        { "type", "frame" },
+        { "fid", tracker.totalStats.getFrameCount() }
+    };
+
+    layer->onFrame(frame.dump());
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    return layer->driver.vkQueuePresentKHR(queue, pPresentInfo);
+}
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit<user_tag>(
+    VkQueue queue,
+    uint32_t submitCount,
+    const VkSubmitInfo* pSubmits,
+    VkFence fence
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(queue);
+
+    auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1);
+
+    auto& tracker = layer->getStateTracker();
+    auto& trackQueue = tracker.getQueue(queue);
+
+    // This is run with the lock held to ensure that all queue submit
+    // messages are sent sequentially to the host tool
+    for (uint32_t i = 0; i < submitCount; i++)
+    {
+        const auto& submit = pSubmits[i];
+        for (uint32_t j = 0; j < submit.commandBufferCount; j++)
+        {
+            auto& trackCB = tracker.getCommandBuffer(submit.pCommandBuffers[j]);
+            const auto& LCS = trackCB.getSubmitCommandStream();
+            trackQueue.runSubmitCommandStream(LCS, onSubmit);
+        }
+    }
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    return layer->driver.vkQueueSubmit(queue, submitCount, pSubmits, fence);
+}
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2<user_tag>(
+    VkQueue queue,
+    uint32_t submitCount,
+    const VkSubmitInfo2* pSubmits,
+    VkFence fence
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(queue);
+
+    auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1);
+
+    auto& tracker = layer->getStateTracker();
+    auto& trackQueue = tracker.getQueue(queue);
+
+    // This is run with the lock held to ensure that all queue submit
+    // messages are sent sequentially to the host tool
+    for (uint32_t i = 0; i < submitCount; i++)
+    {
+        const auto& submit = pSubmits[i];
+        for (uint32_t j = 0; j < submit.commandBufferInfoCount; j++)
+        {
+            auto& trackCB = tracker.getCommandBuffer(submit.pCommandBufferInfos[j].commandBuffer);
+            const auto& LCS = trackCB.getSubmitCommandStream();
+            trackQueue.runSubmitCommandStream(LCS, onSubmit);
+        }
+    }
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    return layer->driver.vkQueueSubmit2(queue, submitCount, pSubmits, fence);
+}
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2KHR<user_tag>(
+    VkQueue queue,
+    uint32_t submitCount,
+    const VkSubmitInfo2* pSubmits,
+    VkFence fence
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(queue);
+
+    auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1);
+
+    auto& tracker = layer->getStateTracker();
+    auto& trackQueue = tracker.getQueue(queue);
+
+    // This is run with the lock held to ensure that all queue submit
+    // messages are sent sequentially to the host tool
+    for (uint32_t i = 0; i < submitCount; i++)
+    {
+        const auto& submit = pSubmits[i];
+        for (uint32_t j = 0; j < submit.commandBufferInfoCount; j++)
+        {
+            auto& trackCB = tracker.getCommandBuffer(submit.pCommandBufferInfos[j].commandBuffer);
+            const auto& LCS = trackCB.getSubmitCommandStream();
+            trackQueue.runSubmitCommandStream(LCS, onSubmit);
+        }
+    }
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    return layer->driver.vkQueueSubmit2KHR(queue, submitCount, pSubmits, fence);
+}
diff --git a/layer_gpu_performance/source/layer_device_functions_render_pass.cpp b/layer_gpu_performance/source/layer_device_functions_render_pass.cpp
new file mode 100644
index 0000000..5d16880
--- /dev/null
+++ b/layer_gpu_performance/source/layer_device_functions_render_pass.cpp
@@ -0,0 +1,376 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <mutex>
+
+#include "framework/utils.hpp"
+#include "trackers/render_pass.hpp"
+
+#include "device.hpp"
+#include "device_utils.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass<user_tag>(
+    VkDevice device,
+    const VkRenderPassCreateInfo* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkRenderPass* pRenderPass
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    VkResult ret = layer->driver.vkCreateRenderPass(device, pCreateInfo, pAllocator, pRenderPass);
+    if (ret != VK_SUCCESS)
+    {
+        return ret;
+    }
+
+    // Retake the lock to access layer-wide global store
+    lock.lock();
+    auto& tracker = layer->getStateTracker();
+    tracker.createRenderPass(*pRenderPass, *pCreateInfo);
+    return VK_SUCCESS;
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2<user_tag>(
+    VkDevice device,
+    const VkRenderPassCreateInfo2* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkRenderPass* pRenderPass
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    VkResult ret = layer->driver.vkCreateRenderPass2(device, pCreateInfo, pAllocator, pRenderPass);
+    if (ret != VK_SUCCESS)
+    {
+        return ret;
+    }
+
+    // Retake the lock to access layer-wide global store
+    lock.lock();
+    auto& tracker = layer->getStateTracker();
+    tracker.createRenderPass(*pRenderPass, *pCreateInfo);
+    return VK_SUCCESS;
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2KHR<user_tag>(
+    VkDevice device,
+    const VkRenderPassCreateInfo2* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkRenderPass* pRenderPass
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    VkResult ret = layer->driver.vkCreateRenderPass2KHR(device, pCreateInfo, pAllocator, pRenderPass);
+    if (ret != VK_SUCCESS)
+    {
+        return ret;
+    }
+
+    // Retake the lock to access layer-wide global store
+    lock.lock();
+    auto& tracker = layer->getStateTracker();
+    tracker.createRenderPass(*pRenderPass, *pCreateInfo);
+    return VK_SUCCESS;
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkDestroyRenderPass<user_tag>(
+    VkDevice device,
+    VkRenderPass renderPass,
+    const VkAllocationCallbacks* pAllocator
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    auto& tracker = layer->getStateTracker();
+    tracker.destroyRenderPass(renderPass);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkDestroyRenderPass(device, renderPass, pAllocator);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderPassBeginInfo* pRenderPassBegin,
+    VkSubpassContents contents
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass);
+    uint32_t width = pRenderPassBegin->renderArea.extent.width;
+    uint32_t height = pRenderPassBegin->renderArea.extent.height;
+
+    // Notify the command buffer we are starting a new render pass
+    uint64_t tagID = cb.renderPassBegin(rp, width, height);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdBeginRenderPass(commandBuffer, pRenderPassBegin, contents);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderPassBeginInfo* pRenderPassBegin,
+    const VkSubpassBeginInfo* pSubpassBeginInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass);
+    uint32_t width = pRenderPassBegin->renderArea.extent.width;
+    uint32_t height = pRenderPassBegin->renderArea.extent.height;
+
+    // Notify the command buffer we are starting a new render pass
+    uint64_t tagID = cb.renderPassBegin(rp, width, height);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdBeginRenderPass2(commandBuffer, pRenderPassBegin, pSubpassBeginInfo);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderPassBeginInfo* pRenderPassBegin,
+    const VkSubpassBeginInfo* pSubpassBeginInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass);
+    uint32_t width = pRenderPassBegin->renderArea.extent.width;
+    uint32_t height = pRenderPassBegin->renderArea.extent.height;
+
+    // Notify the command buffer we are starting a new render pass
+    uint64_t tagID = cb.renderPassBegin(rp, width, height);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdBeginRenderPass2KHR(commandBuffer, pRenderPassBegin, pSubpassBeginInfo);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRendering<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderingInfo* pRenderingInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    bool resuming = pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT;
+    bool suspending = pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT;
+
+    // Extract metadata for later use ...
+    Tracker::RenderPass rp(*pRenderingInfo);
+    uint32_t width = pRenderingInfo->renderArea.extent.width;
+    uint32_t height = pRenderingInfo->renderArea.extent.height;
+
+    // Notify the command buffer we are starting a new render pass
+    uint64_t tagID = cb.renderPassBegin(
+        rp, width, height, resuming, suspending);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    // Emit the label only for new render passes
+    if (!resuming)
+    {
+        emitStartTag(layer, commandBuffer, tagID);
+    }
+    layer->driver.vkCmdBeginRendering(commandBuffer, pRenderingInfo);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderingKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderingInfo* pRenderingInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    bool resuming = pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT;
+    bool suspending = pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT;
+
+    // Extract metadata for later use ...
+    Tracker::RenderPass rp(*pRenderingInfo);
+    uint32_t width = pRenderingInfo->renderArea.extent.width;
+    uint32_t height = pRenderingInfo->renderArea.extent.height;
+
+    // Notify the command buffer we are starting a new render pass
+    uint64_t tagID = cb.renderPassBegin(
+        rp, width, height, resuming, suspending);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    // Emit the label only for new render passes
+    if (!resuming)
+    {
+        emitStartTag(layer, commandBuffer, tagID);
+    }
+    layer->driver.vkCmdBeginRenderingKHR(commandBuffer, pRenderingInfo);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderPass<user_tag>(
+    VkCommandBuffer commandBuffer
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Update the layer command stream in the tracker
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+    cb.renderPassEnd();
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdEndRenderPass(commandBuffer);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL  layer_vkCmdEndRendering<user_tag>(
+    VkCommandBuffer commandBuffer
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Update the layer command stream in the tracker
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+    bool suspending = cb.renderPassEnd();
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdEndRendering(commandBuffer);
+    if (!suspending)
+    {
+        layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+    }
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL  layer_vkCmdEndRenderingKHR<user_tag>(
+    VkCommandBuffer commandBuffer
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Update the layer command stream in the tracker
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+    bool suspending = cb.renderPassEnd();
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdEndRenderingKHR(commandBuffer);
+    if (!suspending)
+    {
+        layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+    }
+}
diff --git a/layer_gpu_performance/source/layer_device_functions_trace_rays.cpp b/layer_gpu_performance/source/layer_device_functions_trace_rays.cpp
new file mode 100644
index 0000000..e6df5e3
--- /dev/null
+++ b/layer_gpu_performance/source/layer_device_functions_trace_rays.cpp
@@ -0,0 +1,130 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <memory>
+#include <mutex>
+#include <thread>
+
+#include "device.hpp"
+#include "device_utils.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/**
+ * @brief Register a trace rays dispatch with the tracker.
+ *
+ * @param layer           The layer context for the device.
+ * @param commandBuffer   The command buffer we are recording.
+ * @param itemsX          The X size of the dispatch in work items.
+ * @param itemsY          The Y size of the dispatch in work items.
+ * @param itemsZ          The Z size of the dispatch in work items.
+ *
+ * @return The assigned tagID for the workload.
+ */
+static uint64_t registerTraceRays(
+    Device* layer,
+    VkCommandBuffer commandBuffer,
+    int64_t itemsX,
+    int64_t itemsY,
+    int64_t itemsZ
+) {
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+    return cb.traceRays(itemsX, itemsY, itemsZ);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirect2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkDeviceAddress indirectDeviceAddress
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerTraceRays(layer, commandBuffer, -1, -1, -1);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdTraceRaysIndirect2KHR(commandBuffer, indirectDeviceAddress);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirectKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable,
+    VkDeviceAddress indirectDeviceAddress
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerTraceRays(layer, commandBuffer, -1, -1, -1);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdTraceRaysIndirectKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, indirectDeviceAddress);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable,
+    uint32_t width,
+    uint32_t height,
+    uint32_t depth
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerTraceRays(layer, commandBuffer, width, height, depth);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdTraceRaysKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, width, height, depth);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
\ No newline at end of file
diff --git a/layer_gpu_performance/source/layer_device_functions_transfer.cpp b/layer_gpu_performance/source/layer_device_functions_transfer.cpp
new file mode 100644
index 0000000..ecfaa65
--- /dev/null
+++ b/layer_gpu_performance/source/layer_device_functions_transfer.cpp
@@ -0,0 +1,619 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <memory>
+#include <mutex>
+#include <thread>
+
+#include "device.hpp"
+#include "device_utils.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/**
+ * @brief Register a transfer to a buffer with the tracker.
+ *
+ * @param layer           The layer context for the device.
+ * @param commandBuffer   The command buffer we are recording.
+ * @param transferType    The type of transfer being performed.
+ * @param byteCount       The number of bytes transferred.
+ *
+ * @return The assigned tagID for the workload.
+ */
+static uint64_t registerBufferTransfer(
+    Device* layer,
+    VkCommandBuffer commandBuffer,
+    const std::string& transferType,
+    int64_t byteCount
+) {
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+    return cb.bufferTransfer(transferType, byteCount);
+}
+
+/**
+ * @brief Register a transfer to an image with the tracker.
+ *
+ * @param layer           The layer context for the device.
+ * @param commandBuffer   The command buffer we are recording.
+ * @param transferType    The type of transfer being performed.
+ * @param pixelCount      The number of pixels transferred.
+ *
+ * @return The assigned tagID for the workload.
+ */
+static uint64_t registerImageTransfer(
+    Device* layer,
+    VkCommandBuffer commandBuffer,
+    const std::string& transferType,
+    int64_t pixelCount
+) {
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+    return cb.imageTransfer(transferType, pixelCount);
+}
+
+// Commands for transfers
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdFillBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer dstBuffer,
+    VkDeviceSize dstOffset,
+    VkDeviceSize size,
+    uint32_t data
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    // TODO: Add buffer tracking so we can turn VK_WHOLE_SIZE into bytes
+    int64_t byteCount = static_cast<int64_t>(size);
+    if (size == VK_WHOLE_SIZE)
+    {
+        byteCount = -2;
+    }
+
+    uint64_t tagID = registerBufferTransfer(
+        layer,
+        commandBuffer,
+        "Fill buffer",
+        byteCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdFillBuffer(commandBuffer, dstBuffer, dstOffset, size, data);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearColorImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage image,
+    VkImageLayout imageLayout,
+    const VkClearColorValue* pColor,
+    uint32_t rangeCount,
+    const VkImageSubresourceRange* pRanges
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    // TODO: Add image tracking so we can turn image and pRanges into pixels
+    int64_t pixelCount = -1;
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Clear image",
+        pixelCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdClearColorImage(commandBuffer, image, imageLayout, pColor, rangeCount, pRanges);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearDepthStencilImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage image,
+    VkImageLayout imageLayout,
+    const VkClearDepthStencilValue* pDepthStencil,
+    uint32_t rangeCount,
+    const VkImageSubresourceRange* pRanges
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    // TODO: Add image tracking so we can turn image and pRanges into pixels
+    int64_t pixelCount = -1;
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Clear image",
+        pixelCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdClearDepthStencilImage(commandBuffer, image, imageLayout, pDepthStencil, rangeCount, pRanges);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer srcBuffer,
+    VkBuffer dstBuffer,
+    uint32_t regionCount,
+    const VkBufferCopy* pRegions
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t byteCount = 0;
+    for (uint32_t i = 0; i < regionCount; i++)
+    {
+        byteCount += static_cast<int64_t>(pRegions[i].size);
+    }
+
+    uint64_t tagID = registerBufferTransfer(
+        layer,
+        commandBuffer,
+        "Copy buffer",
+        byteCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyBuffer(commandBuffer, srcBuffer, dstBuffer, regionCount, pRegions);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferInfo2* pCopyBufferInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t byteCount = 0;
+    for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++)
+    {
+        byteCount += static_cast<int64_t>(pCopyBufferInfo->pRegions[i].size);
+    }
+
+    uint64_t tagID = registerBufferTransfer(
+        layer,
+        commandBuffer,
+        "Copy buffer",
+        byteCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyBuffer2(commandBuffer, pCopyBufferInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferInfo2* pCopyBufferInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t byteCount = 0;
+    for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++)
+    {
+        byteCount += static_cast<int64_t>(pCopyBufferInfo->pRegions[i].size);
+    }
+
+    uint64_t tagID = registerBufferTransfer(
+        layer,
+        commandBuffer,
+        "Copy buffer",
+        byteCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyBuffer2KHR(commandBuffer, pCopyBufferInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer srcBuffer,
+    VkImage dstImage,
+    VkImageLayout dstImageLayout,
+    uint32_t regionCount,
+    const VkBufferImageCopy* pRegions
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t pixelCount = 0;
+    for (uint32_t i = 0; i < regionCount; i++)
+    {
+        int64_t rPixelCount = static_cast<int64_t>(pRegions[i].imageExtent.width)
+                            * static_cast<int64_t>(pRegions[i].imageExtent.height)
+                            * static_cast<int64_t>(pRegions[i].imageExtent.depth);
+        pixelCount += rPixelCount;
+    }
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy buffer to image",
+        pixelCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyBufferToImage(commandBuffer, srcBuffer, dstImage, dstImageLayout, regionCount, pRegions);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t pixelCount = 0;
+    for (uint32_t i = 0; i < pCopyBufferToImageInfo->regionCount; i++)
+    {
+        int64_t rPixelCount = static_cast<int64_t>(pCopyBufferToImageInfo->pRegions[i].imageExtent.width)
+                            * static_cast<int64_t>(pCopyBufferToImageInfo->pRegions[i].imageExtent.height)
+                            * static_cast<int64_t>(pCopyBufferToImageInfo->pRegions[i].imageExtent.depth);
+        pixelCount += rPixelCount;
+    }
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy buffer to image",
+        pixelCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyBufferToImage2(commandBuffer, pCopyBufferToImageInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t pixelCount = 0;
+    for (uint32_t i = 0; i < pCopyBufferToImageInfo->regionCount; i++)
+    {
+        int64_t rPixelCount = static_cast<int64_t>(pCopyBufferToImageInfo->pRegions[i].imageExtent.width)
+                            * static_cast<int64_t>(pCopyBufferToImageInfo->pRegions[i].imageExtent.height)
+                            * static_cast<int64_t>(pCopyBufferToImageInfo->pRegions[i].imageExtent.depth);
+        pixelCount += rPixelCount;
+    }
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy buffer to image",
+        pixelCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyBufferToImage2KHR(commandBuffer, pCopyBufferToImageInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage srcImage,
+    VkImageLayout srcImageLayout,
+    VkImage dstImage,
+    VkImageLayout dstImageLayout,
+    uint32_t regionCount,
+    const VkImageCopy* pRegions
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t pixelCount = 0;
+    for (uint32_t i = 0; i < regionCount; i++)
+    {
+        int64_t rPixelCount = static_cast<int64_t>(pRegions[i].extent.width)
+                            * static_cast<int64_t>(pRegions[i].extent.height)
+                            * static_cast<int64_t>(pRegions[i].extent.depth);
+        pixelCount += rPixelCount;
+    }
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy image",
+        pixelCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyImage(commandBuffer, srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageInfo2* pCopyImageInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t pixelCount = 0;
+    for (uint32_t i = 0; i < pCopyImageInfo->regionCount; i++)
+    {
+        int64_t rPixelCount = static_cast<int64_t>(pCopyImageInfo->pRegions[i].extent.width)
+                            * static_cast<int64_t>(pCopyImageInfo->pRegions[i].extent.height)
+                            * static_cast<int64_t>(pCopyImageInfo->pRegions[i].extent.depth);
+        pixelCount += rPixelCount;
+    }
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy image",
+        pixelCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyImage2(commandBuffer, pCopyImageInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageInfo2* pCopyImageInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t pixelCount = 0;
+    for (uint32_t i = 0; i < pCopyImageInfo->regionCount; i++)
+    {
+        int64_t rPixelCount = static_cast<int64_t>(pCopyImageInfo->pRegions[i].extent.width)
+                            * static_cast<int64_t>(pCopyImageInfo->pRegions[i].extent.height)
+                            * static_cast<int64_t>(pCopyImageInfo->pRegions[i].extent.depth);
+        pixelCount += rPixelCount;
+    }
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy image",
+        pixelCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyImage2KHR(commandBuffer, pCopyImageInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage srcImage,
+    VkImageLayout srcImageLayout,
+    VkBuffer dstBuffer,
+    uint32_t regionCount,
+    const VkBufferImageCopy* pRegions
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t pixelCount = 0;
+    for (uint32_t i = 0; i < regionCount; i++)
+    {
+        int64_t rPixelCount = static_cast<int64_t>(pRegions[i].imageExtent.width)
+                            * static_cast<int64_t>(pRegions[i].imageExtent.height)
+                            * static_cast<int64_t>(pRegions[i].imageExtent.depth);
+        pixelCount += rPixelCount;
+    }
+
+    // TODO: Our usual convention is to mark the transfer using the destination
+    // type, which means this should be a bufferTransfer reporting size in
+    // bytes. Without image tracking we only have pixels, so for now we report
+    // as "Copy image" and report size in pixels.
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy image to buffer",
+        pixelCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyImageToBuffer(commandBuffer, srcImage, srcImageLayout, dstBuffer, regionCount, pRegions);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t pixelCount = 0;
+    for (uint32_t i = 0; i < pCopyImageToBufferInfo->regionCount; i++)
+    {
+        int64_t rPixelCount = static_cast<int64_t>(pCopyImageToBufferInfo->pRegions[i].imageExtent.width)
+                            * static_cast<int64_t>(pCopyImageToBufferInfo->pRegions[i].imageExtent.height)
+                            * static_cast<int64_t>(pCopyImageToBufferInfo->pRegions[i].imageExtent.depth);
+        pixelCount += rPixelCount;
+    }
+
+    // TODO: Our usual convention is to mark the transfer using the destination
+    // type, which means this should be a bufferTransfer reporting size in
+    // bytes. Without image tracking we only have pixels, so for now we report
+    // as "Copy image" and report size in pixels.
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy image to buffer",
+        pixelCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyImageToBuffer2(commandBuffer, pCopyImageToBufferInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t pixelCount = 0;
+    for (uint32_t i = 0; i < pCopyImageToBufferInfo->regionCount; i++)
+    {
+        int64_t rPixelCount = static_cast<int64_t>(pCopyImageToBufferInfo->pRegions[i].imageExtent.width)
+                            * static_cast<int64_t>(pCopyImageToBufferInfo->pRegions[i].imageExtent.height)
+                            * static_cast<int64_t>(pCopyImageToBufferInfo->pRegions[i].imageExtent.depth);
+        pixelCount += rPixelCount;
+    }
+
+    // TODO: Our usual convention is to mark the transfer using the destination
+    // type, which means this should be a bufferTransfer reporting size in
+    // bytes. Without image tracking we only have pixels, so for now we report
+    // as "Copy image" and report size in pixels.
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy image to buffer",
+        pixelCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyImageToBuffer2KHR(commandBuffer, pCopyImageToBufferInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
diff --git a/layer_gpu_performance/source/performance_comms.cpp b/layer_gpu_performance/source/performance_comms.cpp
new file mode 100644
index 0000000..bf04114
--- /dev/null
+++ b/layer_gpu_performance/source/performance_comms.cpp
@@ -0,0 +1,54 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <memory>
+
+#include "performance_comms.hpp"
+
+/* See header for documentation. */
+PerformanceComms::PerformanceComms(
+    Comms::CommsInterface& _comms
+):
+    comms(_comms)
+{
+    if (comms.isConnected())
+    {
+        endpoint = comms.getEndpointID("GPUTimeline");
+    }
+}
+
+/* See header for documentation. */
+void PerformanceComms::txMessage(
+    const std::string& message)
+{
+    // Message endpoint is not available
+    if (endpoint == 0)
+    {
+        return;
+    }
+
+    auto data = std::make_unique<Comms::MessageData>(message.begin(), message.end());
+    comms.txAsync(endpoint, std::move(data));
+}
diff --git a/layer_gpu_performance/source/performance_comms.hpp b/layer_gpu_performance/source/performance_comms.hpp
new file mode 100644
index 0000000..d9f3916
--- /dev/null
+++ b/layer_gpu_performance/source/performance_comms.hpp
@@ -0,0 +1,71 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+/**
+ * @file Declares a simple comms encoded for the timeline layer.
+ */
+
+#pragma once
+
+#include "comms/comms_interface.hpp"
+
+/**
+ * @brief A simple message encoder for the timeline comms endpoint.
+ *
+ * TODO: This is currently a very simple implementation because we are simply
+ * passing JSON strings around. This is not the most efficient way of doing
+ * this and in future this module will be used to implement binary encoders
+ * for each specific message type that needs sending.
+ */
+class PerformanceComms
+{
+public:
+    /**
+     * @brief Construct a new encoder.
+     *
+     * @param comms   The common comms module used by all services.
+     */
+    PerformanceComms(
+        Comms::CommsInterface& comms);
+
+    /**
+     * @brief Send a message to the GPU timeline endpoint service.
+     *
+     * @param message   The message to send.
+     */
+    void txMessage(
+        const std::string& message);
+
+private:
+    /**
+     * @brief The endpoint ID of the service, or 0 if not found.
+     */
+    Comms::EndpointID endpoint { 0 };
+
+    /**
+     * @brief The common module for network messaging.
+     */
+    Comms::CommsInterface& comms;
+};

From 5c1171e1f4e1dc02fbbbab86f658b7f84bd2ce74 Mon Sep 17 00:00:00 2001
From: Peter Harris <peter.harris@arm.com>
Date: Mon, 30 Dec 2024 22:40:11 +0000
Subject: [PATCH 3/4] Add layer implementation of vkCreateDevice

---
 layer_gpu_performance/README_LAYER.md         | 110 ++++++++++++++----
 layer_gpu_performance/source/CMakeLists.txt   |   1 +
 .../source/layer_device_functions.hpp         |  14 +--
 .../source/layer_device_functions_debug.cpp   |   8 +-
 .../source/layer_device_functions_queue.cpp   |   8 +-
 .../source/layer_instance_functions.hpp       |  38 ++++++
 .../layer_instance_functions_device.cpp       |  80 +++++++++++++
 7 files changed, 224 insertions(+), 35 deletions(-)
 create mode 100644 layer_gpu_performance/source/layer_instance_functions.hpp
 create mode 100644 layer_gpu_performance/source/layer_instance_functions_device.cpp

diff --git a/layer_gpu_performance/README_LAYER.md b/layer_gpu_performance/README_LAYER.md
index f046cd4..a103878 100644
--- a/layer_gpu_performance/README_LAYER.md
+++ b/layer_gpu_performance/README_LAYER.md
@@ -5,7 +5,7 @@ analyze the workloads that make up a single frame.
 
 This layer supports two modes:
 
-* Per workload time, read via queries
+* Per workload time, read via Vulkan API queries
 * Per workload performance counters, read via a non-API mechanism
 
 ## What devices are supported?
@@ -23,16 +23,16 @@ a way which is compatible with the way that a tile-based renderer schedules
 render passes.
 
 Under normal scheduling, tile-based renderers split render passes into two
-pieces which are independently scheduled and can overlap with other work that
-is running on the GPU. Blindly timing render passes using timer queries can
-result in confusing results because the time includes time spend processing
-unrelated workloads running in parallel.
+pieces which are independently scheduled and that can overlap with other work
+that is running on the GPU. Blindly timing render passes using timer queries
+can result in confusing results because the reported time might include time
+spent processing unrelated workloads that happen to be running in parallel.
 
-The diagram shows one possible arrangement of workloads scheduled on the GPU
-hardware queues for an Arm 5th Generation architecture GPU. We're trying to
-time render pass 1 indicated by the `1` characters in the diagram, starting a
-timer query when this render pass starts (`S`) in the binning phase queue, and
-stopping when it ends (`E`) in the main phase queue.
+The timing diagram below shows one possible arrangement of workloads scheduled
+on the GPU hardware queues for an Arm 5th Generation architecture GPU. We are
+trying to time render pass 1 indicated by the `1` characters in the diagram,
+starting a timer query when this render pass starts (`S`) in the binning phase
+queue, and stopping when it ends (`E`) in the main phase queue.
 
 ```
          Compute:              222
@@ -41,16 +41,86 @@ stopping when it ends (`E`) in the main phase queue.
 ```
 
 In this scenario the timer query correctly reflects the elapsed time of the
-render pass, but is not an accurate measure of cost of this workload. The
-elapsed time includes time where other workloads are running in parallel,
-indicated by the `0`, `2`, and `3` characters. It also includes time between
-the two phases where workload `1` is not running at all, because the binning
-phase work has completed, but is waiting for the main phase queue to finish an
-earlier workload.
+render pass, but does not give an accurate measure of its cost. The elapsed
+time includes time where other workloads are running in parallel, indicated by
+the `0`, `2`, and `3` characters. It also includes time between the two phases
+where workload `1` is not running at all, because the binning phase work has
+completed and the main phase work is stuck waiting for an earlier workload to
+finish to free up the hardware.
 
 To accurately cost workloads on a tile-based renderer, which will overlap and
 run workloads in parallel if it is allowed to, the layer must inject additional
-synchronization primitives to serialize all workloads within a queue and across
-queues. This ensures that timer query values reflect the cost of individual
-workloads, however it also means that overall frame performance will be reduced
-due to loss of workload parallelization.
+synchronization to serialize all workloads within a queue and across queues.
+This ensures that timer query values reflect the cost of individual workloads,
+however it also means that overall frame performance will be reduced due to
+loss of workload parallelization.
+
+# Design notes
+
+## Dependencies
+
+This layer uses timeline semaphores, so requires either Vulkan 1.1 or
+the `VK_KHR_timeline_semaphore` extension.
+
+## Implementing serialization
+
+Cross-queue serialization is implemented using an injected timeline semaphore.
+Each submit is assigned an incrementing `ID`, and will wait for `ID - 1` in the
+timeline before starting, and set `ID` in the timeline when completing. This
+allows us to implement serialization using a single sync primitive.
+
+Serialization within a queue is implemented by injecting a full pipeline
+barrier before the pre-workload timer query, ensuring that all prior work has
+completed before the time is sampled. Similarly we put a full pipeline barrier
+after the post-workload timer query, ensuring that no following work starts
+before the time is sampled.
+
+## Implementing query lifetime tracking
+
+Timer queries are implemented using query pools. The timer write commands are
+recorded into each command buffer alongside the user commands. Each timer write
+command specifies the specific counter slots used in a specific query pool, so
+the query pool and slot usage must be assigned when the command buffer is
+recorded.
+
+Query pools in the layer are a managed resource. We allocate query pools on
+demand, and maintain a free-list of query pools that have been freed and are
+ready for reuse.
+
+Query pools are allocated with enough space for 64 query results which is, in
+the best case, enough for 63 workloads (N+1 counters). This can reduce for
+render passes using multi-view rendering, which allocate 1 counter slot per
+view.
+
+Query pools are assigned to a command buffer when recording, and multiple
+query pools can be assigned to a single command buffer if more query result
+space is needed. Query pools are fully reset on first use in the command
+buffer. Query pools are returned to the layer free-list when the command buffer
+is reset or destroyed.
+
+### Multi-submit command buffers
+
+Reusable command buffers that are not one-time submit can be problematic for
+this type of instrumentation.
+
+A single primary command buffer could be submitted multiple times. This can be
+managed by serializing the workloads and ensuring that the query results are
+consumed between executions. This may impact performance due to additional
+serialization, but it can be made to work.
+
+**NOTE:** This impact of this case could be mitigated by having the layer
+inject a command buffer after the user command buffer, which inserts a copy
+command to copy the query results to a buffer. This buffer is owned by the
+layer and can be N-buffered to avoid stalls.
+
+The more problematic case is the case where a single secondary command buffer
+is executed multiple times from within the same primary. In this case there
+is no place to solve the collision with CPU-side synchronization, and relying
+on only CPU-side recording will only capture the last copy.
+
+### Split command buffers
+
+Vulkan 1.3 can split dynamic render passes over multiple command buffers,
+although all parts must be part of the same queue submit call. The layer will
+only emit timestamps for the final part of the render pass, and will ignore
+suspend/resumes.
diff --git a/layer_gpu_performance/source/CMakeLists.txt b/layer_gpu_performance/source/CMakeLists.txt
index bdd3091..2975722 100644
--- a/layer_gpu_performance/source/CMakeLists.txt
+++ b/layer_gpu_performance/source/CMakeLists.txt
@@ -53,6 +53,7 @@ add_library(
         layer_device_functions_render_pass.cpp
         layer_device_functions_trace_rays.cpp
         layer_device_functions_transfer.cpp
+        layer_instance_functions_device.cpp
         performance_comms.cpp)
 
 target_include_directories(
diff --git a/layer_gpu_performance/source/layer_device_functions.hpp b/layer_gpu_performance/source/layer_device_functions.hpp
index 8c2f8b5..660502a 100644
--- a/layer_gpu_performance/source/layer_device_functions.hpp
+++ b/layer_gpu_performance/source/layer_device_functions.hpp
@@ -456,18 +456,18 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2KHR<user_tag>(
 // Functions for debug
 
 /* See Vulkan API for documentation. */
-template<>
+template <>
 VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerBeginEXT<user_tag>(
     VkCommandBuffer commandBuffer,
     const VkDebugMarkerMarkerInfoEXT* pMarkerInfo);
 
 /* See Vulkan API for documentation. */
-template<>
+template <>
 VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerEndEXT<user_tag>(
     VkCommandBuffer commandBuffer);
 
 /* See Vulkan API for documentation. */
-template<>
+template <>
 VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginDebugUtilsLabelEXT<user_tag>(
     VkCommandBuffer commandBuffer,
     const VkDebugUtilsLabelEXT* pLabelInfo);
@@ -480,13 +480,13 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndDebugUtilsLabelEXT<user_tag>(
 // Functions for queues
 
 /* See Vulkan API for documentation. */
-template<>
+template <>
 VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR<user_tag>(
     VkQueue queue,
     const VkPresentInfoKHR* pPresentInfo);
 
 /* See Vulkan API for documentation. */
-template<>
+template <>
 VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit<user_tag>(
     VkQueue queue,
     uint32_t submitCount,
@@ -494,7 +494,7 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit<user_tag>(
     VkFence fence);
 
 /* See Vulkan API for documentation. */
-template<>
+template <>
 VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2<user_tag>(
     VkQueue queue,
     uint32_t submitCount,
@@ -502,7 +502,7 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2<user_tag>(
     VkFence fence);
 
 /* See Vulkan API for documentation. */
-template<>
+template <>
 VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2KHR<user_tag>(
     VkQueue queue,
     uint32_t submitCount,
diff --git a/layer_gpu_performance/source/layer_device_functions_debug.cpp b/layer_gpu_performance/source/layer_device_functions_debug.cpp
index 1905193..664d2b8 100644
--- a/layer_gpu_performance/source/layer_device_functions_debug.cpp
+++ b/layer_gpu_performance/source/layer_device_functions_debug.cpp
@@ -31,7 +31,7 @@
 extern std::mutex g_vulkanLock;
 
 /* See Vulkan API for documentation. */
-template<>
+template <>
 VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerBeginEXT<user_tag>(
     VkCommandBuffer commandBuffer,
     const VkDebugMarkerMarkerInfoEXT* pMarkerInfo
@@ -54,7 +54,7 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerBeginEXT<user_tag>(
 }
 
 /* See Vulkan API for documentation. */
-template<>
+template <>
 VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerEndEXT<user_tag>(
     VkCommandBuffer commandBuffer
 ) {
@@ -76,7 +76,7 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerEndEXT<user_tag>(
 }
 
 /* See Vulkan API for documentation. */
-template<>
+template <>
 VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginDebugUtilsLabelEXT<user_tag>(
     VkCommandBuffer commandBuffer,
     const VkDebugUtilsLabelEXT* pLabelInfo
@@ -99,7 +99,7 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginDebugUtilsLabelEXT<user_tag>(
 }
 
 /* See Vulkan API for documentation. */
-template<>
+template <>
 VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndDebugUtilsLabelEXT<user_tag>(
     VkCommandBuffer commandBuffer
 ) {
diff --git a/layer_gpu_performance/source/layer_device_functions_queue.cpp b/layer_gpu_performance/source/layer_device_functions_queue.cpp
index a5c92e2..6f435ba 100644
--- a/layer_gpu_performance/source/layer_device_functions_queue.cpp
+++ b/layer_gpu_performance/source/layer_device_functions_queue.cpp
@@ -38,7 +38,7 @@ using namespace std::placeholders;
 extern std::mutex g_vulkanLock;
 
 /* See Vulkan API for documentation. */
-template<>
+template <>
 VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR<user_tag>(
     VkQueue queue,
     const VkPresentInfoKHR* pPresentInfo
@@ -67,7 +67,7 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR<user_tag>(
 }
 
 /* See Vulkan API for documentation. */
-template<>
+template <>
 VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit<user_tag>(
     VkQueue queue,
     uint32_t submitCount,
@@ -104,7 +104,7 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit<user_tag>(
 }
 
 /* See Vulkan API for documentation. */
-template<>
+template <>
 VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2<user_tag>(
     VkQueue queue,
     uint32_t submitCount,
@@ -141,7 +141,7 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2<user_tag>(
 }
 
 /* See Vulkan API for documentation. */
-template<>
+template <>
 VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2KHR<user_tag>(
     VkQueue queue,
     uint32_t submitCount,
diff --git a/layer_gpu_performance/source/layer_instance_functions.hpp b/layer_gpu_performance/source/layer_instance_functions.hpp
new file mode 100644
index 0000000..00f93a9
--- /dev/null
+++ b/layer_gpu_performance/source/layer_instance_functions.hpp
@@ -0,0 +1,38 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#pragma once
+
+#include <vulkan/vulkan.h>
+
+#include "framework/utils.hpp"
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateDevice<user_tag>(
+    VkPhysicalDevice physicalDevice,
+    const VkDeviceCreateInfo* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkDevice* pDevice);
diff --git a/layer_gpu_performance/source/layer_instance_functions_device.cpp b/layer_gpu_performance/source/layer_instance_functions_device.cpp
new file mode 100644
index 0000000..f31143b
--- /dev/null
+++ b/layer_gpu_performance/source/layer_instance_functions_device.cpp
@@ -0,0 +1,80 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <mutex>
+
+#include "framework/manual_functions.hpp"
+
+#include "device.hpp"
+#include "layer_instance_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateDevice<user_tag>(
+    VkPhysicalDevice physicalDevice,
+    const VkDeviceCreateInfo* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkDevice* pDevice
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Instance::retrieve(physicalDevice);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+
+    auto* chainInfo = getChainInfo(pCreateInfo);
+    auto fpGetInstanceProcAddr = chainInfo->u.pLayerInfo->pfnNextGetInstanceProcAddr;
+    auto fpGetDeviceProcAddr = chainInfo->u.pLayerInfo->pfnNextGetDeviceProcAddr;
+
+    auto extensions = getDeviceExtensionList(
+        layer->instance, physicalDevice, pCreateInfo);
+
+    auto fpCreateDevice = reinterpret_cast<PFN_vkCreateDevice>(
+        fpGetInstanceProcAddr(layer->instance, "vkCreateDevice"));
+    if (!fpCreateDevice)
+    {
+        return VK_ERROR_INITIALIZATION_FAILED;
+    }
+
+    // Advance the link info for the next element on the chain
+    chainInfo->u.pLayerInfo = chainInfo->u.pLayerInfo->pNext;
+    auto res = fpCreateDevice(physicalDevice, pCreateInfo, pAllocator, pDevice);
+    if (res != VK_SUCCESS)
+    {
+        return res;
+    }
+
+    // Retake the lock to access layer-wide global store
+    lock.lock();
+    auto device = std::make_unique<Device>(layer, physicalDevice, *pDevice, fpGetDeviceProcAddr);
+    Device::store(*pDevice, std::move(device));
+
+    return VK_SUCCESS;
+}

From f928a2d4282eb6a06d42f5d044507200c6d58f3c Mon Sep 17 00:00:00 2001
From: Peter Harris <peter.harris@arm.com>
Date: Tue, 7 Jan 2025 13:37:00 +0000
Subject: [PATCH 4/4] Merge Python changes

---
 layer_gpu_performance/android_install.json |   4 +
 layer_gpu_performance/android_install.py   | 254 ---------------------
 2 files changed, 4 insertions(+), 254 deletions(-)
 create mode 100644 layer_gpu_performance/android_install.json
 delete mode 100644 layer_gpu_performance/android_install.py

diff --git a/layer_gpu_performance/android_install.json b/layer_gpu_performance/android_install.json
new file mode 100644
index 0000000..9d933c0
--- /dev/null
+++ b/layer_gpu_performance/android_install.json
@@ -0,0 +1,4 @@
+{
+    "layer_name": "VK_LAYER_LGL_GPUPERFORMANCE",
+    "layer_binary": "libVkLayerGPUPerformance.so"
+}
diff --git a/layer_gpu_performance/android_install.py b/layer_gpu_performance/android_install.py
deleted file mode 100644
index 35780ea..0000000
--- a/layer_gpu_performance/android_install.py
+++ /dev/null
@@ -1,254 +0,0 @@
-#!/usr/bin/env python3
-# SPDX-License-Identifier: MIT
-# -----------------------------------------------------------------------------
-# Copyright (c) 2024 Arm Limited
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the 'Software'), to
-# deal in the Software without restriction, including without limitation the
-# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-# sell copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# -----------------------------------------------------------------------------
-'''
-A simple installer for Android Vulkan layers.
-'''
-
-import argparse
-import os
-import shlex
-import subprocess as sp
-import sys
-from typing import Any, Optional
-
-# Android temp directory
-ANDROID_TMP_DIR = '/data/local/tmp/'
-
-# Expected layer names
-EXPECTED_VULKAN_LAYER_NAME = 'VK_LAYER_LGL_GPUPERFORMANCE'
-EXPECTED_VULKAN_LAYER_FILE = 'libVkLayerGPUPerformance.so'
-
-
-class Device:
-    '''
-    A basic wrapper around adb, allowing a specific device to be registered.
-
-    Attributes:
-        device: The name of the device to call, or None for non-specific use.
-    '''
-
-    def adb_quiet(self, *args: str) -> None:
-        '''
-        Call `adb` to run a command, but ignore output and errors.
-
-        Args:
-            *args : List of command line parameters.
-        '''
-        commands = ['adb']
-        commands.extend(args)
-        sp.run(commands, stdout=sp.DEVNULL, stderr=sp.DEVNULL, check=False)
-
-    def adb(self, *args: str, **kwargs: Any) -> str:
-        '''
-        Call `adb` to run command, and capture output and results.
-
-        Args:
-            *args: List of command line parameters.
-            **kwargs: text: Is output is text, or binary?
-                      shell: Use the host shell?
-                      quote: Quote arguments before forwarding
-
-        Returns:
-            The contents of stdout.
-
-        Raises:
-            CalledProcessError: The subprocess was not successfully executed.
-        '''
-        commands = ['adb']  # type: Any
-        commands.extend(args)
-
-        text = kwargs.get('text', True)
-        shell = kwargs.get('shell', False)
-        quote = kwargs.get('quote', False)
-
-        # Run on the host shell
-        if shell:
-            # Unix shells need a flattened command for shell commands
-            if os.name != 'nt':
-                quoted_commands = []
-                for command in commands:
-                    if command != '>':
-                        command = shlex.quote(command)
-                    quoted_commands.append(command)
-                commands = ' '.join(quoted_commands)
-
-        # Run on the device but with shell argument quoting
-        if quote:
-            for i, command in enumerate(commands):
-                commands[i] = shlex.quote(command)
-
-        rep = sp.run(commands, check=True, shell=shell, stdout=sp.PIPE,
-                     stderr=sp.PIPE, universal_newlines=text)
-
-        return rep.stdout
-
-    def adb_run_as(self, package: str,
-                   *args: str, quiet: bool = False) -> Optional[str]:
-        '''
-        Call `adb` to run command as a package using `run-as` or as root,
-        if root is accessible. If command will be run as root, this function
-        will change CWD to the package data directory before executing the
-        command.
-
-        Args:
-            package: Package name to run-as or change CWD to.
-            *args: List of command line parameters.
-            quiet: If True, ignores output from adb.
-
-        Returns:
-            The contents of stdout or None if quiet=True.
-
-        Raises:
-            CalledProcessError: The subprocess was not successfully executed.
-        '''
-        command = ['shell', 'run-as', package]
-        command.extend(args)
-
-        if quiet:
-            self.adb_quiet(*command)
-            return None
-
-        return self.adb(*command)
-
-
-def enable_vulkan_debug_layer(
-        device: Device, package: str, layer: str) -> None:
-    '''
-    Args:
-        device: The device instance.
-        package: The Android package name.
-        layer: The layer file path name.
-    '''
-
-    print('\nInstalling Vulkan debug layer')
-
-    layer = os.path.normpath(layer)
-    layer_base = os.path.basename(os.path.normpath(layer))
-
-    device.adb('push', layer, ANDROID_TMP_DIR)
-
-    device.adb_run_as(package, 'cp', ANDROID_TMP_DIR + layer_base, '.')
-
-    device.adb('shell', 'settings', 'put', 'global',
-               'enable_gpu_debug_layers', '1')
-
-    device.adb('shell', 'settings', 'put', 'global',
-               'gpu_debug_app', package)
-
-    device.adb('shell', 'settings', 'put', 'global',
-               'gpu_debug_layers', EXPECTED_VULKAN_LAYER_NAME)
-
-
-def disable_vulkan_debug_layer(
-        device: Device, package: str, layer: str) -> None:
-    '''
-    Clean up the Vulkan layer installation.
-
-    Args:
-        device: The device instance.
-        args: The command arguments.
-    '''
-    print('\nRemoving Vulkan debug layer')
-
-    layer_base = os.path.basename(os.path.normpath(layer))
-
-    device.adb('shell', 'settings', 'delete', 'global',
-               'enable_gpu_debug_layers')
-
-    device.adb('shell', 'settings', 'delete', 'global',
-               'gpu_debug_app')
-
-    device.adb('shell', 'settings', 'delete', 'global',
-               'gpu_debug_layers')
-
-    device.adb_run_as(package, 'rm', layer_base, quiet=True)
-
-
-def get_layer() -> Optional[str]:
-    '''
-    Find the debug layer to use in the build directory.
-
-    Returns:
-        The part to the library to use.
-    '''
-
-    base_dir = './build_arm64/source/'
-
-    # TODO: If we want to use symbolized layer we need to rename it
-    lib = None
-
-    for path in os.listdir(base_dir):
-        # Match symbolized library first so we don't use it
-        if path.endswith('_sym.so'):
-            _ = os.path.join(base_dir, path)
-        elif path.endswith('.so'):
-            lib = os.path.join(base_dir, path)
-
-    return lib
-
-
-def parse_command_line() -> argparse.Namespace:
-    '''
-    Parse the command line.
-
-    Returns:
-        The parsed command line container.
-    '''
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument('--package', required=True,
-                        help='Android package name')
-
-    return parser.parse_args()
-
-
-def main() -> int:
-    '''
-    Script main function.
-
-    Returns:
-        Process return code.
-    '''
-    args = parse_command_line()
-
-    device = Device()
-    layer = get_layer()
-    if not layer:
-        print('ERROR: Layer binary not found')
-        return 1
-
-    enable_vulkan_debug_layer(device, args.package, layer)
-
-    input('Press Enter to disable layers')
-
-    disable_vulkan_debug_layer(device, args.package, layer)
-
-    return 0
-
-
-if __name__ == '__main__':
-    try:
-        sys.exit(main())
-    except KeyboardInterrupt:
-        print('\n\nERROR: User interrupted execution')