diff --git a/layer_gpu_performance/CMakeLists.txt b/layer_gpu_performance/CMakeLists.txt
new file mode 100644
index 0000000..625064e
--- /dev/null
+++ b/layer_gpu_performance/CMakeLists.txt
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: MIT
+# -----------------------------------------------------------------------------
+# Copyright (c) 2024 Arm Limited
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# -----------------------------------------------------------------------------
+
+cmake_minimum_required(VERSION 3.17)
+
+set(CMAKE_CXX_STANDARD 20)
+
+project(VkLayerGPUPerformance VERSION 1.0.0)
+
+# Common configuration
+set(LGL_LOG_TAG "VkLayerGPUPerformance")
+set(LGL_CONFIG_TRACE 0)
+set(LGL_CONFIG_LOG 1)
+
+include(../source_common/compiler_helper.cmake)
+
+# Build steps
+add_subdirectory(source)
+add_subdirectory(../source_common/comms source_common/comms)
+add_subdirectory(../source_common/framework source_common/framework)
+add_subdirectory(../source_common/trackers source_common/trackers)
diff --git a/layer_gpu_performance/README_LAYER.md b/layer_gpu_performance/README_LAYER.md
new file mode 100644
index 0000000..a103878
--- /dev/null
+++ b/layer_gpu_performance/README_LAYER.md
@@ -0,0 +1,126 @@
+# Layer: GPU Performance
+
+This layer is a standalone performance analysis layer that can be used to
+analyze the workloads that make up a single frame.
+
+This layer supports two modes:
+
+* Per workload time, read via Vulkan API queries
+* Per workload performance counters, read via a non-API mechanism
+
+## What devices are supported?
+
+The per workload timing uses Vulkan API timer queries, and should work on any
+GPU that supports the required Vulkan features.
+
+The per workload performance counters uses the Arm libGPUCounters library,
+and requires an Arm GPU.
+
+## Is this layer non-invasive?
+
+The goal of this layer is to cost the major workloads submitted via the API, in
+a way which is compatible with the way that a tile-based renderer schedules
+render passes.
+
+Under normal scheduling, tile-based renderers split render passes into two
+pieces which are independently scheduled and that can overlap with other work
+that is running on the GPU. Blindly timing render passes using timer queries
+can result in confusing results because the reported time might include time
+spent processing unrelated workloads that happen to be running in parallel.
+
+The timing diagram below shows one possible arrangement of workloads scheduled
+on the GPU hardware queues for an Arm 5th Generation architecture GPU. We are
+trying to time render pass 1 indicated by the `1` characters in the diagram,
+starting a timer query when this render pass starts (`S`) in the binning phase
+queue, and stopping when it ends (`E`) in the main phase queue.
+
+```
+         Compute:              222
+   Binning phase: S 11111 3333
+      Main phase:   00000000 111111111111 E
+```
+
+In this scenario the timer query correctly reflects the elapsed time of the
+render pass, but does not give an accurate measure of its cost. The elapsed
+time includes time where other workloads are running in parallel, indicated by
+the `0`, `2`, and `3` characters. It also includes time between the two phases
+where workload `1` is not running at all, because the binning phase work has
+completed and the main phase work is stuck waiting for an earlier workload to
+finish to free up the hardware.
+
+To accurately cost workloads on a tile-based renderer, which will overlap and
+run workloads in parallel if it is allowed to, the layer must inject additional
+synchronization to serialize all workloads within a queue and across queues.
+This ensures that timer query values reflect the cost of individual workloads,
+however it also means that overall frame performance will be reduced due to
+loss of workload parallelization.
+
+# Design notes
+
+## Dependencies
+
+This layer uses timeline semaphores, so requires either Vulkan 1.1 or
+the `VK_KHR_timeline_semaphore` extension.
+
+## Implementing serialization
+
+Cross-queue serialization is implemented using an injected timeline semaphore.
+Each submit is assigned an incrementing `ID`, and will wait for `ID - 1` in the
+timeline before starting, and set `ID` in the timeline when completing. This
+allows us to implement serialization using a single sync primitive.
+
+Serialization within a queue is implemented by injecting a full pipeline
+barrier before the pre-workload timer query, ensuring that all prior work has
+completed before the time is sampled. Similarly we put a full pipeline barrier
+after the post-workload timer query, ensuring that no following work starts
+before the time is sampled.
+
+## Implementing query lifetime tracking
+
+Timer queries are implemented using query pools. The timer write commands are
+recorded into each command buffer alongside the user commands. Each timer write
+command specifies the specific counter slots used in a specific query pool, so
+the query pool and slot usage must be assigned when the command buffer is
+recorded.
+
+Query pools in the layer are a managed resource. We allocate query pools on
+demand, and maintain a free-list of query pools that have been freed and are
+ready for reuse.
+
+Query pools are allocated with enough space for 64 query results which is, in
+the best case, enough for 63 workloads (N+1 counters). This can reduce for
+render passes using multi-view rendering, which allocate 1 counter slot per
+view.
+
+Query pools are assigned to a command buffer when recording, and multiple
+query pools can be assigned to a single command buffer if more query result
+space is needed. Query pools are fully reset on first use in the command
+buffer. Query pools are returned to the layer free-list when the command buffer
+is reset or destroyed.
+
+### Multi-submit command buffers
+
+Reusable command buffers that are not one-time submit can be problematic for
+this type of instrumentation.
+
+A single primary command buffer could be submitted multiple times. This can be
+managed by serializing the workloads and ensuring that the query results are
+consumed between executions. This may impact performance due to additional
+serialization, but it can be made to work.
+
+**NOTE:** This impact of this case could be mitigated by having the layer
+inject a command buffer after the user command buffer, which inserts a copy
+command to copy the query results to a buffer. This buffer is owned by the
+layer and can be N-buffered to avoid stalls.
+
+The more problematic case is the case where a single secondary command buffer
+is executed multiple times from within the same primary. In this case there
+is no place to solve the collision with CPU-side synchronization, and relying
+on only CPU-side recording will only capture the last copy.
+
+### Split command buffers
+
+Vulkan 1.3 can split dynamic render passes over multiple command buffers,
+although all parts must be part of the same queue submit call. The layer will
+only emit timestamps for the final part of the render pass, and will ignore
+suspend/resumes.
diff --git a/layer_gpu_performance/android_build.sh b/layer_gpu_performance/android_build.sh
new file mode 100644
index 0000000..960b2b0
--- /dev/null
+++ b/layer_gpu_performance/android_build.sh
@@ -0,0 +1,82 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: MIT
+# ----------------------------------------------------------------------------
+# Copyright (c) 2024 Arm Limited
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+# ----------------------------------------------------------------------------
+
+# ----------------------------------------------------------------------------
+# Configuration
+
+# Exit immediately if any component command errors
+set -e
+
+BUILD_DIR_64=build_arm64
+BUILD_DIR_PACK=build_package
+
+# ----------------------------------------------------------------------------
+# Process command line options
+if [ "$#" -lt 1 ]; then
+    BUILD_TYPE=Release
+else
+    BUILD_TYPE=$1
+fi
+
+# Process command line options
+if [ "$#" -lt 2 ]; then
+    PACKAGE=0
+else
+    PACKAGE=$2
+fi
+
+if [ "${PACKAGE}" -gt "0" ]; then
+    echo "Building a ${BUILD_TYPE} build with packaging"
+else
+    echo "Building a ${BUILD_TYPE} build without packaging"
+fi
+
+# ----------------------------------------------------------------------------
+# Build the 64-bit layer
+mkdir -p ${BUILD_DIR_64}
+pushd ${BUILD_DIR_64}
+
+cmake \
+    -DCMAKE_SYSTEM_NAME=Android \
+    -DANDROID_PLATFORM=29 \
+    -DANDROID_ABI=arm64-v8a \
+    -DANDROID_TOOLCHAIN=clang \
+    -DANDROID_STL=c++_static \
+    -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
+    -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake" \
+    ..
+
+make -j1
+
+popd
+
+# ----------------------------------------------------------------------------
+# Build the release package
+if [ "${PACKAGE}" -gt "0" ]; then
+    # Setup the package directories
+    mkdir -p ${BUILD_DIR_PACK}/bin/android/arm64
+
+    # Install the 64-bit layer
+    cp ${BUILD_DIR_64}/source/*.so ${BUILD_DIR_PACK}/bin/android/arm64
+fi
diff --git a/layer_gpu_performance/android_install.json b/layer_gpu_performance/android_install.json
new file mode 100644
index 0000000..9d933c0
--- /dev/null
+++ b/layer_gpu_performance/android_install.json
@@ -0,0 +1,4 @@
+{
+    "layer_name": "VK_LAYER_LGL_GPUPERFORMANCE",
+    "layer_binary": "libVkLayerGPUPerformance.so"
+}
diff --git a/layer_gpu_performance/source/CMakeLists.txt b/layer_gpu_performance/source/CMakeLists.txt
new file mode 100644
index 0000000..2975722
--- /dev/null
+++ b/layer_gpu_performance/source/CMakeLists.txt
@@ -0,0 +1,86 @@
+# SPDX-License-Identifier: MIT
+# -----------------------------------------------------------------------------
+# Copyright (c) 2024 Arm Limited
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# -----------------------------------------------------------------------------
+
+# Set output file names
+if (CMAKE_BUILD_TYPE STREQUAL "Release")
+    set(VK_LAYER VkLayerGPUPerformance_sym)
+    set(VK_LAYER_STRIP libVkLayerGPUPerformance.so)
+else()
+    set(VK_LAYER VkLayerGPUPerformance)
+endif()
+
+# Set strings used by configure
+set(LGL_LAYER_NAME_STR "VK_LAYER_LGL_GPUPERFORMANCE")
+set(LGL_LAYER_DESC_STR "VkLayerGPUPerformance by LGL")
+
+# Vulkan layer library
+configure_file(
+    version.hpp.in
+    version.hpp
+    ESCAPE_QUOTES @ONLY)
+
+add_library(
+    ${VK_LAYER} SHARED
+        ${PROJECT_SOURCE_DIR}/../source_common/framework/entry.cpp
+        device.cpp
+        instance.cpp
+        layer_device_functions_command_buffer.cpp
+        layer_device_functions_command_pool.cpp
+        layer_device_functions_debug.cpp
+        layer_device_functions_dispatch.cpp
+        layer_device_functions_draw_call.cpp
+        layer_device_functions_queue.cpp
+        layer_device_functions_render_pass.cpp
+        layer_device_functions_trace_rays.cpp
+        layer_device_functions_transfer.cpp
+        layer_instance_functions_device.cpp
+        performance_comms.cpp)
+
+target_include_directories(
+    ${VK_LAYER} PRIVATE
+        ${PROJECT_SOURCE_DIR}/../source_common
+        ${PROJECT_SOURCE_DIR}/../source_third_party
+        ${CMAKE_CURRENT_BINARY_DIR}
+        .)
+
+target_include_directories(
+    ${VK_LAYER} SYSTEM PRIVATE
+        ../../khronos/vulkan/include)
+
+lgl_set_build_options(${VK_LAYER})
+
+target_link_libraries(
+    ${VK_LAYER}
+        lib_layer_comms
+        lib_layer_framework
+        lib_layer_trackers
+        $<$<PLATFORM_ID:Android>:log>)
+
+if (CMAKE_BUILD_TYPE STREQUAL "Release")
+    add_custom_command(
+        TARGET "${VK_LAYER}" POST_BUILD
+        DEPENDS "${VK_LAYER}"
+        COMMAND ${CMAKE_STRIP}
+        ARGS --strip-all -o ${VK_LAYER_STRIP} $<TARGET_FILE:${VK_LAYER}>
+        COMMENT "Stripped lib${VK_LAYER}.so to ${VK_LAYER_STRIP}")
+endif()
diff --git a/layer_gpu_performance/source/device.cpp b/layer_gpu_performance/source/device.cpp
new file mode 100644
index 0000000..571b2e4
--- /dev/null
+++ b/layer_gpu_performance/source/device.cpp
@@ -0,0 +1,111 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <array>
+#include <iostream>
+#include <fstream>
+#include <sys/stat.h>
+#include <vector>
+
+#include "comms/comms_module.hpp"
+#include "framework/utils.hpp"
+
+#include "device.hpp"
+#include "instance.hpp"
+
+/**
+ * @brief The dispatch lookup for all of the created Vulkan devices.
+ */
+static std::unordered_map<void*, std::unique_ptr<Device>> g_devices;
+
+/* See header for documentation. */
+std::unique_ptr<Comms::CommsModule> Device::commsModule;
+
+/* See header for documentation. */
+std::unique_ptr<PerformanceComms> Device::commsWrapper;
+
+/* See header for documentation. */
+void Device::store(
+    VkDevice handle,
+    std::unique_ptr<Device> device
+) {
+    void* key = getDispatchKey(handle);
+    g_devices.insert({ key, std::move(device) });
+}
+
+/* See header for documentation. */
+Device* Device::retrieve(
+    VkDevice handle
+) {
+    void* key = getDispatchKey(handle);
+    assert(isInMap(key, g_devices));
+    return g_devices.at(key).get();
+}
+
+/* See header for documentation. */
+Device* Device::retrieve(
+    VkQueue handle
+) {
+    void* key = getDispatchKey(handle);
+    assert(isInMap(key, g_devices));
+    return g_devices.at(key).get();
+}
+
+/* See header for documentation. */
+Device* Device::retrieve(
+    VkCommandBuffer handle
+) {
+    void* key = getDispatchKey(handle);
+    assert(isInMap(key, g_devices));
+    return g_devices.at(key).get();
+}
+
+/* See header for documentation. */
+void Device::destroy(
+    Device* device
+) {
+    g_devices.erase(getDispatchKey(device));
+}
+
+/* See header for documentation. */
+Device::Device(
+    Instance* _instance,
+    VkPhysicalDevice _physicalDevice,
+    VkDevice _device,
+    PFN_vkGetDeviceProcAddr nlayerGetProcAddress
+):
+    instance(_instance),
+    physicalDevice(_physicalDevice),
+    device(_device)
+{
+    initDriverDeviceDispatchTable(device, nlayerGetProcAddress, driver);
+
+    // Init the shared comms module for the first device built
+    if (!commsModule)
+    {
+        commsModule = std::make_unique<Comms::CommsModule>("lglcomms");
+        commsWrapper = std::make_unique<PerformanceComms>(*commsModule);
+    }
+}
diff --git a/layer_gpu_performance/source/device.hpp b/layer_gpu_performance/source/device.hpp
new file mode 100644
index 0000000..acfee7e
--- /dev/null
+++ b/layer_gpu_performance/source/device.hpp
@@ -0,0 +1,203 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+/**
+ * @file Declares the root class for layer management of VkDevice objects.
+ *
+ * Role summary
+ * ============
+ *
+ * Devices represent the core context used by the application to connect to the
+ * underlying graphics driver. A device object is the dispatch root for the
+ * Vulkan driver, so device commands all take some form of dispatchable handle
+ * that can be resolved into a unique per-device key. For the driver this key
+ * would simply be a pointer directly to the driver-internal device object, but
+ * for our layer we use a device dispatch key as an index in to the map to find
+ * the layer's driver object.
+ *
+ * Key properties
+ * ==============
+ *
+ * Vulkan devices are designed to be used concurrently by multiple application
+ * threads. An application can have multiple concurrent devices, and use each
+ * device from multiple threads.
+ *
+ * Access to the layer driver structures must therefore be kept thread-safe.
+ * For sake of simplicity, we generally implement this by:
+ *   - Holding a global lock whenever any thread is inside layer code.
+ *   - Releasing the global lock whenever the layer calls a driver function.
+ */
+
+#pragma once
+
+#include <vulkan/vk_layer.h>
+
+#include "comms/comms_module.hpp"
+#include "framework/device_dispatch_table.hpp"
+#include "trackers/device.hpp"
+
+#include "instance.hpp"
+#include "performance_comms.hpp"
+
+/**
+ * @brief This class implements the layer state tracker for a single device.
+ */
+class Device
+{
+public:
+    /**
+     * @brief Store a new device into the global store of dispatchable devices.
+     *
+     * @param handle   The dispatchable device handle to use as an indirect key.
+     * @param device   The @c Device object to store.
+     */
+    static void store(
+        VkDevice handle,
+        std::unique_ptr<Device> device);
+
+    /**
+     * @brief Fetch a device from the global store of dispatchable devices.
+     *
+     * @param handle   The dispatchable device handle to use as an indirect lookup.
+     *
+     * @return The layer device context.
+     */
+    static Device* retrieve(
+        VkDevice handle);
+
+    /**
+     * @brief Fetch a device from the global store of dispatchable devices.
+     *
+     * @param handle   The dispatchable queue handle to use as an indirect lookup.
+     *
+     * @return The layer device context.
+     */
+    static Device* retrieve(
+        VkQueue handle);
+
+    /**
+     * @brief Fetch a device from the global store of dispatchable devices.
+     *
+     * @param handle   The dispatchable command buffer handle to use as an indirect lookup.
+     *
+     * @return The layer device context.
+     */
+    static Device* retrieve(
+        VkCommandBuffer handle);
+
+    /**
+     * @brief Drop a device from the global store of dispatchable devices.
+     *
+     * @param device   The device to drop.
+     */
+    static void destroy(
+        Device* device);
+
+    /**
+     * @brief Create a new layer device object.
+     *
+     * @param instance               The layer instance object this device is created with.
+     * @param physicalDevice         The physical device this logical device is for.
+     * @param device                 The device handle this device is created with.
+     * @param nlayerGetProcAddress   The vkGetDeviceProcAddress function for the driver.
+     */
+    Device(
+        Instance* instance,
+        VkPhysicalDevice physicalDevice,
+        VkDevice device,
+        PFN_vkGetDeviceProcAddr nlayerGetProcAddress);
+
+    /**
+     * @brief Destroy this layer device object.
+     */
+    ~Device() = default;
+
+    /**
+     * @brief Callback for sending messages on frame boundary.
+     *
+     * @param message   The message to send.
+     */
+    void onFrame(
+        const std::string& message
+    ) {
+        commsWrapper->txMessage(message);
+    }
+
+    /**
+     * @brief Callback for sending messages on workload submit to a queue.
+     *
+     * @param message   The message to send.
+     */
+    void onWorkloadSubmit(
+        const std::string& message
+    ) {
+        commsWrapper->txMessage(message);
+    }
+
+    /**
+     * @brief Get the cumulative stats for this device.
+     */
+    Tracker::Device& getStateTracker()
+    {
+        return stateTracker;
+    }
+
+public:
+    /**
+     * @brief The driver function dispatch table.
+     */
+    DeviceDispatchTable driver {};
+
+private:
+    /**
+     * @brief The instance this device is created with.
+     */
+    const Instance* instance;
+
+    /**
+     * @brief The physical device this device is created with.
+     */
+    const VkPhysicalDevice physicalDevice;
+
+    /**
+     * @brief The device handle this device is created with.
+     */
+    const VkDevice device;
+
+    /**
+     * @brief State tracker for this device.
+     */
+    Tracker::Device stateTracker;
+
+    /**
+     * @brief Shared network communications module.
+     */
+    static std::unique_ptr<Comms::CommsModule> commsModule;
+
+    /**
+     * @brief Shared network communications message encoder.
+     */
+    static std::unique_ptr<PerformanceComms> commsWrapper;
+};
diff --git a/layer_gpu_performance/source/device_utils.hpp b/layer_gpu_performance/source/device_utils.hpp
new file mode 100644
index 0000000..eddf193
--- /dev/null
+++ b/layer_gpu_performance/source/device_utils.hpp
@@ -0,0 +1,56 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#pragma once
+
+#include <vulkan/vulkan.h>
+
+#include "framework/utils.hpp"
+
+#include "device.hpp"
+
+/**
+ * @brief Emit a start tag via a driver debug utils label.
+ *
+ * @param layer           The layer context for the device.
+ * @param commandBuffer   The command buffer we are recording.
+ * @param tagID           The tagID to emit into the label.
+ */
+[[maybe_unused]] static void emitStartTag(
+    Device* layer,
+    VkCommandBuffer commandBuffer,
+    uint64_t tagID
+) {
+    // Emit the unique workload tag into the command stream
+    std::string tagLabel = formatString("t%" PRIu64, tagID);
+    VkDebugUtilsLabelEXT tagInfo {
+        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
+        .pNext = nullptr,
+        .pLabelName = tagLabel.c_str(),
+        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
+    };
+
+    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
+}
diff --git a/layer_gpu_performance/source/instance.cpp b/layer_gpu_performance/source/instance.cpp
new file mode 100644
index 0000000..0b62857
--- /dev/null
+++ b/layer_gpu_performance/source/instance.cpp
@@ -0,0 +1,80 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <cassert>
+
+#include "framework/utils.hpp"
+
+#include "instance.hpp"
+
+/**
+ * @brief The dispatch lookup for all of the created Vulkan instances.
+ */
+static std::unordered_map<void*, std::unique_ptr<Instance>> g_instances;
+
+/* See header for documentation. */
+void Instance::store(
+    VkInstance handle,
+    std::unique_ptr<Instance>& instance
+) {
+    void* key = getDispatchKey(handle);
+    g_instances.insert({ key, std::move(instance) });
+}
+
+/* See header for documentation. */
+Instance* Instance::retrieve(
+    VkInstance handle
+) {
+    void* key = getDispatchKey(handle);
+    assert(isInMap(key, g_instances));
+    return g_instances.at(key).get();
+}
+
+/* See header for documentation. */
+Instance* Instance::retrieve(
+    VkPhysicalDevice handle
+) {
+    void* key = getDispatchKey(handle);
+    assert(isInMap(key, g_instances));
+    return g_instances.at(key).get();
+}
+
+/* See header for documentation. */
+void Instance::destroy(
+    Instance* instance
+) {
+    g_instances.erase(getDispatchKey(instance->instance));
+}
+
+/* See header for documentation. */
+Instance::Instance(
+    VkInstance _instance,
+    PFN_vkGetInstanceProcAddr _nlayerGetProcAddress
+) :
+    instance(_instance),
+    nlayerGetProcAddress(_nlayerGetProcAddress)
+{
+    initDriverInstanceDispatchTable(instance, nlayerGetProcAddress, driver);
+}
diff --git a/layer_gpu_performance/source/instance.hpp b/layer_gpu_performance/source/instance.hpp
new file mode 100644
index 0000000..fc6af6b
--- /dev/null
+++ b/layer_gpu_performance/source/instance.hpp
@@ -0,0 +1,134 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+/**
+ * @file
+ * Declares the root class for layer management of VkInstance objects.
+ *
+ * Role summary
+ * ============
+ *
+ * Instances represent the core context used by the application to connect to
+ * the OS graphics subsystem prior to connection to a specific device instance.
+ * An instance object is the dispatch root for the Vulkan subsystem, so
+ * instance commands all take some form of dispatchable handle that can be
+ * resolved into a unique per-instance key. For the driver this key would
+ * simply be a pointer directly to the driver-internal instance object, but for
+ * our layer we use a instance dispatch key as an index in to the map to find
+ * the layer's instance object.
+ *
+ * Key properties
+ * ==============
+ *
+ * Vulkan instances are designed to be used concurrently by multiple
+ * application threads. An application can have multiple concurrent instances,
+ * and use each instance from multiple threads.
+ *
+ * Access to the layer driver structures must therefore be kept thread-safe.
+ * For sake of simplicity, we generally implement this by:
+ *   - Holding a global lock whenever any thread is inside layer code.
+ *   - Releasing the global lock whenever the layer calls a driver function.
+ */
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+
+#include <vulkan/vk_layer.h>
+#include <vulkan/vulkan.h>
+
+#include "framework/instance_dispatch_table.hpp"
+
+/**
+ * @brief This class implements the layer state tracker for a single instance.
+ */
+class Instance
+{
+public:
+    /**
+     * @brief Store a new instance into the global store of dispatchable instances.
+     *
+     * @param handle     The dispatchable instance handle to use as an indirect key.
+     * @param instance   The @c Instance object to store.
+     */
+    static void store(
+        VkInstance handle,
+        std::unique_ptr<Instance>& instance);
+
+    /**
+     * @brief Fetch an instance from the global store of dispatchable instances.
+     *
+     * @param handle   The dispatchable instance handle to use as an indirect lookup.
+     *
+     * @return The layer instance context.
+     */
+    static Instance* retrieve(
+        VkInstance handle);
+
+    /**
+     * @brief Fetch an instance from the global store of dispatchable instances.
+     *
+     * @param handle   The dispatchable physical device handle to use as an indirect lookup.
+     *
+     * @return The layer instance context.
+     */
+    static Instance* retrieve(
+        VkPhysicalDevice handle);
+
+    /**
+     * @brief Drop an instance from the global store of dispatchable instances.
+     *
+     * @param instance   The instance to drop.
+     */
+    static void destroy(
+        Instance* instance);
+
+    /**
+     * @brief Create a new layer instance object.
+     *
+     * @param instance               The instance handle this instance is created with.
+     * @param nlayerGetProcAddress   The vkGetProcAddress function in the driver/next layer down.
+     */
+    Instance(
+        VkInstance instance,
+        PFN_vkGetInstanceProcAddr nlayerGetProcAddress);
+
+public:
+    /**
+     * @brief The instance handle this instance is created with.
+     */
+    VkInstance instance;
+
+    /**
+     * @brief The next layer's \c vkGetInstanceProcAddr() function pointer.
+     */
+    PFN_vkGetInstanceProcAddr nlayerGetProcAddress;
+
+    /**
+     * @brief The driver function dispatch table.
+     */
+    InstanceDispatchTable driver {};
+};
diff --git a/layer_gpu_performance/source/layer_device_functions.hpp b/layer_gpu_performance/source/layer_device_functions.hpp
new file mode 100644
index 0000000..660502a
--- /dev/null
+++ b/layer_gpu_performance/source/layer_device_functions.hpp
@@ -0,0 +1,510 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#pragma once
+
+#include <vulkan/vulkan.h>
+
+#include "framework/utils.hpp"
+
+// Functions for command pools
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateCommandPool<user_tag>(
+    VkDevice device,
+    const VkCommandPoolCreateInfo* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkCommandPool* pCommandPool);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandPool<user_tag>(
+    VkDevice device,
+    VkCommandPool commandPool,
+    VkCommandPoolResetFlags flags);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkDestroyCommandPool<user_tag>(
+    VkDevice device,
+    VkCommandPool commandPool,
+    const VkAllocationCallbacks* pAllocator);
+
+// Functions for command buffers
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkAllocateCommandBuffers<user_tag>(
+    VkDevice device,
+    const VkCommandBufferAllocateInfo* pAllocateInfo,
+    VkCommandBuffer* pCommandBuffers);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult layer_vkBeginCommandBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCommandBufferBeginInfo* pBeginInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdExecuteCommands<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t commandBufferCount,
+    const VkCommandBuffer* pCommandBuffers);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkCommandBufferResetFlags flags);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkFreeCommandBuffers<user_tag>(
+    VkDevice device,
+    VkCommandPool commandPool,
+    uint32_t commandBufferCount,
+    const VkCommandBuffer* pCommandBuffers);
+
+// Functions for render passes
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass<user_tag>(
+    VkDevice device,
+    const VkRenderPassCreateInfo* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkRenderPass* pRenderPass);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2<user_tag>(
+    VkDevice device,
+    const VkRenderPassCreateInfo2* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkRenderPass* pRenderPass);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2KHR<user_tag>(
+    VkDevice device,
+    const VkRenderPassCreateInfo2* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkRenderPass* pRenderPass);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkDestroyRenderPass<user_tag>(
+    VkDevice device,
+    VkRenderPass renderPass,
+    const VkAllocationCallbacks* pAllocator);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderPassBeginInfo* pRenderPassBegin,
+    VkSubpassContents contents);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderPassBeginInfo* pRenderPassBegin,
+    const VkSubpassBeginInfo* pSubpassBeginInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderPassBeginInfo* pRenderPassBegin,
+    const VkSubpassBeginInfo* pSubpassBeginInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRendering<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderingInfo* pRenderingInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderingKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderingInfo* pRenderingInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderPass<user_tag>(
+    VkCommandBuffer commandBuffer);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL  layer_vkCmdEndRendering<user_tag>(
+    VkCommandBuffer commandBuffer);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL  layer_vkCmdEndRenderingKHR<user_tag>(
+    VkCommandBuffer commandBuffer);
+
+// Functions for draw calls
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDraw<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t vertexCount,
+    uint32_t instanceCount,
+    uint32_t firstVertex,
+    uint32_t firstInstance);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexed<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t indexCount,
+    uint32_t instanceCount,
+    uint32_t firstIndex,
+    int32_t vertexOffset,
+    uint32_t firstInstance);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirect<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    uint32_t drawCount,
+    uint32_t stride);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCount<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCountKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirect<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    uint32_t drawCount,
+    uint32_t stride);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectByteCountEXT<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t instanceCount,
+    uint32_t firstInstance,
+    VkBuffer counterBuffer,
+    VkDeviceSize counterBufferOffset,
+    uint32_t counterOffset,
+    uint32_t vertexStride);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCount<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCountKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride);
+
+// Functions for compute dispatches
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatch<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t groupCountX,
+    uint32_t groupCountY,
+    uint32_t groupCountZ);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBase<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t baseGroupX,
+    uint32_t baseGroupY,
+    uint32_t baseGroupZ,
+    uint32_t groupCountX,
+    uint32_t groupCountY,
+    uint32_t groupCountZ);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBaseKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t baseGroupX,
+    uint32_t baseGroupY,
+    uint32_t baseGroupZ,
+    uint32_t groupCountX,
+    uint32_t groupCountY,
+    uint32_t groupCountZ);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchIndirect<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset);
+
+// Commands for trace rays
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirect2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkDeviceAddress indirectDeviceAddress);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirectKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable,
+    VkDeviceAddress indirectDeviceAddress);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable,
+    uint32_t width,
+    uint32_t height,
+    uint32_t depth);
+
+
+// Commands for transfers
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdFillBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer dstBuffer,
+    VkDeviceSize dstOffset,
+    VkDeviceSize size,
+    uint32_t data);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearColorImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage image,
+    VkImageLayout imageLayout,
+    const VkClearColorValue* pColor,
+    uint32_t rangeCount,
+    const VkImageSubresourceRange* pRanges);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearDepthStencilImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage image,
+    VkImageLayout imageLayout,
+    const VkClearDepthStencilValue* pDepthStencil,
+    uint32_t rangeCount,
+    const VkImageSubresourceRange* pRanges);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer srcBuffer,
+    VkBuffer dstBuffer,
+    uint32_t regionCount,
+    const VkBufferCopy* pRegions);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferInfo2* pCopyBufferInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferInfo2* pCopyBufferInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer srcBuffer,
+    VkImage dstImage,
+    VkImageLayout dstImageLayout,
+    uint32_t regionCount,
+    const VkBufferImageCopy* pRegions);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage srcImage,
+    VkImageLayout srcImageLayout,
+    VkImage dstImage,
+    VkImageLayout dstImageLayout,
+    uint32_t regionCount,
+    const VkImageCopy* pRegions);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageInfo2* pCopyImageInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageInfo2* pCopyImageInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage srcImage,
+    VkImageLayout srcImageLayout,
+    VkBuffer dstBuffer,
+    uint32_t regionCount,
+    const VkBufferImageCopy* pRegions);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo);
+
+// Functions for debug
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerBeginEXT<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkDebugMarkerMarkerInfoEXT* pMarkerInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerEndEXT<user_tag>(
+    VkCommandBuffer commandBuffer);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginDebugUtilsLabelEXT<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkDebugUtilsLabelEXT* pLabelInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndDebugUtilsLabelEXT<user_tag>(
+    VkCommandBuffer commandBuffer);
+
+// Functions for queues
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR<user_tag>(
+    VkQueue queue,
+    const VkPresentInfoKHR* pPresentInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit<user_tag>(
+    VkQueue queue,
+    uint32_t submitCount,
+    const VkSubmitInfo* pSubmits,
+    VkFence fence);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2<user_tag>(
+    VkQueue queue,
+    uint32_t submitCount,
+    const VkSubmitInfo2* pSubmits,
+    VkFence fence);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2KHR<user_tag>(
+    VkQueue queue,
+    uint32_t submitCount,
+    const VkSubmitInfo2* pSubmits,
+    VkFence fence);
diff --git a/layer_gpu_performance/source/layer_device_functions_command_buffer.cpp b/layer_gpu_performance/source/layer_device_functions_command_buffer.cpp
new file mode 100644
index 0000000..ef8e920
--- /dev/null
+++ b/layer_gpu_performance/source/layer_device_functions_command_buffer.cpp
@@ -0,0 +1,160 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <mutex>
+
+#include "device.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkAllocateCommandBuffers<user_tag>(
+    VkDevice device,
+    const VkCommandBufferAllocateInfo* pAllocateInfo,
+    VkCommandBuffer* pCommandBuffers
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    VkResult result = layer->driver.vkAllocateCommandBuffers(
+        device, pAllocateInfo, pCommandBuffers);
+    if (result != VK_SUCCESS)
+    {
+        return result;
+    }
+
+    // Retake the lock to access layer-wide global store
+    lock.lock();
+    auto& tracker = layer->getStateTracker();
+    for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++)
+    {
+        tracker.allocateCommandBuffer(
+            pAllocateInfo->commandPool, pCommandBuffers[i]);
+    }
+
+    return result;
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult layer_vkBeginCommandBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCommandBufferBeginInfo* pBeginInfo
+) {
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cmdBuffer = tracker.getCommandBuffer(commandBuffer);
+    cmdBuffer.reset();
+    cmdBuffer.begin(pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    return layer->driver.vkBeginCommandBuffer(commandBuffer, pBeginInfo);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkCommandBufferResetFlags flags
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cmdBuffer = tracker.getCommandBuffer(commandBuffer);
+    cmdBuffer.reset();
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    return layer->driver.vkResetCommandBuffer(commandBuffer, flags);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkFreeCommandBuffers<user_tag>(
+    VkDevice device,
+    VkCommandPool commandPool,
+    uint32_t commandBufferCount,
+    const VkCommandBuffer* pCommandBuffers
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    auto& tracker = layer->getStateTracker();
+    for (uint32_t i = 0; i < commandBufferCount; i++)
+    {
+        tracker.freeCommandBuffer(commandPool, pCommandBuffers[i]);
+    }
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkFreeCommandBuffers(
+        device, commandPool, commandBufferCount, pCommandBuffers);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdExecuteCommands<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t commandBufferCount,
+    const VkCommandBuffer* pCommandBuffers
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store and device-wide data
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& primary = tracker.getCommandBuffer(commandBuffer);
+
+    for (uint32_t i = 0; i < commandBufferCount; i++)
+    {
+        auto& secondary = tracker.getCommandBuffer(pCommandBuffers[i]);
+        primary.executeCommands(secondary);
+    }
+
+    // Release the lock to call into the main driver
+    lock.unlock();
+    layer->driver.vkCmdExecuteCommands(
+        commandBuffer, commandBufferCount, pCommandBuffers);
+}
diff --git a/layer_gpu_performance/source/layer_device_functions_command_pool.cpp b/layer_gpu_performance/source/layer_device_functions_command_pool.cpp
new file mode 100644
index 0000000..a640a90
--- /dev/null
+++ b/layer_gpu_performance/source/layer_device_functions_command_pool.cpp
@@ -0,0 +1,103 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <mutex>
+
+#include "device.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateCommandPool<user_tag>(
+    VkDevice device,
+    const VkCommandPoolCreateInfo* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkCommandPool* pCommandPool
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    VkResult result = layer->driver.vkCreateCommandPool(
+        device, pCreateInfo, pAllocator, pCommandPool);
+    if (result != VK_SUCCESS)
+    {
+        return result;
+    }
+
+    // Retake the lock to access layer-wide global store
+    lock.lock();
+    auto& tracker = layer->getStateTracker();
+    tracker.createCommandPool(*pCommandPool);
+    return result;
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandPool<user_tag>(
+    VkDevice device,
+    VkCommandPool commandPool,
+    VkCommandPoolResetFlags flags
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    auto& tracker = layer->getStateTracker();
+    tracker.getCommandPool(commandPool).reset();
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    return layer->driver.vkResetCommandPool(device, commandPool, flags);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkDestroyCommandPool<user_tag>(
+    VkDevice device,
+    VkCommandPool commandPool,
+    const VkAllocationCallbacks* pAllocator
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    auto& tracker = layer->getStateTracker();
+    tracker.destroyCommandPool(commandPool);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkDestroyCommandPool(device, commandPool, pAllocator);
+}
diff --git a/layer_gpu_performance/source/layer_device_functions_debug.cpp b/layer_gpu_performance/source/layer_device_functions_debug.cpp
new file mode 100644
index 0000000..664d2b8
--- /dev/null
+++ b/layer_gpu_performance/source/layer_device_functions_debug.cpp
@@ -0,0 +1,121 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <mutex>
+
+#include "device.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerBeginEXT<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkDebugMarkerMarkerInfoEXT* pMarkerInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    // Increment the render pass counter in the tracker
+    cb.debugMarkerBegin(pMarkerInfo->pMarkerName);
+
+    // Note that we do not call the driver for user labels - they are
+    // emitted via the comms side-channel for each workload to avoid
+    // polluting the layer's use of the driver for tag labelling
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerEndEXT<user_tag>(
+    VkCommandBuffer commandBuffer
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    // Increment the render pass counter in the tracker
+    cb.debugMarkerEnd();
+
+    // Note that we do not call the driver for user labels - they are
+    // emitted via the comms side-channel for each workload to avoid
+    // polluting the layer's use of the driver for tag labelling
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginDebugUtilsLabelEXT<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkDebugUtilsLabelEXT* pLabelInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    // Increment the render pass counter in the tracker
+    cb.debugMarkerBegin(pLabelInfo->pLabelName);
+
+    // Note that we do not call the driver for user labels - they are
+    // emitted via the comms side-channel for each workload to avoid
+    // polluting the layer's use of the driver for tag labelling
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndDebugUtilsLabelEXT<user_tag>(
+    VkCommandBuffer commandBuffer
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    // Increment the render pass counter in the tracker
+    cb.debugMarkerEnd();
+
+    // Note that we do not call the driver for user labels - they are
+    // emitted via the comms side-channel for each workload to avoid
+    // polluting the layer's use of the driver for tag labelling
+}
diff --git a/layer_gpu_performance/source/layer_device_functions_dispatch.cpp b/layer_gpu_performance/source/layer_device_functions_dispatch.cpp
new file mode 100644
index 0000000..de5ee10
--- /dev/null
+++ b/layer_gpu_performance/source/layer_device_functions_dispatch.cpp
@@ -0,0 +1,167 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <mutex>
+
+#include "device.hpp"
+#include "device_utils.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/**
+ * @brief Register a compute dispatch with the tracker.
+ *
+ * @param layer           The layer context for the device.
+ * @param commandBuffer   The command buffer we are recording.
+ * @param groupX          The X size of the dispatch in groups.
+ * @param groupY          The Y size of the dispatch in groups.
+ * @param groupZ          The Z size of the dispatch in groups.
+ *
+ * @return The assigned tagID for the workload.
+ */
+static uint64_t registerDispatch(
+    Device* layer,
+    VkCommandBuffer commandBuffer,
+    int64_t groupX,
+    int64_t groupY,
+    int64_t groupZ
+) {
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+    return cb.dispatch(groupX, groupY, groupZ);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatch<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t groupCountX,
+    uint32_t groupCountY,
+    uint32_t groupCountZ
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerDispatch(
+        layer,
+        commandBuffer,
+        static_cast<int64_t>(groupCountX),
+        static_cast<int64_t>(groupCountY),
+        static_cast<int64_t>(groupCountZ));
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBase<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t baseGroupX,
+    uint32_t baseGroupY,
+    uint32_t baseGroupZ,
+    uint32_t groupCountX,
+    uint32_t groupCountY,
+    uint32_t groupCountZ
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerDispatch(
+        layer,
+        commandBuffer,
+        static_cast<int64_t>(groupCountX),
+        static_cast<int64_t>(groupCountY),
+        static_cast<int64_t>(groupCountZ));
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdDispatchBase(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBaseKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t baseGroupX,
+    uint32_t baseGroupY,
+    uint32_t baseGroupZ,
+    uint32_t groupCountX,
+    uint32_t groupCountY,
+    uint32_t groupCountZ
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerDispatch(
+        layer,
+        commandBuffer,
+        static_cast<int64_t>(groupCountX),
+        static_cast<int64_t>(groupCountY),
+        static_cast<int64_t>(groupCountZ));
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdDispatchBaseKHR(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchIndirect<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerDispatch(layer, commandBuffer, -1, -1, -1);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdDispatchIndirect(commandBuffer, buffer, offset);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
diff --git a/layer_gpu_performance/source/layer_device_functions_draw_call.cpp b/layer_gpu_performance/source/layer_device_functions_draw_call.cpp
new file mode 100644
index 0000000..42350d0
--- /dev/null
+++ b/layer_gpu_performance/source/layer_device_functions_draw_call.cpp
@@ -0,0 +1,257 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <memory>
+#include <mutex>
+#include <thread>
+
+#include "device.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/**
+ * @brief Register a draw call with the tracker.
+ *
+ * @param layer           The layer context for the device.
+ * @param commandBuffer   The command buffer we are recording.
+ */
+static void registerDrawCall(
+    Device* layer,
+    VkCommandBuffer commandBuffer
+) {
+    auto& state = layer->getStateTracker();
+    auto& stats = state.getCommandBuffer(commandBuffer).getStats();
+    stats.incDrawCallCount();
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDraw<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t vertexCount,
+    uint32_t instanceCount,
+    uint32_t firstVertex,
+    uint32_t firstInstance
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDraw(commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexed<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t indexCount,
+    uint32_t instanceCount,
+    uint32_t firstIndex,
+    int32_t vertexOffset,
+    uint32_t firstInstance
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndexed(commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirect<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    uint32_t drawCount,
+    uint32_t stride
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndexedIndirect(commandBuffer, buffer, offset, drawCount, stride);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCount<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndexedIndirectCount(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCountKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndexedIndirectCountKHR(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirect<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    uint32_t drawCount,
+    uint32_t stride
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndirect(commandBuffer, buffer, offset, drawCount, stride);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectByteCountEXT<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t instanceCount,
+    uint32_t firstInstance,
+    VkBuffer counterBuffer,
+    VkDeviceSize counterBufferOffset,
+    uint32_t counterOffset,
+    uint32_t vertexStride
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndirectByteCountEXT(commandBuffer, instanceCount, firstInstance, counterBuffer, counterBufferOffset, counterOffset, vertexStride);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCount<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndirectCount(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCountKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndirectCountKHR(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride);
+}
diff --git a/layer_gpu_performance/source/layer_device_functions_queue.cpp b/layer_gpu_performance/source/layer_device_functions_queue.cpp
new file mode 100644
index 0000000..6f435ba
--- /dev/null
+++ b/layer_gpu_performance/source/layer_device_functions_queue.cpp
@@ -0,0 +1,178 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <mutex>
+#include <nlohmann/json.hpp>
+
+#include "utils/misc.hpp"
+
+#include "device.hpp"
+#include "layer_device_functions.hpp"
+
+using json = nlohmann::json;
+
+using namespace std::placeholders;
+
+extern std::mutex g_vulkanLock;
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR<user_tag>(
+    VkQueue queue,
+    const VkPresentInfoKHR* pPresentInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(queue);
+
+    auto& tracker = layer->getStateTracker();
+    tracker.queuePresent();
+
+    // This is run with the lock held to ensure that all queue submit
+    // messages are sent sequentially to the host tool
+    json frame {
+        { "type", "frame" },
+        { "fid", tracker.totalStats.getFrameCount() }
+    };
+
+    layer->onFrame(frame.dump());
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    return layer->driver.vkQueuePresentKHR(queue, pPresentInfo);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit<user_tag>(
+    VkQueue queue,
+    uint32_t submitCount,
+    const VkSubmitInfo* pSubmits,
+    VkFence fence
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(queue);
+
+    auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1);
+
+    auto& tracker = layer->getStateTracker();
+    auto& trackQueue = tracker.getQueue(queue);
+
+    // This is run with the lock held to ensure that all queue submit
+    // messages are sent sequentially to the host tool
+    for (uint32_t i = 0; i < submitCount; i++)
+    {
+        const auto& submit = pSubmits[i];
+        for (uint32_t j = 0; j < submit.commandBufferCount; j++)
+        {
+            auto& trackCB = tracker.getCommandBuffer(submit.pCommandBuffers[j]);
+            const auto& LCS = trackCB.getSubmitCommandStream();
+            trackQueue.runSubmitCommandStream(LCS, onSubmit);
+        }
+    }
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    return layer->driver.vkQueueSubmit(queue, submitCount, pSubmits, fence);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2<user_tag>(
+    VkQueue queue,
+    uint32_t submitCount,
+    const VkSubmitInfo2* pSubmits,
+    VkFence fence
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(queue);
+
+    auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1);
+
+    auto& tracker = layer->getStateTracker();
+    auto& trackQueue = tracker.getQueue(queue);
+
+    // This is run with the lock held to ensure that all queue submit
+    // messages are sent sequentially to the host tool
+    for (uint32_t i = 0; i < submitCount; i++)
+    {
+        const auto& submit = pSubmits[i];
+        for (uint32_t j = 0; j < submit.commandBufferInfoCount; j++)
+        {
+            auto& trackCB = tracker.getCommandBuffer(submit.pCommandBufferInfos[j].commandBuffer);
+            const auto& LCS = trackCB.getSubmitCommandStream();
+            trackQueue.runSubmitCommandStream(LCS, onSubmit);
+        }
+    }
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    return layer->driver.vkQueueSubmit2(queue, submitCount, pSubmits, fence);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2KHR<user_tag>(
+    VkQueue queue,
+    uint32_t submitCount,
+    const VkSubmitInfo2* pSubmits,
+    VkFence fence
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(queue);
+
+    auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1);
+
+    auto& tracker = layer->getStateTracker();
+    auto& trackQueue = tracker.getQueue(queue);
+
+    // This is run with the lock held to ensure that all queue submit
+    // messages are sent sequentially to the host tool
+    for (uint32_t i = 0; i < submitCount; i++)
+    {
+        const auto& submit = pSubmits[i];
+        for (uint32_t j = 0; j < submit.commandBufferInfoCount; j++)
+        {
+            auto& trackCB = tracker.getCommandBuffer(submit.pCommandBufferInfos[j].commandBuffer);
+            const auto& LCS = trackCB.getSubmitCommandStream();
+            trackQueue.runSubmitCommandStream(LCS, onSubmit);
+        }
+    }
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    return layer->driver.vkQueueSubmit2KHR(queue, submitCount, pSubmits, fence);
+}
diff --git a/layer_gpu_performance/source/layer_device_functions_render_pass.cpp b/layer_gpu_performance/source/layer_device_functions_render_pass.cpp
new file mode 100644
index 0000000..5d16880
--- /dev/null
+++ b/layer_gpu_performance/source/layer_device_functions_render_pass.cpp
@@ -0,0 +1,376 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <mutex>
+
+#include "framework/utils.hpp"
+#include "trackers/render_pass.hpp"
+
+#include "device.hpp"
+#include "device_utils.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass<user_tag>(
+    VkDevice device,
+    const VkRenderPassCreateInfo* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkRenderPass* pRenderPass
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    VkResult ret = layer->driver.vkCreateRenderPass(device, pCreateInfo, pAllocator, pRenderPass);
+    if (ret != VK_SUCCESS)
+    {
+        return ret;
+    }
+
+    // Retake the lock to access layer-wide global store
+    lock.lock();
+    auto& tracker = layer->getStateTracker();
+    tracker.createRenderPass(*pRenderPass, *pCreateInfo);
+    return VK_SUCCESS;
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2<user_tag>(
+    VkDevice device,
+    const VkRenderPassCreateInfo2* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkRenderPass* pRenderPass
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    VkResult ret = layer->driver.vkCreateRenderPass2(device, pCreateInfo, pAllocator, pRenderPass);
+    if (ret != VK_SUCCESS)
+    {
+        return ret;
+    }
+
+    // Retake the lock to access layer-wide global store
+    lock.lock();
+    auto& tracker = layer->getStateTracker();
+    tracker.createRenderPass(*pRenderPass, *pCreateInfo);
+    return VK_SUCCESS;
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2KHR<user_tag>(
+    VkDevice device,
+    const VkRenderPassCreateInfo2* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkRenderPass* pRenderPass
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    VkResult ret = layer->driver.vkCreateRenderPass2KHR(device, pCreateInfo, pAllocator, pRenderPass);
+    if (ret != VK_SUCCESS)
+    {
+        return ret;
+    }
+
+    // Retake the lock to access layer-wide global store
+    lock.lock();
+    auto& tracker = layer->getStateTracker();
+    tracker.createRenderPass(*pRenderPass, *pCreateInfo);
+    return VK_SUCCESS;
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkDestroyRenderPass<user_tag>(
+    VkDevice device,
+    VkRenderPass renderPass,
+    const VkAllocationCallbacks* pAllocator
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    auto& tracker = layer->getStateTracker();
+    tracker.destroyRenderPass(renderPass);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkDestroyRenderPass(device, renderPass, pAllocator);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderPassBeginInfo* pRenderPassBegin,
+    VkSubpassContents contents
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass);
+    uint32_t width = pRenderPassBegin->renderArea.extent.width;
+    uint32_t height = pRenderPassBegin->renderArea.extent.height;
+
+    // Notify the command buffer we are starting a new render pass
+    uint64_t tagID = cb.renderPassBegin(rp, width, height);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdBeginRenderPass(commandBuffer, pRenderPassBegin, contents);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderPassBeginInfo* pRenderPassBegin,
+    const VkSubpassBeginInfo* pSubpassBeginInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass);
+    uint32_t width = pRenderPassBegin->renderArea.extent.width;
+    uint32_t height = pRenderPassBegin->renderArea.extent.height;
+
+    // Notify the command buffer we are starting a new render pass
+    uint64_t tagID = cb.renderPassBegin(rp, width, height);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdBeginRenderPass2(commandBuffer, pRenderPassBegin, pSubpassBeginInfo);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderPassBeginInfo* pRenderPassBegin,
+    const VkSubpassBeginInfo* pSubpassBeginInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass);
+    uint32_t width = pRenderPassBegin->renderArea.extent.width;
+    uint32_t height = pRenderPassBegin->renderArea.extent.height;
+
+    // Notify the command buffer we are starting a new render pass
+    uint64_t tagID = cb.renderPassBegin(rp, width, height);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdBeginRenderPass2KHR(commandBuffer, pRenderPassBegin, pSubpassBeginInfo);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRendering<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderingInfo* pRenderingInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    bool resuming = pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT;
+    bool suspending = pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT;
+
+    // Extract metadata for later use ...
+    Tracker::RenderPass rp(*pRenderingInfo);
+    uint32_t width = pRenderingInfo->renderArea.extent.width;
+    uint32_t height = pRenderingInfo->renderArea.extent.height;
+
+    // Notify the command buffer we are starting a new render pass
+    uint64_t tagID = cb.renderPassBegin(
+        rp, width, height, resuming, suspending);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    // Emit the label only for new render passes
+    if (!resuming)
+    {
+        emitStartTag(layer, commandBuffer, tagID);
+    }
+    layer->driver.vkCmdBeginRendering(commandBuffer, pRenderingInfo);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderingKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderingInfo* pRenderingInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    bool resuming = pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT;
+    bool suspending = pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT;
+
+    // Extract metadata for later use ...
+    Tracker::RenderPass rp(*pRenderingInfo);
+    uint32_t width = pRenderingInfo->renderArea.extent.width;
+    uint32_t height = pRenderingInfo->renderArea.extent.height;
+
+    // Notify the command buffer we are starting a new render pass
+    uint64_t tagID = cb.renderPassBegin(
+        rp, width, height, resuming, suspending);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    // Emit the label only for new render passes
+    if (!resuming)
+    {
+        emitStartTag(layer, commandBuffer, tagID);
+    }
+    layer->driver.vkCmdBeginRenderingKHR(commandBuffer, pRenderingInfo);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderPass<user_tag>(
+    VkCommandBuffer commandBuffer
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Update the layer command stream in the tracker
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+    cb.renderPassEnd();
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdEndRenderPass(commandBuffer);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL  layer_vkCmdEndRendering<user_tag>(
+    VkCommandBuffer commandBuffer
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Update the layer command stream in the tracker
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+    bool suspending = cb.renderPassEnd();
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdEndRendering(commandBuffer);
+    if (!suspending)
+    {
+        layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+    }
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL  layer_vkCmdEndRenderingKHR<user_tag>(
+    VkCommandBuffer commandBuffer
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Update the layer command stream in the tracker
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+    bool suspending = cb.renderPassEnd();
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdEndRenderingKHR(commandBuffer);
+    if (!suspending)
+    {
+        layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+    }
+}
diff --git a/layer_gpu_performance/source/layer_device_functions_trace_rays.cpp b/layer_gpu_performance/source/layer_device_functions_trace_rays.cpp
new file mode 100644
index 0000000..e6df5e3
--- /dev/null
+++ b/layer_gpu_performance/source/layer_device_functions_trace_rays.cpp
@@ -0,0 +1,130 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <memory>
+#include <mutex>
+#include <thread>
+
+#include "device.hpp"
+#include "device_utils.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/**
+ * @brief Register a trace rays dispatch with the tracker.
+ *
+ * @param layer           The layer context for the device.
+ * @param commandBuffer   The command buffer we are recording.
+ * @param itemsX          The X size of the dispatch in work items.
+ * @param itemsY          The Y size of the dispatch in work items.
+ * @param itemsZ          The Z size of the dispatch in work items.
+ *
+ * @return The assigned tagID for the workload.
+ */
+static uint64_t registerTraceRays(
+    Device* layer,
+    VkCommandBuffer commandBuffer,
+    int64_t itemsX,
+    int64_t itemsY,
+    int64_t itemsZ
+) {
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+    return cb.traceRays(itemsX, itemsY, itemsZ);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirect2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkDeviceAddress indirectDeviceAddress
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerTraceRays(layer, commandBuffer, -1, -1, -1);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdTraceRaysIndirect2KHR(commandBuffer, indirectDeviceAddress);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirectKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable,
+    VkDeviceAddress indirectDeviceAddress
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerTraceRays(layer, commandBuffer, -1, -1, -1);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdTraceRaysIndirectKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, indirectDeviceAddress);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable,
+    uint32_t width,
+    uint32_t height,
+    uint32_t depth
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerTraceRays(layer, commandBuffer, width, height, depth);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdTraceRaysKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, width, height, depth);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
\ No newline at end of file
diff --git a/layer_gpu_performance/source/layer_device_functions_transfer.cpp b/layer_gpu_performance/source/layer_device_functions_transfer.cpp
new file mode 100644
index 0000000..ecfaa65
--- /dev/null
+++ b/layer_gpu_performance/source/layer_device_functions_transfer.cpp
@@ -0,0 +1,619 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <memory>
+#include <mutex>
+#include <thread>
+
+#include "device.hpp"
+#include "device_utils.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/**
+ * @brief Register a transfer to a buffer with the tracker.
+ *
+ * @param layer           The layer context for the device.
+ * @param commandBuffer   The command buffer we are recording.
+ * @param transferType    The type of transfer being performed.
+ * @param byteCount       The number of bytes transferred.
+ *
+ * @return The assigned tagID for the workload.
+ */
+static uint64_t registerBufferTransfer(
+    Device* layer,
+    VkCommandBuffer commandBuffer,
+    const std::string& transferType,
+    int64_t byteCount
+) {
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+    return cb.bufferTransfer(transferType, byteCount);
+}
+
+/**
+ * @brief Register a transfer to an image with the tracker.
+ *
+ * @param layer           The layer context for the device.
+ * @param commandBuffer   The command buffer we are recording.
+ * @param transferType    The type of transfer being performed.
+ * @param pixelCount      The number of pixels transferred.
+ *
+ * @return The assigned tagID for the workload.
+ */
+static uint64_t registerImageTransfer(
+    Device* layer,
+    VkCommandBuffer commandBuffer,
+    const std::string& transferType,
+    int64_t pixelCount
+) {
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+    return cb.imageTransfer(transferType, pixelCount);
+}
+
+// Commands for transfers
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdFillBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer dstBuffer,
+    VkDeviceSize dstOffset,
+    VkDeviceSize size,
+    uint32_t data
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    // TODO: Add buffer tracking so we can turn VK_WHOLE_SIZE into bytes
+    int64_t byteCount = static_cast<int64_t>(size);
+    if (size == VK_WHOLE_SIZE)
+    {
+        byteCount = -2;
+    }
+
+    uint64_t tagID = registerBufferTransfer(
+        layer,
+        commandBuffer,
+        "Fill buffer",
+        byteCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdFillBuffer(commandBuffer, dstBuffer, dstOffset, size, data);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearColorImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage image,
+    VkImageLayout imageLayout,
+    const VkClearColorValue* pColor,
+    uint32_t rangeCount,
+    const VkImageSubresourceRange* pRanges
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    // TODO: Add image tracking so we can turn image and pRanges into pixels
+    int64_t pixelCount = -1;
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Clear image",
+        pixelCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdClearColorImage(commandBuffer, image, imageLayout, pColor, rangeCount, pRanges);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearDepthStencilImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage image,
+    VkImageLayout imageLayout,
+    const VkClearDepthStencilValue* pDepthStencil,
+    uint32_t rangeCount,
+    const VkImageSubresourceRange* pRanges
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    // TODO: Add image tracking so we can turn image and pRanges into pixels
+    int64_t pixelCount = -1;
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Clear image",
+        pixelCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdClearDepthStencilImage(commandBuffer, image, imageLayout, pDepthStencil, rangeCount, pRanges);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer srcBuffer,
+    VkBuffer dstBuffer,
+    uint32_t regionCount,
+    const VkBufferCopy* pRegions
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t byteCount = 0;
+    for (uint32_t i = 0; i < regionCount; i++)
+    {
+        byteCount += static_cast<int64_t>(pRegions[i].size);
+    }
+
+    uint64_t tagID = registerBufferTransfer(
+        layer,
+        commandBuffer,
+        "Copy buffer",
+        byteCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyBuffer(commandBuffer, srcBuffer, dstBuffer, regionCount, pRegions);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferInfo2* pCopyBufferInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t byteCount = 0;
+    for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++)
+    {
+        byteCount += static_cast<int64_t>(pCopyBufferInfo->pRegions[i].size);
+    }
+
+    uint64_t tagID = registerBufferTransfer(
+        layer,
+        commandBuffer,
+        "Copy buffer",
+        byteCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyBuffer2(commandBuffer, pCopyBufferInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferInfo2* pCopyBufferInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t byteCount = 0;
+    for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++)
+    {
+        byteCount += static_cast<int64_t>(pCopyBufferInfo->pRegions[i].size);
+    }
+
+    uint64_t tagID = registerBufferTransfer(
+        layer,
+        commandBuffer,
+        "Copy buffer",
+        byteCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyBuffer2KHR(commandBuffer, pCopyBufferInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer srcBuffer,
+    VkImage dstImage,
+    VkImageLayout dstImageLayout,
+    uint32_t regionCount,
+    const VkBufferImageCopy* pRegions
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t pixelCount = 0;
+    for (uint32_t i = 0; i < regionCount; i++)
+    {
+        int64_t rPixelCount = static_cast<int64_t>(pRegions[i].imageExtent.width)
+                            * static_cast<int64_t>(pRegions[i].imageExtent.height)
+                            * static_cast<int64_t>(pRegions[i].imageExtent.depth);
+        pixelCount += rPixelCount;
+    }
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy buffer to image",
+        pixelCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyBufferToImage(commandBuffer, srcBuffer, dstImage, dstImageLayout, regionCount, pRegions);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t pixelCount = 0;
+    for (uint32_t i = 0; i < pCopyBufferToImageInfo->regionCount; i++)
+    {
+        int64_t rPixelCount = static_cast<int64_t>(pCopyBufferToImageInfo->pRegions[i].imageExtent.width)
+                            * static_cast<int64_t>(pCopyBufferToImageInfo->pRegions[i].imageExtent.height)
+                            * static_cast<int64_t>(pCopyBufferToImageInfo->pRegions[i].imageExtent.depth);
+        pixelCount += rPixelCount;
+    }
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy buffer to image",
+        pixelCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyBufferToImage2(commandBuffer, pCopyBufferToImageInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t pixelCount = 0;
+    for (uint32_t i = 0; i < pCopyBufferToImageInfo->regionCount; i++)
+    {
+        int64_t rPixelCount = static_cast<int64_t>(pCopyBufferToImageInfo->pRegions[i].imageExtent.width)
+                            * static_cast<int64_t>(pCopyBufferToImageInfo->pRegions[i].imageExtent.height)
+                            * static_cast<int64_t>(pCopyBufferToImageInfo->pRegions[i].imageExtent.depth);
+        pixelCount += rPixelCount;
+    }
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy buffer to image",
+        pixelCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyBufferToImage2KHR(commandBuffer, pCopyBufferToImageInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage srcImage,
+    VkImageLayout srcImageLayout,
+    VkImage dstImage,
+    VkImageLayout dstImageLayout,
+    uint32_t regionCount,
+    const VkImageCopy* pRegions
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t pixelCount = 0;
+    for (uint32_t i = 0; i < regionCount; i++)
+    {
+        int64_t rPixelCount = static_cast<int64_t>(pRegions[i].extent.width)
+                            * static_cast<int64_t>(pRegions[i].extent.height)
+                            * static_cast<int64_t>(pRegions[i].extent.depth);
+        pixelCount += rPixelCount;
+    }
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy image",
+        pixelCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyImage(commandBuffer, srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageInfo2* pCopyImageInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t pixelCount = 0;
+    for (uint32_t i = 0; i < pCopyImageInfo->regionCount; i++)
+    {
+        int64_t rPixelCount = static_cast<int64_t>(pCopyImageInfo->pRegions[i].extent.width)
+                            * static_cast<int64_t>(pCopyImageInfo->pRegions[i].extent.height)
+                            * static_cast<int64_t>(pCopyImageInfo->pRegions[i].extent.depth);
+        pixelCount += rPixelCount;
+    }
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy image",
+        pixelCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyImage2(commandBuffer, pCopyImageInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageInfo2* pCopyImageInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t pixelCount = 0;
+    for (uint32_t i = 0; i < pCopyImageInfo->regionCount; i++)
+    {
+        int64_t rPixelCount = static_cast<int64_t>(pCopyImageInfo->pRegions[i].extent.width)
+                            * static_cast<int64_t>(pCopyImageInfo->pRegions[i].extent.height)
+                            * static_cast<int64_t>(pCopyImageInfo->pRegions[i].extent.depth);
+        pixelCount += rPixelCount;
+    }
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy image",
+        pixelCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyImage2KHR(commandBuffer, pCopyImageInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage srcImage,
+    VkImageLayout srcImageLayout,
+    VkBuffer dstBuffer,
+    uint32_t regionCount,
+    const VkBufferImageCopy* pRegions
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t pixelCount = 0;
+    for (uint32_t i = 0; i < regionCount; i++)
+    {
+        int64_t rPixelCount = static_cast<int64_t>(pRegions[i].imageExtent.width)
+                            * static_cast<int64_t>(pRegions[i].imageExtent.height)
+                            * static_cast<int64_t>(pRegions[i].imageExtent.depth);
+        pixelCount += rPixelCount;
+    }
+
+    // TODO: Our usual convention is to mark the transfer using the destination
+    // type, which means this should be a bufferTransfer reporting size in
+    // bytes. Without image tracking we only have pixels, so for now we report
+    // as "Copy image" and report size in pixels.
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy image to buffer",
+        pixelCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyImageToBuffer(commandBuffer, srcImage, srcImageLayout, dstBuffer, regionCount, pRegions);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t pixelCount = 0;
+    for (uint32_t i = 0; i < pCopyImageToBufferInfo->regionCount; i++)
+    {
+        int64_t rPixelCount = static_cast<int64_t>(pCopyImageToBufferInfo->pRegions[i].imageExtent.width)
+                            * static_cast<int64_t>(pCopyImageToBufferInfo->pRegions[i].imageExtent.height)
+                            * static_cast<int64_t>(pCopyImageToBufferInfo->pRegions[i].imageExtent.depth);
+        pixelCount += rPixelCount;
+    }
+
+    // TODO: Our usual convention is to mark the transfer using the destination
+    // type, which means this should be a bufferTransfer reporting size in
+    // bytes. Without image tracking we only have pixels, so for now we report
+    // as "Copy image" and report size in pixels.
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy image to buffer",
+        pixelCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyImageToBuffer2(commandBuffer, pCopyImageToBufferInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Compute the size of the transfer
+    int64_t pixelCount = 0;
+    for (uint32_t i = 0; i < pCopyImageToBufferInfo->regionCount; i++)
+    {
+        int64_t rPixelCount = static_cast<int64_t>(pCopyImageToBufferInfo->pRegions[i].imageExtent.width)
+                            * static_cast<int64_t>(pCopyImageToBufferInfo->pRegions[i].imageExtent.height)
+                            * static_cast<int64_t>(pCopyImageToBufferInfo->pRegions[i].imageExtent.depth);
+        pixelCount += rPixelCount;
+    }
+
+    // TODO: Our usual convention is to mark the transfer using the destination
+    // type, which means this should be a bufferTransfer reporting size in
+    // bytes. Without image tracking we only have pixels, so for now we report
+    // as "Copy image" and report size in pixels.
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy image to buffer",
+        pixelCount);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyImageToBuffer2KHR(commandBuffer, pCopyImageToBufferInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
diff --git a/layer_gpu_performance/source/layer_instance_functions.hpp b/layer_gpu_performance/source/layer_instance_functions.hpp
new file mode 100644
index 0000000..00f93a9
--- /dev/null
+++ b/layer_gpu_performance/source/layer_instance_functions.hpp
@@ -0,0 +1,38 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#pragma once
+
+#include <vulkan/vulkan.h>
+
+#include "framework/utils.hpp"
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateDevice<user_tag>(
+    VkPhysicalDevice physicalDevice,
+    const VkDeviceCreateInfo* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkDevice* pDevice);
diff --git a/layer_gpu_performance/source/layer_instance_functions_device.cpp b/layer_gpu_performance/source/layer_instance_functions_device.cpp
new file mode 100644
index 0000000..f31143b
--- /dev/null
+++ b/layer_gpu_performance/source/layer_instance_functions_device.cpp
@@ -0,0 +1,80 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <mutex>
+
+#include "framework/manual_functions.hpp"
+
+#include "device.hpp"
+#include "layer_instance_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateDevice<user_tag>(
+    VkPhysicalDevice physicalDevice,
+    const VkDeviceCreateInfo* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkDevice* pDevice
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Instance::retrieve(physicalDevice);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+
+    auto* chainInfo = getChainInfo(pCreateInfo);
+    auto fpGetInstanceProcAddr = chainInfo->u.pLayerInfo->pfnNextGetInstanceProcAddr;
+    auto fpGetDeviceProcAddr = chainInfo->u.pLayerInfo->pfnNextGetDeviceProcAddr;
+
+    auto extensions = getDeviceExtensionList(
+        layer->instance, physicalDevice, pCreateInfo);
+
+    auto fpCreateDevice = reinterpret_cast<PFN_vkCreateDevice>(
+        fpGetInstanceProcAddr(layer->instance, "vkCreateDevice"));
+    if (!fpCreateDevice)
+    {
+        return VK_ERROR_INITIALIZATION_FAILED;
+    }
+
+    // Advance the link info for the next element on the chain
+    chainInfo->u.pLayerInfo = chainInfo->u.pLayerInfo->pNext;
+    auto res = fpCreateDevice(physicalDevice, pCreateInfo, pAllocator, pDevice);
+    if (res != VK_SUCCESS)
+    {
+        return res;
+    }
+
+    // Retake the lock to access layer-wide global store
+    lock.lock();
+    auto device = std::make_unique<Device>(layer, physicalDevice, *pDevice, fpGetDeviceProcAddr);
+    Device::store(*pDevice, std::move(device));
+
+    return VK_SUCCESS;
+}
diff --git a/layer_gpu_performance/source/performance_comms.cpp b/layer_gpu_performance/source/performance_comms.cpp
new file mode 100644
index 0000000..bf04114
--- /dev/null
+++ b/layer_gpu_performance/source/performance_comms.cpp
@@ -0,0 +1,54 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <memory>
+
+#include "performance_comms.hpp"
+
+/* See header for documentation. */
+PerformanceComms::PerformanceComms(
+    Comms::CommsInterface& _comms
+):
+    comms(_comms)
+{
+    if (comms.isConnected())
+    {
+        endpoint = comms.getEndpointID("GPUTimeline");
+    }
+}
+
+/* See header for documentation. */
+void PerformanceComms::txMessage(
+    const std::string& message)
+{
+    // Message endpoint is not available
+    if (endpoint == 0)
+    {
+        return;
+    }
+
+    auto data = std::make_unique<Comms::MessageData>(message.begin(), message.end());
+    comms.txAsync(endpoint, std::move(data));
+}
diff --git a/layer_gpu_performance/source/performance_comms.hpp b/layer_gpu_performance/source/performance_comms.hpp
new file mode 100644
index 0000000..d9f3916
--- /dev/null
+++ b/layer_gpu_performance/source/performance_comms.hpp
@@ -0,0 +1,71 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+/**
+ * @file Declares a simple comms encoded for the timeline layer.
+ */
+
+#pragma once
+
+#include "comms/comms_interface.hpp"
+
+/**
+ * @brief A simple message encoder for the timeline comms endpoint.
+ *
+ * TODO: This is currently a very simple implementation because we are simply
+ * passing JSON strings around. This is not the most efficient way of doing
+ * this and in future this module will be used to implement binary encoders
+ * for each specific message type that needs sending.
+ */
+class PerformanceComms
+{
+public:
+    /**
+     * @brief Construct a new encoder.
+     *
+     * @param comms   The common comms module used by all services.
+     */
+    PerformanceComms(
+        Comms::CommsInterface& comms);
+
+    /**
+     * @brief Send a message to the GPU timeline endpoint service.
+     *
+     * @param message   The message to send.
+     */
+    void txMessage(
+        const std::string& message);
+
+private:
+    /**
+     * @brief The endpoint ID of the service, or 0 if not found.
+     */
+    Comms::EndpointID endpoint { 0 };
+
+    /**
+     * @brief The common module for network messaging.
+     */
+    Comms::CommsInterface& comms;
+};
diff --git a/layer_gpu_performance/source/version.hpp.in b/layer_gpu_performance/source/version.hpp.in
new file mode 100644
index 0000000..5fcb9c3
--- /dev/null
+++ b/layer_gpu_performance/source/version.hpp.in
@@ -0,0 +1,36 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+/**
+ * @file Placeholder templates that are populated by CMake during configure.
+ */
+
+#pragma once
+
+#define LGL_VER_MAJOR @PROJECT_VERSION_MAJOR@
+#define LGL_VER_MINOR @PROJECT_VERSION_MINOR@
+#define LGL_VER_PATCH @PROJECT_VERSION_PATCH@
+#define LGL_LAYER_NAME "@LGL_LAYER_NAME_STR@"
+#define LGL_LAYER_DESC "@LGL_LAYER_DESC_STR@"