From c69a41835559c2ad900d2079b44f0b04d3ccd39d Mon Sep 17 00:00:00 2001 From: snigupta Date: Wed, 22 Apr 2026 13:13:09 -0600 Subject: [PATCH 01/19] Update headers to include aie-codegen for files under profile/device Signed-off-by: snigupta --- profile/device/aie_trace/client/aie_trace_offload_client.h | 5 +++++ profile/device/aie_trace/ve2/aie_trace_offload_ve2.cpp | 6 ++++++ profile/device/aie_trace/ve2/aie_trace_offload_ve2.h | 5 +++++ profile/device/common/aie_driver_common_util.cpp | 5 +++++ profile/device/common/client_transaction.cpp | 5 +++++ profile/device/common/transactions/op_init.hpp | 7 ++++++- 6 files changed, 32 insertions(+), 1 deletion(-) diff --git a/profile/device/aie_trace/client/aie_trace_offload_client.h b/profile/device/aie_trace/client/aie_trace_offload_client.h index 1d04223f..513eaaf1 100644 --- a/profile/device/aie_trace/client/aie_trace_offload_client.h +++ b/profile/device/aie_trace/client/aie_trace_offload_client.h @@ -28,8 +28,13 @@ extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } namespace xdp { diff --git a/profile/device/aie_trace/ve2/aie_trace_offload_ve2.cpp b/profile/device/aie_trace/ve2/aie_trace_offload_ve2.cpp index 83a514b5..f2c56981 100644 --- a/profile/device/aie_trace/ve2/aie_trace_offload_ve2.cpp +++ b/profile/device/aie_trace/ve2/aie_trace_offload_ve2.cpp @@ -17,6 +17,12 @@ #define XDP_PLUGIN_SOURCE +#ifdef XDP_USE_AIE_CODEGEN +extern "C" { +#include +} +#endif + #include #include "core/include/xrt.h" diff --git a/profile/device/aie_trace/ve2/aie_trace_offload_ve2.h b/profile/device/aie_trace/ve2/aie_trace_offload_ve2.h index e6c996c3..618353c0 100644 --- a/profile/device/aie_trace/ve2/aie_trace_offload_ve2.h +++ b/profile/device/aie_trace/ve2/aie_trace_offload_ve2.h @@ -22,8 +22,13 @@ extern "C" { +#ifdef XDP_USE_AIE_CODEGEN + #include + #include +#else #include "xaiengine/xaiegbl.h" #include +#endif } namespace xdp { diff --git a/profile/device/common/aie_driver_common_util.cpp b/profile/device/common/aie_driver_common_util.cpp index 03beb487..35c0b7a1 100644 --- a/profile/device/common/aie_driver_common_util.cpp +++ b/profile/device/common/aie_driver_common_util.cpp @@ -28,8 +28,13 @@ #include extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } #include "core/common/message.h" diff --git a/profile/device/common/client_transaction.cpp b/profile/device/common/client_transaction.cpp index 9e649b35..5cef9e64 100644 --- a/profile/device/common/client_transaction.cpp +++ b/profile/device/common/client_transaction.cpp @@ -24,8 +24,13 @@ #include "transactions/op_buf.hpp" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } // *************************************************************** diff --git a/profile/device/common/transactions/op_init.hpp b/profile/device/common/transactions/op_init.hpp index 69e84010..c6ca79fa 100644 --- a/profile/device/common/transactions/op_init.hpp +++ b/profile/device/common/transactions/op_init.hpp @@ -3,8 +3,13 @@ #ifndef __OPINIT_HPP__ #define __OPINIT_HPP__ +extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#else #include - +#endif +} #include "op_types.h" #include From b3ff8fffeacb773334537fd02bf9913f7be4363f Mon Sep 17 00:00:00 2001 From: snigupta Date: Wed, 22 Apr 2026 13:16:34 -0600 Subject: [PATCH 02/19] Update headers and CMAKE for aie_base and aie_debug Signed-off-by: snigupta --- profile/plugin/aie_base/aie_base_util.h | 5 +++++ profile/plugin/aie_debug/CMakeLists.txt | 8 ++++---- profile/plugin/aie_debug/aie_debug_metadata.h | 5 +++++ profile/plugin/aie_debug/client/aie_debug.h | 5 +++++ profile/plugin/aie_debug/ve2/aie_debug.h | 5 +++++ 5 files changed, 24 insertions(+), 4 deletions(-) diff --git a/profile/plugin/aie_base/aie_base_util.h b/profile/plugin/aie_base/aie_base_util.h index baa70ab2..ede8c803 100755 --- a/profile/plugin/aie_base/aie_base_util.h +++ b/profile/plugin/aie_base/aie_base_util.h @@ -11,8 +11,13 @@ #include "xdp/profile/plugin/aie_base/generations/aie_generations.h" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } namespace xdp::aie { diff --git a/profile/plugin/aie_debug/CMakeLists.txt b/profile/plugin/aie_debug/CMakeLists.txt index 67d71348..9b87a6fc 100644 --- a/profile/plugin/aie_debug/CMakeLists.txt +++ b/profile/plugin/aie_debug/CMakeLists.txt @@ -34,8 +34,8 @@ file(GLOB AIE_DRIVER_COMMON_UTIL_FILES if (XDP_VE2_BUILD_CMAKE STREQUAL "yes") add_library(xdp_aie_debug_plugin SHARED ${AIE_DEBUG_PLUGIN_FILES}) add_dependencies(xdp_aie_debug_plugin xdp_core xrt_coreutil) - target_link_libraries(xdp_aie_debug_plugin PRIVATE xdp_core xrt_coreutil xaiengine) - target_compile_definitions(xdp_aie_debug_plugin PRIVATE FAL_LINUX="on" XDP_VE2_BUILD=1) + target_link_libraries(xdp_aie_debug_plugin PRIVATE xdp_core xrt_coreutil aie_codegen) + target_compile_definitions(xdp_aie_debug_plugin PRIVATE FAL_LINUX="on" XDP_VE2_BUILD=1 XDP_USE_AIE_CODEGEN=1) target_include_directories(xdp_aie_debug_plugin PRIVATE ${CMAKE_SOURCE_DIR}/src) install (TARGETS xdp_aie_debug_plugin @@ -46,8 +46,8 @@ if (XDP_VE2_BUILD_CMAKE STREQUAL "yes") elseif (XDP_CLIENT_BUILD_CMAKE STREQUAL "yes") add_library(xdp_aie_debug_plugin SHARED ${AIE_DEBUG_PLUGIN_FILES} ${AIE_DRIVER_COMMON_UTIL_FILES}) add_dependencies(xdp_aie_debug_plugin xdp_core xrt_coreutil) - target_link_libraries(xdp_aie_debug_plugin PRIVATE xdp_core xrt_coreutil xaiengine) - target_compile_definitions(xdp_aie_debug_plugin PRIVATE XDP_CLIENT_BUILD=1 -DXAIE_FEATURE_MSVC) + target_link_libraries(xdp_aie_debug_plugin PRIVATE xdp_core xrt_coreutil aie_codegen) + target_compile_definitions(xdp_aie_debug_plugin PRIVATE XDP_CLIENT_BUILD=1 XDP_USE_AIE_CODEGEN=1 -DXAIE_FEATURE_MSVC) target_include_directories(xdp_aie_debug_plugin PRIVATE ${AIERT_DIR}/include) set_target_properties(xdp_aie_debug_plugin PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) diff --git a/profile/plugin/aie_debug/aie_debug_metadata.h b/profile/plugin/aie_debug/aie_debug_metadata.h index 1b397227..23d83926 100644 --- a/profile/plugin/aie_debug/aie_debug_metadata.h +++ b/profile/plugin/aie_debug/aie_debug_metadata.h @@ -24,8 +24,13 @@ #include "xdp/profile/plugin/vp_base/vp_base_plugin.h" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } namespace xdp { diff --git a/profile/plugin/aie_debug/client/aie_debug.h b/profile/plugin/aie_debug/client/aie_debug.h index b27cc418..652bf2e1 100755 --- a/profile/plugin/aie_debug/client/aie_debug.h +++ b/profile/plugin/aie_debug/client/aie_debug.h @@ -17,8 +17,13 @@ #include "core/include/xrt/xrt_hw_context.h" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN + #include + #include +#else #include #include +#endif } namespace xdp { diff --git a/profile/plugin/aie_debug/ve2/aie_debug.h b/profile/plugin/aie_debug/ve2/aie_debug.h index fe7967a7..1b2a20e2 100755 --- a/profile/plugin/aie_debug/ve2/aie_debug.h +++ b/profile/plugin/aie_debug/ve2/aie_debug.h @@ -18,8 +18,13 @@ #include "xaiefal/xaiefal.hpp" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } namespace xdp { From 5c58ab804a9933967301988c038d271fff508e26 Mon Sep 17 00:00:00 2001 From: snigupta Date: Wed, 22 Apr 2026 13:20:25 -0600 Subject: [PATCH 03/19] Update headers and CMAKE for aie_halt and aie_pc Signed-off-by: snigupta --- profile/plugin/aie_halt/CMakeLists.txt | 8 ++++---- profile/plugin/aie_halt/clientDev/aie_halt.cpp | 5 +++++ profile/plugin/aie_pc/CMakeLists.txt | 4 ++-- profile/plugin/aie_pc/clientDev/aie_pc.cpp | 6 ++++++ profile/plugin/aie_pc/clientDev/aie_pc.h | 5 +++++ 5 files changed, 22 insertions(+), 6 deletions(-) diff --git a/profile/plugin/aie_halt/CMakeLists.txt b/profile/plugin/aie_halt/CMakeLists.txt index a67d422b..8bb4cf17 100644 --- a/profile/plugin/aie_halt/CMakeLists.txt +++ b/profile/plugin/aie_halt/CMakeLists.txt @@ -31,8 +31,8 @@ if (XDP_CLIENT_BUILD_CMAKE STREQUAL "yes") xrt_configure_version_file(xdp_aie_halt_plugin SHARED) add_library(xdp_aie_halt_plugin SHARED xdp_aie_halt_plugin-version.rc ${XDP_AIE_HALT_PLUGIN_FILES} ${XDP_DEVICE_COMMON_FILES}) add_dependencies(xdp_aie_halt_plugin xdp_core xrt_coreutil) - target_link_libraries(xdp_aie_halt_plugin PRIVATE xdp_core xrt_coreutil xaiengine) - target_compile_definitions(xdp_aie_halt_plugin PRIVATE XDP_CLIENT_BUILD=1 -DXAIE_FEATURE_MSVC) + target_link_libraries(xdp_aie_halt_plugin PRIVATE xdp_core xrt_coreutil aie_codegen) + target_compile_definitions(xdp_aie_halt_plugin PRIVATE XDP_CLIENT_BUILD=1 XDP_USE_AIE_CODEGEN=1 -DXAIE_FEATURE_MSVC) target_include_directories(xdp_aie_halt_plugin PRIVATE ${AIERT_DIR}/include) set_target_properties(xdp_aie_halt_plugin PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) @@ -46,9 +46,9 @@ elseif (XDP_VE2_BUILD_CMAKE STREQUAL "yes") add_dependencies(xdp_aie_halt_plugin xdp_core xrt_coreutil) #target_include_directories(xdp_aie_halt_plugin PRIVATE ${AIERT_DIR}/include) - #target_link_libraries(xdp_aie_halt_plugin PRIVATE xdp_core xrt_coreutil xaiengine) + #target_link_libraries(xdp_aie_halt_plugin PRIVATE xdp_core xrt_coreutil aie_codegen) target_link_libraries(xdp_aie_halt_plugin PRIVATE xdp_core xrt_coreutil) - target_compile_definitions(xdp_aie_halt_plugin PRIVATE XDP_VE2_BUILD=1) + target_compile_definitions(xdp_aie_halt_plugin PRIVATE XDP_VE2_BUILD=1 XDP_USE_AIE_CODEGEN=1) set_target_properties(xdp_aie_halt_plugin PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) diff --git a/profile/plugin/aie_halt/clientDev/aie_halt.cpp b/profile/plugin/aie_halt/clientDev/aie_halt.cpp index bced7a34..bbedcd00 100644 --- a/profile/plugin/aie_halt/clientDev/aie_halt.cpp +++ b/profile/plugin/aie_halt/clientDev/aie_halt.cpp @@ -34,8 +34,13 @@ #include "core/include/xclbin.h" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } #ifdef _WIN32 diff --git a/profile/plugin/aie_pc/CMakeLists.txt b/profile/plugin/aie_pc/CMakeLists.txt index 0ca7ffb7..2a88c97e 100644 --- a/profile/plugin/aie_pc/CMakeLists.txt +++ b/profile/plugin/aie_pc/CMakeLists.txt @@ -30,8 +30,8 @@ if (XDP_CLIENT_BUILD_CMAKE STREQUAL "yes") add_library(xdp_aie_pc_plugin SHARED xdp_aie_pc_plugin-version.rc ${XDP_AIE_PC_PLUGIN_FILES} ${XDP_DEVICE_COMMON_FILES}) add_dependencies(xdp_aie_pc_plugin xdp_core xrt_coreutil) - target_link_libraries(xdp_aie_pc_plugin PRIVATE xdp_core xrt_coreutil xaiengine) - target_compile_definitions(xdp_aie_pc_plugin PRIVATE XDP_CLIENT_BUILD=1 -DXAIE_FEATURE_MSVC) + target_link_libraries(xdp_aie_pc_plugin PRIVATE xdp_core xrt_coreutil aie_codegen) + target_compile_definitions(xdp_aie_pc_plugin PRIVATE XDP_CLIENT_BUILD=1 XDP_USE_AIE_CODEGEN=1 -DXAIE_FEATURE_MSVC) target_include_directories(xdp_aie_pc_plugin PRIVATE ${AIERT_DIR}/include) set_target_properties(xdp_aie_pc_plugin PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) diff --git a/profile/plugin/aie_pc/clientDev/aie_pc.cpp b/profile/plugin/aie_pc/clientDev/aie_pc.cpp index f18389f5..d7674479 100644 --- a/profile/plugin/aie_pc/clientDev/aie_pc.cpp +++ b/profile/plugin/aie_pc/clientDev/aie_pc.cpp @@ -38,9 +38,15 @@ #include "core/include/xclbin.h" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN + #include + #include + #include +#else #include #include #include +#endif } namespace xdp { diff --git a/profile/plugin/aie_pc/clientDev/aie_pc.h b/profile/plugin/aie_pc/clientDev/aie_pc.h index 67647bb4..e917fc98 100644 --- a/profile/plugin/aie_pc/clientDev/aie_pc.h +++ b/profile/plugin/aie_pc/clientDev/aie_pc.h @@ -21,8 +21,13 @@ #include "xdp/profile/plugin/aie_pc/aie_pc_impl.h" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN + #include + #include +#else #include #include +#endif } #include From 0631ba2de4787e18a6e09a9aca7392db3d30f639 Mon Sep 17 00:00:00 2001 From: snigupta Date: Wed, 22 Apr 2026 13:26:13 -0600 Subject: [PATCH 04/19] Update headers and CMAKE for aie_profile Signed-off-by: snigupta --- profile/plugin/aie_profile/CMakeLists.txt | 12 ++++++++---- profile/plugin/aie_profile/client/aie_profile.h | 5 +++++ profile/plugin/aie_profile/util/aie_profile_config.h | 5 +++++ profile/plugin/aie_profile/util/aie_profile_util.h | 5 +++++ profile/plugin/aie_profile/ve2/aie_profile.h | 5 +++++ 5 files changed, 28 insertions(+), 4 deletions(-) diff --git a/profile/plugin/aie_profile/CMakeLists.txt b/profile/plugin/aie_profile/CMakeLists.txt index 7ba9968a..70a57bc2 100644 --- a/profile/plugin/aie_profile/CMakeLists.txt +++ b/profile/plugin/aie_profile/CMakeLists.txt @@ -43,8 +43,8 @@ if (XDP_CLIENT_BUILD_CMAKE STREQUAL "yes") add_library(xdp_aie_profile_plugin SHARED xdp_aie_profile_plugin-version.rc ${AIE_PROFILE_PLUGIN_FILES} ${AIE_PROFILE_IMPL_FILES} ${AIE_DRIVER_COMMON_UTIL_FILES} ${AIE_PROFILE_UTIL_FILES} ${AIE_JSON_PARSER_FILES}) add_dependencies(xdp_aie_profile_plugin xdp_core xrt_coreutil) - target_link_libraries(xdp_aie_profile_plugin PRIVATE xdp_core xrt_coreutil xaiengine) - target_compile_definitions(xdp_aie_profile_plugin PRIVATE XDP_CLIENT_BUILD=1 -DXAIE_FEATURE_MSVC) + target_link_libraries(xdp_aie_profile_plugin PRIVATE xdp_core xrt_coreutil aie_codegen) + target_compile_definitions(xdp_aie_profile_plugin PRIVATE XDP_CLIENT_BUILD=1 XDP_USE_AIE_CODEGEN=1 -DXAIE_FEATURE_MSVC) target_include_directories(xdp_aie_profile_plugin PRIVATE ${AIERT_DIR}/include) set_target_properties(xdp_aie_profile_plugin PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) @@ -85,10 +85,14 @@ else() ) add_library(xdp_aie_profile_plugin_xdna SHARED ${AIE_PROFILE_PLUGIN_FILES} ${AIE_PROFILE_IMPL_FILES} ${AIE_PROFILE_UTIL_FILES} ${AIE_PROFILE_CONFIG_FILES} ${AIE_JSON_PARSER_FILES}) add_dependencies(xdp_aie_profile_plugin_xdna xdp_core xrt_coreutil) - target_link_libraries(xdp_aie_profile_plugin_xdna PRIVATE xdp_core xrt_coreutil xaiengine) - target_compile_definitions(xdp_aie_profile_plugin_xdna PRIVATE XDP_VE2_BUILD=1 FAL_LINUX="on") + target_link_libraries(xdp_aie_profile_plugin_xdna PRIVATE xdp_core xrt_coreutil aie_codegen aiebu_library_objects) + target_link_options(xdp_aie_profile_plugin_xdna PRIVATE -Wl,-Bsymbolic) + target_compile_definitions(xdp_aie_profile_plugin_xdna PRIVATE XDP_VE2_BUILD=1 XDP_USE_AIE_CODEGEN=1 FAL_LINUX="on") target_include_directories(xdp_aie_profile_plugin_xdna PRIVATE ${CMAKE_SOURCE_DIR}/src + ${AIEFAL_DIR} + ${AIEBU_SOURCE_DIR}/src/cpp/include + ${AIERT_DIR}/include ${XRT_SOURCE_DIR}/runtime_src/core/common/elf ) set_target_properties(xdp_aie_profile_plugin_xdna PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) diff --git a/profile/plugin/aie_profile/client/aie_profile.h b/profile/plugin/aie_profile/client/aie_profile.h index 863f1fb6..0c0775ff 100644 --- a/profile/plugin/aie_profile/client/aie_profile.h +++ b/profile/plugin/aie_profile/client/aie_profile.h @@ -13,8 +13,13 @@ #include "xdp/profile/device/common/client_transaction.h" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } namespace xdp { diff --git a/profile/plugin/aie_profile/util/aie_profile_config.h b/profile/plugin/aie_profile/util/aie_profile_config.h index 4a8643a4..5ba86338 100644 --- a/profile/plugin/aie_profile/util/aie_profile_config.h +++ b/profile/plugin/aie_profile/util/aie_profile_config.h @@ -11,8 +11,13 @@ #include "xdp/profile/database/static_info/aie_constructs.h" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } namespace xdp::aie::profile { diff --git a/profile/plugin/aie_profile/util/aie_profile_util.h b/profile/plugin/aie_profile/util/aie_profile_util.h index 3cf62984..fc56e369 100644 --- a/profile/plugin/aie_profile/util/aie_profile_util.h +++ b/profile/plugin/aie_profile/util/aie_profile_util.h @@ -11,8 +11,13 @@ #include "xdp/profile/database/static_info/aie_constructs.h" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } namespace xdp::aie::profile { diff --git a/profile/plugin/aie_profile/ve2/aie_profile.h b/profile/plugin/aie_profile/ve2/aie_profile.h index e5671b08..fbd20fe8 100644 --- a/profile/plugin/aie_profile/ve2/aie_profile.h +++ b/profile/plugin/aie_profile/ve2/aie_profile.h @@ -15,8 +15,13 @@ #include "xaiefal/xaiefal.hpp" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } namespace xdp { From b461625d0791ffd3f0fe5895a23bf92c75a4dde5 Mon Sep 17 00:00:00 2001 From: snigupta Date: Wed, 22 Apr 2026 13:27:41 -0600 Subject: [PATCH 05/19] Update headers and CMAKE for aie_status Signed-off-by: snigupta --- profile/plugin/aie_status/CMakeLists.txt | 4 ++-- profile/plugin/aie_status/aie_status_plugin.h | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/profile/plugin/aie_status/CMakeLists.txt b/profile/plugin/aie_status/CMakeLists.txt index b116ef29..18fd8530 100644 --- a/profile/plugin/aie_status/CMakeLists.txt +++ b/profile/plugin/aie_status/CMakeLists.txt @@ -17,8 +17,8 @@ file(GLOB AIE_STATUS_PLUGIN_FILES if (XDP_VE2_BUILD_CMAKE STREQUAL "yes") add_library(xdp_aie_status_plugin SHARED ${AIE_STATUS_PLUGIN_FILES}) add_dependencies(xdp_aie_status_plugin xdp_core) - target_link_libraries(xdp_aie_status_plugin PRIVATE xdp_core xaiengine) - target_compile_definitions(xdp_aie_status_plugin PRIVATE XDP_VE2_BUILD=1 FAL_LINUX="on") + target_link_libraries(xdp_aie_status_plugin PRIVATE xdp_core aie_codegen) + target_compile_definitions(xdp_aie_status_plugin PRIVATE XDP_VE2_BUILD=1 XDP_USE_AIE_CODEGEN=1 FAL_LINUX="on") target_include_directories(xdp_aie_status_plugin PRIVATE ${CMAKE_SOURCE_DIR}/src) set_target_properties(xdp_aie_status_plugin PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) diff --git a/profile/plugin/aie_status/aie_status_plugin.h b/profile/plugin/aie_status/aie_status_plugin.h index e502d6a2..8cc03466 100644 --- a/profile/plugin/aie_status/aie_status_plugin.h +++ b/profile/plugin/aie_status/aie_status_plugin.h @@ -33,8 +33,13 @@ #include "xdp/profile/plugin/vp_base/vp_base_plugin.h" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include -#include "xaiengine/xaie_helper.h" +#include +#endif } namespace xdp { From f91d2ac97cfc37650c3d8a6e0a4c32290ec551e5 Mon Sep 17 00:00:00 2001 From: snigupta Date: Wed, 22 Apr 2026 13:30:14 -0600 Subject: [PATCH 06/19] Update headers and CMAKE for aie_trace Signed-off-by: snigupta --- profile/plugin/aie_trace/CMakeLists.txt | 15 +++++++++++++-- .../plugin/aie_trace/aie_trace_offload_manager.h | 5 +++++ profile/plugin/aie_trace/client/aie_trace.h | 5 +++++ profile/plugin/aie_trace/util/aie_trace_util.h | 5 +++++ 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/profile/plugin/aie_trace/CMakeLists.txt b/profile/plugin/aie_trace/CMakeLists.txt index 249a344d..fe02abb1 100644 --- a/profile/plugin/aie_trace/CMakeLists.txt +++ b/profile/plugin/aie_trace/CMakeLists.txt @@ -52,8 +52,8 @@ if (XDP_CLIENT_BUILD_CMAKE STREQUAL "yes") add_library(xdp_aie_trace_plugin SHARED xdp_aie_trace_plugin-version.rc ${AIE_TRACE_PLUGIN_FILES} ${AIE_TRACE_COMPONENT_FILES} ${AIE_TRACE_UTIL_FILES} ${AIE_DRIVER_COMMON_UTIL_FILES} ${AIE_JSON_PARSER_FILES}) add_dependencies(xdp_aie_trace_plugin xdp_core xrt_coreutil) - target_link_libraries(xdp_aie_trace_plugin PRIVATE xdp_core xrt_coreutil xaiengine) - target_compile_definitions(xdp_aie_trace_plugin PRIVATE XDP_CLIENT_BUILD=1 -DXAIE_FEATURE_MSVC) + target_link_libraries(xdp_aie_trace_plugin PRIVATE xdp_core xrt_coreutil aie_codegen) + target_compile_definitions(xdp_aie_trace_plugin PRIVATE XDP_CLIENT_BUILD=1 XDP_USE_AIE_CODEGEN=1 -DXAIE_FEATURE_MSVC) target_include_directories(xdp_aie_trace_plugin PRIVATE ${AIERT_DIR}/include) set_target_properties(xdp_aie_trace_plugin PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) @@ -106,6 +106,17 @@ else() add_library(xdp_aie_trace_plugin_xdna SHARED ${AIE_TRACE_PLUGIN_FILES} ${AIE_TRACE_COMPONENT_FILES} ${AIE_TRACE_UTIL_FILES} ${AIE_TRACE_CONFIG_FILES} ${AIE_JSON_PARSER_FILES}) add_dependencies(xdp_aie_trace_plugin_xdna xdp_core xrt_coreutil) + # TODO: add aie_codegen to aie_trace once ASM->ELF support is there + # target_link_libraries(xdp_aie_trace_plugin_xdna PRIVATE xdp_core xrt_coreutil aie_codegen) + # target_link_options(xdp_aie_trace_plugin_xdna PRIVATE -Wl,-Bsymbolic) + # target_compile_definitions(xdp_aie_trace_plugin_xdna PRIVATE XDP_VE2_BUILD=1 XDP_USE_AIE_CODEGEN=1 FAL_LINUX="on") + # target_include_directories(xdp_aie_profile_plugin_xdna PRIVATE + # ${CMAKE_SOURCE_DIR}/src + # ${AIEFAL_DIR} + # ${AIEBU_SOURCE_DIR}/src/cpp/include + # ${AIERT_DIR}/include + # ${XRT_SOURCE_DIR}/runtime_src/core/common/elf + # ) target_link_libraries(xdp_aie_trace_plugin_xdna PRIVATE xdp_core xrt_coreutil xaiengine) target_compile_definitions(xdp_aie_trace_plugin_xdna PRIVATE XDP_VE2_BUILD=1 FAL_LINUX="on") target_include_directories(xdp_aie_trace_plugin_xdna PRIVATE ${CMAKE_SOURCE_DIR}/src) diff --git a/profile/plugin/aie_trace/aie_trace_offload_manager.h b/profile/plugin/aie_trace/aie_trace_offload_manager.h index 638e74c0..2594ba58 100644 --- a/profile/plugin/aie_trace/aie_trace_offload_manager.h +++ b/profile/plugin/aie_trace/aie_trace_offload_manager.h @@ -25,8 +25,13 @@ #endif extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } diff --git a/profile/plugin/aie_trace/client/aie_trace.h b/profile/plugin/aie_trace/client/aie_trace.h index dde6da85..ba548303 100644 --- a/profile/plugin/aie_trace/client/aie_trace.h +++ b/profile/plugin/aie_trace/client/aie_trace.h @@ -12,8 +12,13 @@ #include "xdp/profile/device/common/client_transaction.h" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } namespace xdp { diff --git a/profile/plugin/aie_trace/util/aie_trace_util.h b/profile/plugin/aie_trace/util/aie_trace_util.h index 036e2ee7..adf86e93 100755 --- a/profile/plugin/aie_trace/util/aie_trace_util.h +++ b/profile/plugin/aie_trace/util/aie_trace_util.h @@ -9,8 +9,13 @@ #include "xdp/profile/plugin/aie_trace/aie_trace_metadata.h" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } namespace xdp::aie::trace { From a45a0ba825adb388d8519b3ee04948b7532fb000 Mon Sep 17 00:00:00 2001 From: snigupta Date: Wed, 22 Apr 2026 13:31:54 -0600 Subject: [PATCH 07/19] Update headers for aie_dtrace Signed-off-by: snigupta --- profile/plugin/aie_dtrace/util/aie_dtrace_util.h | 4 ++++ profile/plugin/aie_dtrace/ve2/aie_dtrace_ve2.h | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/profile/plugin/aie_dtrace/util/aie_dtrace_util.h b/profile/plugin/aie_dtrace/util/aie_dtrace_util.h index 29ac73f0..9fe75466 100644 --- a/profile/plugin/aie_dtrace/util/aie_dtrace_util.h +++ b/profile/plugin/aie_dtrace/util/aie_dtrace_util.h @@ -9,7 +9,11 @@ #include extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#else #include +#endif } namespace xdp::aie::dtrace { diff --git a/profile/plugin/aie_dtrace/ve2/aie_dtrace_ve2.h b/profile/plugin/aie_dtrace/ve2/aie_dtrace_ve2.h index 2a4acbfa..bbf36f04 100644 --- a/profile/plugin/aie_dtrace/ve2/aie_dtrace_ve2.h +++ b/profile/plugin/aie_dtrace/ve2/aie_dtrace_ve2.h @@ -17,8 +17,13 @@ #include "xaiefal/xaiefal.hpp" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } namespace xdp { From 089e17309b4e9bc0612b3f1ea140ee5c1b06e60e Mon Sep 17 00:00:00 2001 From: snigupta Date: Wed, 22 Apr 2026 13:35:01 -0600 Subject: [PATCH 08/19] Add TransactionHandler for ASM transactions on VE2 Signed-off-by: snigupta --- profile/device/common/ve2/ve2_transaction.cpp | 196 ++++++++++++++++++ profile/device/common/ve2/ve2_transaction.h | 54 +++++ 2 files changed, 250 insertions(+) create mode 100644 profile/device/common/ve2/ve2_transaction.cpp create mode 100644 profile/device/common/ve2/ve2_transaction.h diff --git a/profile/device/common/ve2/ve2_transaction.cpp b/profile/device/common/ve2/ve2_transaction.cpp new file mode 100644 index 00000000..a3f57f68 --- /dev/null +++ b/profile/device/common/ve2/ve2_transaction.cpp @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2026 Advanced Micro Devices, Inc. All rights reserved + +#include + +#include "ve2_transaction.h" +#include "core/common/message.h" +#include "xrt/experimental/xrt_elf.h" +#include "xrt/experimental/xrt_ext.h" +#include "xrt/experimental/xrt_module.h" +#include "xrt/xrt_hw_context.h" +#include "xrt/xrt_kernel.h" + +#include "core/common/aiebu/src/cpp/include/aiebu/aiebu_assembler.h" +#include "core/common/aiebu/src/cpp/include/aiebu/aiebu_error.h" + +#include +#include +#include +#include + +extern "C" { + #include + #include +} + +namespace xdp::aie { + using severity_level = xrt_core::message::severity_level; + + bool VE2Transaction::initializeTransaction(XAie_DevInst* aieDevInst, std::string tName) + { + setTransactionName(tName); + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "Writing to New Control Code ASM file: " + getAsmFileName()); + + try { + // On VE2 Linux, the default IO backend is Linux IO which tries to open + // the AIE character device — this fails on XDNA (PCIe NPU) since the + // AIE is managed by the XDNA driver, not the Linux AIE driver. + // Explicitly switch to control-code backend before opening the ASM file. + XAie_SetIOBackend(aieDevInst, XAIE_IO_BACKEND_CONTROLCODE); + XAie_OpenControlCodeFile(aieDevInst, getAsmFileName().c_str(), 8192); + XAie_StartNewJob(aieDevInst, XAIE_START_JOB); + return true; + } + catch(const std::exception& e) { + xrt_core::message::send(xrt_core::message::severity_level::error, "XRT", + "Error in generating asm File: " + getAsmFileName() + "\n" + e.what()); + } + xrt_core::message::send(severity_level::warning, "XRT", "AIE Transaction Initialization Failed."); + return false; + } + + bool VE2Transaction::completeASM(XAie_DevInst* aieDevInst) + { + // + // 1. End generation of ASM file + // + try { + XAie_EndJob(aieDevInst); + XAie_EndPage(aieDevInst); + XAie_CloseControlCodeFile(aieDevInst); + } + catch(const std::exception& e) { + xrt_core::message::send(xrt_core::message::severity_level::error, "XRT", + "Error in generating ASM file: " + getAsmFileName() + "\n" + e.what()); + return false; + } + return true; + } + + bool VE2Transaction::generateELF() + { + // + // 2. Convert ASM to ELF + // + // Fill this vector with ASM content + std::vector control_code_buf; + std::vector libpaths; + libpaths.push_back("./"); + + try { +#if 1 + //Read ASM file + std::string asmFileName = getAsmFileName(); + if (!std::filesystem::exists(asmFileName)) + throw std::runtime_error("file:" + asmFileName + " not found\n"); + + std::ifstream inAsm(asmFileName, std::ios::in | std::ios::binary); + std::cout << "Open file " << asmFileName << std::endl; + + auto file_size = std::filesystem::file_size(asmFileName); + control_code_buf.resize(file_size); + + inAsm.read(control_code_buf.data(), file_size); + std::streamsize bytesRead = inAsm.gcount(); + if (static_cast(bytesRead) != static_cast(file_size)) { + std::cerr << "Read " << bytesRead << " bytes but expected " << file_size + << " for file " << asmFileName << '\n'; + control_code_buf.resize(static_cast(bytesRead)); // keep only read bytes + } else { + std::cout << "ASM file read (" << file_size << " bytes): " << asmFileName << '\n'; + } + + //Convert ASM to ELF data. + auto as = aiebu::aiebu_assembler(aiebu::aiebu_assembler::buffer_type::asm_aie2ps, + control_code_buf, std::vector{}, libpaths); + + //Write elf data to a file + auto e = as.get_elf(); + std::cout << "Elf size:" << e.size() << std::endl; + std::ofstream outElf(getElfFileName(), std::ios_base::binary); + outElf.write(e.data(), e.size()); +#else + auto check1 = std::getenv("AIEBU_REPO"); + auto check2 = std::getenv("PYTHONPATH"); + if ((check1 == nullptr) || (check2 == nullptr)) { + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", + "Please define AIEBU_REPO and PYTHONPATH so elf generation can work."); + return false; + } + + std::stringstream command; + command << "${AIEBU_REPO}/src/python/aiebu/control_asm_disasm.py -t aie4 " + << getAsmFileName() << " -o " << getElfFileName(); + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "Generating ELF using: " + command.str()); + if (system(command.str().c_str())) { + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "Elf generation failed"); + return false; + } +#endif + } + catch(const std::exception& e) { + xrt_core::message::send(xrt_core::message::severity_level::error, "XRT", + "Error in generating Elf file: " + getElfFileName() + "\n" + e.what()); + return false; + } + return true; + } + + bool VE2Transaction::submitELF(xrt::hw_context hwContext) + { + // + // 3. Submit ELF to microcontroller + // + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "Start New Control Code Elf"); + xrt::elf profileElf; + try { + profileElf = xrt::elf(getElfFileName()); + } + catch (...) { + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", + "Failed to load " + getElfFileName() + ". Cannot configure AIE to profile."); + return false; + } + + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "Elf Object Created"); + xrt::module mod{profileElf}; + + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "Module Created"); + xrt::kernel kernel; + try { + kernel = xrt::ext::kernel{hwContext, mod, "XDP_KERNEL:{IPUV1CNN}"}; + } catch (...) { + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", + "XDP_KERNEL not found in HW Context. Unable to run " + getElfFileName()); + return false; + } + + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "XDP_KERNEL created"); + xrt::run run{kernel}; + + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "Kernel run created"); + run.start(); + + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "Run started"); + run.wait2(); + + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "Wait done!"); + return true; + } + + bool VE2Transaction::submitTransaction(XAie_DevInst* aieDevInst, xrt::hw_context hwContext) + { + if (!completeASM(aieDevInst)) + return false; + if (!generateELF()) + return false; + if (!submitELF(hwContext)) + return false; + return true; + } +} \ No newline at end of file diff --git a/profile/device/common/ve2/ve2_transaction.h b/profile/device/common/ve2/ve2_transaction.h new file mode 100644 index 00000000..7758d239 --- /dev/null +++ b/profile/device/common/ve2/ve2_transaction.h @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2026 Advanced Micro Devices, Inc. All rights reserved + +#ifndef VE2_TRANSACTION_DOT_H +#define VE2_TRANSACTION_DOT_H + +#include +#include +#include + +#include "xrt/xrt_hw_context.h" +#include "xrt/xrt_kernel.h" + +extern "C" { +#include +#include +} + +/** + * @brief VE2Transaction class for generating and submitting VE2 XDNA transactions + * + * This class is used to generate and submit VE2 transactions. It is used to generate the ASM file, the ELF file, and submit the transaction. + * + */ + +namespace xdp::aie { + class VE2Transaction { + public: + VE2Transaction() {}; + bool initializeTransaction(XAie_DevInst* aieDevInst, std::string tName); + bool submitTransaction(XAie_DevInst* aieDevInst, xrt::hw_context hwContext); + bool completeASM(XAie_DevInst* aieDevInst); + bool generateELF(); + bool submitELF(xrt::hw_context hwContext); + + void setTransactionName(std::string newTransactionName) {m_transactionName = newTransactionName;} + std::string getAsmFileName() { return m_transactionName + ".asm"; } + std::string getElfFileName() { return m_transactionName + ".elf"; } + int getGroupID(int id, xrt::hw_context hwContext) { + xrt::kernel kernel = xrt::kernel(hwContext, "XDP_KERNEL"); + return kernel.group_id(id); + } + + private: + std::string m_transactionName; + std::vector m_columns; + std::vector m_rows; + std::vector m_offsets; + std::vector m_values; + }; + +} // namespace xdp::aie + +#endif \ No newline at end of file From c3dd7f5d46c3af647c18668ec59788faa361d7a3 Mon Sep 17 00:00:00 2001 From: snigupta Date: Wed, 22 Apr 2026 14:08:22 -0600 Subject: [PATCH 09/19] Update aie_profile VE2 XDNA flow to use ASM->ELF Signed-off-by: snigupta --- .../plugin/aie_profile/aie_profile_plugin.cpp | 5 + .../plugin/aie_profile/ve2/aie_profile.cpp | 1760 +++++++++++------ profile/plugin/aie_profile/ve2/aie_profile.h | 32 +- 3 files changed, 1169 insertions(+), 628 deletions(-) diff --git a/profile/plugin/aie_profile/aie_profile_plugin.cpp b/profile/plugin/aie_profile/aie_profile_plugin.cpp index 789393ed..0fb51221 100644 --- a/profile/plugin/aie_profile/aie_profile_plugin.cpp +++ b/profile/plugin/aie_profile/aie_profile_plugin.cpp @@ -168,6 +168,8 @@ namespace xdp { #elif defined(XRT_X86_BUILD) implementation = std::make_unique(db, metadata, deviceID); #elif XDP_VE2_BUILD + xrt::hw_context context = xrt_core::hw_context_int::create_hw_context_from_implementation(handle); + metadata->setHwContext(context); implementation = std::make_unique(db, metadata, deviceID); #else implementation = std::make_unique(db, metadata, deviceID); @@ -257,6 +259,9 @@ auto time = std::time(nullptr); #ifdef XDP_CLIENT_BUILD auto& implementation = handleToAIEProfileImpl.begin()->second; implementation->poll(0); + #elif XDP_VE2_BUILD + auto& implementation = handleToAIEProfileImpl.begin()->second; + implementation->poll(implementation->getDeviceID()); #endif // Ask all threads to end for (auto& p : handleToAIEProfileImpl) { diff --git a/profile/plugin/aie_profile/ve2/aie_profile.cpp b/profile/plugin/aie_profile/ve2/aie_profile.cpp index a0874065..333ad004 100644 --- a/profile/plugin/aie_profile/ve2/aie_profile.cpp +++ b/profile/plugin/aie_profile/ve2/aie_profile.cpp @@ -31,720 +31,1230 @@ #include "core/common/api/hw_context_int.h" #include "shim_ve2/xdna_hwctx.h" -namespace { - static void* fetchAieDevInst(void* devHandle) - { - xrt::hw_context context = xrt_core::hw_context_int::create_hw_context_from_implementation(devHandle); - auto hwctx_hdl = static_cast(context); - auto hwctx_obj = dynamic_cast(hwctx_hdl); - auto aieArray = hwctx_obj->get_aie_array(); - return aieArray->get_dev() ; - } - - static void* allocateAieDevice(void* devHandle) - { - auto aieDevInst = static_cast(fetchAieDevInst(devHandle)) ; - if (!aieDevInst) - return nullptr; - return new xaiefal::XAieDev(aieDevInst, false) ; - } +#include "core/common/api/bo_int.h" +#include "xrt/xrt_bo.h" + +// ZOCL flow +#ifdef XDP_VE2_ZOCL_BUILD + namespace { + static void* fetchAieDevInst(void* devHandle) + { + xrt::hw_context context = xrt_core::hw_context_int::create_hw_context_from_implementation(devHandle); + auto hwctx_hdl = static_cast(context); + auto hwctx_obj = dynamic_cast(hwctx_hdl); + auto aieArray = hwctx_obj->get_aie_array(); + return aieArray->get_dev() ; + } - static void deallocateAieDevice(void* aieDevice) - { - auto object = static_cast(aieDevice) ; - if (object != nullptr) - delete object ; - } -} // end anonymous namespace + static void* allocateAieDevice(void* devHandle) + { + auto aieDevInst = static_cast(fetchAieDevInst(devHandle)) ; + if (!aieDevInst) + return nullptr; + return new xaiefal::XAieDev(aieDevInst, false) ; + } -namespace xdp { - using tile_type = xdp::tile_type; - using module_type = xdp::module_type; - using severity_level = xrt_core::message::severity_level; + static void deallocateAieDevice(void* aieDevice) + { + auto object = static_cast(aieDevice) ; + if (object != nullptr) + delete object ; + } + } // end anonymous namespace - AieProfile_VE2Impl::AieProfile_VE2Impl(VPDatabase* database, std::shared_ptr metadata, uint64_t deviceID) - : AieProfileImpl(database, metadata, deviceID) - { - auto hwGen = metadata->getHardwareGen(); + namespace xdp { + using tile_type = xdp::tile_type; + using module_type = xdp::module_type; + using severity_level = xrt_core::message::severity_level; - coreStartEvents = aie::profile::getCoreEventSets(hwGen); - coreEndEvents = coreStartEvents; + AieProfile_VE2Impl::AieProfile_VE2Impl(VPDatabase* database, std::shared_ptr metadata, uint64_t deviceID) + : AieProfileImpl(database, metadata, deviceID) + { + auto hwGen = metadata->getHardwareGen(); - memoryStartEvents = aie::profile::getMemoryEventSets(hwGen); - memoryEndEvents = memoryStartEvents; + coreStartEvents = aie::profile::getCoreEventSets(hwGen); + coreEndEvents = coreStartEvents; - shimStartEvents = aie::profile::getInterfaceTileEventSets(hwGen); - shimEndEvents = shimStartEvents; - shimEndEvents[METRIC_BYTE_COUNT] = {XAIE_EVENT_PORT_RUNNING_0_PL, XAIE_EVENT_PERF_CNT_0_PL}; + memoryStartEvents = aie::profile::getMemoryEventSets(hwGen); + memoryEndEvents = memoryStartEvents; - memTileStartEvents = aie::profile::getMemoryTileEventSets(hwGen); - memTileEndEvents = memTileStartEvents; - - microcontrollerEvents = aie::profile::getMicrocontrollerEventSets(hwGen); - } + shimStartEvents = aie::profile::getInterfaceTileEventSets(hwGen); + shimEndEvents = shimStartEvents; + shimEndEvents[METRIC_BYTE_COUNT] = {XAIE_EVENT_PORT_RUNNING_0_PL, XAIE_EVENT_PERF_CNT_0_PL}; - bool AieProfile_VE2Impl::checkAieDevice(const uint64_t deviceId, void* handle) - { - aieDevInst = static_cast(db->getStaticInfo().getAieDevInst(fetchAieDevInst, handle, deviceId)) ; - aieDevice = static_cast(db->getStaticInfo().getAieDevice(allocateAieDevice, deallocateAieDevice, handle, deviceId)) ; - if (!aieDevInst || !aieDevice) { - xrt_core::message::send(severity_level::warning, "XRT", - "Unable to get AIE device. There will be no AIE profiling."); - return false; + memTileStartEvents = aie::profile::getMemoryTileEventSets(hwGen); + memTileEndEvents = memTileStartEvents; + + microcontrollerEvents = aie::profile::getMicrocontrollerEventSets(hwGen); } - return true; - } - - void AieProfile_VE2Impl::updateDevice() { - - if(!checkAieDevice(deviceID, metadata->getHandle())) - return; - // Submit nop.elf before configuring profile - if (!aie::submitNopElf(metadata->getHandle())) { - xrt_core::message::send(severity_level::warning, "XRT", - "Failed to submit nop.elf. AIE profile configuration will not proceed."); - return; + bool AieProfile_VE2Impl::checkAieDevice(const uint64_t deviceId, void* handle) + { + aieDevInst = static_cast(db->getStaticInfo().getAieDevInst(fetchAieDevInst, handle, deviceId)) ; + aieDevice = static_cast(db->getStaticInfo().getAieDevice(allocateAieDevice, deallocateAieDevice, handle, deviceId)) ; + if (!aieDevInst || !aieDevice) { + xrt_core::message::send(severity_level::warning, "XRT", + "Unable to get AIE device. There will be no AIE profiling."); + return false; } + return true; + } - bool runtimeCounters = setMetricsSettings(deviceID, metadata->getHandle()); + void AieProfile_VE2Impl::updateDevice() { - if (!runtimeCounters) { - void* h = metadata->getHandle(); - std::shared_ptr device = xrt_core::get_userpf_device(h); - if (!device) - device = xdp::util::convertToCoreDevice(h, true); - if (!device) { + if(!checkAieDevice(deviceID, metadata->getHandle())) + return; + + // Submit nop.elf before configuring profile + if (!aie::submitNopElf(metadata->getHandle())) { xrt_core::message::send(severity_level::warning, "XRT", - "AIE Profile: could not resolve core device for xclbin profile counters."); - (db->getStaticInfo()).setIsAIECounterRead(deviceID, true); - return; - } - auto counters = xrt_core::edge::aie::get_profile_counters(device.get()); - - if (counters.empty()) { - xrt_core::message::send(severity_level::warning, "XRT", - "AIE Profile Counters were not found for this design. Please specify " - "graph_based_[aie|aie_memory|memory_tile|interface_tile]_metrics and/or " - "tile_based_[aie|aie_memory|memory_tile|interface_tile|microcontroller]_metrics " - "under \"AIE_profile_settings\" section in your xrt.ini."); - (db->getStaticInfo()).setIsAIECounterRead(deviceID,true); + "Failed to submit nop.elf. AIE profile configuration will not proceed."); return; } - else { - XAie_DevInst* aieDevInst = - static_cast(db->getStaticInfo().getAieDevInst(fetchAieDevInst, metadata->getHandle())); - - if (!aieDevInst) { - xrt_core::message::send(severity_level::warning, "XRT", - "Failed to get AIE device instance for profile counters."); + + bool runtimeCounters = setMetricsSettings(deviceID, metadata->getHandle()); + + if (!runtimeCounters) { + void* h = metadata->getHandle(); + std::shared_ptr device = xrt_core::get_userpf_device(h); + if (!device) + device = xdp::util::convertToCoreDevice(h, true); + if (!device) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE Profile: could not resolve core device for xclbin profile counters."); + (db->getStaticInfo()).setIsAIECounterRead(deviceID, true); return; } + auto counters = xrt_core::edge::aie::get_profile_counters(device.get()); - xrt_core::message::send(severity_level::debug, "XRT", "Processing " + std::to_string(counters.size()) + " counters"); - for (auto& counter : counters) { - std::stringstream msg; - msg << "Adding counter " << counter.id << " at (" - << +counter.column << "," << +counter.row << ") module: " << counter.module; - xrt_core::message::send(severity_level::debug, "XRT", msg.str()); - - // For pre-configured counters from xclbin metadata, the hardware is already configured - // Payload is used for reporting metadata (channel/stream IDs), set to 0 for these counters - // as we don't have full tile information (stream_ids, is_master_vec) to safely compute it - uint64_t payload = 0; + if (counters.empty()) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE Profile Counters were not found for this design. Please specify " + "graph_based_[aie|aie_memory|memory_tile|interface_tile]_metrics and/or " + "tile_based_[aie|aie_memory|memory_tile|interface_tile|microcontroller]_metrics " + "under \"AIE_profile_settings\" section in your xrt.ini."); + (db->getStaticInfo()).setIsAIECounterRead(deviceID,true); + return; + } + else { + XAie_DevInst* aieDevInst = + static_cast(db->getStaticInfo().getAieDevInst(fetchAieDevInst, metadata->getHandle())); - (db->getStaticInfo()).addAIECounter(deviceID, counter.id, counter.column, - counter.row, counter.counterNumber, counter.startEvent, counter.endEvent, - counter.resetEvent, payload, counter.clockFreqMhz, counter.module, counter.name); + if (!aieDevInst) { + xrt_core::message::send(severity_level::warning, "XRT", + "Failed to get AIE device instance for profile counters."); + return; + } + + xrt_core::message::send(severity_level::debug, "XRT", "Processing " + std::to_string(counters.size()) + " counters"); + for (auto& counter : counters) { + std::stringstream msg; + msg << "Adding counter " << counter.id << " at (" + << +counter.column << "," << +counter.row << ") module: " << counter.module; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + + // For pre-configured counters from xclbin metadata, the hardware is already configured + // Payload is used for reporting metadata (channel/stream IDs), set to 0 for these counters + // as we don't have full tile information (stream_ids, is_master_vec) to safely compute it + uint64_t payload = 0; + + (db->getStaticInfo()).addAIECounter(deviceID, counter.id, counter.column, + counter.row, counter.counterNumber, counter.startEvent, counter.endEvent, + counter.resetEvent, payload, counter.clockFreqMhz, counter.module, counter.name); + } + xrt_core::message::send(severity_level::debug, "XRT", "Finished processing counters"); } - xrt_core::message::send(severity_level::debug, "XRT", "Finished processing counters"); } + } + + // Get reportable payload specific for this tile and/or counter + uint64_t + AieProfile_VE2Impl::getCounterPayload(XAie_DevInst* aieDevInst, + const tile_type& tile, + const module_type type, + uint8_t column, + uint8_t row, + uint16_t startEvent, + const std::string metricSet, + const uint8_t channel, + uint8_t logicalPortIndex) + { + // 1. Profile API specific values + if (aie::profile::profileAPIMetricSet(metricSet)) + return getAdfProfileAPIPayload(tile, metricSet); + + // 2. Channel/stream IDs for interface tiles + if (type == module_type::shim) { + // NOTE: value = ((isMaster) << 8) & (isChannel << 7) & (channel/stream ID) + // portnum = physical stream-switch port (0-7) from event; stream_ids/is_master_vec + // are indexed by logical port (size = number of configured ports). When portnum is + // out of range (e.g. physical ports 4-7 when only 4 logical ports), use + // logicalPortIndex. + auto portnum = xdp::aie::getPortNumberFromEvent(static_cast(startEvent)); + uint8_t streamPortId = (portnum >= tile.stream_ids.size()) ? + 0 : static_cast(tile.stream_ids.at(portnum)); + uint8_t idToReport = (tile.subtype == io_type::GMIO) ? channel : streamPortId; + uint8_t isChannel = (tile.subtype == io_type::GMIO) ? 1 : 0; + uint8_t isMaster = aie::isInputSet(type, metricSet) ? 0 : 1; + + return ((isMaster << PAYLOAD_IS_MASTER_SHIFT) + | (isChannel << PAYLOAD_IS_CHANNEL_SHIFT) | idToReport); } - } - // Get reportable payload specific for this tile and/or counter - uint64_t - AieProfile_VE2Impl::getCounterPayload(XAie_DevInst* aieDevInst, - const tile_type& tile, - const module_type type, - uint8_t column, - uint8_t row, - uint16_t startEvent, - const std::string metricSet, - const uint8_t channel, - uint8_t logicalPortIndex) - { - // 1. Profile API specific values - if (aie::profile::profileAPIMetricSet(metricSet)) - return getAdfProfileAPIPayload(tile, metricSet); - - // 2. Channel/stream IDs for interface tiles - if (type == module_type::shim) { - // NOTE: value = ((isMaster) << 8) & (isChannel << 7) & (channel/stream ID) - // portnum = physical stream-switch port (0-7) from event; stream_ids/is_master_vec - // are indexed by logical port (size = number of configured ports). When portnum is - // out of range (e.g. physical ports 4-7 when only 4 logical ports), use - // logicalPortIndex. - auto portnum = xdp::aie::getPortNumberFromEvent(static_cast(startEvent)); - uint8_t streamPortId = (portnum >= tile.stream_ids.size()) ? - 0 : static_cast(tile.stream_ids.at(portnum)); - uint8_t idToReport = (tile.subtype == io_type::GMIO) ? channel : streamPortId; - uint8_t isChannel = (tile.subtype == io_type::GMIO) ? 1 : 0; - uint8_t isMaster = aie::isInputSet(type, metricSet) ? 0 : 1; - - return ((isMaster << PAYLOAD_IS_MASTER_SHIFT) - | (isChannel << PAYLOAD_IS_CHANNEL_SHIFT) | idToReport); - } + // 3. Channel IDs for memory tiles + if (type == module_type::mem_tile) { + // NOTE: value = ((isMaster) << 8) & (isChannel << 7) & (channel ID) + uint8_t isChannel = 1; + uint8_t isMaster = aie::isInputSet(type, metricSet) ? 1 : 0; + return ((isMaster << PAYLOAD_IS_MASTER_SHIFT) + | (isChannel << PAYLOAD_IS_CHANNEL_SHIFT) | channel); + } - // 3. Channel IDs for memory tiles - if (type == module_type::mem_tile) { - // NOTE: value = ((isMaster) << 8) & (isChannel << 7) & (channel ID) + // 4. DMA BD sizes for AIE tiles + // NOTE: value = ((max BD size) << 16) & ((isMaster) << 8) & (isChannel << 7) & (channel ID) uint8_t isChannel = 1; - uint8_t isMaster = aie::isInputSet(type, metricSet) ? 1 : 0; - return ((isMaster << PAYLOAD_IS_MASTER_SHIFT) - | (isChannel << PAYLOAD_IS_CHANNEL_SHIFT) | channel); - } + uint8_t isMaster = aie::isInputSet(type, metricSet) ? 1 : 0; + uint32_t payloadValue = ((isMaster << PAYLOAD_IS_MASTER_SHIFT) + | (isChannel << PAYLOAD_IS_CHANNEL_SHIFT) | channel); + + if ((metadata->getHardwareGen() != 1) + || ((startEvent != XAIE_EVENT_DMA_S2MM_0_FINISHED_BD_MEM) + && (startEvent != XAIE_EVENT_DMA_S2MM_1_FINISHED_BD_MEM) + && (startEvent != XAIE_EVENT_DMA_MM2S_0_FINISHED_BD_MEM) + && (startEvent != XAIE_EVENT_DMA_MM2S_1_FINISHED_BD_MEM))) + return payloadValue; + + // Get average BD size for throughput calculations (AIE1 only) + constexpr int NUM_BDS = 8; + constexpr uint32_t BYTES_PER_WORD = 4; + constexpr uint32_t ACTUAL_OFFSET = 1; + uint64_t offsets[NUM_BDS] = {XAIEGBL_MEM_DMABD0CTRL, XAIEGBL_MEM_DMABD1CTRL, + XAIEGBL_MEM_DMABD2CTRL, XAIEGBL_MEM_DMABD3CTRL, + XAIEGBL_MEM_DMABD4CTRL, XAIEGBL_MEM_DMABD5CTRL, + XAIEGBL_MEM_DMABD6CTRL, XAIEGBL_MEM_DMABD7CTRL}; + uint32_t lsbs[NUM_BDS] = {XAIEGBL_MEM_DMABD0CTRL_LEN_LSB, XAIEGBL_MEM_DMABD1CTRL_LEN_LSB, + XAIEGBL_MEM_DMABD2CTRL_LEN_LSB, XAIEGBL_MEM_DMABD3CTRL_LEN_LSB, + XAIEGBL_MEM_DMABD4CTRL_LEN_LSB, XAIEGBL_MEM_DMABD5CTRL_LEN_LSB, + XAIEGBL_MEM_DMABD6CTRL_LEN_LSB, XAIEGBL_MEM_DMABD7CTRL_LEN_LSB}; + uint32_t masks[NUM_BDS] = {XAIEGBL_MEM_DMABD0CTRL_LEN_MASK, XAIEGBL_MEM_DMABD1CTRL_LEN_MASK, + XAIEGBL_MEM_DMABD2CTRL_LEN_MASK, XAIEGBL_MEM_DMABD3CTRL_LEN_MASK, + XAIEGBL_MEM_DMABD4CTRL_LEN_MASK, XAIEGBL_MEM_DMABD5CTRL_LEN_MASK, + XAIEGBL_MEM_DMABD6CTRL_LEN_MASK, XAIEGBL_MEM_DMABD7CTRL_LEN_MASK}; + uint32_t valids[NUM_BDS] = {XAIEGBL_MEM_DMABD0CTRL_VALBD_MASK, XAIEGBL_MEM_DMABD1CTRL_VALBD_MASK, + XAIEGBL_MEM_DMABD2CTRL_VALBD_MASK, XAIEGBL_MEM_DMABD3CTRL_VALBD_MASK, + XAIEGBL_MEM_DMABD4CTRL_VALBD_MASK, XAIEGBL_MEM_DMABD5CTRL_VALBD_MASK, + XAIEGBL_MEM_DMABD6CTRL_VALBD_MASK, XAIEGBL_MEM_DMABD7CTRL_VALBD_MASK}; + + uint32_t maxBDSize = 0; + auto tileOffset = XAie_GetTileAddr(aieDevInst, row, column); + for (int bd = 0; bd < NUM_BDS; ++bd) { + uint32_t regValue = 0; + XAie_Read32(aieDevInst, tileOffset + offsets[bd], ®Value); + + if (regValue & valids[bd]) { + uint32_t bdBytes = BYTES_PER_WORD * (((regValue >> lsbs[bd]) & masks[bd]) + ACTUAL_OFFSET); + maxBDSize = std::max(bdBytes, maxBDSize); + } + } - // 4. DMA BD sizes for AIE tiles - // NOTE: value = ((max BD size) << 16) & ((isMaster) << 8) & (isChannel << 7) & (channel ID) - uint8_t isChannel = 1; - uint8_t isMaster = aie::isInputSet(type, metricSet) ? 1 : 0; - uint32_t payloadValue = ((isMaster << PAYLOAD_IS_MASTER_SHIFT) - | (isChannel << PAYLOAD_IS_CHANNEL_SHIFT) | channel); - - if ((metadata->getHardwareGen() != 1) - || ((startEvent != XAIE_EVENT_DMA_S2MM_0_FINISHED_BD_MEM) - && (startEvent != XAIE_EVENT_DMA_S2MM_1_FINISHED_BD_MEM) - && (startEvent != XAIE_EVENT_DMA_MM2S_0_FINISHED_BD_MEM) - && (startEvent != XAIE_EVENT_DMA_MM2S_1_FINISHED_BD_MEM))) + payloadValue |= (maxBDSize << PAYLOAD_BD_SIZE_SHIFT); return payloadValue; - - // Get average BD size for throughput calculations (AIE1 only) - constexpr int NUM_BDS = 8; - constexpr uint32_t BYTES_PER_WORD = 4; - constexpr uint32_t ACTUAL_OFFSET = 1; - uint64_t offsets[NUM_BDS] = {XAIEGBL_MEM_DMABD0CTRL, XAIEGBL_MEM_DMABD1CTRL, - XAIEGBL_MEM_DMABD2CTRL, XAIEGBL_MEM_DMABD3CTRL, - XAIEGBL_MEM_DMABD4CTRL, XAIEGBL_MEM_DMABD5CTRL, - XAIEGBL_MEM_DMABD6CTRL, XAIEGBL_MEM_DMABD7CTRL}; - uint32_t lsbs[NUM_BDS] = {XAIEGBL_MEM_DMABD0CTRL_LEN_LSB, XAIEGBL_MEM_DMABD1CTRL_LEN_LSB, - XAIEGBL_MEM_DMABD2CTRL_LEN_LSB, XAIEGBL_MEM_DMABD3CTRL_LEN_LSB, - XAIEGBL_MEM_DMABD4CTRL_LEN_LSB, XAIEGBL_MEM_DMABD5CTRL_LEN_LSB, - XAIEGBL_MEM_DMABD6CTRL_LEN_LSB, XAIEGBL_MEM_DMABD7CTRL_LEN_LSB}; - uint32_t masks[NUM_BDS] = {XAIEGBL_MEM_DMABD0CTRL_LEN_MASK, XAIEGBL_MEM_DMABD1CTRL_LEN_MASK, - XAIEGBL_MEM_DMABD2CTRL_LEN_MASK, XAIEGBL_MEM_DMABD3CTRL_LEN_MASK, - XAIEGBL_MEM_DMABD4CTRL_LEN_MASK, XAIEGBL_MEM_DMABD5CTRL_LEN_MASK, - XAIEGBL_MEM_DMABD6CTRL_LEN_MASK, XAIEGBL_MEM_DMABD7CTRL_LEN_MASK}; - uint32_t valids[NUM_BDS] = {XAIEGBL_MEM_DMABD0CTRL_VALBD_MASK, XAIEGBL_MEM_DMABD1CTRL_VALBD_MASK, - XAIEGBL_MEM_DMABD2CTRL_VALBD_MASK, XAIEGBL_MEM_DMABD3CTRL_VALBD_MASK, - XAIEGBL_MEM_DMABD4CTRL_VALBD_MASK, XAIEGBL_MEM_DMABD5CTRL_VALBD_MASK, - XAIEGBL_MEM_DMABD6CTRL_VALBD_MASK, XAIEGBL_MEM_DMABD7CTRL_VALBD_MASK}; - - uint32_t maxBDSize = 0; - auto tileOffset = XAie_GetTileAddr(aieDevInst, row, column); - for (int bd = 0; bd < NUM_BDS; ++bd) { - uint32_t regValue = 0; - XAie_Read32(aieDevInst, tileOffset + offsets[bd], ®Value); - - if (regValue & valids[bd]) { - uint32_t bdBytes = BYTES_PER_WORD * (((regValue >> lsbs[bd]) & masks[bd]) + ACTUAL_OFFSET); - maxBDSize = std::max(bdBytes, maxBDSize); - } } + + uint64_t + AieProfile_VE2Impl::getAdfProfileAPIPayload(const tile_type& tile, const std::string metricSet) + { + if (metricSet == METRIC_LATENCY) + return metadata->getIntfLatencyPayload(tile); - payloadValue |= (maxBDSize << PAYLOAD_BD_SIZE_SHIFT); - return payloadValue; - } - - uint64_t - AieProfile_VE2Impl::getAdfProfileAPIPayload(const tile_type& tile, const std::string metricSet) - { - if (metricSet == METRIC_LATENCY) - return metadata->getIntfLatencyPayload(tile); - - return 0; - } - - void AieProfile_VE2Impl::printTileModStats(xaiefal::XAieDev* aieDevice, - const tile_type& tile, XAie_ModuleType mod) - { - auto col = tile.col; - auto row = tile.row; - auto loc = XAie_TileLoc(col, row); - std::string moduleName = (mod == XAIE_CORE_MOD) ? "aie" - : ((mod == XAIE_MEM_MOD) ? "aie_memory" - : "interface_tile"); - const std::string groups[3] = { - XAIEDEV_DEFAULT_GROUP_GENERIC, - XAIEDEV_DEFAULT_GROUP_STATIC, - XAIEDEV_DEFAULT_GROUP_AVAIL - }; - - std::stringstream msg; - msg << "Resource usage stats for Tile : (" << +col << "," << +row - << ") Module : " << moduleName << std::endl; - for (auto&g : groups) { - auto stats = aieDevice->getRscStat(g); - auto pc = stats.getNumRsc(loc, mod, xaiefal::XAIE_PERFCOUNT); - auto ts = stats.getNumRsc(loc, mod, xaiefal::XAIE_TRACEEVENT); - auto bc = stats.getNumRsc(loc, mod, xaiefal::XAIE_BROADCAST); - msg << "Resource Group : " << std::left << std::setw(10) << g << " " - << "Performance Counters : " << pc << " " - << "Trace Slots : " << ts << " " - << "Broadcast Channels : " << bc << " " - << std::endl; + return 0; } - xrt_core::message::send(severity_level::info, "XRT", msg.str()); - } + void AieProfile_VE2Impl::printTileModStats(xaiefal::XAieDev* aieDevice, + const tile_type& tile, XAie_ModuleType mod) + { + auto col = tile.col; + auto row = tile.row; + auto loc = XAie_TileLoc(col, row); + std::string moduleName = (mod == XAIE_CORE_MOD) ? "aie" + : ((mod == XAIE_MEM_MOD) ? "aie_memory" + : "interface_tile"); + const std::string groups[3] = { + XAIEDEV_DEFAULT_GROUP_GENERIC, + XAIEDEV_DEFAULT_GROUP_STATIC, + XAIEDEV_DEFAULT_GROUP_AVAIL + }; + + std::stringstream msg; + msg << "Resource usage stats for Tile : (" << +col << "," << +row + << ") Module : " << moduleName << std::endl; + for (auto&g : groups) { + auto stats = aieDevice->getRscStat(g); + auto pc = stats.getNumRsc(loc, mod, xaiefal::XAIE_PERFCOUNT); + auto ts = stats.getNumRsc(loc, mod, xaiefal::XAIE_TRACEEVENT); + auto bc = stats.getNumRsc(loc, mod, xaiefal::XAIE_BROADCAST); + msg << "Resource Group : " << std::left << std::setw(10) << g << " " + << "Performance Counters : " << pc << " " + << "Trace Slots : " << ts << " " + << "Broadcast Channels : " << bc << " " + << std::endl; + } - // Set metrics for all specified AIE counters on this device with configs given in AIE_profile_settings - bool - AieProfile_VE2Impl::setMetricsSettings(const uint64_t deviceId, void* handle) - { - int counterId = 0; - bool runtimeCounters = false; - - auto stats = aieDevice->getRscStat(XAIEDEV_DEFAULT_GROUP_AVAIL); - auto hwGen = metadata->getHardwareGen(); - auto configChannel0 = metadata->getConfigChannel0(); - auto configChannel1 = metadata->getConfigChannel1(); - uint8_t startColShift = metadata->getPartitionOverlayStartCols().front(); - aie::displayColShiftInfo(startColShift); - - for (int module = 0; module < metadata->getNumModules(); ++module) { - auto configMetrics = metadata->getConfigMetricsVec(module); - if (configMetrics.empty()) - continue; - - int numTileCounters[metadata->getNumCountersMod(module)+1] = {0}; - XAie_ModuleType mod = aie::profile::getFalModuleType(module); - - // Iterate over tiles and metrics to configure all desired counters - for (auto& tileMetric : configMetrics) { - auto& metricSet = tileMetric.second; - auto tile = tileMetric.first; - auto col = tile.col + startColShift; - auto row = tile.row; - auto subtype = tile.subtype; - auto type = aie::getModuleType(row, metadata->getAIETileRowOffset()); - if ((mod == XAIE_MEM_MOD) && (type == module_type::core)) - type = module_type::dma; - - // Catch microcontroller event sets for MDM - if (module == static_cast(module_type::uc)) { - // Configure - auto events = microcontrollerEvents[metricSet]; - aie::profile::configMDMCounters(aieDevInst, hwGen, col, row, events); - // Record - tile_type recordTile; - recordTile.col = col; - recordTile.row = row; - microcontrollerTileEvents[recordTile] = events; - runtimeCounters = true; - continue; - } + xrt_core::message::send(severity_level::info, "XRT", msg.str()); + } - // Ignore invalid types and inactive modules - // NOTE: Inactive core modules are configured when utilizing - // stream switch monitor ports to profile DMA channels - if (!aie::profile::isValidType(type, mod)) - continue; - if ((type == module_type::dma) && !tile.active_memory) + // Set metrics for all specified AIE counters on this device with configs given in AIE_profile_settings + bool + AieProfile_VE2Impl::setMetricsSettings(const uint64_t deviceId, void* handle) + { + int counterId = 0; + bool runtimeCounters = false; + + auto stats = aieDevice->getRscStat(XAIEDEV_DEFAULT_GROUP_AVAIL); + auto hwGen = metadata->getHardwareGen(); + auto configChannel0 = metadata->getConfigChannel0(); + auto configChannel1 = metadata->getConfigChannel1(); + uint8_t startColShift = metadata->getPartitionOverlayStartCols().front(); + aie::displayColShiftInfo(startColShift); + + for (int module = 0; module < metadata->getNumModules(); ++module) { + auto configMetrics = metadata->getConfigMetricsVec(module); + if (configMetrics.empty()) continue; - if ((type == module_type::core) && !tile.active_core) { - if (metadata->getPairModuleIndex(metricSet, type) < 0) + + int numTileCounters[metadata->getNumCountersMod(module)+1] = {0}; + XAie_ModuleType mod = aie::profile::getFalModuleType(module); + + // Iterate over tiles and metrics to configure all desired counters + for (auto& tileMetric : configMetrics) { + auto& metricSet = tileMetric.second; + auto tile = tileMetric.first; + auto col = tile.col + startColShift; + auto row = tile.row; + auto subtype = tile.subtype; + auto type = aie::getModuleType(row, metadata->getAIETileRowOffset()); + if ((mod == XAIE_MEM_MOD) && (type == module_type::core)) + type = module_type::dma; + + // Catch microcontroller event sets for MDM + if (module == static_cast(module_type::uc)) { + // Configure + auto events = microcontrollerEvents[metricSet]; + aie::profile::configMDMCounters(aieDevInst, hwGen, col, row, events); + // Record + tile_type recordTile; + recordTile.col = col; + recordTile.row = row; + microcontrollerTileEvents[recordTile] = events; + runtimeCounters = true; continue; - } + } - auto loc = XAie_TileLoc(col, row); - auto& xaieTile = aieDevice->tile(col, row); - auto xaieModule = (mod == XAIE_CORE_MOD) ? xaieTile.core() - : ((mod == XAIE_MEM_MOD) ? xaieTile.mem() - : xaieTile.pl()); - - auto startEvents = (type == module_type::core) ? coreStartEvents[metricSet] - : ((type == module_type::dma) ? memoryStartEvents[metricSet] - : ((type == module_type::shim) ? shimStartEvents[metricSet] - : memTileStartEvents[metricSet])); - auto endEvents = (type == module_type::core) ? coreEndEvents[metricSet] - : ((type == module_type::dma) ? memoryEndEvents[metricSet] - : ((type == module_type::shim) ? shimEndEvents[metricSet] - : memTileEndEvents[metricSet])); - std::vector resetEvents = {}; - - int numCounters = 0; - auto numFreeCtr = stats.getNumRsc(loc, mod, xaiefal::XAIE_PERFCOUNT); - numFreeCtr = (startEvents.size() < numFreeCtr) ? startEvents.size() : numFreeCtr; - - int numFreeCtrSS = numFreeCtr; - if (aie::profile::profileAPIMetricSet(metricSet)) { - if (numFreeCtr < 2) { + // Ignore invalid types and inactive modules + // NOTE: Inactive core modules are configured when utilizing + // stream switch monitor ports to profile DMA channels + if (!aie::profile::isValidType(type, mod)) continue; + if ((type == module_type::dma) && !tile.active_memory) + continue; + if ((type == module_type::core) && !tile.active_core) { + if (metadata->getPairModuleIndex(metricSet, type) < 0) + continue; } - // We need to monitor single stream switch monitor port - // numFreeCtrSS = 1 ; - } - // Specify Sel0/Sel1 for memory tile events 21-44 - auto iter0 = configChannel0.find(tile); - auto iter1 = configChannel1.find(tile); - uint8_t channel0 = (iter0 == configChannel0.end()) ? 0 : iter0->second; - uint8_t channel1 = (iter1 == configChannel1.end()) ? 1 : iter1->second; - - // Modify events as needed - aie::profile::modifyEvents(type, subtype, channel0, startEvents, metadata->getHardwareGen()); - endEvents = startEvents; - - // TBD : Placeholder to configure AIE core with required profile counters. - aie::profile::configEventSelections(aieDevInst, loc, type, metricSet, channel0); - // TBD : Placeholder to configure shim tile with required profile counters. - - aie::profile::configStreamSwitchPorts(tileMetric.first, xaieTile, loc, type, - numFreeCtrSS, metricSet, channel0, channel1, startEvents, endEvents, streamPorts); - - // Identify the profiling API metric sets and configure graph events - if (metadata->getUseGraphIterator() && !graphItrBroadcastConfigDone) { - XAie_Events bcEvent = XAIE_EVENT_NONE_CORE; - bool status = aie::profile::configGraphIteratorAndBroadcast(aieDevInst, aieDevice, - metadata, xaieModule, loc, mod, type, metricSet, bcEvent, bcResourcesBytesTx); - if (status) { - graphIteratorBrodcastChannelEvent = bcEvent; - graphItrBroadcastConfigDone = true; + auto loc = XAie_TileLoc(col, row); + auto& xaieTile = aieDevice->tile(col, row); + auto xaieModule = (mod == XAIE_CORE_MOD) ? xaieTile.core() + : ((mod == XAIE_MEM_MOD) ? xaieTile.mem() + : xaieTile.pl()); + + auto startEvents = (type == module_type::core) ? coreStartEvents[metricSet] + : ((type == module_type::dma) ? memoryStartEvents[metricSet] + : ((type == module_type::shim) ? shimStartEvents[metricSet] + : memTileStartEvents[metricSet])); + auto endEvents = (type == module_type::core) ? coreEndEvents[metricSet] + : ((type == module_type::dma) ? memoryEndEvents[metricSet] + : ((type == module_type::shim) ? shimEndEvents[metricSet] + : memTileEndEvents[metricSet])); + std::vector resetEvents = {}; + + int numCounters = 0; + auto numFreeCtr = stats.getNumRsc(loc, mod, xaiefal::XAIE_PERFCOUNT); + numFreeCtr = (startEvents.size() < numFreeCtr) ? startEvents.size() : numFreeCtr; + + int numFreeCtrSS = numFreeCtr; + if (aie::profile::profileAPIMetricSet(metricSet)) { + if (numFreeCtr < 2) { + continue; + } + // We need to monitor single stream switch monitor port + // numFreeCtrSS = 1 ; } - } - if (aie::profile::profileAPIMetricSet(metricSet)) { - // Re-use the existing port running event for both the counters - startEvents[startEvents.size()-1] = startEvents[0]; + // Specify Sel0/Sel1 for memory tile events 21-44 + auto iter0 = configChannel0.find(tile); + auto iter1 = configChannel1.find(tile); + uint8_t channel0 = (iter0 == configChannel0.end()) ? 0 : iter0->second; + uint8_t channel1 = (iter1 == configChannel1.end()) ? 1 : iter1->second; - // Use start events as End events for profile counters if threshold is not provided - endEvents[endEvents.size()-1] = endEvents[0]; - - // Use the set values broadcast events for the reset of counter - resetEvents = {XAIE_EVENT_NONE_CORE, XAIE_EVENT_NONE_CORE}; - if (type == module_type::shim) { - if (metadata->getUseGraphIterator()) - resetEvents = {graphIteratorBrodcastChannelEvent, graphIteratorBrodcastChannelEvent}; - else - resetEvents = {XAIE_EVENT_NONE_CORE, XAIE_EVENT_USER_EVENT_1_PL}; + // Modify events as needed + aie::profile::modifyEvents(type, subtype, channel0, startEvents, metadata->getHardwareGen()); + endEvents = startEvents; + + // TBD : Placeholder to configure AIE core with required profile counters. + aie::profile::configEventSelections(aieDevInst, loc, type, metricSet, channel0); + // TBD : Placeholder to configure shim tile with required profile counters. + + aie::profile::configStreamSwitchPorts(tileMetric.first, xaieTile, loc, type, + numFreeCtrSS, metricSet, channel0, channel1, startEvents, endEvents, streamPorts); + + // Identify the profiling API metric sets and configure graph events + if (metadata->getUseGraphIterator() && !graphItrBroadcastConfigDone) { + XAie_Events bcEvent = XAIE_EVENT_NONE_CORE; + bool status = aie::profile::configGraphIteratorAndBroadcast(aieDevInst, aieDevice, + metadata, xaieModule, loc, mod, type, metricSet, bcEvent, bcResourcesBytesTx); + if (status) { + graphIteratorBrodcastChannelEvent = bcEvent; + graphItrBroadcastConfigDone = true; + } } - } - uint32_t threshold = 0; - // Request and configure all available counters for this tile - for (int i=0; i < numFreeCtr; ++i) { - auto startEvent = startEvents.at(i); - auto endEvent = endEvents.at(i); - auto resetEvent = XAIE_EVENT_NONE_CORE; - auto portnum = xdp::aie::getPortNumberFromEvent(startEvent); - // For metric sets with multiple stream-switch ports, use modulo for channel mapping - uint8_t channelNum = portnum % 2; - uint8_t channel = (channelNum == 0) ? channel0 : channel1; - - // Configure group event before reserving and starting counter - aie::profile::configGroupEvents(aieDevInst, loc, mod, type, metricSet, startEvent, channel); - - // Configure the profile counters for profile APIs metric sets. - std::shared_ptr perfCounter = nullptr; if (aie::profile::profileAPIMetricSet(metricSet)) { - resetEvent = resetEvents.at(i); - threshold = metadata->getUserSpecifiedThreshold(tileMetric.first, tileMetric.second); - threshold = aie::profile::convertToBeats(tileMetric.second, threshold, metadata->getHardwareGen()); + // Re-use the existing port running event for both the counters + startEvents[startEvents.size()-1] = startEvents[0]; + + // Use start events as End events for profile counters if threshold is not provided + endEvents[endEvents.size()-1] = endEvents[0]; + + // Use the set values broadcast events for the reset of counter + resetEvents = {XAIE_EVENT_NONE_CORE, XAIE_EVENT_NONE_CORE}; + if (type == module_type::shim) { + if (metadata->getUseGraphIterator()) + resetEvents = {graphIteratorBrodcastChannelEvent, graphIteratorBrodcastChannelEvent}; + else + resetEvents = {XAIE_EVENT_NONE_CORE, XAIE_EVENT_USER_EVENT_1_PL}; + } + } - if (i==0 && threshold>0) - endEvent = XAIE_EVENT_PERF_CNT_1_PL; + uint32_t threshold = 0; + // Request and configure all available counters for this tile + for (int i=0; i < numFreeCtr; ++i) { + auto startEvent = startEvents.at(i); + auto endEvent = endEvents.at(i); + auto resetEvent = XAIE_EVENT_NONE_CORE; + auto portnum = xdp::aie::getPortNumberFromEvent(startEvent); + // For metric sets with multiple stream-switch ports, use modulo for channel mapping + uint8_t channelNum = portnum % 2; + uint8_t channel = (channelNum == 0) ? channel0 : channel1; + + // Configure group event before reserving and starting counter + aie::profile::configGroupEvents(aieDevInst, loc, mod, type, metricSet, startEvent, channel); + + // Configure the profile counters for profile APIs metric sets. + std::shared_ptr perfCounter = nullptr; + if (aie::profile::profileAPIMetricSet(metricSet)) { + resetEvent = resetEvents.at(i); + threshold = metadata->getUserSpecifiedThreshold(tileMetric.first, tileMetric.second); + threshold = aie::profile::convertToBeats(tileMetric.second, threshold, metadata->getHardwareGen()); + + if (i==0 && threshold>0) + endEvent = XAIE_EVENT_PERF_CNT_1_PL; + + if (i==1 && threshold == 0) + continue; - if (i==1 && threshold == 0) + XAie_Events retCounterEvent = XAIE_EVENT_NONE_CORE; + perfCounter = aie::profile::configProfileAPICounters(aieDevInst, aieDevice, metadata, xaieModule, + mod, type, metricSet, startEvent, endEvent, resetEvent, i, perfCounters.size(), + threshold, retCounterEvent, tile, bcResourcesLatency, adfAPIResourceInfoMap, adfAPIBroadcastEventsMap); + } + else { + // Request counter from resource manager + perfCounter = xaieModule.perfCounter(); + auto ret = perfCounter->initialize(mod, startEvent, mod, endEvent); + if (ret != XAIE_OK) break; + ret = perfCounter->reserve(); + if (ret != XAIE_OK) break; + + // Start the counter + ret = perfCounter->start(); + if (ret != XAIE_OK) break; + } + if (!perfCounter) continue; - - XAie_Events retCounterEvent = XAIE_EVENT_NONE_CORE; - perfCounter = aie::profile::configProfileAPICounters(aieDevInst, aieDevice, metadata, xaieModule, - mod, type, metricSet, startEvent, endEvent, resetEvent, i, perfCounters.size(), - threshold, retCounterEvent, tile, bcResourcesLatency, adfAPIResourceInfoMap, adfAPIBroadcastEventsMap); + perfCounters.push_back(perfCounter); + + // Generate user_event_1 for byte count metric set after configuration + if ((metricSet == METRIC_BYTE_COUNT) && (i == 1) && !graphItrBroadcastConfigDone) { + XAie_LocType tileloc = XAie_TileLoc(tile.col, tile.row); + //Note: For BYTE_COUNT metric, user_event_1 is used twice as eventA & eventB to + // to transition the FSM from Idle->State0->State1. + // eventC = Port Running and eventD = stop event (counter event). + XAie_EventGenerate(aieDevInst, tileloc, mod, XAIE_EVENT_USER_EVENT_1_PL); + XAie_EventGenerate(aieDevInst, tileloc, mod, XAIE_EVENT_USER_EVENT_1_PL); + } + + // Convert enums to physical event IDs for reporting purposes + auto physicalEventIds = aie::profile::getEventPhysicalId(aieDevInst, loc, mod, type, metricSet, + startEvent, endEvent); + uint16_t phyStartEvent = physicalEventIds.first; + uint16_t phyEndEvent = physicalEventIds.second; + + // Get payload for reporting purposes + uint64_t payload = getCounterPayload(aieDevInst, tileMetric.first, type, col, row, + startEvent, metricSet, channel, static_cast(i)); + // Store counter info in database + std::string counterName = "AIE Counter " + std::to_string(counterId); + (db->getStaticInfo()).addAIECounter(deviceId, counterId, col, row, i, + phyStartEvent, phyEndEvent, resetEvent, payload, metadata->getClockFreqMhz(), + metadata->getModuleName(module), counterName, (tile.stream_ids.empty() ? 0 : tile.stream_ids[0])); + counterId++; + numCounters++; + } // numFreeCtr + + std::stringstream msg; + msg << "Reserved " << numCounters << " counters for profiling AIE tile (" << +col + << "," << +row << ") using metric set " << metricSet << "."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + numTileCounters[numCounters]++; + } // configMetrics + + // Report counters reserved per tile + { + std::stringstream msg; + msg << "AIE profile counters reserved in " << metadata->getModuleName(module) << " - "; + for (int n=0; n <= metadata->getNumCountersMod(module); ++n) { + if (numTileCounters[n] == 0) + continue; + msg << n << ": " << numTileCounters[n] << " tiles, "; + (db->getStaticInfo()).addAIECounterResources(deviceId, n, numTileCounters[n], module); } - else { - // Request counter from resource manager - perfCounter = xaieModule.perfCounter(); - auto ret = perfCounter->initialize(mod, startEvent, mod, endEvent); - if (ret != XAIE_OK) break; - ret = perfCounter->reserve(); - if (ret != XAIE_OK) break; - - // Start the counter - ret = perfCounter->start(); - if (ret != XAIE_OK) break; + xrt_core::message::send(severity_level::info, "XRT", msg.str().substr(0, msg.str().size()-2)); + } + + runtimeCounters = true; + } // modules + + return runtimeCounters; + } + + void AieProfile_VE2Impl::startPoll(const uint64_t id) + { + xrt_core::message::send(severity_level::debug, "XRT", " In AieProfile_VE2Impl::startPoll."); + threadCtrl = true; + thread = std::make_unique(&AieProfile_VE2Impl::continuePoll, this, id); + xrt_core::message::send(severity_level::debug, "XRT", " In AieProfile_VE2Impl::startPoll, after creating thread instance."); + } + + void AieProfile_VE2Impl::continuePoll(const uint64_t id) + { + xrt_core::message::send(severity_level::debug, "XRT", " In AieProfile_VE2Impl::continuePoll"); + + while (threadCtrl) { + poll(id); + std::this_thread::sleep_for(std::chrono::microseconds(metadata->getPollingIntervalVal())); + } + //Final Polling Operation + poll(id); + } + + void AieProfile_VE2Impl::poll(const uint64_t id) + { + // Wait until xclbin has been loaded and device has been updated in database + if (!(db->getStaticInfo().isDeviceReady(id))) + return; + + if (!aieDevInst) + return; + + uint32_t prevColumn = 0; + uint32_t prevRow = 0; + uint64_t timerValue = 0; + auto hwGen = metadata->getHardwareGen(); + + // Iterate over all AIE Counters & Timers + auto numCounters = db->getStaticInfo().getNumAIECounter(id); + for (uint64_t c=0; c < numCounters; c++) { + auto aie = db->getStaticInfo().getAIECounter(id, c); + if (!aie) + continue; + + std::vector values; + values.push_back(aie->column); + values.push_back(aie::getRelativeRow(aie->row, metadata->getAIETileRowOffset())); + values.push_back(aie->startEvent); + values.push_back(aie->endEvent); + values.push_back(aie->resetEvent); + + // Read counter value from device + uint32_t counterValue; + if (perfCounters.empty()) { + // Compiler-defined counters + XAie_LocType tileLocation = XAie_TileLoc(aie->column, aie->row); + XAie_PerfCounterGet(aieDevInst, tileLocation, XAIE_CORE_MOD, aie->counterNumber, &counterValue); + } + else { + // Runtime-defined counters + if (aie::profile::adfAPILatencyConfigEvent(aie->startEvent)) + { + uint32_t srcCounterValue = 0; + uint32_t destCounterValue = 0; + try { + std::string srcDestPairKey = metadata->getSrcDestPairKey(aie->column, aie->row, aie->streamId); + uint64_t srcPcIdx = adfAPIResourceInfoMap.at(aie::profile::adfAPI::INTF_TILE_LATENCY).at(srcDestPairKey).srcPcIdx; + uint64_t destPcIdx = adfAPIResourceInfoMap.at(aie::profile::adfAPI::INTF_TILE_LATENCY).at(srcDestPairKey).destPcIdx; + auto srcPerfCount = perfCounters.at(srcPcIdx); + auto destPerfCount = perfCounters.at(destPcIdx); + srcPerfCount->readResult(srcCounterValue); + destPerfCount->readResult(destCounterValue); + counterValue = (destCounterValue > srcCounterValue) ? (destCounterValue-srcCounterValue) : (srcCounterValue-destCounterValue); + uint64_t storedValue = adfAPIResourceInfoMap[aie::profile::adfAPI::INTF_TILE_LATENCY][srcDestPairKey].profileResult; + if (counterValue != storedValue) + adfAPIResourceInfoMap[aie::profile::adfAPI::INTF_TILE_LATENCY][srcDestPairKey].profileResult = counterValue; + } catch(...) { + continue; + } } - if (!perfCounter) - continue; - perfCounters.push_back(perfCounter); - - // Generate user_event_1 for byte count metric set after configuration - if ((metricSet == METRIC_BYTE_COUNT) && (i == 1) && !graphItrBroadcastConfigDone) { - XAie_LocType tileloc = XAie_TileLoc(tile.col, tile.row); - //Note: For BYTE_COUNT metric, user_event_1 is used twice as eventA & eventB to - // to transition the FSM from Idle->State0->State1. - // eventC = Port Running and eventD = stop event (counter event). - XAie_EventGenerate(aieDevInst, tileloc, mod, XAIE_EVENT_USER_EVENT_1_PL); - XAie_EventGenerate(aieDevInst, tileloc, mod, XAIE_EVENT_USER_EVENT_1_PL); + else if (aie::profile::adfAPIStartToTransferredConfigEvent(aie->startEvent)) + { + try { + std::string srcKey = "(" + aie::uint8ToStr(aie->column) + "," + aie::uint8ToStr(aie->row) + ")"; + uint64_t srcPcIdx = adfAPIResourceInfoMap.at(aie::profile::adfAPI::START_TO_BYTES_TRANSFERRED).at(srcKey).srcPcIdx; + auto perfCounter = perfCounters.at(srcPcIdx); + perfCounter->readResult(counterValue); + uint64_t storedValue = adfAPIResourceInfoMap[aie::profile::adfAPI::START_TO_BYTES_TRANSFERRED][srcKey].profileResult; + if (counterValue != storedValue) + adfAPIResourceInfoMap[aie::profile::adfAPI::START_TO_BYTES_TRANSFERRED][srcKey].profileResult = counterValue; + } catch(...) { + continue; + } + } + else { + auto perfCounter = perfCounters.at(c); + perfCounter->readResult(counterValue); } + } + values.push_back(counterValue); + + // Read tile timer (once per tile to minimize overhead) + if ((aie->column != prevColumn) || (aie->row != prevRow)) { + prevColumn = aie->column; + prevRow = aie->row; + auto moduleType = aie::getModuleType(aie->row, metadata->getAIETileRowOffset()); + auto falModuleType = (moduleType == module_type::core) ? XAIE_CORE_MOD + : ((moduleType == module_type::shim) ? XAIE_PL_MOD + : XAIE_MEM_MOD); + XAie_LocType tileLocation = XAie_TileLoc(aie->column, aie->row); + XAie_ReadTimer(aieDevInst, tileLocation, falModuleType, &timerValue); + } + values.push_back(timerValue); + values.push_back(aie->payload); - // Convert enums to physical event IDs for reporting purposes - auto physicalEventIds = aie::profile::getEventPhysicalId(aieDevInst, loc, mod, type, metricSet, - startEvent, endEvent); - uint16_t phyStartEvent = physicalEventIds.first; - uint16_t phyEndEvent = physicalEventIds.second; - - // Get payload for reporting purposes - uint64_t payload = getCounterPayload(aieDevInst, tileMetric.first, type, col, row, - startEvent, metricSet, channel, static_cast(i)); - // Store counter info in database - std::string counterName = "AIE Counter " + std::to_string(counterId); - (db->getStaticInfo()).addAIECounter(deviceId, counterId, col, row, i, - phyStartEvent, phyEndEvent, resetEvent, payload, metadata->getClockFreqMhz(), - metadata->getModuleName(module), counterName, (tile.stream_ids.empty() ? 0 : tile.stream_ids[0])); - counterId++; - numCounters++; - } // numFreeCtr + // Get timestamp in milliseconds + double timestamp = xrt_core::time_ns() / 1.0e6; + db->getDynamicInfo().addAIESample(id, timestamp, values); + } - std::stringstream msg; - msg << "Reserved " << numCounters << " counters for profiling AIE tile (" << +col - << "," << +row << ") using metric set " << metricSet << "."; - xrt_core::message::send(severity_level::debug, "XRT", msg.str()); - numTileCounters[numCounters]++; - } // configMetrics - - // Report counters reserved per tile - { - std::stringstream msg; - msg << "AIE profile counters reserved in " << metadata->getModuleName(module) << " - "; - for (int n=0; n <= metadata->getNumCountersMod(module); ++n) { - if (numTileCounters[n] == 0) - continue; - msg << n << ": " << numTileCounters[n] << " tiles, "; - (db->getStaticInfo()).addAIECounterResources(deviceId, n, numTileCounters[n], module); + // Read and record MDM counters (if available) + // NOTE: all MDM counters in a given tile are sampled in same read sequence + for (auto& ucTile : microcontrollerTileEvents) { + auto tile = ucTile.first; + auto events = ucTile.second; + + std::vector counterValues; + aie::profile::readMDMCounters(aieDevInst, hwGen, tile.col, tile.row, counterValues); + + double timestamp = xrt_core::time_ns() / 1.0e6; + + for (uint64_t c=0; c < counterValues.size(); c++) { + std::vector values; + values.push_back(tile.col); + values.push_back(0); + values.push_back(events.at(c)); + values.push_back(events.at(c)); + values.push_back(0); + values.push_back(counterValues.at(c)); + + db->getDynamicInfo().addAIESample(id, timestamp, values); } - xrt_core::message::send(severity_level::info, "XRT", msg.str().substr(0, msg.str().size()-2)); } + } - runtimeCounters = true; - } // modules + void AieProfile_VE2Impl::endPoll() + { + xrt_core::message::send(severity_level::debug, "XRT", " In AieProfile_VE2Impl::endPoll"); + if (!threadCtrl) + return; - return runtimeCounters; - } + threadCtrl = false; + if (thread && thread->joinable()) + thread->join(); - void AieProfile_VE2Impl::startPoll(const uint64_t id) - { - xrt_core::message::send(severity_level::debug, "XRT", " In AieProfile_VE2Impl::startPoll."); - threadCtrl = true; - thread = std::make_unique(&AieProfile_VE2Impl::continuePoll, this, id); - xrt_core::message::send(severity_level::debug, "XRT", " In AieProfile_VE2Impl::startPoll, after creating thread instance."); - } + freeResources(); + } + + void AieProfile_VE2Impl::freeResources() + { + displayAdfAPIResults(); + for (auto& c : perfCounters){ + c->stop(); + c->release(); + } - void AieProfile_VE2Impl::continuePoll(const uint64_t id) - { - xrt_core::message::send(severity_level::debug, "XRT", " In AieProfile_VE2Impl::continuePoll"); + for (auto& c : streamPorts){ + c->stop(); + c->release(); + } - while (threadCtrl) { - poll(id); - std::this_thread::sleep_for(std::chrono::microseconds(metadata->getPollingIntervalVal())); - } - //Final Polling Operation - poll(id); - } + for (auto &bc : bcResourcesBytesTx) { + bc->stop(); + bc->release(); + } - void AieProfile_VE2Impl::poll(const uint64_t id) - { - // Wait until xclbin has been loaded and device has been updated in database - if (!(db->getStaticInfo().isDeviceReady(id))) - return; - - if (!aieDevInst) - return; - - uint32_t prevColumn = 0; - uint32_t prevRow = 0; - uint64_t timerValue = 0; - auto hwGen = metadata->getHardwareGen(); - - // Iterate over all AIE Counters & Timers - auto numCounters = db->getStaticInfo().getNumAIECounter(id); - for (uint64_t c=0; c < numCounters; c++) { - auto aie = db->getStaticInfo().getAIECounter(id, c); - if (!aie) - continue; - - std::vector values; - values.push_back(aie->column); - values.push_back(aie::getRelativeRow(aie->row, metadata->getAIETileRowOffset())); - values.push_back(aie->startEvent); - values.push_back(aie->endEvent); - values.push_back(aie->resetEvent); - - // Read counter value from device - uint32_t counterValue; - if (perfCounters.empty()) { - // Compiler-defined counters - XAie_LocType tileLocation = XAie_TileLoc(aie->column, aie->row); - XAie_PerfCounterGet(aieDevInst, tileLocation, XAIE_CORE_MOD, aie->counterNumber, &counterValue); + for (auto &bc : bcResourcesLatency) { + bc->stop(); + bc->release(); } - else { - // Runtime-defined counters - if (aie::profile::adfAPILatencyConfigEvent(aie->startEvent)) - { - uint32_t srcCounterValue = 0; - uint32_t destCounterValue = 0; - try { - std::string srcDestPairKey = metadata->getSrcDestPairKey(aie->column, aie->row, aie->streamId); - uint64_t srcPcIdx = adfAPIResourceInfoMap.at(aie::profile::adfAPI::INTF_TILE_LATENCY).at(srcDestPairKey).srcPcIdx; - uint64_t destPcIdx = adfAPIResourceInfoMap.at(aie::profile::adfAPI::INTF_TILE_LATENCY).at(srcDestPairKey).destPcIdx; - auto srcPerfCount = perfCounters.at(srcPcIdx); - auto destPerfCount = perfCounters.at(destPcIdx); - srcPerfCount->readResult(srcCounterValue); - destPerfCount->readResult(destCounterValue); - counterValue = (destCounterValue > srcCounterValue) ? (destCounterValue-srcCounterValue) : (srcCounterValue-destCounterValue); - uint64_t storedValue = adfAPIResourceInfoMap[aie::profile::adfAPI::INTF_TILE_LATENCY][srcDestPairKey].profileResult; - if (counterValue != storedValue) - adfAPIResourceInfoMap[aie::profile::adfAPI::INTF_TILE_LATENCY][srcDestPairKey].profileResult = counterValue; - } catch(...) { - continue; + } + + /**************************************************************************** + * Display start to bytes or latency results to output transcript + ***************************************************************************/ + void AieProfile_VE2Impl::displayAdfAPIResults() + { + for (auto &adfAPIType : adfAPIResourceInfoMap) { + if (adfAPIType.first == aie::profile::adfAPI::START_TO_BYTES_TRANSFERRED) { + for (auto &adfApiResource : adfAPIType.second) { + std::stringstream msg; + msg << "Total start to bytes transferred for tile " << adfApiResource.first << " is " + << +adfApiResource.second.profileResult << " clock cycles for specified bytes."; + xrt_core::message::send(severity_level::warning, "XRT", msg.str()); } } - else if (aie::profile::adfAPIStartToTransferredConfigEvent(aie->startEvent)) - { - try { - std::string srcKey = "(" + aie::uint8ToStr(aie->column) + "," + aie::uint8ToStr(aie->row) + ")"; - uint64_t srcPcIdx = adfAPIResourceInfoMap.at(aie::profile::adfAPI::START_TO_BYTES_TRANSFERRED).at(srcKey).srcPcIdx; - auto perfCounter = perfCounters.at(srcPcIdx); - perfCounter->readResult(counterValue); - uint64_t storedValue = adfAPIResourceInfoMap[aie::profile::adfAPI::START_TO_BYTES_TRANSFERRED][srcKey].profileResult; - if (counterValue != storedValue) - adfAPIResourceInfoMap[aie::profile::adfAPI::START_TO_BYTES_TRANSFERRED][srcKey].profileResult = counterValue; - } catch(...) { - continue; + else if (adfAPIType.first == aie::profile::adfAPI::INTF_TILE_LATENCY) { + for(auto &adfApiResource : adfAPIType.second) { + GraphPortPair graphPortPair; + try { + graphPortPair = metadata->getSrcDestGraphPair(adfApiResource.first); + } + catch (...) { + continue; + } + + std::stringstream msg; + msg << "Total latency between " << graphPortPair.srcGraphName + << ":" << graphPortPair.srcGraphPort << " and " + << graphPortPair.destGraphName << ":" << graphPortPair.destGraphPort + << " is " << +adfApiResource.second.profileResult << " clock cycles."; + xrt_core::message::send(severity_level::warning, "XRT", msg.str()); } - } - else { - auto perfCounter = perfCounters.at(c); - perfCounter->readResult(counterValue); } } - values.push_back(counterValue); - - // Read tile timer (once per tile to minimize overhead) - if ((aie->column != prevColumn) || (aie->row != prevRow)) { - prevColumn = aie->column; - prevRow = aie->row; - auto moduleType = aie::getModuleType(aie->row, metadata->getAIETileRowOffset()); - auto falModuleType = (moduleType == module_type::core) ? XAIE_CORE_MOD - : ((moduleType == module_type::shim) ? XAIE_PL_MOD - : XAIE_MEM_MOD); - XAie_LocType tileLocation = XAie_TileLoc(aie->column, aie->row); - XAie_ReadTimer(aieDevInst, tileLocation, falModuleType, &timerValue); - } - values.push_back(timerValue); - values.push_back(aie->payload); - - // Get timestamp in milliseconds - double timestamp = xrt_core::time_ns() / 1.0e6; - db->getDynamicInfo().addAIESample(id, timestamp, values); } + } - // Read and record MDM counters (if available) - // NOTE: all MDM counters in a given tile are sampled in same read sequence - for (auto& ucTile : microcontrollerTileEvents) { - auto tile = ucTile.first; - auto events = ucTile.second; +// End ZOCL flow - std::vector counterValues; - aie::profile::readMDMCounters(aieDevInst, hwGen, tile.col, tile.row, counterValues); +// XDNA flow +#else + namespace xdp { + using tile_type = xdp::tile_type; + using module_type = xdp::module_type; + using severity_level = xrt_core::message::severity_level; - double timestamp = xrt_core::time_ns() / 1.0e6; + AieProfile_VE2Impl::AieProfile_VE2Impl(VPDatabase* database, std::shared_ptr metadata, uint64_t deviceID) + : AieProfileImpl(database, metadata, deviceID) + { + auto hwGen = metadata->getHardwareGen(); - for (uint64_t c=0; c < counterValues.size(); c++) { - std::vector values; - values.push_back(tile.col); - values.push_back(0); - values.push_back(events.at(c)); - values.push_back(events.at(c)); - values.push_back(0); - values.push_back(counterValues.at(c)); + coreStartEvents = aie::profile::getCoreEventSets(hwGen); + coreEndEvents = coreStartEvents; + + memoryStartEvents = aie::profile::getMemoryEventSets(hwGen); + memoryEndEvents = memoryStartEvents; + + shimStartEvents = aie::profile::getInterfaceTileEventSets(hwGen); + shimEndEvents = shimStartEvents; + shimEndEvents[METRIC_BYTE_COUNT] = {XAIE_EVENT_PORT_RUNNING_0_PL, XAIE_EVENT_PERF_CNT_0_PL}; + + memTileStartEvents = aie::profile::getMemoryTileEventSets(hwGen); + memTileEndEvents = memTileStartEvents; - db->getDynamicInfo().addAIESample(id, timestamp, values); + microcontrollerEvents = aie::profile::getMicrocontrollerEventSets(hwGen); + + tranxHandler = std::make_unique(); + + // Create debug buffer for AIE Profile results + auto context = metadata->getHwContext(); + uint32_t* output = nullptr; + std::map activeUCsegmentMap; + activeUCsegmentMap[0] = 0x20000; + try { + resultBO = xrt_core::bo_int::create_bo(context, 0x20000, xrt_core::bo_int::use_type::uc_debug); + xrt_core::bo_int::config_bo(resultBO, activeUCsegmentMap); + output = resultBO.map(); + memset(output, 0, 0x20000); + } catch (std::exception& e) { + std::stringstream msg; + msg << "Unable to create 128KB buffer for AIE Profile results. Cannot get AIE Profile info. " << e.what() << std::endl; + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", msg.str()); } } - } - - void AieProfile_VE2Impl::endPoll() - { - xrt_core::message::send(severity_level::debug, "XRT", " In AieProfile_VE2Impl::endPoll"); - if (!threadCtrl) - return; - - threadCtrl = false; - if (thread && thread->joinable()) - thread->join(); - - freeResources(); - } - - void AieProfile_VE2Impl::freeResources() - { - displayAdfAPIResults(); - for (auto& c : perfCounters){ - c->stop(); - c->release(); - } - for (auto& c : streamPorts){ - c->stop(); - c->release(); - } + void AieProfile_VE2Impl::updateDevice() { + bool runtimeCounters = setMetricsSettings(deviceID, metadata->getHandle()); + if (!runtimeCounters) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE Profile Counters were not found for this design. Please specify " + "graph_based_[aie|aie_memory|memory_tile|interface_tile]_metrics and/or " + "tile_based_[aie|aie_memory|memory_tile|interface_tile|microcontroller]_metrics " + "under \"AIE_profile_settings\" section in your xrt.ini."); + (db->getStaticInfo()).setIsAIECounterRead(deviceID,true); + return; + } - for (auto &bc : bcResourcesBytesTx) { - bc->stop(); - bc->release(); + // Build poll ASM/ELF after metrics are configured; submit is deferred to endPoll() (see plugin). + generatePollElf(); } - for (auto &bc : bcResourcesLatency) { - bc->stop(); - bc->release(); - } - } + // Set metrics for all specified AIE counters on this device with configs given in AIE_profile_settings + bool + AieProfile_VE2Impl::setMetricsSettings(const uint64_t deviceId, void* handle) + { + int counterId = 0; + bool runtimeCounters = false; + + xdp::aie::driver_config meta_config = metadata->getAIEConfigMetadata(); + XAie_Config cfg { + meta_config.hw_gen, + meta_config.base_address, + meta_config.column_shift, + meta_config.row_shift, + meta_config.num_rows, + meta_config.num_columns, + meta_config.shim_row, + meta_config.mem_row_start, + meta_config.mem_num_rows, + meta_config.aie_tile_row_start, + meta_config.aie_tile_num_rows, + {0} // PartProp + }; + + auto RC = XAie_CfgInitialize(&aieDevInst, &cfg); + if (RC != XAIE_OK) { + xrt_core::message::send(severity_level::warning, "XRT", "AIE Driver Initialization Failed."); + return false; + } - /**************************************************************************** - * Display start to bytes or latency results to output transcript - ***************************************************************************/ - void AieProfile_VE2Impl::displayAdfAPIResults() - { - for (auto &adfAPIType : adfAPIResourceInfoMap) { - if (adfAPIType.first == aie::profile::adfAPI::START_TO_BYTES_TRANSFERRED) { - for (auto &adfApiResource : adfAPIType.second) { - std::stringstream msg; - msg << "Total start to bytes transferred for tile " << adfApiResource.first << " is " - << +adfApiResource.second.profileResult << " clock cycles for specified bytes."; - xrt_core::message::send(severity_level::warning, "XRT", msg.str()); - } + const std::string tranxName = "AieProfileMetrics"; + if (!tranxHandler->initializeTransaction(&aieDevInst, tranxName)) { + xrt_core::message::send(severity_level::warning, "XRT", "Transaction Initialization Failed."); + return false; } - else if (adfAPIType.first == aie::profile::adfAPI::INTF_TILE_LATENCY) { - for(auto &adfApiResource : adfAPIType.second) { - GraphPortPair graphPortPair; - try { - graphPortPair = metadata->getSrcDestGraphPair(adfApiResource.first); + + auto hwGen = metadata->getHardwareGen(); + auto configChannel0 = metadata->getConfigChannel0(); + auto configChannel1 = metadata->getConfigChannel1(); + uint8_t startColShift = metadata->getPartitionOverlayStartCols().front(); + aie::displayColShiftInfo(startColShift); + + for (int module = 0; module < metadata->getNumModules(); ++module) { + auto configMetrics = metadata->getConfigMetricsVec(module); + if (configMetrics.empty()) + continue; + + int numTileCounters[metadata->getNumCountersMod(module)+1] = {0}; + XAie_ModuleType mod = aie::profile::getFalModuleType(module); + + // Iterate over tiles and metrics to configure all desired counters + for (auto& tileMetric : configMetrics) { + auto& metricSet = tileMetric.second; + auto tile = tileMetric.first; + auto col = tile.col + startColShift; + auto row = tile.row; + auto subtype = tile.subtype; + auto type = aie::getModuleType(row, metadata->getAIETileRowOffset()); + if ((mod == XAIE_MEM_MOD) && (type == module_type::core)) + type = module_type::dma; + + // Catch microcontroller event sets for MDM + if (module == static_cast(module_type::uc)) { + // Configure + auto events = microcontrollerEvents[metricSet]; + aie::profile::configMDMCounters(&aieDevInst, hwGen, col, row, events); + // Record + tile_type recordTile; + recordTile.col = col; + recordTile.row = row; + microcontrollerTileEvents[recordTile] = events; + runtimeCounters = true; + continue; } - catch (...) { + + // Ignore invalid types and inactive modules + // NOTE: Inactive core modules are configured when utilizing + // stream switch monitor ports to profile DMA channels + if (!aie::profile::isValidType(type, mod)) + continue; + if ((type == module_type::dma) && !tile.active_memory) continue; + if ((type == module_type::core) && !tile.active_core) { + if (metadata->getPairModuleIndex(metricSet, type) < 0) + continue; + } + + auto loc = XAie_TileLoc(col, row); + auto startEvents = (type == module_type::core) ? coreStartEvents[metricSet] + : ((type == module_type::dma) ? memoryStartEvents[metricSet] + : ((type == module_type::shim) ? shimStartEvents[metricSet] + : memTileStartEvents[metricSet])); + auto endEvents = (type == module_type::core) ? coreEndEvents[metricSet] + : ((type == module_type::dma) ? memoryEndEvents[metricSet] + : ((type == module_type::shim) ? shimEndEvents[metricSet] + : memTileEndEvents[metricSet])); + std::vector resetEvents = {}; + + int numCounters = 0; + auto numFreeCtr = static_cast(startEvents.size()); + + int numFreeCtrSS = numFreeCtr; + if (aie::profile::profileAPIMetricSet(metricSet)) { + if (numFreeCtr < 2) { + continue; + } + // We need to monitor single stream switch monitor port + // numFreeCtrSS = 1 ; + } + + // Specify Sel0/Sel1 for memory tile events 21-44 + auto iter0 = configChannel0.find(tile); + auto iter1 = configChannel1.find(tile); + uint8_t channel0 = (iter0 == configChannel0.end()) ? 0 : iter0->second; + uint8_t channel1 = (iter1 == configChannel1.end()) ? 1 : iter1->second; + std::vector channels = {channel0, channel1}; // TODO: do we also add channel 2 & 3 here? + + // Modify events as needed + aie::profile::modifyEvents(type, subtype, channel0, startEvents, metadata->getHardwareGen()); + endEvents = startEvents; + + // TBD : Placeholder to configure AIE core with required profile counters. + configEventSelections(loc, type, metricSet, channels); + // TBD : Placeholder to configure shim tile with required profile counters. + + // TODO: support for VE2 XDNA for profile API metric sets + { + // // Identify the profiling API metric sets and configure graph events + // if (metadata->getUseGraphIterator() && !graphItrBroadcastConfigDone) { + // XAie_Events bcEvent = XAIE_EVENT_NONE_CORE; + // bool status = aie::profile::configGraphIteratorAndBroadcast(aieDevInst, aieDevice, + // metadata, xaieModule, loc, mod, type, metricSet, bcEvent, bcResourcesBytesTx); + // if (status) { + // graphIteratorBrodcastChannelEvent = bcEvent; + // graphItrBroadcastConfigDone = true; + // } + // } + + // if (aie::profile::profileAPIMetricSet(metricSet)) { + // // Re-use the existing port running event for both the counters + // startEvents[startEvents.size()-1] = startEvents[0]; + + // // Use start events as End events for profile counters if threshold is not provided + // endEvents[endEvents.size()-1] = endEvents[0]; + + // // Use the set values broadcast events for the reset of counter + // resetEvents = {XAIE_EVENT_NONE_CORE, XAIE_EVENT_NONE_CORE}; + // if (type == module_type::shim) { + // if (metadata->getUseGraphIterator()) + // resetEvents = {graphIteratorBrodcastChannelEvent, graphIteratorBrodcastChannelEvent}; + // else + // resetEvents = {XAIE_EVENT_NONE_CORE, XAIE_EVENT_USER_EVENT_1_PL}; + // } + // } } + // Request and configure all available counters for this tile + for (int i=0; i < numFreeCtr; ++i) { + auto startEvent = startEvents.at(i); + auto endEvent = endEvents.at(i); + auto resetEvent = XAIE_EVENT_NONE_CORE; + auto portnum = xdp::aie::getPortNumberFromEvent(startEvent); + // For metric sets with multiple stream-switch ports, use modulo for channel mapping + uint8_t channelNum = portnum % 2; + uint8_t channel = (channelNum == 0) ? channel0 : channel1; + + // Configure group event before reserving and starting counter + aie::profile::configGroupEvents(&aieDevInst, loc, mod, type, metricSet, startEvent, channel); + + // TODO: support for VE2 XDNA for profile API metric sets + // // Configure the profile counters for profile APIs metric sets. + // std::shared_ptr perfCounter = nullptr; + if (aie::profile::profileAPIMetricSet(metricSet)) { + // resetEvent = resetEvents.at(i); + // threshold = metadata->getUserSpecifiedThreshold(tileMetric.first, tileMetric.second); + // threshold = aie::profile::convertToBeats(tileMetric.second, threshold, metadata->getHardwareGen()); + + // if (i==0 && threshold>0) + // endEvent = XAIE_EVENT_PERF_CNT_1_PL; + + // if (i==1 && threshold == 0) + // continue; + + // XAie_Events retCounterEvent = XAIE_EVENT_NONE_CORE; + // perfCounter = aie::profile::configProfileAPICounters(aieDevInst, aieDevice, metadata, xaieModule, + // mod, type, metricSet, startEvent, endEvent, resetEvent, i, perfCounters.size(), + // threshold, retCounterEvent, tile, bcResourcesLatency, adfAPIResourceInfoMap, adfAPIBroadcastEventsMap); + // if (!perfCounter) + // continue; + // perfCounters.push_back(perfCounter); + } else { + // No resource manager, so manually manage the counters + RC = XAie_PerfCounterReset(&aieDevInst, loc, mod, i); + if (RC != XAIE_OK) { + xrt_core::message::send(severity_level::error, "XRT", "AIE Performance Counter Reset Failed."); + break; + } + RC = XAie_PerfCounterControlSet(&aieDevInst, loc, mod, i, startEvent, endEvent); + if (RC != XAIE_OK) { + xrt_core::message::send(severity_level::error, "XRT", "AIE Performance Counter Set Failed."); + break; + } + } + + if (aie::isStreamSwitchPortEvent(startEvent)) + configStreamSwitchPorts(tileMetric.first, loc, type, metricSet, channel, startEvent); + + // Generate user_event_1 for byte count metric set after configuration + if ((metricSet == METRIC_BYTE_COUNT) && (i == 1) && !graphItrBroadcastConfigDone) { + XAie_LocType tileloc = XAie_TileLoc(tile.col, tile.row); + //Note: For BYTE_COUNT metric, user_event_1 is used twice as eventA & eventB to + // to transition the FSM from Idle->State0->State1. + // eventC = Port Running and eventD = stop event (counter event). + XAie_EventGenerate(&aieDevInst, tileloc, mod, XAIE_EVENT_USER_EVENT_1_PL); + XAie_EventGenerate(&aieDevInst, tileloc, mod, XAIE_EVENT_USER_EVENT_1_PL); + } + + // Convert enums to physical event IDs for reporting purposes + uint16_t tmpStart; + uint16_t tmpEnd; + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, startEvent, &tmpStart); + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, endEvent, &tmpEnd); + uint16_t phyStartEvent = tmpStart + aie::profile::getCounterBase(type); + uint16_t phyEndEvent = tmpEnd + aie::profile::getCounterBase(type); + auto payload = channel0; + + // Store counter info in database + std::string counterName = "AIE Counter " + std::to_string(counterId); + (db->getStaticInfo()).addAIECounter(deviceId, counterId, col, row, i, + phyStartEvent, phyEndEvent, resetEvent, payload, metadata->getClockFreqMhz(), + metadata->getModuleName(module), counterName, (tile.stream_ids.empty() ? 0 : tile.stream_ids[0])); + + auto tileOffset = XAie_GetTileAddr(&aieDevInst, row, col); + std::vector Regs = regValues.at(type); + op_profile_data.emplace_back((u32)(Regs[i] + tileOffset)); + + std::vector values; + values.insert(values.end(), {col, row, phyStartEvent, phyEndEvent, resetEvent, 0, 0, payload}); + outputValues.push_back(values); + + counterId++; + numCounters++; + } // numFreeCtr + std::stringstream msg; - msg << "Total latency between " << graphPortPair.srcGraphName - << ":" << graphPortPair.srcGraphPort << " and " - << graphPortPair.destGraphName << ":" << graphPortPair.destGraphPort - << " is " << +adfApiResource.second.profileResult << " clock cycles."; - xrt_core::message::send(severity_level::warning, "XRT", msg.str()); + msg << "Reserved " << numCounters << " counters for profiling AIE tile (" << +col + << "," << +row << ") using metric set " << metricSet << "."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + numTileCounters[numCounters]++; + } // configMetrics + + // Report counters reserved per tile + { + std::stringstream msg; + msg << "AIE profile counters reserved in " << metadata->getModuleName(module) << " - "; + for (int n=0; n <= metadata->getNumCountersMod(module); ++n) { + if (numTileCounters[n] == 0) + continue; + msg << n << ": " << numTileCounters[n] << " tiles, "; + (db->getStaticInfo()).addAIECounterResources(deviceId, n, numTileCounters[n], module); + } + xrt_core::message::send(severity_level::info, "XRT", msg.str().substr(0, msg.str().size()-2)); } + + runtimeCounters = true; + } // modules + + // Submit transaction if we were able to configure + if (runtimeCounters) { + auto hwCtx = metadata->getHwContext(); + tranxHandler->submitTransaction(&aieDevInst, hwCtx); + xrt_core::message::send(severity_level::info, "XRT", "Successfully scheduled AIE Profiling."); + } + + return runtimeCounters; + } + + /**************************************************************************** + * Configure selection index to monitor channel numbers + * NOTE: In NPU3, this is required in memory and interface tiles + ***************************************************************************/ + void + AieProfile_VE2Impl::configEventSelections(const XAie_LocType loc, const module_type type, + const std::string metricSet, std::vector& channels) + { + if ((type != module_type::mem_tile) && (type != module_type::shim)) + return; + + XAie_DmaDirection dmaDir = aie::isInputSet(type, metricSet) ? DMA_S2MM : DMA_MM2S; + uint8_t numChannels = NUM_CHANNEL_SELECTS; + + if (aie::isDebugVerbosity()) { + std::string tileType = (type == module_type::shim) ? "interface" : "memory"; + std::string dmaType = (dmaDir == DMA_S2MM) ? "S2MM" : "MM2S"; + std::stringstream channelsStr; + std::copy(channels.begin(), channels.end(), std::ostream_iterator(channelsStr, ", ")); + + std::string msg = "Configuring event selections for " + tileType + " tile DMA " + + dmaType + " channels " + channelsStr.str(); + xrt_core::message::send(severity_level::debug, "XRT", msg); } + + for (uint8_t c = 0; c < numChannels; ++c) + XAie_EventSelectDmaChannel(&aieDevInst, loc, c, dmaDir, channels.at(c)); } - } + /**************************************************************************** + * Configure stream switch ports for monitoring purposes + * NOTE: Used to monitor streams: trace, interfaces, and memory tiles + ***************************************************************************/ + void + AieProfile_VE2Impl::configStreamSwitchPorts(const tile_type& tile, const XAie_LocType& loc, + const module_type& type, const std::string& metricSet, + const uint8_t channel, const XAie_Events startEvent) + { + // Hardcoded + uint8_t rscId = 0; + uint8_t portnum = aie::getPortNumberFromEvent(startEvent); + // AIE Tiles (e.g., trace streams) + if (type == module_type::core) { + auto slaveOrMaster = (metricSet.find("mm2s") != std::string::npos) ? + XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER; + XAie_EventSelectStrmPort(&aieDevInst, loc, rscId, slaveOrMaster, DMA, channel); + std::stringstream msg; + msg << "Configured core tile " << (aie::isInputSet(type,metricSet) ? "S2MM" : "MM2S") + << " stream switch ports for metricset " << metricSet << " and channel " << (int)channel << "."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + return; + } + + // Interface tiles (e.g., PLIO, GMIO) + if (type == module_type::shim) { + // NOTE: skip configuration of extra ports for tile if stream_ids are not available. + if (portnum >= tile.stream_ids.size()) + return; + // Grab slave/master and stream ID + // NOTE: stored in getTilesForProfiling() above + auto slaveOrMaster = (tile.is_master_vec.at(portnum) == 0) ? XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER; + uint8_t streamPortId = static_cast(tile.stream_ids.at(portnum)); + + // auto streamPortId = tile.stream_id; + // Define stream switch port to monitor interface + XAie_EventSelectStrmPort(&aieDevInst, loc, rscId, slaveOrMaster, SOUTH, streamPortId); + std::stringstream msg; + msg << "Configured shim tile " << (aie::isInputSet(type,metricSet) ? "S2MM" : "MM2S") << " stream switch ports for metricset " << metricSet << " and stream port id " << (int)streamPortId << "."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + return; + } + + if (type == module_type::mem_tile) { + auto slaveOrMaster = (metricSet.find("mm2s") != std::string::npos) ? + XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER; + XAie_EventSelectStrmPort(&aieDevInst, loc, rscId, slaveOrMaster, DMA, channel); + std::stringstream msg; + msg << "Configured mem tile " << (aie::isInputSet(type,metricSet) ? "S2MM" : "MM2S") << " stream switch ports for metricset " << metricSet << " and channel " << (int)channel << "."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + } + } + + void AieProfile_NPU3Impl::generatePollElf() + { + auto context = metadata->getHwContext(); + + std::string tranxName = "AieProfilePoll"; + if (!tranxHandler->initializeTransaction(&aieDevInst, tranxName)) { + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "Unable to initialize transaction for AIE profile polling."); + return; + } + for (u32 i = 0; i < op_profile_data.size(); i++) { + XAie_SaveRegister(&aieDevInst, op_profile_data[i], i); + } + if (!tranxHandler->completeASM(&aieDevInst)) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE Profile: Failed to finalize poll ASM."); + return; + } + if (!tranxHandler->generateELF()) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE Profile: Failed to generate poll ELF."); + return; + } + finishedPoll = false; + xrt_core::message::send(severity_level::debug, "XRT", + "AIE Profile: Poll ASM/ELF ready (submit deferred to teardown)."); + } + + + void AieProfile_VE2Impl::poll(const uint64_t id) + { + // Wait until xclbin has been loaded and device has been updated in database + if (!(db->getStaticInfo().isDeviceReady(id))) + return; + + if (finishedPoll) + return; + + if (db->infoAvailable(xdp::info::ml_timeline)) { + db->broadcast(VPDatabase::MessageType::READ_RECORD_TIMESTAMPS, nullptr); + xrt_core::message::send(severity_level::debug, "XRT", "Done reading recorded timestamps."); + } + + if (!tranxHandler->submitTransaction(&aieDevInst, context)) + return; + + resultBO.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + uint32_t* output = resultBO.map(); + + // Get timestamp in milliseconds + double timestamp = xrt_core::time_ns() / 1.0e6; + + //**************************TODO: Remove this after testing *************************** + for (u32 i = 0; i < op_profile_data.size() + 12 * 3; i++) { + std::stringstream msg; + msg << "Counter address/values: " << output[2 * i] << " - " << output[2 * i + 1]; + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", msg.str()); + } + + // Process counter values and add to database + for (u32 i = 0; i < op_profile_data.size(); i++) { + // Update counter value in outputValues and add to database + std::vector values = outputValues[i]; + values[5] = static_cast(output[2 * i + 1]); // Write counter value + db->getDynamicInfo().addAIESample(id, timestamp, values); + } + + finishedPoll = true; + } + + bool AieProfile_VE2Impl::checkAieDevice(const uint64_t deviceId, void* handle) {} + uint64_t AieProfile_VE2Impl::getCounterPayload(XAie_DevInst* aieDevInst, const tile_type& tile, const module_type type, uint8_t column, uint8_t row, uint16_t startEvent, const std::string metricSet, const uint8_t channel, uint8_t logicalPortIndex) {} + uint64_t AieProfile_VE2Impl::getAdfProfileAPIPayload(const tile_type& tile, const std::string metricSet) {} + void AieProfile_VE2Impl::printTileModStats(xaiefal::XAieDev* aieDevice, const tile_type& tile, XAie_ModuleType mod) {} + void AieProfile_VE2Impl::startPoll(const uint64_t id) {} + void AieProfile_VE2Impl::continuePoll(const uint64_t id) {} + void AieProfile_VE2Impl::endPoll() {} + void AieProfile_VE2Impl::freeResources() {} + void AieProfile_VE2Impl::displayAdfAPIResults() {} } +// END XDNA flow + +#endif diff --git a/profile/plugin/aie_profile/ve2/aie_profile.h b/profile/plugin/aie_profile/ve2/aie_profile.h index fbd20fe8..81790c14 100644 --- a/profile/plugin/aie_profile/ve2/aie_profile.h +++ b/profile/plugin/aie_profile/ve2/aie_profile.h @@ -12,6 +12,8 @@ #include "core/edge/common/aie_parser.h" #include "xdp/profile/plugin/aie_profile/aie_profile_impl.h" #include "xdp/profile/plugin/aie_profile/util/aie_profile_util.h" +#include "xdp/profile/device/common/ve2/ve2_transaction.h" +#include "xdp/profile/plugin/aie_base/generations/aie2ps_registers.h" #include "xaiefal/xaiefal.hpp" extern "C" { @@ -90,13 +92,37 @@ namespace xdp { std::pair getShimBroadcastChannel(const tile_type& srcTile); - void - displayAdfAPIResults(); + void displayAdfAPIResults(); private: + #ifdef XDP_VE2_ZOCL_BUILD XAie_DevInst* aieDevInst = nullptr; xaiefal::XAieDev* aieDevice = nullptr; - + #else + void configEventSelections(const XAie_LocType loc, const module_type type, const std::string metricSet, std::vector& channels); + void configStreamSwitchPorts(const tile_type& tile, const XAie_LocType& loc, const module_type& type, const std::string& metricSet, const uint8_t channel, const XAie_Events startEvent); + void generatePollElf(); + + std::unique_ptr tranxHandler; + xrt::bo resultBO; + XAie_DevInst aieDevInst = {0}; + bool finishedPoll = false; + std::vector op_profile_data; + std::vector> outputValues; + + // Register offsets per tile type for VE2 (AIE2PS) — used to build the poll ELF. + const std::map> regValues { + {module_type::core, {aie2ps::cm_performance_counter0, aie2ps::cm_performance_counter1, + aie2ps::cm_performance_counter2, aie2ps::cm_performance_counter3}}, + {module_type::dma, {aie2ps::mm_performance_counter0, aie2ps::mm_performance_counter1, + aie2ps::mm_performance_counter2, aie2ps::mm_performance_counter3}}, + {module_type::shim, {aie2ps::shim_performance_counter0, aie2ps::shim_performance_counter1, + aie2ps::shim_performance_counter2, aie2ps::shim_performance_counter3}}, + {module_type::mem_tile, {aie2ps::mem_performance_counter0, aie2ps::mem_performance_counter1, + aie2ps::mem_performance_counter2, aie2ps::mem_performance_counter3}} + }; + #endif + std::map> coreStartEvents; std::map> coreEndEvents; std::map> memoryStartEvents; From f238c4e377a4ec9b6d30a2d402fb6f2bb5ed0071 Mon Sep 17 00:00:00 2001 From: snigupta Date: Wed, 22 Apr 2026 15:55:17 -0600 Subject: [PATCH 10/19] Add missing header file Signed-off-by: snigupta --- profile/plugin/aie_profile/ve2/aie_profile.h | 1 + 1 file changed, 1 insertion(+) diff --git a/profile/plugin/aie_profile/ve2/aie_profile.h b/profile/plugin/aie_profile/ve2/aie_profile.h index 81790c14..f2ab51ef 100644 --- a/profile/plugin/aie_profile/ve2/aie_profile.h +++ b/profile/plugin/aie_profile/ve2/aie_profile.h @@ -15,6 +15,7 @@ #include "xdp/profile/device/common/ve2/ve2_transaction.h" #include "xdp/profile/plugin/aie_base/generations/aie2ps_registers.h" #include "xaiefal/xaiefal.hpp" +#include "xrt/xrt_bo.h" extern "C" { #ifdef XDP_USE_AIE_CODEGEN From bd1ed43d8a24b1fd5e4ddde18b2a99b76f8c8238 Mon Sep 17 00:00:00 2001 From: snigupta Date: Thu, 23 Apr 2026 14:02:08 -0600 Subject: [PATCH 11/19] Compilation fixes Signed-off-by: snigupta --- profile/plugin/aie_profile/CMakeLists.txt | 2 +- profile/plugin/aie_profile/ve2/aie_profile.cpp | 2 +- profile/plugin/aie_status/CMakeLists.txt | 5 ++++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/profile/plugin/aie_profile/CMakeLists.txt b/profile/plugin/aie_profile/CMakeLists.txt index 70a57bc2..4be2bb55 100644 --- a/profile/plugin/aie_profile/CMakeLists.txt +++ b/profile/plugin/aie_profile/CMakeLists.txt @@ -83,7 +83,7 @@ else() "${IMPL_DIR}/*.h" "${IMPL_DIR}/*.cpp" ) - add_library(xdp_aie_profile_plugin_xdna SHARED ${AIE_PROFILE_PLUGIN_FILES} ${AIE_PROFILE_IMPL_FILES} ${AIE_PROFILE_UTIL_FILES} ${AIE_PROFILE_CONFIG_FILES} ${AIE_JSON_PARSER_FILES}) + add_library(xdp_aie_profile_plugin_xdna SHARED ${AIE_PROFILE_PLUGIN_FILES} ${AIE_PROFILE_IMPL_FILES} ${AIE_PROFILE_UTIL_FILES} ${AIE_PROFILE_CONFIG_FILES} ${AIE_JSON_PARSER_FILES} "${PROFILE_DIR}/device/common/ve2/ve2_transaction.cpp") add_dependencies(xdp_aie_profile_plugin_xdna xdp_core xrt_coreutil) target_link_libraries(xdp_aie_profile_plugin_xdna PRIVATE xdp_core xrt_coreutil aie_codegen aiebu_library_objects) target_link_options(xdp_aie_profile_plugin_xdna PRIVATE -Wl,-Bsymbolic) diff --git a/profile/plugin/aie_profile/ve2/aie_profile.cpp b/profile/plugin/aie_profile/ve2/aie_profile.cpp index 333ad004..e04211a4 100644 --- a/profile/plugin/aie_profile/ve2/aie_profile.cpp +++ b/profile/plugin/aie_profile/ve2/aie_profile.cpp @@ -1175,7 +1175,7 @@ } } - void AieProfile_NPU3Impl::generatePollElf() + void AieProfile_VE2Impl::generatePollElf() { auto context = metadata->getHwContext(); diff --git a/profile/plugin/aie_status/CMakeLists.txt b/profile/plugin/aie_status/CMakeLists.txt index 18fd8530..aa9938f1 100644 --- a/profile/plugin/aie_status/CMakeLists.txt +++ b/profile/plugin/aie_status/CMakeLists.txt @@ -19,7 +19,10 @@ if (XDP_VE2_BUILD_CMAKE STREQUAL "yes") add_dependencies(xdp_aie_status_plugin xdp_core) target_link_libraries(xdp_aie_status_plugin PRIVATE xdp_core aie_codegen) target_compile_definitions(xdp_aie_status_plugin PRIVATE XDP_VE2_BUILD=1 XDP_USE_AIE_CODEGEN=1 FAL_LINUX="on") - target_include_directories(xdp_aie_status_plugin PRIVATE ${CMAKE_SOURCE_DIR}/src) + target_include_directories(xdp_aie_status_plugin PRIVATE + ${CMAKE_SOURCE_DIR}/src + ${AIEFAL_DIR} + ) set_target_properties(xdp_aie_status_plugin PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) install (TARGETS xdp_aie_status_plugin From f0bbd621be9a0634f5258e374e0247ae61a9cf26 Mon Sep 17 00:00:00 2001 From: snigupta Date: Thu, 23 Apr 2026 15:10:04 -0600 Subject: [PATCH 12/19] Final fixes for aie_profile Signed-off-by: snigupta --- profile/plugin/aie_profile/aie_profile_plugin.cpp | 12 +++++++++--- profile/plugin/aie_profile/ve2/aie_profile.cpp | 3 ++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/profile/plugin/aie_profile/aie_profile_plugin.cpp b/profile/plugin/aie_profile/aie_profile_plugin.cpp index 0fb51221..45017c99 100644 --- a/profile/plugin/aie_profile/aie_profile_plugin.cpp +++ b/profile/plugin/aie_profile/aie_profile_plugin.cpp @@ -246,6 +246,8 @@ auto time = std::time(nullptr); #ifdef XDP_CLIENT_BUILD implementation->poll(0); + #elif defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD) + implementation->poll(implementation->getDeviceID()); #endif implementation->endPoll(); @@ -259,9 +261,13 @@ auto time = std::time(nullptr); #ifdef XDP_CLIENT_BUILD auto& implementation = handleToAIEProfileImpl.begin()->second; implementation->poll(0); - #elif XDP_VE2_BUILD - auto& implementation = handleToAIEProfileImpl.begin()->second; - implementation->poll(implementation->getDeviceID()); + #elif defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD) + for (auto& p : handleToAIEProfileImpl) { + if (!p.second) + continue; + p.second->endPoll(); + p.second->poll(p.second->getDeviceID()); + } #endif // Ask all threads to end for (auto& p : handleToAIEProfileImpl) { diff --git a/profile/plugin/aie_profile/ve2/aie_profile.cpp b/profile/plugin/aie_profile/ve2/aie_profile.cpp index e04211a4..277f3d9f 100644 --- a/profile/plugin/aie_profile/ve2/aie_profile.cpp +++ b/profile/plugin/aie_profile/ve2/aie_profile.cpp @@ -1218,7 +1218,8 @@ xrt_core::message::send(severity_level::debug, "XRT", "Done reading recorded timestamps."); } - if (!tranxHandler->submitTransaction(&aieDevInst, context)) + auto context = metadata->getHwContext(); + if (!tranxHandler->submitELF(context)) return; resultBO.sync(XCL_BO_SYNC_BO_FROM_DEVICE); From 8eb35952804dab20f8835addb88ce80e7c2f1168 Mon Sep 17 00:00:00 2001 From: snigupta Date: Thu, 23 Apr 2026 15:23:08 -0600 Subject: [PATCH 13/19] Final fixes for aie_profile Signed-off-by: snigupta --- .../plugin/aie_profile/ve2/aie_profile.cpp | 2156 ++++++++--------- 1 file changed, 1076 insertions(+), 1080 deletions(-) diff --git a/profile/plugin/aie_profile/ve2/aie_profile.cpp b/profile/plugin/aie_profile/ve2/aie_profile.cpp index 277f3d9f..a6f7e92c 100644 --- a/profile/plugin/aie_profile/ve2/aie_profile.cpp +++ b/profile/plugin/aie_profile/ve2/aie_profile.cpp @@ -31,1231 +31,1227 @@ #include "core/common/api/hw_context_int.h" #include "shim_ve2/xdna_hwctx.h" -#include "core/common/api/bo_int.h" -#include "xrt/xrt_bo.h" - -// ZOCL flow #ifdef XDP_VE2_ZOCL_BUILD - namespace { - static void* fetchAieDevInst(void* devHandle) - { - xrt::hw_context context = xrt_core::hw_context_int::create_hw_context_from_implementation(devHandle); - auto hwctx_hdl = static_cast(context); - auto hwctx_obj = dynamic_cast(hwctx_hdl); - auto aieArray = hwctx_obj->get_aie_array(); - return aieArray->get_dev() ; - } +namespace { + static void* fetchAieDevInst(void* devHandle) + { + xrt::hw_context context = xrt_core::hw_context_int::create_hw_context_from_implementation(devHandle); + auto hwctx_hdl = static_cast(context); + auto hwctx_obj = dynamic_cast(hwctx_hdl); + auto aieArray = hwctx_obj->get_aie_array(); + return aieArray->get_dev() ; + } - static void* allocateAieDevice(void* devHandle) - { - auto aieDevInst = static_cast(fetchAieDevInst(devHandle)) ; - if (!aieDevInst) - return nullptr; - return new xaiefal::XAieDev(aieDevInst, false) ; - } + static void* allocateAieDevice(void* devHandle) + { + auto aieDevInst = static_cast(fetchAieDevInst(devHandle)) ; + if (!aieDevInst) + return nullptr; + return new xaiefal::XAieDev(aieDevInst, false) ; + } - static void deallocateAieDevice(void* aieDevice) - { - auto object = static_cast(aieDevice) ; - if (object != nullptr) - delete object ; - } - } // end anonymous namespace + static void deallocateAieDevice(void* aieDevice) + { + auto object = static_cast(aieDevice) ; + if (object != nullptr) + delete object ; + } +} // end anonymous namespace - namespace xdp { - using tile_type = xdp::tile_type; - using module_type = xdp::module_type; - using severity_level = xrt_core::message::severity_level; +namespace xdp { + using tile_type = xdp::tile_type; + using module_type = xdp::module_type; + using severity_level = xrt_core::message::severity_level; - AieProfile_VE2Impl::AieProfile_VE2Impl(VPDatabase* database, std::shared_ptr metadata, uint64_t deviceID) - : AieProfileImpl(database, metadata, deviceID) - { - auto hwGen = metadata->getHardwareGen(); + AieProfile_VE2Impl::AieProfile_VE2Impl(VPDatabase* database, std::shared_ptr metadata, uint64_t deviceID) + : AieProfileImpl(database, metadata, deviceID) + { + auto hwGen = metadata->getHardwareGen(); - coreStartEvents = aie::profile::getCoreEventSets(hwGen); - coreEndEvents = coreStartEvents; + coreStartEvents = aie::profile::getCoreEventSets(hwGen); + coreEndEvents = coreStartEvents; - memoryStartEvents = aie::profile::getMemoryEventSets(hwGen); - memoryEndEvents = memoryStartEvents; + memoryStartEvents = aie::profile::getMemoryEventSets(hwGen); + memoryEndEvents = memoryStartEvents; - shimStartEvents = aie::profile::getInterfaceTileEventSets(hwGen); - shimEndEvents = shimStartEvents; - shimEndEvents[METRIC_BYTE_COUNT] = {XAIE_EVENT_PORT_RUNNING_0_PL, XAIE_EVENT_PERF_CNT_0_PL}; + shimStartEvents = aie::profile::getInterfaceTileEventSets(hwGen); + shimEndEvents = shimStartEvents; + shimEndEvents[METRIC_BYTE_COUNT] = {XAIE_EVENT_PORT_RUNNING_0_PL, XAIE_EVENT_PERF_CNT_0_PL}; - memTileStartEvents = aie::profile::getMemoryTileEventSets(hwGen); - memTileEndEvents = memTileStartEvents; - - microcontrollerEvents = aie::profile::getMicrocontrollerEventSets(hwGen); - } + memTileStartEvents = aie::profile::getMemoryTileEventSets(hwGen); + memTileEndEvents = memTileStartEvents; + + microcontrollerEvents = aie::profile::getMicrocontrollerEventSets(hwGen); + } - bool AieProfile_VE2Impl::checkAieDevice(const uint64_t deviceId, void* handle) - { - aieDevInst = static_cast(db->getStaticInfo().getAieDevInst(fetchAieDevInst, handle, deviceId)) ; - aieDevice = static_cast(db->getStaticInfo().getAieDevice(allocateAieDevice, deallocateAieDevice, handle, deviceId)) ; - if (!aieDevInst || !aieDevice) { - xrt_core::message::send(severity_level::warning, "XRT", - "Unable to get AIE device. There will be no AIE profiling."); - return false; - } - return true; + bool AieProfile_VE2Impl::checkAieDevice(const uint64_t deviceId, void* handle) + { + aieDevInst = static_cast(db->getStaticInfo().getAieDevInst(fetchAieDevInst, handle, deviceId)) ; + aieDevice = static_cast(db->getStaticInfo().getAieDevice(allocateAieDevice, deallocateAieDevice, handle, deviceId)) ; + if (!aieDevInst || !aieDevice) { + xrt_core::message::send(severity_level::warning, "XRT", + "Unable to get AIE device. There will be no AIE profiling."); + return false; } + return true; + } - void AieProfile_VE2Impl::updateDevice() { + void AieProfile_VE2Impl::updateDevice() { + + if(!checkAieDevice(deviceID, metadata->getHandle())) + return; + + // Submit nop.elf before configuring profile + if (!aie::submitNopElf(metadata->getHandle())) { + xrt_core::message::send(severity_level::warning, "XRT", + "Failed to submit nop.elf. AIE profile configuration will not proceed."); + return; + } - if(!checkAieDevice(deviceID, metadata->getHandle())) - return; + bool runtimeCounters = setMetricsSettings(deviceID, metadata->getHandle()); - // Submit nop.elf before configuring profile - if (!aie::submitNopElf(metadata->getHandle())) { + if (!runtimeCounters) { + void* h = metadata->getHandle(); + std::shared_ptr device = xrt_core::get_userpf_device(h); + if (!device) + device = xdp::util::convertToCoreDevice(h, true); + if (!device) { xrt_core::message::send(severity_level::warning, "XRT", - "Failed to submit nop.elf. AIE profile configuration will not proceed."); + "AIE Profile: could not resolve core device for xclbin profile counters."); + (db->getStaticInfo()).setIsAIECounterRead(deviceID, true); return; } - - bool runtimeCounters = setMetricsSettings(deviceID, metadata->getHandle()); - - if (!runtimeCounters) { - void* h = metadata->getHandle(); - std::shared_ptr device = xrt_core::get_userpf_device(h); - if (!device) - device = xdp::util::convertToCoreDevice(h, true); - if (!device) { - xrt_core::message::send(severity_level::warning, "XRT", - "AIE Profile: could not resolve core device for xclbin profile counters."); - (db->getStaticInfo()).setIsAIECounterRead(deviceID, true); - return; - } - auto counters = xrt_core::edge::aie::get_profile_counters(device.get()); - - if (counters.empty()) { + auto counters = xrt_core::edge::aie::get_profile_counters(device.get()); + + if (counters.empty()) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE Profile Counters were not found for this design. Please specify " + "graph_based_[aie|aie_memory|memory_tile|interface_tile]_metrics and/or " + "tile_based_[aie|aie_memory|memory_tile|interface_tile|microcontroller]_metrics " + "under \"AIE_profile_settings\" section in your xrt.ini."); + (db->getStaticInfo()).setIsAIECounterRead(deviceID,true); + return; + } + else { + XAie_DevInst* aieDevInst = + static_cast(db->getStaticInfo().getAieDevInst(fetchAieDevInst, metadata->getHandle())); + + if (!aieDevInst) { xrt_core::message::send(severity_level::warning, "XRT", - "AIE Profile Counters were not found for this design. Please specify " - "graph_based_[aie|aie_memory|memory_tile|interface_tile]_metrics and/or " - "tile_based_[aie|aie_memory|memory_tile|interface_tile|microcontroller]_metrics " - "under \"AIE_profile_settings\" section in your xrt.ini."); - (db->getStaticInfo()).setIsAIECounterRead(deviceID,true); + "Failed to get AIE device instance for profile counters."); return; } - else { - XAie_DevInst* aieDevInst = - static_cast(db->getStaticInfo().getAieDevInst(fetchAieDevInst, metadata->getHandle())); - - if (!aieDevInst) { - xrt_core::message::send(severity_level::warning, "XRT", - "Failed to get AIE device instance for profile counters."); - return; - } - xrt_core::message::send(severity_level::debug, "XRT", "Processing " + std::to_string(counters.size()) + " counters"); - for (auto& counter : counters) { - std::stringstream msg; - msg << "Adding counter " << counter.id << " at (" - << +counter.column << "," << +counter.row << ") module: " << counter.module; - xrt_core::message::send(severity_level::debug, "XRT", msg.str()); - - // For pre-configured counters from xclbin metadata, the hardware is already configured - // Payload is used for reporting metadata (channel/stream IDs), set to 0 for these counters - // as we don't have full tile information (stream_ids, is_master_vec) to safely compute it - uint64_t payload = 0; - - (db->getStaticInfo()).addAIECounter(deviceID, counter.id, counter.column, - counter.row, counter.counterNumber, counter.startEvent, counter.endEvent, - counter.resetEvent, payload, counter.clockFreqMhz, counter.module, counter.name); - } - xrt_core::message::send(severity_level::debug, "XRT", "Finished processing counters"); + xrt_core::message::send(severity_level::debug, "XRT", "Processing " + std::to_string(counters.size()) + " counters"); + for (auto& counter : counters) { + std::stringstream msg; + msg << "Adding counter " << counter.id << " at (" + << +counter.column << "," << +counter.row << ") module: " << counter.module; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + + // For pre-configured counters from xclbin metadata, the hardware is already configured + // Payload is used for reporting metadata (channel/stream IDs), set to 0 for these counters + // as we don't have full tile information (stream_ids, is_master_vec) to safely compute it + uint64_t payload = 0; + + (db->getStaticInfo()).addAIECounter(deviceID, counter.id, counter.column, + counter.row, counter.counterNumber, counter.startEvent, counter.endEvent, + counter.resetEvent, payload, counter.clockFreqMhz, counter.module, counter.name); } + xrt_core::message::send(severity_level::debug, "XRT", "Finished processing counters"); } - } - - // Get reportable payload specific for this tile and/or counter - uint64_t - AieProfile_VE2Impl::getCounterPayload(XAie_DevInst* aieDevInst, - const tile_type& tile, - const module_type type, - uint8_t column, - uint8_t row, - uint16_t startEvent, - const std::string metricSet, - const uint8_t channel, - uint8_t logicalPortIndex) - { - // 1. Profile API specific values - if (aie::profile::profileAPIMetricSet(metricSet)) - return getAdfProfileAPIPayload(tile, metricSet); - - // 2. Channel/stream IDs for interface tiles - if (type == module_type::shim) { - // NOTE: value = ((isMaster) << 8) & (isChannel << 7) & (channel/stream ID) - // portnum = physical stream-switch port (0-7) from event; stream_ids/is_master_vec - // are indexed by logical port (size = number of configured ports). When portnum is - // out of range (e.g. physical ports 4-7 when only 4 logical ports), use - // logicalPortIndex. - auto portnum = xdp::aie::getPortNumberFromEvent(static_cast(startEvent)); - uint8_t streamPortId = (portnum >= tile.stream_ids.size()) ? - 0 : static_cast(tile.stream_ids.at(portnum)); - uint8_t idToReport = (tile.subtype == io_type::GMIO) ? channel : streamPortId; - uint8_t isChannel = (tile.subtype == io_type::GMIO) ? 1 : 0; - uint8_t isMaster = aie::isInputSet(type, metricSet) ? 0 : 1; - - return ((isMaster << PAYLOAD_IS_MASTER_SHIFT) - | (isChannel << PAYLOAD_IS_CHANNEL_SHIFT) | idToReport); } + } - // 3. Channel IDs for memory tiles - if (type == module_type::mem_tile) { - // NOTE: value = ((isMaster) << 8) & (isChannel << 7) & (channel ID) - uint8_t isChannel = 1; - uint8_t isMaster = aie::isInputSet(type, metricSet) ? 1 : 0; - return ((isMaster << PAYLOAD_IS_MASTER_SHIFT) - | (isChannel << PAYLOAD_IS_CHANNEL_SHIFT) | channel); - } + // Get reportable payload specific for this tile and/or counter + uint64_t + AieProfile_VE2Impl::getCounterPayload(XAie_DevInst* aieDevInst, + const tile_type& tile, + const module_type type, + uint8_t column, + uint8_t row, + uint16_t startEvent, + const std::string metricSet, + const uint8_t channel, + uint8_t logicalPortIndex) + { + // 1. Profile API specific values + if (aie::profile::profileAPIMetricSet(metricSet)) + return getAdfProfileAPIPayload(tile, metricSet); + + // 2. Channel/stream IDs for interface tiles + if (type == module_type::shim) { + // NOTE: value = ((isMaster) << 8) & (isChannel << 7) & (channel/stream ID) + // portnum = physical stream-switch port (0-7) from event; stream_ids/is_master_vec + // are indexed by logical port (size = number of configured ports). When portnum is + // out of range (e.g. physical ports 4-7 when only 4 logical ports), use + // logicalPortIndex. + auto portnum = xdp::aie::getPortNumberFromEvent(static_cast(startEvent)); + uint8_t streamPortId = (portnum >= tile.stream_ids.size()) ? + 0 : static_cast(tile.stream_ids.at(portnum)); + uint8_t idToReport = (tile.subtype == io_type::GMIO) ? channel : streamPortId; + uint8_t isChannel = (tile.subtype == io_type::GMIO) ? 1 : 0; + uint8_t isMaster = aie::isInputSet(type, metricSet) ? 0 : 1; + + return ((isMaster << PAYLOAD_IS_MASTER_SHIFT) + | (isChannel << PAYLOAD_IS_CHANNEL_SHIFT) | idToReport); + } - // 4. DMA BD sizes for AIE tiles - // NOTE: value = ((max BD size) << 16) & ((isMaster) << 8) & (isChannel << 7) & (channel ID) + // 3. Channel IDs for memory tiles + if (type == module_type::mem_tile) { + // NOTE: value = ((isMaster) << 8) & (isChannel << 7) & (channel ID) uint8_t isChannel = 1; - uint8_t isMaster = aie::isInputSet(type, metricSet) ? 1 : 0; - uint32_t payloadValue = ((isMaster << PAYLOAD_IS_MASTER_SHIFT) - | (isChannel << PAYLOAD_IS_CHANNEL_SHIFT) | channel); - - if ((metadata->getHardwareGen() != 1) - || ((startEvent != XAIE_EVENT_DMA_S2MM_0_FINISHED_BD_MEM) - && (startEvent != XAIE_EVENT_DMA_S2MM_1_FINISHED_BD_MEM) - && (startEvent != XAIE_EVENT_DMA_MM2S_0_FINISHED_BD_MEM) - && (startEvent != XAIE_EVENT_DMA_MM2S_1_FINISHED_BD_MEM))) - return payloadValue; - - // Get average BD size for throughput calculations (AIE1 only) - constexpr int NUM_BDS = 8; - constexpr uint32_t BYTES_PER_WORD = 4; - constexpr uint32_t ACTUAL_OFFSET = 1; - uint64_t offsets[NUM_BDS] = {XAIEGBL_MEM_DMABD0CTRL, XAIEGBL_MEM_DMABD1CTRL, - XAIEGBL_MEM_DMABD2CTRL, XAIEGBL_MEM_DMABD3CTRL, - XAIEGBL_MEM_DMABD4CTRL, XAIEGBL_MEM_DMABD5CTRL, - XAIEGBL_MEM_DMABD6CTRL, XAIEGBL_MEM_DMABD7CTRL}; - uint32_t lsbs[NUM_BDS] = {XAIEGBL_MEM_DMABD0CTRL_LEN_LSB, XAIEGBL_MEM_DMABD1CTRL_LEN_LSB, - XAIEGBL_MEM_DMABD2CTRL_LEN_LSB, XAIEGBL_MEM_DMABD3CTRL_LEN_LSB, - XAIEGBL_MEM_DMABD4CTRL_LEN_LSB, XAIEGBL_MEM_DMABD5CTRL_LEN_LSB, - XAIEGBL_MEM_DMABD6CTRL_LEN_LSB, XAIEGBL_MEM_DMABD7CTRL_LEN_LSB}; - uint32_t masks[NUM_BDS] = {XAIEGBL_MEM_DMABD0CTRL_LEN_MASK, XAIEGBL_MEM_DMABD1CTRL_LEN_MASK, - XAIEGBL_MEM_DMABD2CTRL_LEN_MASK, XAIEGBL_MEM_DMABD3CTRL_LEN_MASK, - XAIEGBL_MEM_DMABD4CTRL_LEN_MASK, XAIEGBL_MEM_DMABD5CTRL_LEN_MASK, - XAIEGBL_MEM_DMABD6CTRL_LEN_MASK, XAIEGBL_MEM_DMABD7CTRL_LEN_MASK}; - uint32_t valids[NUM_BDS] = {XAIEGBL_MEM_DMABD0CTRL_VALBD_MASK, XAIEGBL_MEM_DMABD1CTRL_VALBD_MASK, - XAIEGBL_MEM_DMABD2CTRL_VALBD_MASK, XAIEGBL_MEM_DMABD3CTRL_VALBD_MASK, - XAIEGBL_MEM_DMABD4CTRL_VALBD_MASK, XAIEGBL_MEM_DMABD5CTRL_VALBD_MASK, - XAIEGBL_MEM_DMABD6CTRL_VALBD_MASK, XAIEGBL_MEM_DMABD7CTRL_VALBD_MASK}; - - uint32_t maxBDSize = 0; - auto tileOffset = XAie_GetTileAddr(aieDevInst, row, column); - for (int bd = 0; bd < NUM_BDS; ++bd) { - uint32_t regValue = 0; - XAie_Read32(aieDevInst, tileOffset + offsets[bd], ®Value); - - if (regValue & valids[bd]) { - uint32_t bdBytes = BYTES_PER_WORD * (((regValue >> lsbs[bd]) & masks[bd]) + ACTUAL_OFFSET); - maxBDSize = std::max(bdBytes, maxBDSize); - } - } + uint8_t isMaster = aie::isInputSet(type, metricSet) ? 1 : 0; + return ((isMaster << PAYLOAD_IS_MASTER_SHIFT) + | (isChannel << PAYLOAD_IS_CHANNEL_SHIFT) | channel); + } - payloadValue |= (maxBDSize << PAYLOAD_BD_SIZE_SHIFT); + // 4. DMA BD sizes for AIE tiles + // NOTE: value = ((max BD size) << 16) & ((isMaster) << 8) & (isChannel << 7) & (channel ID) + uint8_t isChannel = 1; + uint8_t isMaster = aie::isInputSet(type, metricSet) ? 1 : 0; + uint32_t payloadValue = ((isMaster << PAYLOAD_IS_MASTER_SHIFT) + | (isChannel << PAYLOAD_IS_CHANNEL_SHIFT) | channel); + + if ((metadata->getHardwareGen() != 1) + || ((startEvent != XAIE_EVENT_DMA_S2MM_0_FINISHED_BD_MEM) + && (startEvent != XAIE_EVENT_DMA_S2MM_1_FINISHED_BD_MEM) + && (startEvent != XAIE_EVENT_DMA_MM2S_0_FINISHED_BD_MEM) + && (startEvent != XAIE_EVENT_DMA_MM2S_1_FINISHED_BD_MEM))) return payloadValue; - } - - uint64_t - AieProfile_VE2Impl::getAdfProfileAPIPayload(const tile_type& tile, const std::string metricSet) - { - if (metricSet == METRIC_LATENCY) - return metadata->getIntfLatencyPayload(tile); - return 0; + // Get average BD size for throughput calculations (AIE1 only) + constexpr int NUM_BDS = 8; + constexpr uint32_t BYTES_PER_WORD = 4; + constexpr uint32_t ACTUAL_OFFSET = 1; + uint64_t offsets[NUM_BDS] = {XAIEGBL_MEM_DMABD0CTRL, XAIEGBL_MEM_DMABD1CTRL, + XAIEGBL_MEM_DMABD2CTRL, XAIEGBL_MEM_DMABD3CTRL, + XAIEGBL_MEM_DMABD4CTRL, XAIEGBL_MEM_DMABD5CTRL, + XAIEGBL_MEM_DMABD6CTRL, XAIEGBL_MEM_DMABD7CTRL}; + uint32_t lsbs[NUM_BDS] = {XAIEGBL_MEM_DMABD0CTRL_LEN_LSB, XAIEGBL_MEM_DMABD1CTRL_LEN_LSB, + XAIEGBL_MEM_DMABD2CTRL_LEN_LSB, XAIEGBL_MEM_DMABD3CTRL_LEN_LSB, + XAIEGBL_MEM_DMABD4CTRL_LEN_LSB, XAIEGBL_MEM_DMABD5CTRL_LEN_LSB, + XAIEGBL_MEM_DMABD6CTRL_LEN_LSB, XAIEGBL_MEM_DMABD7CTRL_LEN_LSB}; + uint32_t masks[NUM_BDS] = {XAIEGBL_MEM_DMABD0CTRL_LEN_MASK, XAIEGBL_MEM_DMABD1CTRL_LEN_MASK, + XAIEGBL_MEM_DMABD2CTRL_LEN_MASK, XAIEGBL_MEM_DMABD3CTRL_LEN_MASK, + XAIEGBL_MEM_DMABD4CTRL_LEN_MASK, XAIEGBL_MEM_DMABD5CTRL_LEN_MASK, + XAIEGBL_MEM_DMABD6CTRL_LEN_MASK, XAIEGBL_MEM_DMABD7CTRL_LEN_MASK}; + uint32_t valids[NUM_BDS] = {XAIEGBL_MEM_DMABD0CTRL_VALBD_MASK, XAIEGBL_MEM_DMABD1CTRL_VALBD_MASK, + XAIEGBL_MEM_DMABD2CTRL_VALBD_MASK, XAIEGBL_MEM_DMABD3CTRL_VALBD_MASK, + XAIEGBL_MEM_DMABD4CTRL_VALBD_MASK, XAIEGBL_MEM_DMABD5CTRL_VALBD_MASK, + XAIEGBL_MEM_DMABD6CTRL_VALBD_MASK, XAIEGBL_MEM_DMABD7CTRL_VALBD_MASK}; + + uint32_t maxBDSize = 0; + auto tileOffset = XAie_GetTileAddr(aieDevInst, row, column); + for (int bd = 0; bd < NUM_BDS; ++bd) { + uint32_t regValue = 0; + XAie_Read32(aieDevInst, tileOffset + offsets[bd], ®Value); + + if (regValue & valids[bd]) { + uint32_t bdBytes = BYTES_PER_WORD * (((regValue >> lsbs[bd]) & masks[bd]) + ACTUAL_OFFSET); + maxBDSize = std::max(bdBytes, maxBDSize); + } } - void AieProfile_VE2Impl::printTileModStats(xaiefal::XAieDev* aieDevice, - const tile_type& tile, XAie_ModuleType mod) - { - auto col = tile.col; - auto row = tile.row; - auto loc = XAie_TileLoc(col, row); - std::string moduleName = (mod == XAIE_CORE_MOD) ? "aie" - : ((mod == XAIE_MEM_MOD) ? "aie_memory" - : "interface_tile"); - const std::string groups[3] = { - XAIEDEV_DEFAULT_GROUP_GENERIC, - XAIEDEV_DEFAULT_GROUP_STATIC, - XAIEDEV_DEFAULT_GROUP_AVAIL - }; + payloadValue |= (maxBDSize << PAYLOAD_BD_SIZE_SHIFT); + return payloadValue; + } + + uint64_t + AieProfile_VE2Impl::getAdfProfileAPIPayload(const tile_type& tile, const std::string metricSet) + { + if (metricSet == METRIC_LATENCY) + return metadata->getIntfLatencyPayload(tile); - std::stringstream msg; - msg << "Resource usage stats for Tile : (" << +col << "," << +row - << ") Module : " << moduleName << std::endl; - for (auto&g : groups) { - auto stats = aieDevice->getRscStat(g); - auto pc = stats.getNumRsc(loc, mod, xaiefal::XAIE_PERFCOUNT); - auto ts = stats.getNumRsc(loc, mod, xaiefal::XAIE_TRACEEVENT); - auto bc = stats.getNumRsc(loc, mod, xaiefal::XAIE_BROADCAST); - msg << "Resource Group : " << std::left << std::setw(10) << g << " " - << "Performance Counters : " << pc << " " - << "Trace Slots : " << ts << " " - << "Broadcast Channels : " << bc << " " - << std::endl; - } + return 0; + } - xrt_core::message::send(severity_level::info, "XRT", msg.str()); + void AieProfile_VE2Impl::printTileModStats(xaiefal::XAieDev* aieDevice, + const tile_type& tile, XAie_ModuleType mod) + { + auto col = tile.col; + auto row = tile.row; + auto loc = XAie_TileLoc(col, row); + std::string moduleName = (mod == XAIE_CORE_MOD) ? "aie" + : ((mod == XAIE_MEM_MOD) ? "aie_memory" + : "interface_tile"); + const std::string groups[3] = { + XAIEDEV_DEFAULT_GROUP_GENERIC, + XAIEDEV_DEFAULT_GROUP_STATIC, + XAIEDEV_DEFAULT_GROUP_AVAIL + }; + + std::stringstream msg; + msg << "Resource usage stats for Tile : (" << +col << "," << +row + << ") Module : " << moduleName << std::endl; + for (auto&g : groups) { + auto stats = aieDevice->getRscStat(g); + auto pc = stats.getNumRsc(loc, mod, xaiefal::XAIE_PERFCOUNT); + auto ts = stats.getNumRsc(loc, mod, xaiefal::XAIE_TRACEEVENT); + auto bc = stats.getNumRsc(loc, mod, xaiefal::XAIE_BROADCAST); + msg << "Resource Group : " << std::left << std::setw(10) << g << " " + << "Performance Counters : " << pc << " " + << "Trace Slots : " << ts << " " + << "Broadcast Channels : " << bc << " " + << std::endl; } - // Set metrics for all specified AIE counters on this device with configs given in AIE_profile_settings - bool - AieProfile_VE2Impl::setMetricsSettings(const uint64_t deviceId, void* handle) - { - int counterId = 0; - bool runtimeCounters = false; - - auto stats = aieDevice->getRscStat(XAIEDEV_DEFAULT_GROUP_AVAIL); - auto hwGen = metadata->getHardwareGen(); - auto configChannel0 = metadata->getConfigChannel0(); - auto configChannel1 = metadata->getConfigChannel1(); - uint8_t startColShift = metadata->getPartitionOverlayStartCols().front(); - aie::displayColShiftInfo(startColShift); - - for (int module = 0; module < metadata->getNumModules(); ++module) { - auto configMetrics = metadata->getConfigMetricsVec(module); - if (configMetrics.empty()) - continue; - - int numTileCounters[metadata->getNumCountersMod(module)+1] = {0}; - XAie_ModuleType mod = aie::profile::getFalModuleType(module); + xrt_core::message::send(severity_level::info, "XRT", msg.str()); + } + + // Set metrics for all specified AIE counters on this device with configs given in AIE_profile_settings + bool + AieProfile_VE2Impl::setMetricsSettings(const uint64_t deviceId, void* handle) + { + int counterId = 0; + bool runtimeCounters = false; + + auto stats = aieDevice->getRscStat(XAIEDEV_DEFAULT_GROUP_AVAIL); + auto hwGen = metadata->getHardwareGen(); + auto configChannel0 = metadata->getConfigChannel0(); + auto configChannel1 = metadata->getConfigChannel1(); + uint8_t startColShift = metadata->getPartitionOverlayStartCols().front(); + aie::displayColShiftInfo(startColShift); + + for (int module = 0; module < metadata->getNumModules(); ++module) { + auto configMetrics = metadata->getConfigMetricsVec(module); + if (configMetrics.empty()) + continue; + + int numTileCounters[metadata->getNumCountersMod(module)+1] = {0}; + XAie_ModuleType mod = aie::profile::getFalModuleType(module); + + // Iterate over tiles and metrics to configure all desired counters + for (auto& tileMetric : configMetrics) { + auto& metricSet = tileMetric.second; + auto tile = tileMetric.first; + auto col = tile.col + startColShift; + auto row = tile.row; + auto subtype = tile.subtype; + auto type = aie::getModuleType(row, metadata->getAIETileRowOffset()); + if ((mod == XAIE_MEM_MOD) && (type == module_type::core)) + type = module_type::dma; - // Iterate over tiles and metrics to configure all desired counters - for (auto& tileMetric : configMetrics) { - auto& metricSet = tileMetric.second; - auto tile = tileMetric.first; - auto col = tile.col + startColShift; - auto row = tile.row; - auto subtype = tile.subtype; - auto type = aie::getModuleType(row, metadata->getAIETileRowOffset()); - if ((mod == XAIE_MEM_MOD) && (type == module_type::core)) - type = module_type::dma; - - // Catch microcontroller event sets for MDM - if (module == static_cast(module_type::uc)) { - // Configure - auto events = microcontrollerEvents[metricSet]; - aie::profile::configMDMCounters(aieDevInst, hwGen, col, row, events); - // Record - tile_type recordTile; - recordTile.col = col; - recordTile.row = row; - microcontrollerTileEvents[recordTile] = events; - runtimeCounters = true; - continue; - } + // Catch microcontroller event sets for MDM + if (module == static_cast(module_type::uc)) { + // Configure + auto events = microcontrollerEvents[metricSet]; + aie::profile::configMDMCounters(aieDevInst, hwGen, col, row, events); + // Record + tile_type recordTile; + recordTile.col = col; + recordTile.row = row; + microcontrollerTileEvents[recordTile] = events; + runtimeCounters = true; + continue; + } - // Ignore invalid types and inactive modules - // NOTE: Inactive core modules are configured when utilizing - // stream switch monitor ports to profile DMA channels - if (!aie::profile::isValidType(type, mod)) + // Ignore invalid types and inactive modules + // NOTE: Inactive core modules are configured when utilizing + // stream switch monitor ports to profile DMA channels + if (!aie::profile::isValidType(type, mod)) + continue; + if ((type == module_type::dma) && !tile.active_memory) + continue; + if ((type == module_type::core) && !tile.active_core) { + if (metadata->getPairModuleIndex(metricSet, type) < 0) continue; - if ((type == module_type::dma) && !tile.active_memory) + } + + auto loc = XAie_TileLoc(col, row); + auto& xaieTile = aieDevice->tile(col, row); + auto xaieModule = (mod == XAIE_CORE_MOD) ? xaieTile.core() + : ((mod == XAIE_MEM_MOD) ? xaieTile.mem() + : xaieTile.pl()); + + auto startEvents = (type == module_type::core) ? coreStartEvents[metricSet] + : ((type == module_type::dma) ? memoryStartEvents[metricSet] + : ((type == module_type::shim) ? shimStartEvents[metricSet] + : memTileStartEvents[metricSet])); + auto endEvents = (type == module_type::core) ? coreEndEvents[metricSet] + : ((type == module_type::dma) ? memoryEndEvents[metricSet] + : ((type == module_type::shim) ? shimEndEvents[metricSet] + : memTileEndEvents[metricSet])); + std::vector resetEvents = {}; + + int numCounters = 0; + auto numFreeCtr = stats.getNumRsc(loc, mod, xaiefal::XAIE_PERFCOUNT); + numFreeCtr = (startEvents.size() < numFreeCtr) ? startEvents.size() : numFreeCtr; + + int numFreeCtrSS = numFreeCtr; + if (aie::profile::profileAPIMetricSet(metricSet)) { + if (numFreeCtr < 2) { continue; - if ((type == module_type::core) && !tile.active_core) { - if (metadata->getPairModuleIndex(metricSet, type) < 0) - continue; } + // We need to monitor single stream switch monitor port + // numFreeCtrSS = 1 ; + } - auto loc = XAie_TileLoc(col, row); - auto& xaieTile = aieDevice->tile(col, row); - auto xaieModule = (mod == XAIE_CORE_MOD) ? xaieTile.core() - : ((mod == XAIE_MEM_MOD) ? xaieTile.mem() - : xaieTile.pl()); - - auto startEvents = (type == module_type::core) ? coreStartEvents[metricSet] - : ((type == module_type::dma) ? memoryStartEvents[metricSet] - : ((type == module_type::shim) ? shimStartEvents[metricSet] - : memTileStartEvents[metricSet])); - auto endEvents = (type == module_type::core) ? coreEndEvents[metricSet] - : ((type == module_type::dma) ? memoryEndEvents[metricSet] - : ((type == module_type::shim) ? shimEndEvents[metricSet] - : memTileEndEvents[metricSet])); - std::vector resetEvents = {}; - - int numCounters = 0; - auto numFreeCtr = stats.getNumRsc(loc, mod, xaiefal::XAIE_PERFCOUNT); - numFreeCtr = (startEvents.size() < numFreeCtr) ? startEvents.size() : numFreeCtr; - - int numFreeCtrSS = numFreeCtr; - if (aie::profile::profileAPIMetricSet(metricSet)) { - if (numFreeCtr < 2) { - continue; - } - // We need to monitor single stream switch monitor port - // numFreeCtrSS = 1 ; + // Specify Sel0/Sel1 for memory tile events 21-44 + auto iter0 = configChannel0.find(tile); + auto iter1 = configChannel1.find(tile); + uint8_t channel0 = (iter0 == configChannel0.end()) ? 0 : iter0->second; + uint8_t channel1 = (iter1 == configChannel1.end()) ? 1 : iter1->second; + + // Modify events as needed + aie::profile::modifyEvents(type, subtype, channel0, startEvents, metadata->getHardwareGen()); + endEvents = startEvents; + + // TBD : Placeholder to configure AIE core with required profile counters. + aie::profile::configEventSelections(aieDevInst, loc, type, metricSet, channel0); + // TBD : Placeholder to configure shim tile with required profile counters. + + aie::profile::configStreamSwitchPorts(tileMetric.first, xaieTile, loc, type, + numFreeCtrSS, metricSet, channel0, channel1, startEvents, endEvents, streamPorts); + + // Identify the profiling API metric sets and configure graph events + if (metadata->getUseGraphIterator() && !graphItrBroadcastConfigDone) { + XAie_Events bcEvent = XAIE_EVENT_NONE_CORE; + bool status = aie::profile::configGraphIteratorAndBroadcast(aieDevInst, aieDevice, + metadata, xaieModule, loc, mod, type, metricSet, bcEvent, bcResourcesBytesTx); + if (status) { + graphIteratorBrodcastChannelEvent = bcEvent; + graphItrBroadcastConfigDone = true; } + } - // Specify Sel0/Sel1 for memory tile events 21-44 - auto iter0 = configChannel0.find(tile); - auto iter1 = configChannel1.find(tile); - uint8_t channel0 = (iter0 == configChannel0.end()) ? 0 : iter0->second; - uint8_t channel1 = (iter1 == configChannel1.end()) ? 1 : iter1->second; + if (aie::profile::profileAPIMetricSet(metricSet)) { + // Re-use the existing port running event for both the counters + startEvents[startEvents.size()-1] = startEvents[0]; - // Modify events as needed - aie::profile::modifyEvents(type, subtype, channel0, startEvents, metadata->getHardwareGen()); - endEvents = startEvents; - - // TBD : Placeholder to configure AIE core with required profile counters. - aie::profile::configEventSelections(aieDevInst, loc, type, metricSet, channel0); - // TBD : Placeholder to configure shim tile with required profile counters. - - aie::profile::configStreamSwitchPorts(tileMetric.first, xaieTile, loc, type, - numFreeCtrSS, metricSet, channel0, channel1, startEvents, endEvents, streamPorts); - - // Identify the profiling API metric sets and configure graph events - if (metadata->getUseGraphIterator() && !graphItrBroadcastConfigDone) { - XAie_Events bcEvent = XAIE_EVENT_NONE_CORE; - bool status = aie::profile::configGraphIteratorAndBroadcast(aieDevInst, aieDevice, - metadata, xaieModule, loc, mod, type, metricSet, bcEvent, bcResourcesBytesTx); - if (status) { - graphIteratorBrodcastChannelEvent = bcEvent; - graphItrBroadcastConfigDone = true; - } + // Use start events as End events for profile counters if threshold is not provided + endEvents[endEvents.size()-1] = endEvents[0]; + + // Use the set values broadcast events for the reset of counter + resetEvents = {XAIE_EVENT_NONE_CORE, XAIE_EVENT_NONE_CORE}; + if (type == module_type::shim) { + if (metadata->getUseGraphIterator()) + resetEvents = {graphIteratorBrodcastChannelEvent, graphIteratorBrodcastChannelEvent}; + else + resetEvents = {XAIE_EVENT_NONE_CORE, XAIE_EVENT_USER_EVENT_1_PL}; } + } + uint32_t threshold = 0; + // Request and configure all available counters for this tile + for (int i=0; i < numFreeCtr; ++i) { + auto startEvent = startEvents.at(i); + auto endEvent = endEvents.at(i); + auto resetEvent = XAIE_EVENT_NONE_CORE; + auto portnum = xdp::aie::getPortNumberFromEvent(startEvent); + // For metric sets with multiple stream-switch ports, use modulo for channel mapping + uint8_t channelNum = portnum % 2; + uint8_t channel = (channelNum == 0) ? channel0 : channel1; + + // Configure group event before reserving and starting counter + aie::profile::configGroupEvents(aieDevInst, loc, mod, type, metricSet, startEvent, channel); + + // Configure the profile counters for profile APIs metric sets. + std::shared_ptr perfCounter = nullptr; if (aie::profile::profileAPIMetricSet(metricSet)) { - // Re-use the existing port running event for both the counters - startEvents[startEvents.size()-1] = startEvents[0]; - - // Use start events as End events for profile counters if threshold is not provided - endEvents[endEvents.size()-1] = endEvents[0]; - - // Use the set values broadcast events for the reset of counter - resetEvents = {XAIE_EVENT_NONE_CORE, XAIE_EVENT_NONE_CORE}; - if (type == module_type::shim) { - if (metadata->getUseGraphIterator()) - resetEvents = {graphIteratorBrodcastChannelEvent, graphIteratorBrodcastChannelEvent}; - else - resetEvents = {XAIE_EVENT_NONE_CORE, XAIE_EVENT_USER_EVENT_1_PL}; - } - } + resetEvent = resetEvents.at(i); + threshold = metadata->getUserSpecifiedThreshold(tileMetric.first, tileMetric.second); + threshold = aie::profile::convertToBeats(tileMetric.second, threshold, metadata->getHardwareGen()); - uint32_t threshold = 0; - // Request and configure all available counters for this tile - for (int i=0; i < numFreeCtr; ++i) { - auto startEvent = startEvents.at(i); - auto endEvent = endEvents.at(i); - auto resetEvent = XAIE_EVENT_NONE_CORE; - auto portnum = xdp::aie::getPortNumberFromEvent(startEvent); - // For metric sets with multiple stream-switch ports, use modulo for channel mapping - uint8_t channelNum = portnum % 2; - uint8_t channel = (channelNum == 0) ? channel0 : channel1; - - // Configure group event before reserving and starting counter - aie::profile::configGroupEvents(aieDevInst, loc, mod, type, metricSet, startEvent, channel); - - // Configure the profile counters for profile APIs metric sets. - std::shared_ptr perfCounter = nullptr; - if (aie::profile::profileAPIMetricSet(metricSet)) { - resetEvent = resetEvents.at(i); - threshold = metadata->getUserSpecifiedThreshold(tileMetric.first, tileMetric.second); - threshold = aie::profile::convertToBeats(tileMetric.second, threshold, metadata->getHardwareGen()); - - if (i==0 && threshold>0) - endEvent = XAIE_EVENT_PERF_CNT_1_PL; - - if (i==1 && threshold == 0) - continue; + if (i==0 && threshold>0) + endEvent = XAIE_EVENT_PERF_CNT_1_PL; - XAie_Events retCounterEvent = XAIE_EVENT_NONE_CORE; - perfCounter = aie::profile::configProfileAPICounters(aieDevInst, aieDevice, metadata, xaieModule, - mod, type, metricSet, startEvent, endEvent, resetEvent, i, perfCounters.size(), - threshold, retCounterEvent, tile, bcResourcesLatency, adfAPIResourceInfoMap, adfAPIBroadcastEventsMap); - } - else { - // Request counter from resource manager - perfCounter = xaieModule.perfCounter(); - auto ret = perfCounter->initialize(mod, startEvent, mod, endEvent); - if (ret != XAIE_OK) break; - ret = perfCounter->reserve(); - if (ret != XAIE_OK) break; - - // Start the counter - ret = perfCounter->start(); - if (ret != XAIE_OK) break; - } - if (!perfCounter) + if (i==1 && threshold == 0) continue; - perfCounters.push_back(perfCounter); - - // Generate user_event_1 for byte count metric set after configuration - if ((metricSet == METRIC_BYTE_COUNT) && (i == 1) && !graphItrBroadcastConfigDone) { - XAie_LocType tileloc = XAie_TileLoc(tile.col, tile.row); - //Note: For BYTE_COUNT metric, user_event_1 is used twice as eventA & eventB to - // to transition the FSM from Idle->State0->State1. - // eventC = Port Running and eventD = stop event (counter event). - XAie_EventGenerate(aieDevInst, tileloc, mod, XAIE_EVENT_USER_EVENT_1_PL); - XAie_EventGenerate(aieDevInst, tileloc, mod, XAIE_EVENT_USER_EVENT_1_PL); - } + + XAie_Events retCounterEvent = XAIE_EVENT_NONE_CORE; + perfCounter = aie::profile::configProfileAPICounters(aieDevInst, aieDevice, metadata, xaieModule, + mod, type, metricSet, startEvent, endEvent, resetEvent, i, perfCounters.size(), + threshold, retCounterEvent, tile, bcResourcesLatency, adfAPIResourceInfoMap, adfAPIBroadcastEventsMap); + } + else { + // Request counter from resource manager + perfCounter = xaieModule.perfCounter(); + auto ret = perfCounter->initialize(mod, startEvent, mod, endEvent); + if (ret != XAIE_OK) break; + ret = perfCounter->reserve(); + if (ret != XAIE_OK) break; + + // Start the counter + ret = perfCounter->start(); + if (ret != XAIE_OK) break; + } + if (!perfCounter) + continue; + perfCounters.push_back(perfCounter); + + // Generate user_event_1 for byte count metric set after configuration + if ((metricSet == METRIC_BYTE_COUNT) && (i == 1) && !graphItrBroadcastConfigDone) { + XAie_LocType tileloc = XAie_TileLoc(tile.col, tile.row); + //Note: For BYTE_COUNT metric, user_event_1 is used twice as eventA & eventB to + // to transition the FSM from Idle->State0->State1. + // eventC = Port Running and eventD = stop event (counter event). + XAie_EventGenerate(aieDevInst, tileloc, mod, XAIE_EVENT_USER_EVENT_1_PL); + XAie_EventGenerate(aieDevInst, tileloc, mod, XAIE_EVENT_USER_EVENT_1_PL); + } - // Convert enums to physical event IDs for reporting purposes - auto physicalEventIds = aie::profile::getEventPhysicalId(aieDevInst, loc, mod, type, metricSet, - startEvent, endEvent); - uint16_t phyStartEvent = physicalEventIds.first; - uint16_t phyEndEvent = physicalEventIds.second; - - // Get payload for reporting purposes - uint64_t payload = getCounterPayload(aieDevInst, tileMetric.first, type, col, row, - startEvent, metricSet, channel, static_cast(i)); - // Store counter info in database - std::string counterName = "AIE Counter " + std::to_string(counterId); - (db->getStaticInfo()).addAIECounter(deviceId, counterId, col, row, i, - phyStartEvent, phyEndEvent, resetEvent, payload, metadata->getClockFreqMhz(), - metadata->getModuleName(module), counterName, (tile.stream_ids.empty() ? 0 : tile.stream_ids[0])); - counterId++; - numCounters++; - } // numFreeCtr + // Convert enums to physical event IDs for reporting purposes + auto physicalEventIds = aie::profile::getEventPhysicalId(aieDevInst, loc, mod, type, metricSet, + startEvent, endEvent); + uint16_t phyStartEvent = physicalEventIds.first; + uint16_t phyEndEvent = physicalEventIds.second; + + // Get payload for reporting purposes + uint64_t payload = getCounterPayload(aieDevInst, tileMetric.first, type, col, row, + startEvent, metricSet, channel, static_cast(i)); + // Store counter info in database + std::string counterName = "AIE Counter " + std::to_string(counterId); + (db->getStaticInfo()).addAIECounter(deviceId, counterId, col, row, i, + phyStartEvent, phyEndEvent, resetEvent, payload, metadata->getClockFreqMhz(), + metadata->getModuleName(module), counterName, (tile.stream_ids.empty() ? 0 : tile.stream_ids[0])); + counterId++; + numCounters++; + } // numFreeCtr - std::stringstream msg; - msg << "Reserved " << numCounters << " counters for profiling AIE tile (" << +col - << "," << +row << ") using metric set " << metricSet << "."; - xrt_core::message::send(severity_level::debug, "XRT", msg.str()); - numTileCounters[numCounters]++; - } // configMetrics - - // Report counters reserved per tile - { - std::stringstream msg; - msg << "AIE profile counters reserved in " << metadata->getModuleName(module) << " - "; - for (int n=0; n <= metadata->getNumCountersMod(module); ++n) { - if (numTileCounters[n] == 0) - continue; - msg << n << ": " << numTileCounters[n] << " tiles, "; - (db->getStaticInfo()).addAIECounterResources(deviceId, n, numTileCounters[n], module); - } - xrt_core::message::send(severity_level::info, "XRT", msg.str().substr(0, msg.str().size()-2)); + std::stringstream msg; + msg << "Reserved " << numCounters << " counters for profiling AIE tile (" << +col + << "," << +row << ") using metric set " << metricSet << "."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + numTileCounters[numCounters]++; + } // configMetrics + + // Report counters reserved per tile + { + std::stringstream msg; + msg << "AIE profile counters reserved in " << metadata->getModuleName(module) << " - "; + for (int n=0; n <= metadata->getNumCountersMod(module); ++n) { + if (numTileCounters[n] == 0) + continue; + msg << n << ": " << numTileCounters[n] << " tiles, "; + (db->getStaticInfo()).addAIECounterResources(deviceId, n, numTileCounters[n], module); } + xrt_core::message::send(severity_level::info, "XRT", msg.str().substr(0, msg.str().size()-2)); + } - runtimeCounters = true; - } // modules + runtimeCounters = true; + } // modules - return runtimeCounters; - } + return runtimeCounters; + } - void AieProfile_VE2Impl::startPoll(const uint64_t id) - { - xrt_core::message::send(severity_level::debug, "XRT", " In AieProfile_VE2Impl::startPoll."); - threadCtrl = true; - thread = std::make_unique(&AieProfile_VE2Impl::continuePoll, this, id); - xrt_core::message::send(severity_level::debug, "XRT", " In AieProfile_VE2Impl::startPoll, after creating thread instance."); - } + void AieProfile_VE2Impl::startPoll(const uint64_t id) + { + xrt_core::message::send(severity_level::debug, "XRT", " In AieProfile_VE2Impl::startPoll."); + threadCtrl = true; + thread = std::make_unique(&AieProfile_VE2Impl::continuePoll, this, id); + xrt_core::message::send(severity_level::debug, "XRT", " In AieProfile_VE2Impl::startPoll, after creating thread instance."); + } - void AieProfile_VE2Impl::continuePoll(const uint64_t id) - { - xrt_core::message::send(severity_level::debug, "XRT", " In AieProfile_VE2Impl::continuePoll"); + void AieProfile_VE2Impl::continuePoll(const uint64_t id) + { + xrt_core::message::send(severity_level::debug, "XRT", " In AieProfile_VE2Impl::continuePoll"); - while (threadCtrl) { - poll(id); - std::this_thread::sleep_for(std::chrono::microseconds(metadata->getPollingIntervalVal())); - } - //Final Polling Operation + while (threadCtrl) { poll(id); + std::this_thread::sleep_for(std::chrono::microseconds(metadata->getPollingIntervalVal())); } + //Final Polling Operation + poll(id); + } - void AieProfile_VE2Impl::poll(const uint64_t id) - { - // Wait until xclbin has been loaded and device has been updated in database - if (!(db->getStaticInfo().isDeviceReady(id))) - return; - - if (!aieDevInst) - return; - - uint32_t prevColumn = 0; - uint32_t prevRow = 0; - uint64_t timerValue = 0; - auto hwGen = metadata->getHardwareGen(); - - // Iterate over all AIE Counters & Timers - auto numCounters = db->getStaticInfo().getNumAIECounter(id); - for (uint64_t c=0; c < numCounters; c++) { - auto aie = db->getStaticInfo().getAIECounter(id, c); - if (!aie) - continue; - - std::vector values; - values.push_back(aie->column); - values.push_back(aie::getRelativeRow(aie->row, metadata->getAIETileRowOffset())); - values.push_back(aie->startEvent); - values.push_back(aie->endEvent); - values.push_back(aie->resetEvent); - - // Read counter value from device - uint32_t counterValue; - if (perfCounters.empty()) { - // Compiler-defined counters - XAie_LocType tileLocation = XAie_TileLoc(aie->column, aie->row); - XAie_PerfCounterGet(aieDevInst, tileLocation, XAIE_CORE_MOD, aie->counterNumber, &counterValue); - } - else { - // Runtime-defined counters - if (aie::profile::adfAPILatencyConfigEvent(aie->startEvent)) - { - uint32_t srcCounterValue = 0; - uint32_t destCounterValue = 0; - try { - std::string srcDestPairKey = metadata->getSrcDestPairKey(aie->column, aie->row, aie->streamId); - uint64_t srcPcIdx = adfAPIResourceInfoMap.at(aie::profile::adfAPI::INTF_TILE_LATENCY).at(srcDestPairKey).srcPcIdx; - uint64_t destPcIdx = adfAPIResourceInfoMap.at(aie::profile::adfAPI::INTF_TILE_LATENCY).at(srcDestPairKey).destPcIdx; - auto srcPerfCount = perfCounters.at(srcPcIdx); - auto destPerfCount = perfCounters.at(destPcIdx); - srcPerfCount->readResult(srcCounterValue); - destPerfCount->readResult(destCounterValue); - counterValue = (destCounterValue > srcCounterValue) ? (destCounterValue-srcCounterValue) : (srcCounterValue-destCounterValue); - uint64_t storedValue = adfAPIResourceInfoMap[aie::profile::adfAPI::INTF_TILE_LATENCY][srcDestPairKey].profileResult; - if (counterValue != storedValue) - adfAPIResourceInfoMap[aie::profile::adfAPI::INTF_TILE_LATENCY][srcDestPairKey].profileResult = counterValue; - } catch(...) { - continue; - } + void AieProfile_VE2Impl::poll(const uint64_t id) + { + // Wait until xclbin has been loaded and device has been updated in database + if (!(db->getStaticInfo().isDeviceReady(id))) + return; + + if (!aieDevInst) + return; + + uint32_t prevColumn = 0; + uint32_t prevRow = 0; + uint64_t timerValue = 0; + auto hwGen = metadata->getHardwareGen(); + + // Iterate over all AIE Counters & Timers + auto numCounters = db->getStaticInfo().getNumAIECounter(id); + for (uint64_t c=0; c < numCounters; c++) { + auto aie = db->getStaticInfo().getAIECounter(id, c); + if (!aie) + continue; + + std::vector values; + values.push_back(aie->column); + values.push_back(aie::getRelativeRow(aie->row, metadata->getAIETileRowOffset())); + values.push_back(aie->startEvent); + values.push_back(aie->endEvent); + values.push_back(aie->resetEvent); + + // Read counter value from device + uint32_t counterValue; + if (perfCounters.empty()) { + // Compiler-defined counters + XAie_LocType tileLocation = XAie_TileLoc(aie->column, aie->row); + XAie_PerfCounterGet(aieDevInst, tileLocation, XAIE_CORE_MOD, aie->counterNumber, &counterValue); + } + else { + // Runtime-defined counters + if (aie::profile::adfAPILatencyConfigEvent(aie->startEvent)) + { + uint32_t srcCounterValue = 0; + uint32_t destCounterValue = 0; + try { + std::string srcDestPairKey = metadata->getSrcDestPairKey(aie->column, aie->row, aie->streamId); + uint64_t srcPcIdx = adfAPIResourceInfoMap.at(aie::profile::adfAPI::INTF_TILE_LATENCY).at(srcDestPairKey).srcPcIdx; + uint64_t destPcIdx = adfAPIResourceInfoMap.at(aie::profile::adfAPI::INTF_TILE_LATENCY).at(srcDestPairKey).destPcIdx; + auto srcPerfCount = perfCounters.at(srcPcIdx); + auto destPerfCount = perfCounters.at(destPcIdx); + srcPerfCount->readResult(srcCounterValue); + destPerfCount->readResult(destCounterValue); + counterValue = (destCounterValue > srcCounterValue) ? (destCounterValue-srcCounterValue) : (srcCounterValue-destCounterValue); + uint64_t storedValue = adfAPIResourceInfoMap[aie::profile::adfAPI::INTF_TILE_LATENCY][srcDestPairKey].profileResult; + if (counterValue != storedValue) + adfAPIResourceInfoMap[aie::profile::adfAPI::INTF_TILE_LATENCY][srcDestPairKey].profileResult = counterValue; + } catch(...) { + continue; } - else if (aie::profile::adfAPIStartToTransferredConfigEvent(aie->startEvent)) - { - try { - std::string srcKey = "(" + aie::uint8ToStr(aie->column) + "," + aie::uint8ToStr(aie->row) + ")"; - uint64_t srcPcIdx = adfAPIResourceInfoMap.at(aie::profile::adfAPI::START_TO_BYTES_TRANSFERRED).at(srcKey).srcPcIdx; - auto perfCounter = perfCounters.at(srcPcIdx); - perfCounter->readResult(counterValue); - uint64_t storedValue = adfAPIResourceInfoMap[aie::profile::adfAPI::START_TO_BYTES_TRANSFERRED][srcKey].profileResult; - if (counterValue != storedValue) - adfAPIResourceInfoMap[aie::profile::adfAPI::START_TO_BYTES_TRANSFERRED][srcKey].profileResult = counterValue; - } catch(...) { - continue; - } - } - else { - auto perfCounter = perfCounters.at(c); + } + else if (aie::profile::adfAPIStartToTransferredConfigEvent(aie->startEvent)) + { + try { + std::string srcKey = "(" + aie::uint8ToStr(aie->column) + "," + aie::uint8ToStr(aie->row) + ")"; + uint64_t srcPcIdx = adfAPIResourceInfoMap.at(aie::profile::adfAPI::START_TO_BYTES_TRANSFERRED).at(srcKey).srcPcIdx; + auto perfCounter = perfCounters.at(srcPcIdx); perfCounter->readResult(counterValue); + uint64_t storedValue = adfAPIResourceInfoMap[aie::profile::adfAPI::START_TO_BYTES_TRANSFERRED][srcKey].profileResult; + if (counterValue != storedValue) + adfAPIResourceInfoMap[aie::profile::adfAPI::START_TO_BYTES_TRANSFERRED][srcKey].profileResult = counterValue; + } catch(...) { + continue; } + } + else { + auto perfCounter = perfCounters.at(c); + perfCounter->readResult(counterValue); } - values.push_back(counterValue); - - // Read tile timer (once per tile to minimize overhead) - if ((aie->column != prevColumn) || (aie->row != prevRow)) { - prevColumn = aie->column; - prevRow = aie->row; - auto moduleType = aie::getModuleType(aie->row, metadata->getAIETileRowOffset()); - auto falModuleType = (moduleType == module_type::core) ? XAIE_CORE_MOD - : ((moduleType == module_type::shim) ? XAIE_PL_MOD - : XAIE_MEM_MOD); - XAie_LocType tileLocation = XAie_TileLoc(aie->column, aie->row); - XAie_ReadTimer(aieDevInst, tileLocation, falModuleType, &timerValue); - } - values.push_back(timerValue); - values.push_back(aie->payload); - - // Get timestamp in milliseconds - double timestamp = xrt_core::time_ns() / 1.0e6; - db->getDynamicInfo().addAIESample(id, timestamp, values); } - - // Read and record MDM counters (if available) - // NOTE: all MDM counters in a given tile are sampled in same read sequence - for (auto& ucTile : microcontrollerTileEvents) { - auto tile = ucTile.first; - auto events = ucTile.second; - - std::vector counterValues; - aie::profile::readMDMCounters(aieDevInst, hwGen, tile.col, tile.row, counterValues); - - double timestamp = xrt_core::time_ns() / 1.0e6; - - for (uint64_t c=0; c < counterValues.size(); c++) { - std::vector values; - values.push_back(tile.col); - values.push_back(0); - values.push_back(events.at(c)); - values.push_back(events.at(c)); - values.push_back(0); - values.push_back(counterValues.at(c)); - - db->getDynamicInfo().addAIESample(id, timestamp, values); - } + values.push_back(counterValue); + + // Read tile timer (once per tile to minimize overhead) + if ((aie->column != prevColumn) || (aie->row != prevRow)) { + prevColumn = aie->column; + prevRow = aie->row; + auto moduleType = aie::getModuleType(aie->row, metadata->getAIETileRowOffset()); + auto falModuleType = (moduleType == module_type::core) ? XAIE_CORE_MOD + : ((moduleType == module_type::shim) ? XAIE_PL_MOD + : XAIE_MEM_MOD); + XAie_LocType tileLocation = XAie_TileLoc(aie->column, aie->row); + XAie_ReadTimer(aieDevInst, tileLocation, falModuleType, &timerValue); } + values.push_back(timerValue); + values.push_back(aie->payload); + + // Get timestamp in milliseconds + double timestamp = xrt_core::time_ns() / 1.0e6; + db->getDynamicInfo().addAIESample(id, timestamp, values); } - void AieProfile_VE2Impl::endPoll() - { - xrt_core::message::send(severity_level::debug, "XRT", " In AieProfile_VE2Impl::endPoll"); - if (!threadCtrl) - return; + // Read and record MDM counters (if available) + // NOTE: all MDM counters in a given tile are sampled in same read sequence + for (auto& ucTile : microcontrollerTileEvents) { + auto tile = ucTile.first; + auto events = ucTile.second; - threadCtrl = false; - if (thread && thread->joinable()) - thread->join(); + std::vector counterValues; + aie::profile::readMDMCounters(aieDevInst, hwGen, tile.col, tile.row, counterValues); - freeResources(); - } + double timestamp = xrt_core::time_ns() / 1.0e6; - void AieProfile_VE2Impl::freeResources() - { - displayAdfAPIResults(); - for (auto& c : perfCounters){ - c->stop(); - c->release(); + for (uint64_t c=0; c < counterValues.size(); c++) { + std::vector values; + values.push_back(tile.col); + values.push_back(0); + values.push_back(events.at(c)); + values.push_back(events.at(c)); + values.push_back(0); + values.push_back(counterValues.at(c)); + + db->getDynamicInfo().addAIESample(id, timestamp, values); } + } + } - for (auto& c : streamPorts){ - c->stop(); - c->release(); - } + void AieProfile_VE2Impl::endPoll() + { + xrt_core::message::send(severity_level::debug, "XRT", " In AieProfile_VE2Impl::endPoll"); + if (!threadCtrl) + return; + + threadCtrl = false; + if (thread && thread->joinable()) + thread->join(); + + freeResources(); + } + + void AieProfile_VE2Impl::freeResources() + { + displayAdfAPIResults(); + for (auto& c : perfCounters){ + c->stop(); + c->release(); + } - for (auto &bc : bcResourcesBytesTx) { - bc->stop(); - bc->release(); - } + for (auto& c : streamPorts){ + c->stop(); + c->release(); + } - for (auto &bc : bcResourcesLatency) { - bc->stop(); - bc->release(); - } + for (auto &bc : bcResourcesBytesTx) { + bc->stop(); + bc->release(); } - /**************************************************************************** - * Display start to bytes or latency results to output transcript - ***************************************************************************/ - void AieProfile_VE2Impl::displayAdfAPIResults() - { - for (auto &adfAPIType : adfAPIResourceInfoMap) { - if (adfAPIType.first == aie::profile::adfAPI::START_TO_BYTES_TRANSFERRED) { - for (auto &adfApiResource : adfAPIType.second) { - std::stringstream msg; - msg << "Total start to bytes transferred for tile " << adfApiResource.first << " is " - << +adfApiResource.second.profileResult << " clock cycles for specified bytes."; - xrt_core::message::send(severity_level::warning, "XRT", msg.str()); - } - } - else if (adfAPIType.first == aie::profile::adfAPI::INTF_TILE_LATENCY) { - for(auto &adfApiResource : adfAPIType.second) { - GraphPortPair graphPortPair; - try { - graphPortPair = metadata->getSrcDestGraphPair(adfApiResource.first); - } - catch (...) { - continue; - } + for (auto &bc : bcResourcesLatency) { + bc->stop(); + bc->release(); + } + } - std::stringstream msg; - msg << "Total latency between " << graphPortPair.srcGraphName - << ":" << graphPortPair.srcGraphPort << " and " - << graphPortPair.destGraphName << ":" << graphPortPair.destGraphPort - << " is " << +adfApiResource.second.profileResult << " clock cycles."; - xrt_core::message::send(severity_level::warning, "XRT", msg.str()); + /**************************************************************************** + * Display start to bytes or latency results to output transcript + ***************************************************************************/ + void AieProfile_VE2Impl::displayAdfAPIResults() + { + for (auto &adfAPIType : adfAPIResourceInfoMap) { + if (adfAPIType.first == aie::profile::adfAPI::START_TO_BYTES_TRANSFERRED) { + for (auto &adfApiResource : adfAPIType.second) { + std::stringstream msg; + msg << "Total start to bytes transferred for tile " << adfApiResource.first << " is " + << +adfApiResource.second.profileResult << " clock cycles for specified bytes."; + xrt_core::message::send(severity_level::warning, "XRT", msg.str()); + } + } + else if (adfAPIType.first == aie::profile::adfAPI::INTF_TILE_LATENCY) { + for(auto &adfApiResource : adfAPIType.second) { + GraphPortPair graphPortPair; + try { + graphPortPair = metadata->getSrcDestGraphPair(adfApiResource.first); + } + catch (...) { + continue; } + + std::stringstream msg; + msg << "Total latency between " << graphPortPair.srcGraphName + << ":" << graphPortPair.srcGraphPort << " and " + << graphPortPair.destGraphName << ":" << graphPortPair.destGraphPort + << " is " << +adfApiResource.second.profileResult << " clock cycles."; + xrt_core::message::send(severity_level::warning, "XRT", msg.str()); } } } } -// End ZOCL flow + } +// END ZOCL flow -// XDNA flow +// XDNA flow #else - namespace xdp { - using tile_type = xdp::tile_type; - using module_type = xdp::module_type; - using severity_level = xrt_core::message::severity_level; +namespace xdp { + using tile_type = xdp::tile_type; + using module_type = xdp::module_type; + using severity_level = xrt_core::message::severity_level; - AieProfile_VE2Impl::AieProfile_VE2Impl(VPDatabase* database, std::shared_ptr metadata, uint64_t deviceID) - : AieProfileImpl(database, metadata, deviceID) - { - auto hwGen = metadata->getHardwareGen(); + AieProfile_VE2Impl::AieProfile_VE2Impl(VPDatabase* database, std::shared_ptr metadata, uint64_t deviceID) + : AieProfileImpl(database, metadata, deviceID) + { + auto hwGen = metadata->getHardwareGen(); - coreStartEvents = aie::profile::getCoreEventSets(hwGen); - coreEndEvents = coreStartEvents; + coreStartEvents = aie::profile::getCoreEventSets(hwGen); + coreEndEvents = coreStartEvents; - memoryStartEvents = aie::profile::getMemoryEventSets(hwGen); - memoryEndEvents = memoryStartEvents; + memoryStartEvents = aie::profile::getMemoryEventSets(hwGen); + memoryEndEvents = memoryStartEvents; - shimStartEvents = aie::profile::getInterfaceTileEventSets(hwGen); - shimEndEvents = shimStartEvents; - shimEndEvents[METRIC_BYTE_COUNT] = {XAIE_EVENT_PORT_RUNNING_0_PL, XAIE_EVENT_PERF_CNT_0_PL}; + shimStartEvents = aie::profile::getInterfaceTileEventSets(hwGen); + shimEndEvents = shimStartEvents; + shimEndEvents[METRIC_BYTE_COUNT] = {XAIE_EVENT_PORT_RUNNING_0_PL, XAIE_EVENT_PERF_CNT_0_PL}; - memTileStartEvents = aie::profile::getMemoryTileEventSets(hwGen); - memTileEndEvents = memTileStartEvents; - - microcontrollerEvents = aie::profile::getMicrocontrollerEventSets(hwGen); - - tranxHandler = std::make_unique(); - - // Create debug buffer for AIE Profile results - auto context = metadata->getHwContext(); - uint32_t* output = nullptr; - std::map activeUCsegmentMap; - activeUCsegmentMap[0] = 0x20000; - try { - resultBO = xrt_core::bo_int::create_bo(context, 0x20000, xrt_core::bo_int::use_type::uc_debug); - xrt_core::bo_int::config_bo(resultBO, activeUCsegmentMap); - output = resultBO.map(); - memset(output, 0, 0x20000); - } catch (std::exception& e) { - std::stringstream msg; - msg << "Unable to create 128KB buffer for AIE Profile results. Cannot get AIE Profile info. " << e.what() << std::endl; - xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", msg.str()); - } + memTileStartEvents = aie::profile::getMemoryTileEventSets(hwGen); + memTileEndEvents = memTileStartEvents; + + microcontrollerEvents = aie::profile::getMicrocontrollerEventSets(hwGen); + + tranxHandler = std::make_unique(); + + // Create debug buffer for AIE Profile results + auto context = metadata->getHwContext(); + uint32_t* output = nullptr; + std::map activeUCsegmentMap; + activeUCsegmentMap[0] = 0x20000; + try { + resultBO = xrt_core::bo_int::create_bo(context, 0x20000, xrt_core::bo_int::use_type::uc_debug); + xrt_core::bo_int::config_bo(resultBO, activeUCsegmentMap); + output = resultBO.map(); + memset(output, 0, 0x20000); + } catch (std::exception& e) { + std::stringstream msg; + msg << "Unable to create 128KB buffer for AIE Profile results. Cannot get AIE Profile info. " << e.what() << std::endl; + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", msg.str()); } + } - void AieProfile_VE2Impl::updateDevice() { - bool runtimeCounters = setMetricsSettings(deviceID, metadata->getHandle()); - if (!runtimeCounters) { - xrt_core::message::send(severity_level::warning, "XRT", - "AIE Profile Counters were not found for this design. Please specify " - "graph_based_[aie|aie_memory|memory_tile|interface_tile]_metrics and/or " - "tile_based_[aie|aie_memory|memory_tile|interface_tile|microcontroller]_metrics " - "under \"AIE_profile_settings\" section in your xrt.ini."); - (db->getStaticInfo()).setIsAIECounterRead(deviceID,true); - return; - } + void AieProfile_VE2Impl::updateDevice() { + bool runtimeCounters = setMetricsSettings(deviceID, metadata->getHandle()); + if (!runtimeCounters) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE Profile Counters were not found for this design. Please specify " + "graph_based_[aie|aie_memory|memory_tile|interface_tile]_metrics and/or " + "tile_based_[aie|aie_memory|memory_tile|interface_tile|microcontroller]_metrics " + "under \"AIE_profile_settings\" section in your xrt.ini."); + (db->getStaticInfo()).setIsAIECounterRead(deviceID,true); + return; + } + + // Build poll ASM/ELF after metrics are configured; submit is deferred to endPoll() (see plugin). + generatePollElf(); + } - // Build poll ASM/ELF after metrics are configured; submit is deferred to endPoll() (see plugin). - generatePollElf(); + // Set metrics for all specified AIE counters on this device with configs given in AIE_profile_settings + bool + AieProfile_VE2Impl::setMetricsSettings(const uint64_t deviceId, void* handle) + { + int counterId = 0; + bool runtimeCounters = false; + + xdp::aie::driver_config meta_config = metadata->getAIEConfigMetadata(); + XAie_Config cfg { + meta_config.hw_gen, + meta_config.base_address, + meta_config.column_shift, + meta_config.row_shift, + meta_config.num_rows, + meta_config.num_columns, + meta_config.shim_row, + meta_config.mem_row_start, + meta_config.mem_num_rows, + meta_config.aie_tile_row_start, + meta_config.aie_tile_num_rows, + {0} // PartProp + }; + + auto RC = XAie_CfgInitialize(&aieDevInst, &cfg); + if (RC != XAIE_OK) { + xrt_core::message::send(severity_level::warning, "XRT", "AIE Driver Initialization Failed."); + return false; } - // Set metrics for all specified AIE counters on this device with configs given in AIE_profile_settings - bool - AieProfile_VE2Impl::setMetricsSettings(const uint64_t deviceId, void* handle) - { - int counterId = 0; - bool runtimeCounters = false; - - xdp::aie::driver_config meta_config = metadata->getAIEConfigMetadata(); - XAie_Config cfg { - meta_config.hw_gen, - meta_config.base_address, - meta_config.column_shift, - meta_config.row_shift, - meta_config.num_rows, - meta_config.num_columns, - meta_config.shim_row, - meta_config.mem_row_start, - meta_config.mem_num_rows, - meta_config.aie_tile_row_start, - meta_config.aie_tile_num_rows, - {0} // PartProp - }; - - auto RC = XAie_CfgInitialize(&aieDevInst, &cfg); - if (RC != XAIE_OK) { - xrt_core::message::send(severity_level::warning, "XRT", "AIE Driver Initialization Failed."); - return false; - } + const std::string tranxName = "AieProfileMetrics"; + if (!tranxHandler->initializeTransaction(&aieDevInst, tranxName)) { + xrt_core::message::send(severity_level::warning, "XRT", "Transaction Initialization Failed."); + return false; + } - const std::string tranxName = "AieProfileMetrics"; - if (!tranxHandler->initializeTransaction(&aieDevInst, tranxName)) { - xrt_core::message::send(severity_level::warning, "XRT", "Transaction Initialization Failed."); - return false; - } + auto hwGen = metadata->getHardwareGen(); + auto configChannel0 = metadata->getConfigChannel0(); + auto configChannel1 = metadata->getConfigChannel1(); + uint8_t startColShift = metadata->getPartitionOverlayStartCols().front(); + aie::displayColShiftInfo(startColShift); - auto hwGen = metadata->getHardwareGen(); - auto configChannel0 = metadata->getConfigChannel0(); - auto configChannel1 = metadata->getConfigChannel1(); - uint8_t startColShift = metadata->getPartitionOverlayStartCols().front(); - aie::displayColShiftInfo(startColShift); + for (int module = 0; module < metadata->getNumModules(); ++module) { + auto configMetrics = metadata->getConfigMetricsVec(module); + if (configMetrics.empty()) + continue; + + int numTileCounters[metadata->getNumCountersMod(module)+1] = {0}; + XAie_ModuleType mod = aie::profile::getFalModuleType(module); + + // Iterate over tiles and metrics to configure all desired counters + for (auto& tileMetric : configMetrics) { + auto& metricSet = tileMetric.second; + auto tile = tileMetric.first; + auto col = tile.col + startColShift; + auto row = tile.row; + auto subtype = tile.subtype; + auto type = aie::getModuleType(row, metadata->getAIETileRowOffset()); + if ((mod == XAIE_MEM_MOD) && (type == module_type::core)) + type = module_type::dma; + + // Catch microcontroller event sets for MDM + if (module == static_cast(module_type::uc)) { + // Configure + auto events = microcontrollerEvents[metricSet]; + aie::profile::configMDMCounters(&aieDevInst, hwGen, col, row, events); + // Record + tile_type recordTile; + recordTile.col = col; + recordTile.row = row; + microcontrollerTileEvents[recordTile] = events; + runtimeCounters = true; + continue; + } - for (int module = 0; module < metadata->getNumModules(); ++module) { - auto configMetrics = metadata->getConfigMetricsVec(module); - if (configMetrics.empty()) + // Ignore invalid types and inactive modules + // NOTE: Inactive core modules are configured when utilizing + // stream switch monitor ports to profile DMA channels + if (!aie::profile::isValidType(type, mod)) continue; - - int numTileCounters[metadata->getNumCountersMod(module)+1] = {0}; - XAie_ModuleType mod = aie::profile::getFalModuleType(module); - - // Iterate over tiles and metrics to configure all desired counters - for (auto& tileMetric : configMetrics) { - auto& metricSet = tileMetric.second; - auto tile = tileMetric.first; - auto col = tile.col + startColShift; - auto row = tile.row; - auto subtype = tile.subtype; - auto type = aie::getModuleType(row, metadata->getAIETileRowOffset()); - if ((mod == XAIE_MEM_MOD) && (type == module_type::core)) - type = module_type::dma; - - // Catch microcontroller event sets for MDM - if (module == static_cast(module_type::uc)) { - // Configure - auto events = microcontrollerEvents[metricSet]; - aie::profile::configMDMCounters(&aieDevInst, hwGen, col, row, events); - // Record - tile_type recordTile; - recordTile.col = col; - recordTile.row = row; - microcontrollerTileEvents[recordTile] = events; - runtimeCounters = true; + if ((type == module_type::dma) && !tile.active_memory) + continue; + if ((type == module_type::core) && !tile.active_core) { + if (metadata->getPairModuleIndex(metricSet, type) < 0) continue; - } + } - // Ignore invalid types and inactive modules - // NOTE: Inactive core modules are configured when utilizing - // stream switch monitor ports to profile DMA channels - if (!aie::profile::isValidType(type, mod)) + auto loc = XAie_TileLoc(col, row); + auto startEvents = (type == module_type::core) ? coreStartEvents[metricSet] + : ((type == module_type::dma) ? memoryStartEvents[metricSet] + : ((type == module_type::shim) ? shimStartEvents[metricSet] + : memTileStartEvents[metricSet])); + auto endEvents = (type == module_type::core) ? coreEndEvents[metricSet] + : ((type == module_type::dma) ? memoryEndEvents[metricSet] + : ((type == module_type::shim) ? shimEndEvents[metricSet] + : memTileEndEvents[metricSet])); + std::vector resetEvents = {}; + + int numCounters = 0; + auto numFreeCtr = static_cast(startEvents.size()); + + int numFreeCtrSS = numFreeCtr; + if (aie::profile::profileAPIMetricSet(metricSet)) { + if (numFreeCtr < 2) { continue; - if ((type == module_type::dma) && !tile.active_memory) - continue; - if ((type == module_type::core) && !tile.active_core) { - if (metadata->getPairModuleIndex(metricSet, type) < 0) - continue; } + // We need to monitor single stream switch monitor port + // numFreeCtrSS = 1 ; + } - auto loc = XAie_TileLoc(col, row); - auto startEvents = (type == module_type::core) ? coreStartEvents[metricSet] - : ((type == module_type::dma) ? memoryStartEvents[metricSet] - : ((type == module_type::shim) ? shimStartEvents[metricSet] - : memTileStartEvents[metricSet])); - auto endEvents = (type == module_type::core) ? coreEndEvents[metricSet] - : ((type == module_type::dma) ? memoryEndEvents[metricSet] - : ((type == module_type::shim) ? shimEndEvents[metricSet] - : memTileEndEvents[metricSet])); - std::vector resetEvents = {}; - - int numCounters = 0; - auto numFreeCtr = static_cast(startEvents.size()); - - int numFreeCtrSS = numFreeCtr; - if (aie::profile::profileAPIMetricSet(metricSet)) { - if (numFreeCtr < 2) { - continue; - } - // We need to monitor single stream switch monitor port - // numFreeCtrSS = 1 ; - } + // Specify Sel0/Sel1 for memory tile events 21-44 + auto iter0 = configChannel0.find(tile); + auto iter1 = configChannel1.find(tile); + uint8_t channel0 = (iter0 == configChannel0.end()) ? 0 : iter0->second; + uint8_t channel1 = (iter1 == configChannel1.end()) ? 1 : iter1->second; + std::vector channels = {channel0, channel1}; // TODO: do we also add channel 2 & 3 here? + + // Modify events as needed + aie::profile::modifyEvents(type, subtype, channel0, startEvents, metadata->getHardwareGen()); + endEvents = startEvents; - // Specify Sel0/Sel1 for memory tile events 21-44 - auto iter0 = configChannel0.find(tile); - auto iter1 = configChannel1.find(tile); - uint8_t channel0 = (iter0 == configChannel0.end()) ? 0 : iter0->second; - uint8_t channel1 = (iter1 == configChannel1.end()) ? 1 : iter1->second; - std::vector channels = {channel0, channel1}; // TODO: do we also add channel 2 & 3 here? - - // Modify events as needed - aie::profile::modifyEvents(type, subtype, channel0, startEvents, metadata->getHardwareGen()); - endEvents = startEvents; + // TBD : Placeholder to configure AIE core with required profile counters. + configEventSelections(loc, type, metricSet, channels); + // TBD : Placeholder to configure shim tile with required profile counters. - // TBD : Placeholder to configure AIE core with required profile counters. - configEventSelections(loc, type, metricSet, channels); - // TBD : Placeholder to configure shim tile with required profile counters. + // TODO: support for VE2 XDNA for profile API metric sets + { + // // Identify the profiling API metric sets and configure graph events + // if (metadata->getUseGraphIterator() && !graphItrBroadcastConfigDone) { + // XAie_Events bcEvent = XAIE_EVENT_NONE_CORE; + // bool status = aie::profile::configGraphIteratorAndBroadcast(aieDevInst, aieDevice, + // metadata, xaieModule, loc, mod, type, metricSet, bcEvent, bcResourcesBytesTx); + // if (status) { + // graphIteratorBrodcastChannelEvent = bcEvent; + // graphItrBroadcastConfigDone = true; + // } + // } + + // if (aie::profile::profileAPIMetricSet(metricSet)) { + // // Re-use the existing port running event for both the counters + // startEvents[startEvents.size()-1] = startEvents[0]; + + // // Use start events as End events for profile counters if threshold is not provided + // endEvents[endEvents.size()-1] = endEvents[0]; + + // // Use the set values broadcast events for the reset of counter + // resetEvents = {XAIE_EVENT_NONE_CORE, XAIE_EVENT_NONE_CORE}; + // if (type == module_type::shim) { + // if (metadata->getUseGraphIterator()) + // resetEvents = {graphIteratorBrodcastChannelEvent, graphIteratorBrodcastChannelEvent}; + // else + // resetEvents = {XAIE_EVENT_NONE_CORE, XAIE_EVENT_USER_EVENT_1_PL}; + // } + // } + } + + // Request and configure all available counters for this tile + for (int i=0; i < numFreeCtr; ++i) { + auto startEvent = startEvents.at(i); + auto endEvent = endEvents.at(i); + auto resetEvent = XAIE_EVENT_NONE_CORE; + auto portnum = xdp::aie::getPortNumberFromEvent(startEvent); + // For metric sets with multiple stream-switch ports, use modulo for channel mapping + uint8_t channelNum = portnum % 2; + uint8_t channel = (channelNum == 0) ? channel0 : channel1; + + // Configure group event before reserving and starting counter + aie::profile::configGroupEvents(&aieDevInst, loc, mod, type, metricSet, startEvent, channel); // TODO: support for VE2 XDNA for profile API metric sets - { - // // Identify the profiling API metric sets and configure graph events - // if (metadata->getUseGraphIterator() && !graphItrBroadcastConfigDone) { - // XAie_Events bcEvent = XAIE_EVENT_NONE_CORE; - // bool status = aie::profile::configGraphIteratorAndBroadcast(aieDevInst, aieDevice, - // metadata, xaieModule, loc, mod, type, metricSet, bcEvent, bcResourcesBytesTx); - // if (status) { - // graphIteratorBrodcastChannelEvent = bcEvent; - // graphItrBroadcastConfigDone = true; - // } - // } - - // if (aie::profile::profileAPIMetricSet(metricSet)) { - // // Re-use the existing port running event for both the counters - // startEvents[startEvents.size()-1] = startEvents[0]; - - // // Use start events as End events for profile counters if threshold is not provided - // endEvents[endEvents.size()-1] = endEvents[0]; - - // // Use the set values broadcast events for the reset of counter - // resetEvents = {XAIE_EVENT_NONE_CORE, XAIE_EVENT_NONE_CORE}; - // if (type == module_type::shim) { - // if (metadata->getUseGraphIterator()) - // resetEvents = {graphIteratorBrodcastChannelEvent, graphIteratorBrodcastChannelEvent}; - // else - // resetEvents = {XAIE_EVENT_NONE_CORE, XAIE_EVENT_USER_EVENT_1_PL}; - // } - // } - } + // // Configure the profile counters for profile APIs metric sets. + // std::shared_ptr perfCounter = nullptr; + if (aie::profile::profileAPIMetricSet(metricSet)) { + // resetEvent = resetEvents.at(i); + // threshold = metadata->getUserSpecifiedThreshold(tileMetric.first, tileMetric.second); + // threshold = aie::profile::convertToBeats(tileMetric.second, threshold, metadata->getHardwareGen()); - // Request and configure all available counters for this tile - for (int i=0; i < numFreeCtr; ++i) { - auto startEvent = startEvents.at(i); - auto endEvent = endEvents.at(i); - auto resetEvent = XAIE_EVENT_NONE_CORE; - auto portnum = xdp::aie::getPortNumberFromEvent(startEvent); - // For metric sets with multiple stream-switch ports, use modulo for channel mapping - uint8_t channelNum = portnum % 2; - uint8_t channel = (channelNum == 0) ? channel0 : channel1; - - // Configure group event before reserving and starting counter - aie::profile::configGroupEvents(&aieDevInst, loc, mod, type, metricSet, startEvent, channel); - - // TODO: support for VE2 XDNA for profile API metric sets - // // Configure the profile counters for profile APIs metric sets. - // std::shared_ptr perfCounter = nullptr; - if (aie::profile::profileAPIMetricSet(metricSet)) { - // resetEvent = resetEvents.at(i); - // threshold = metadata->getUserSpecifiedThreshold(tileMetric.first, tileMetric.second); - // threshold = aie::profile::convertToBeats(tileMetric.second, threshold, metadata->getHardwareGen()); - - // if (i==0 && threshold>0) - // endEvent = XAIE_EVENT_PERF_CNT_1_PL; - - // if (i==1 && threshold == 0) - // continue; + // if (i==0 && threshold>0) + // endEvent = XAIE_EVENT_PERF_CNT_1_PL; - // XAie_Events retCounterEvent = XAIE_EVENT_NONE_CORE; - // perfCounter = aie::profile::configProfileAPICounters(aieDevInst, aieDevice, metadata, xaieModule, - // mod, type, metricSet, startEvent, endEvent, resetEvent, i, perfCounters.size(), - // threshold, retCounterEvent, tile, bcResourcesLatency, adfAPIResourceInfoMap, adfAPIBroadcastEventsMap); - // if (!perfCounter) - // continue; - // perfCounters.push_back(perfCounter); - } else { - // No resource manager, so manually manage the counters - RC = XAie_PerfCounterReset(&aieDevInst, loc, mod, i); - if (RC != XAIE_OK) { - xrt_core::message::send(severity_level::error, "XRT", "AIE Performance Counter Reset Failed."); - break; - } - RC = XAie_PerfCounterControlSet(&aieDevInst, loc, mod, i, startEvent, endEvent); - if (RC != XAIE_OK) { - xrt_core::message::send(severity_level::error, "XRT", "AIE Performance Counter Set Failed."); - break; - } + // if (i==1 && threshold == 0) + // continue; + + // XAie_Events retCounterEvent = XAIE_EVENT_NONE_CORE; + // perfCounter = aie::profile::configProfileAPICounters(aieDevInst, aieDevice, metadata, xaieModule, + // mod, type, metricSet, startEvent, endEvent, resetEvent, i, perfCounters.size(), + // threshold, retCounterEvent, tile, bcResourcesLatency, adfAPIResourceInfoMap, adfAPIBroadcastEventsMap); + // if (!perfCounter) + // continue; + // perfCounters.push_back(perfCounter); + } else { + // No resource manager, so manually manage the counters + RC = XAie_PerfCounterReset(&aieDevInst, loc, mod, i); + if (RC != XAIE_OK) { + xrt_core::message::send(severity_level::error, "XRT", "AIE Performance Counter Reset Failed."); + break; } - - if (aie::isStreamSwitchPortEvent(startEvent)) - configStreamSwitchPorts(tileMetric.first, loc, type, metricSet, channel, startEvent); - - // Generate user_event_1 for byte count metric set after configuration - if ((metricSet == METRIC_BYTE_COUNT) && (i == 1) && !graphItrBroadcastConfigDone) { - XAie_LocType tileloc = XAie_TileLoc(tile.col, tile.row); - //Note: For BYTE_COUNT metric, user_event_1 is used twice as eventA & eventB to - // to transition the FSM from Idle->State0->State1. - // eventC = Port Running and eventD = stop event (counter event). - XAie_EventGenerate(&aieDevInst, tileloc, mod, XAIE_EVENT_USER_EVENT_1_PL); - XAie_EventGenerate(&aieDevInst, tileloc, mod, XAIE_EVENT_USER_EVENT_1_PL); + RC = XAie_PerfCounterControlSet(&aieDevInst, loc, mod, i, startEvent, endEvent); + if (RC != XAIE_OK) { + xrt_core::message::send(severity_level::error, "XRT", "AIE Performance Counter Set Failed."); + break; } + } - // Convert enums to physical event IDs for reporting purposes - uint16_t tmpStart; - uint16_t tmpEnd; - XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, startEvent, &tmpStart); - XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, endEvent, &tmpEnd); - uint16_t phyStartEvent = tmpStart + aie::profile::getCounterBase(type); - uint16_t phyEndEvent = tmpEnd + aie::profile::getCounterBase(type); - auto payload = channel0; - - // Store counter info in database - std::string counterName = "AIE Counter " + std::to_string(counterId); - (db->getStaticInfo()).addAIECounter(deviceId, counterId, col, row, i, - phyStartEvent, phyEndEvent, resetEvent, payload, metadata->getClockFreqMhz(), - metadata->getModuleName(module), counterName, (tile.stream_ids.empty() ? 0 : tile.stream_ids[0])); - - auto tileOffset = XAie_GetTileAddr(&aieDevInst, row, col); - std::vector Regs = regValues.at(type); - op_profile_data.emplace_back((u32)(Regs[i] + tileOffset)); - - std::vector values; - values.insert(values.end(), {col, row, phyStartEvent, phyEndEvent, resetEvent, 0, 0, payload}); - outputValues.push_back(values); - - counterId++; - numCounters++; - } // numFreeCtr - - std::stringstream msg; - msg << "Reserved " << numCounters << " counters for profiling AIE tile (" << +col - << "," << +row << ") using metric set " << metricSet << "."; - xrt_core::message::send(severity_level::debug, "XRT", msg.str()); - numTileCounters[numCounters]++; - } // configMetrics - - // Report counters reserved per tile - { - std::stringstream msg; - msg << "AIE profile counters reserved in " << metadata->getModuleName(module) << " - "; - for (int n=0; n <= metadata->getNumCountersMod(module); ++n) { - if (numTileCounters[n] == 0) - continue; - msg << n << ": " << numTileCounters[n] << " tiles, "; - (db->getStaticInfo()).addAIECounterResources(deviceId, n, numTileCounters[n], module); + if (aie::isStreamSwitchPortEvent(startEvent)) + configStreamSwitchPorts(tileMetric.first, loc, type, metricSet, channel, startEvent); + + // Generate user_event_1 for byte count metric set after configuration + if ((metricSet == METRIC_BYTE_COUNT) && (i == 1) && !graphItrBroadcastConfigDone) { + XAie_LocType tileloc = XAie_TileLoc(tile.col, tile.row); + //Note: For BYTE_COUNT metric, user_event_1 is used twice as eventA & eventB to + // to transition the FSM from Idle->State0->State1. + // eventC = Port Running and eventD = stop event (counter event). + XAie_EventGenerate(&aieDevInst, tileloc, mod, XAIE_EVENT_USER_EVENT_1_PL); + XAie_EventGenerate(&aieDevInst, tileloc, mod, XAIE_EVENT_USER_EVENT_1_PL); } - xrt_core::message::send(severity_level::info, "XRT", msg.str().substr(0, msg.str().size()-2)); - } - runtimeCounters = true; - } // modules + // Convert enums to physical event IDs for reporting purposes + uint16_t tmpStart; + uint16_t tmpEnd; + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, startEvent, &tmpStart); + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, endEvent, &tmpEnd); + uint16_t phyStartEvent = tmpStart + aie::profile::getCounterBase(type); + uint16_t phyEndEvent = tmpEnd + aie::profile::getCounterBase(type); + auto payload = channel0; + + // Store counter info in database + std::string counterName = "AIE Counter " + std::to_string(counterId); + (db->getStaticInfo()).addAIECounter(deviceId, counterId, col, row, i, + phyStartEvent, phyEndEvent, resetEvent, payload, metadata->getClockFreqMhz(), + metadata->getModuleName(module), counterName, (tile.stream_ids.empty() ? 0 : tile.stream_ids[0])); + + auto tileOffset = XAie_GetTileAddr(&aieDevInst, row, col); + std::vector Regs = regValues.at(type); + op_profile_data.emplace_back((u32)(Regs[i] + tileOffset)); - // Submit transaction if we were able to configure - if (runtimeCounters) { - auto hwCtx = metadata->getHwContext(); - tranxHandler->submitTransaction(&aieDevInst, hwCtx); - xrt_core::message::send(severity_level::info, "XRT", "Successfully scheduled AIE Profiling."); - } + std::vector values; + values.insert(values.end(), {col, row, phyStartEvent, phyEndEvent, resetEvent, 0, 0, payload}); + outputValues.push_back(values); + + counterId++; + numCounters++; + } // numFreeCtr - return runtimeCounters; - } + std::stringstream msg; + msg << "Reserved " << numCounters << " counters for profiling AIE tile (" << +col + << "," << +row << ") using metric set " << metricSet << "."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + numTileCounters[numCounters]++; + } // configMetrics - /**************************************************************************** - * Configure selection index to monitor channel numbers - * NOTE: In NPU3, this is required in memory and interface tiles - ***************************************************************************/ - void - AieProfile_VE2Impl::configEventSelections(const XAie_LocType loc, const module_type type, - const std::string metricSet, std::vector& channels) - { - if ((type != module_type::mem_tile) && (type != module_type::shim)) - return; + // Report counters reserved per tile + { + std::stringstream msg; + msg << "AIE profile counters reserved in " << metadata->getModuleName(module) << " - "; + for (int n=0; n <= metadata->getNumCountersMod(module); ++n) { + if (numTileCounters[n] == 0) + continue; + msg << n << ": " << numTileCounters[n] << " tiles, "; + (db->getStaticInfo()).addAIECounterResources(deviceId, n, numTileCounters[n], module); + } + xrt_core::message::send(severity_level::info, "XRT", msg.str().substr(0, msg.str().size()-2)); + } - XAie_DmaDirection dmaDir = aie::isInputSet(type, metricSet) ? DMA_S2MM : DMA_MM2S; - uint8_t numChannels = NUM_CHANNEL_SELECTS; + runtimeCounters = true; + } // modules - if (aie::isDebugVerbosity()) { - std::string tileType = (type == module_type::shim) ? "interface" : "memory"; - std::string dmaType = (dmaDir == DMA_S2MM) ? "S2MM" : "MM2S"; - std::stringstream channelsStr; - std::copy(channels.begin(), channels.end(), std::ostream_iterator(channelsStr, ", ")); + // Submit transaction if we were able to configure + if (runtimeCounters) { + auto hwCtx = metadata->getHwContext(); + tranxHandler->submitTransaction(&aieDevInst, hwCtx); + xrt_core::message::send(severity_level::info, "XRT", "Successfully scheduled AIE Profiling."); + } - std::string msg = "Configuring event selections for " + tileType + " tile DMA " - + dmaType + " channels " + channelsStr.str(); - xrt_core::message::send(severity_level::debug, "XRT", msg); - } - - for (uint8_t c = 0; c < numChannels; ++c) - XAie_EventSelectDmaChannel(&aieDevInst, loc, c, dmaDir, channels.at(c)); + return runtimeCounters; + } + + /**************************************************************************** + * Configure selection index to monitor channel numbers + * NOTE: In NPU3, this is required in memory and interface tiles + ***************************************************************************/ + void + AieProfile_VE2Impl::configEventSelections(const XAie_LocType loc, const module_type type, + const std::string metricSet, std::vector& channels) + { + if ((type != module_type::mem_tile) && (type != module_type::shim)) + return; + + XAie_DmaDirection dmaDir = aie::isInputSet(type, metricSet) ? DMA_S2MM : DMA_MM2S; + uint8_t numChannels = NUM_CHANNEL_SELECTS; + + if (aie::isDebugVerbosity()) { + std::string tileType = (type == module_type::shim) ? "interface" : "memory"; + std::string dmaType = (dmaDir == DMA_S2MM) ? "S2MM" : "MM2S"; + std::stringstream channelsStr; + std::copy(channels.begin(), channels.end(), std::ostream_iterator(channelsStr, ", ")); + + std::string msg = "Configuring event selections for " + tileType + " tile DMA " + + dmaType + " channels " + channelsStr.str(); + xrt_core::message::send(severity_level::debug, "XRT", msg); } - /**************************************************************************** - * Configure stream switch ports for monitoring purposes - * NOTE: Used to monitor streams: trace, interfaces, and memory tiles - ***************************************************************************/ - void - AieProfile_VE2Impl::configStreamSwitchPorts(const tile_type& tile, const XAie_LocType& loc, - const module_type& type, const std::string& metricSet, - const uint8_t channel, const XAie_Events startEvent) - { - // Hardcoded - uint8_t rscId = 0; - uint8_t portnum = aie::getPortNumberFromEvent(startEvent); - // AIE Tiles (e.g., trace streams) - if (type == module_type::core) { - auto slaveOrMaster = (metricSet.find("mm2s") != std::string::npos) ? - XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER; - XAie_EventSelectStrmPort(&aieDevInst, loc, rscId, slaveOrMaster, DMA, channel); - std::stringstream msg; - msg << "Configured core tile " << (aie::isInputSet(type,metricSet) ? "S2MM" : "MM2S") - << " stream switch ports for metricset " << metricSet << " and channel " << (int)channel << "."; - xrt_core::message::send(severity_level::debug, "XRT", msg.str()); - return; - } + for (uint8_t c = 0; c < numChannels; ++c) + XAie_EventSelectDmaChannel(&aieDevInst, loc, c, dmaDir, channels.at(c)); + } - // Interface tiles (e.g., PLIO, GMIO) - if (type == module_type::shim) { - // NOTE: skip configuration of extra ports for tile if stream_ids are not available. - if (portnum >= tile.stream_ids.size()) - return; - // Grab slave/master and stream ID - // NOTE: stored in getTilesForProfiling() above - auto slaveOrMaster = (tile.is_master_vec.at(portnum) == 0) ? XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER; - uint8_t streamPortId = static_cast(tile.stream_ids.at(portnum)); - - // auto streamPortId = tile.stream_id; - // Define stream switch port to monitor interface - XAie_EventSelectStrmPort(&aieDevInst, loc, rscId, slaveOrMaster, SOUTH, streamPortId); - std::stringstream msg; - msg << "Configured shim tile " << (aie::isInputSet(type,metricSet) ? "S2MM" : "MM2S") << " stream switch ports for metricset " << metricSet << " and stream port id " << (int)streamPortId << "."; - xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + /**************************************************************************** + * Configure stream switch ports for monitoring purposes + * NOTE: Used to monitor streams: trace, interfaces, and memory tiles + ***************************************************************************/ + void + AieProfile_VE2Impl::configStreamSwitchPorts(const tile_type& tile, const XAie_LocType& loc, + const module_type& type, const std::string& metricSet, + const uint8_t channel, const XAie_Events startEvent) + { + // Hardcoded + uint8_t rscId = 0; + uint8_t portnum = aie::getPortNumberFromEvent(startEvent); + // AIE Tiles (e.g., trace streams) + if (type == module_type::core) { + auto slaveOrMaster = (metricSet.find("mm2s") != std::string::npos) ? + XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER; + XAie_EventSelectStrmPort(&aieDevInst, loc, rscId, slaveOrMaster, DMA, channel); + std::stringstream msg; + msg << "Configured core tile " << (aie::isInputSet(type,metricSet) ? "S2MM" : "MM2S") + << " stream switch ports for metricset " << metricSet << " and channel " << (int)channel << "."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + return; + } + + // Interface tiles (e.g., PLIO, GMIO) + if (type == module_type::shim) { + // NOTE: skip configuration of extra ports for tile if stream_ids are not available. + if (portnum >= tile.stream_ids.size()) return; - } + // Grab slave/master and stream ID + // NOTE: stored in getTilesForProfiling() above + auto slaveOrMaster = (tile.is_master_vec.at(portnum) == 0) ? XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER; + uint8_t streamPortId = static_cast(tile.stream_ids.at(portnum)); + + // auto streamPortId = tile.stream_id; + // Define stream switch port to monitor interface + XAie_EventSelectStrmPort(&aieDevInst, loc, rscId, slaveOrMaster, SOUTH, streamPortId); + std::stringstream msg; + msg << "Configured shim tile " << (aie::isInputSet(type,metricSet) ? "S2MM" : "MM2S") << " stream switch ports for metricset " << metricSet << " and stream port id " << (int)streamPortId << "."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + return; + } - if (type == module_type::mem_tile) { - auto slaveOrMaster = (metricSet.find("mm2s") != std::string::npos) ? - XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER; - XAie_EventSelectStrmPort(&aieDevInst, loc, rscId, slaveOrMaster, DMA, channel); - std::stringstream msg; - msg << "Configured mem tile " << (aie::isInputSet(type,metricSet) ? "S2MM" : "MM2S") << " stream switch ports for metricset " << metricSet << " and channel " << (int)channel << "."; - xrt_core::message::send(severity_level::debug, "XRT", msg.str()); - } + if (type == module_type::mem_tile) { + auto slaveOrMaster = (metricSet.find("mm2s") != std::string::npos) ? + XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER; + XAie_EventSelectStrmPort(&aieDevInst, loc, rscId, slaveOrMaster, DMA, channel); + std::stringstream msg; + msg << "Configured mem tile " << (aie::isInputSet(type,metricSet) ? "S2MM" : "MM2S") << " stream switch ports for metricset " << metricSet << " and channel " << (int)channel << "."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); } + } - void AieProfile_VE2Impl::generatePollElf() - { - auto context = metadata->getHwContext(); + void AieProfile_VE2Impl::generatePollElf() + { + auto context = metadata->getHwContext(); - std::string tranxName = "AieProfilePoll"; - if (!tranxHandler->initializeTransaction(&aieDevInst, tranxName)) { - xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", - "Unable to initialize transaction for AIE profile polling."); - return; - } - for (u32 i = 0; i < op_profile_data.size(); i++) { - XAie_SaveRegister(&aieDevInst, op_profile_data[i], i); - } - if (!tranxHandler->completeASM(&aieDevInst)) { - xrt_core::message::send(severity_level::warning, "XRT", - "AIE Profile: Failed to finalize poll ASM."); - return; - } - if (!tranxHandler->generateELF()) { - xrt_core::message::send(severity_level::warning, "XRT", - "AIE Profile: Failed to generate poll ELF."); - return; - } - finishedPoll = false; - xrt_core::message::send(severity_level::debug, "XRT", - "AIE Profile: Poll ASM/ELF ready (submit deferred to teardown)."); + std::string tranxName = "AieProfilePoll"; + if (!tranxHandler->initializeTransaction(&aieDevInst, tranxName)) { + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "Unable to initialize transaction for AIE profile polling."); + return; } + for (u32 i = 0; i < op_profile_data.size(); i++) { + XAie_SaveRegister(&aieDevInst, op_profile_data[i], i); + } + if (!tranxHandler->completeASM(&aieDevInst)) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE Profile: Failed to finalize poll ASM."); + return; + } + if (!tranxHandler->generateELF()) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE Profile: Failed to generate poll ELF."); + return; + } + finishedPoll = false; + xrt_core::message::send(severity_level::debug, "XRT", + "AIE Profile: Poll ASM/ELF ready (submit deferred to teardown)."); + } - void AieProfile_VE2Impl::poll(const uint64_t id) - { - // Wait until xclbin has been loaded and device has been updated in database - if (!(db->getStaticInfo().isDeviceReady(id))) - return; - - if (finishedPoll) - return; + void AieProfile_VE2Impl::poll(const uint64_t id) + { + // Wait until xclbin has been loaded and device has been updated in database + if (!(db->getStaticInfo().isDeviceReady(id))) + return; - if (db->infoAvailable(xdp::info::ml_timeline)) { - db->broadcast(VPDatabase::MessageType::READ_RECORD_TIMESTAMPS, nullptr); - xrt_core::message::send(severity_level::debug, "XRT", "Done reading recorded timestamps."); - } + if (finishedPoll) + return; - auto context = metadata->getHwContext(); - if (!tranxHandler->submitELF(context)) - return; + if (db->infoAvailable(xdp::info::ml_timeline)) { + db->broadcast(VPDatabase::MessageType::READ_RECORD_TIMESTAMPS, nullptr); + xrt_core::message::send(severity_level::debug, "XRT", "Done reading recorded timestamps."); + } - resultBO.sync(XCL_BO_SYNC_BO_FROM_DEVICE); - uint32_t* output = resultBO.map(); + auto context = metadata->getHwContext(); + if (!tranxHandler->submitELF(context)) + return; - // Get timestamp in milliseconds - double timestamp = xrt_core::time_ns() / 1.0e6; + resultBO.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + uint32_t* output = resultBO.map(); - //**************************TODO: Remove this after testing *************************** - for (u32 i = 0; i < op_profile_data.size() + 12 * 3; i++) { - std::stringstream msg; - msg << "Counter address/values: " << output[2 * i] << " - " << output[2 * i + 1]; - xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", msg.str()); - } + // Get timestamp in milliseconds + double timestamp = xrt_core::time_ns() / 1.0e6; - // Process counter values and add to database - for (u32 i = 0; i < op_profile_data.size(); i++) { - // Update counter value in outputValues and add to database - std::vector values = outputValues[i]; - values[5] = static_cast(output[2 * i + 1]); // Write counter value - db->getDynamicInfo().addAIESample(id, timestamp, values); - } + //**************************TODO: Remove this after testing *************************** + for (u32 i = 0; i < op_profile_data.size() + 12 * 3; i++) { + std::stringstream msg; + msg << "Counter address/values: " << output[2 * i] << " - " << output[2 * i + 1]; + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", msg.str()); + } - finishedPoll = true; + // Process counter values and add to database + for (u32 i = 0; i < op_profile_data.size(); i++) { + // Update counter value in outputValues and add to database + std::vector values = outputValues[i]; + values[5] = static_cast(output[2 * i + 1]); // Write counter value + db->getDynamicInfo().addAIESample(id, timestamp, values); } - bool AieProfile_VE2Impl::checkAieDevice(const uint64_t deviceId, void* handle) {} - uint64_t AieProfile_VE2Impl::getCounterPayload(XAie_DevInst* aieDevInst, const tile_type& tile, const module_type type, uint8_t column, uint8_t row, uint16_t startEvent, const std::string metricSet, const uint8_t channel, uint8_t logicalPortIndex) {} - uint64_t AieProfile_VE2Impl::getAdfProfileAPIPayload(const tile_type& tile, const std::string metricSet) {} - void AieProfile_VE2Impl::printTileModStats(xaiefal::XAieDev* aieDevice, const tile_type& tile, XAie_ModuleType mod) {} - void AieProfile_VE2Impl::startPoll(const uint64_t id) {} - void AieProfile_VE2Impl::continuePoll(const uint64_t id) {} - void AieProfile_VE2Impl::endPoll() {} - void AieProfile_VE2Impl::freeResources() {} - void AieProfile_VE2Impl::displayAdfAPIResults() {} + finishedPoll = true; } + + bool AieProfile_VE2Impl::checkAieDevice(const uint64_t deviceId, void* handle) {} + uint64_t AieProfile_VE2Impl::getCounterPayload(XAie_DevInst* aieDevInst, const tile_type& tile, const module_type type, uint8_t column, uint8_t row, uint16_t startEvent, const std::string metricSet, const uint8_t channel, uint8_t logicalPortIndex) {} + uint64_t AieProfile_VE2Impl::getAdfProfileAPIPayload(const tile_type& tile, const std::string metricSet) {} + void AieProfile_VE2Impl::printTileModStats(xaiefal::XAieDev* aieDevice, const tile_type& tile, XAie_ModuleType mod) {} + void AieProfile_VE2Impl::startPoll(const uint64_t id) {} + void AieProfile_VE2Impl::continuePoll(const uint64_t id) {} + void AieProfile_VE2Impl::endPoll() {} + void AieProfile_VE2Impl::freeResources() {} + void AieProfile_VE2Impl::displayAdfAPIResults() {} +} // END XDNA flow #endif From e96c6cf261dfbad1c67d5ababa4978a268d7ae23 Mon Sep 17 00:00:00 2001 From: snigupta Date: Thu, 23 Apr 2026 15:45:27 -0600 Subject: [PATCH 14/19] Final fixes for aie_profile Signed-off-by: snigupta --- profile/plugin/aie_profile/ve2/aie_profile.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/profile/plugin/aie_profile/ve2/aie_profile.cpp b/profile/plugin/aie_profile/ve2/aie_profile.cpp index a6f7e92c..7b27e8d8 100644 --- a/profile/plugin/aie_profile/ve2/aie_profile.cpp +++ b/profile/plugin/aie_profile/ve2/aie_profile.cpp @@ -31,6 +31,9 @@ #include "core/common/api/hw_context_int.h" #include "shim_ve2/xdna_hwctx.h" +#include "core/common/api/bo_int.h" +#include "xrt/xrt_bo.h" + #ifdef XDP_VE2_ZOCL_BUILD namespace { static void* fetchAieDevInst(void* devHandle) @@ -809,7 +812,7 @@ namespace xdp { return; } - // Build poll ASM/ELF after metrics are configured; submit is deferred to endPoll() (see plugin). + // Build poll ASM/ELF after metrics are configured; submit is deferred to endPoll() generatePollElf(); } From 71ba1ca199ceffe3ed5a9402ecdd1344be158c55 Mon Sep 17 00:00:00 2001 From: snigupta Date: Thu, 23 Apr 2026 16:01:40 -0600 Subject: [PATCH 15/19] Change all client headers to only use aie-codegen Signed-off-by: snigupta --- profile/device/aie_trace/client/aie_trace_offload_client.h | 5 ----- profile/plugin/aie_debug/client/aie_debug.h | 5 ----- profile/plugin/aie_halt/clientDev/aie_halt.cpp | 5 ----- profile/plugin/aie_pc/clientDev/aie_pc.cpp | 6 ------ profile/plugin/aie_pc/clientDev/aie_pc.h | 5 ----- profile/plugin/aie_profile/client/aie_profile.h | 5 ----- profile/plugin/aie_trace/client/aie_trace.h | 5 ----- 7 files changed, 36 deletions(-) diff --git a/profile/device/aie_trace/client/aie_trace_offload_client.h b/profile/device/aie_trace/client/aie_trace_offload_client.h index 513eaaf1..184effc0 100644 --- a/profile/device/aie_trace/client/aie_trace_offload_client.h +++ b/profile/device/aie_trace/client/aie_trace_offload_client.h @@ -28,13 +28,8 @@ extern "C" { -#ifdef XDP_USE_AIE_CODEGEN #include #include -#else - #include - #include -#endif } namespace xdp { diff --git a/profile/plugin/aie_debug/client/aie_debug.h b/profile/plugin/aie_debug/client/aie_debug.h index 652bf2e1..51c94eea 100755 --- a/profile/plugin/aie_debug/client/aie_debug.h +++ b/profile/plugin/aie_debug/client/aie_debug.h @@ -17,13 +17,8 @@ #include "core/include/xrt/xrt_hw_context.h" extern "C" { -#ifdef XDP_USE_AIE_CODEGEN #include #include -#else - #include - #include -#endif } namespace xdp { diff --git a/profile/plugin/aie_halt/clientDev/aie_halt.cpp b/profile/plugin/aie_halt/clientDev/aie_halt.cpp index bbedcd00..13fd926c 100644 --- a/profile/plugin/aie_halt/clientDev/aie_halt.cpp +++ b/profile/plugin/aie_halt/clientDev/aie_halt.cpp @@ -34,13 +34,8 @@ #include "core/include/xclbin.h" extern "C" { -#ifdef XDP_USE_AIE_CODEGEN #include #include -#else - #include - #include -#endif } #ifdef _WIN32 diff --git a/profile/plugin/aie_pc/clientDev/aie_pc.cpp b/profile/plugin/aie_pc/clientDev/aie_pc.cpp index d7674479..9c0ef0a8 100644 --- a/profile/plugin/aie_pc/clientDev/aie_pc.cpp +++ b/profile/plugin/aie_pc/clientDev/aie_pc.cpp @@ -38,15 +38,9 @@ #include "core/include/xclbin.h" extern "C" { -#ifdef XDP_USE_AIE_CODEGEN #include #include #include -#else - #include - #include - #include -#endif } namespace xdp { diff --git a/profile/plugin/aie_pc/clientDev/aie_pc.h b/profile/plugin/aie_pc/clientDev/aie_pc.h index e917fc98..20810e6d 100644 --- a/profile/plugin/aie_pc/clientDev/aie_pc.h +++ b/profile/plugin/aie_pc/clientDev/aie_pc.h @@ -21,13 +21,8 @@ #include "xdp/profile/plugin/aie_pc/aie_pc_impl.h" extern "C" { -#ifdef XDP_USE_AIE_CODEGEN #include #include -#else - #include - #include -#endif } #include diff --git a/profile/plugin/aie_profile/client/aie_profile.h b/profile/plugin/aie_profile/client/aie_profile.h index 0c0775ff..6cc355aa 100644 --- a/profile/plugin/aie_profile/client/aie_profile.h +++ b/profile/plugin/aie_profile/client/aie_profile.h @@ -13,13 +13,8 @@ #include "xdp/profile/device/common/client_transaction.h" extern "C" { -#ifdef XDP_USE_AIE_CODEGEN #include #include -#else -#include -#include -#endif } namespace xdp { diff --git a/profile/plugin/aie_trace/client/aie_trace.h b/profile/plugin/aie_trace/client/aie_trace.h index ba548303..9de51e17 100644 --- a/profile/plugin/aie_trace/client/aie_trace.h +++ b/profile/plugin/aie_trace/client/aie_trace.h @@ -12,13 +12,8 @@ #include "xdp/profile/device/common/client_transaction.h" extern "C" { -#ifdef XDP_USE_AIE_CODEGEN #include #include -#else - #include - #include -#endif } namespace xdp { From 386d5c220ec713e0ecab629fc47f837c752c886f Mon Sep 17 00:00:00 2001 From: snigupta Date: Fri, 24 Apr 2026 12:15:36 -0600 Subject: [PATCH 16/19] TODO: aie_status aie-codegen support Signed-off-by: snigupta --- profile/plugin/aie_status/CMakeLists.txt | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/profile/plugin/aie_status/CMakeLists.txt b/profile/plugin/aie_status/CMakeLists.txt index aa9938f1..601e2ae7 100644 --- a/profile/plugin/aie_status/CMakeLists.txt +++ b/profile/plugin/aie_status/CMakeLists.txt @@ -17,12 +17,17 @@ file(GLOB AIE_STATUS_PLUGIN_FILES if (XDP_VE2_BUILD_CMAKE STREQUAL "yes") add_library(xdp_aie_status_plugin SHARED ${AIE_STATUS_PLUGIN_FILES}) add_dependencies(xdp_aie_status_plugin xdp_core) - target_link_libraries(xdp_aie_status_plugin PRIVATE xdp_core aie_codegen) - target_compile_definitions(xdp_aie_status_plugin PRIVATE XDP_VE2_BUILD=1 XDP_USE_AIE_CODEGEN=1 FAL_LINUX="on") - target_include_directories(xdp_aie_status_plugin PRIVATE - ${CMAKE_SOURCE_DIR}/src - ${AIEFAL_DIR} - ) + # TODO: add aie_codegen to aie_status once XAie_Read32 support is there + #target_link_libraries(xdp_aie_status_plugin PRIVATE xdp_core aie_codegen) + #target_link_options(xdp_aie_status_plugin PRIVATE -Wl,-Bsymbolic) + #target_compile_definitions(xdp_aie_status_plugin PRIVATE XDP_VE2_BUILD=1 XDP_USE_AIE_CODEGEN=1 FAL_LINUX="on") + #target_include_directories(xdp_aie_status_plugin PRIVATE + # ${CMAKE_SOURCE_DIR}/src + # ${AIEFAL_DIR} + #) + target_link_libraries(xdp_aie_status_plugin PRIVATE xdp_core xaiengine) + target_compile_definitions(xdp_aie_status_plugin PRIVATE XDP_VE2_BUILD=1 FAL_LINUX="on") + target_include_directories(xdp_aie_status_plugin PRIVATE ${CMAKE_SOURCE_DIR}/src) set_target_properties(xdp_aie_status_plugin PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) install (TARGETS xdp_aie_status_plugin From ec06c5f81244122b0fa70c7f8c74cb90f982d815 Mon Sep 17 00:00:00 2001 From: snigupta Date: Fri, 24 Apr 2026 13:31:09 -0600 Subject: [PATCH 17/19] Bsymbolic flag explanation Signed-off-by: snigupta --- profile/plugin/aie_profile/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/profile/plugin/aie_profile/CMakeLists.txt b/profile/plugin/aie_profile/CMakeLists.txt index 4be2bb55..3d7f78ab 100644 --- a/profile/plugin/aie_profile/CMakeLists.txt +++ b/profile/plugin/aie_profile/CMakeLists.txt @@ -86,6 +86,9 @@ else() add_library(xdp_aie_profile_plugin_xdna SHARED ${AIE_PROFILE_PLUGIN_FILES} ${AIE_PROFILE_IMPL_FILES} ${AIE_PROFILE_UTIL_FILES} ${AIE_PROFILE_CONFIG_FILES} ${AIE_JSON_PARSER_FILES} "${PROFILE_DIR}/device/common/ve2/ve2_transaction.cpp") add_dependencies(xdp_aie_profile_plugin_xdna xdp_core xrt_coreutil) target_link_libraries(xdp_aie_profile_plugin_xdna PRIVATE xdp_core xrt_coreutil aie_codegen aiebu_library_objects) + # -Bsymbolic: ensures XAie_* calls within this plugin resolve to the + # statically-linked aie_codegen rather than being interposed by the + # system libxaiengine.so.3 (required by libxrt_core.so.2) target_link_options(xdp_aie_profile_plugin_xdna PRIVATE -Wl,-Bsymbolic) target_compile_definitions(xdp_aie_profile_plugin_xdna PRIVATE XDP_VE2_BUILD=1 XDP_USE_AIE_CODEGEN=1 FAL_LINUX="on") target_include_directories(xdp_aie_profile_plugin_xdna PRIVATE From edcbb80ee2adad3c84a1b32afd65effd3af4bc9f Mon Sep 17 00:00:00 2001 From: snigupta Date: Wed, 29 Apr 2026 15:49:58 -0600 Subject: [PATCH 18/19] Changes for aie_trace Signed-off-by: snigupta --- .../aie_trace/ve2/aie_trace_offload_ve2.cpp | 211 ++- .../aie_trace/ve2/aie_trace_offload_ve2.h | 48 +- profile/device/common/ve2/ve2_transaction.cpp | 27 +- profile/plugin/aie_trace/CMakeLists.txt | 31 +- .../aie_trace/aie_trace_offload_manager.cpp | 44 +- .../aie_trace/aie_trace_offload_manager.h | 18 +- profile/plugin/aie_trace/aie_trace_plugin.cpp | 15 +- profile/plugin/aie_trace/ve2/aie_trace.cpp | 1363 ++++++++++++++++- profile/plugin/aie_trace/ve2/aie_trace.h | 59 +- 9 files changed, 1709 insertions(+), 107 deletions(-) diff --git a/profile/device/aie_trace/ve2/aie_trace_offload_ve2.cpp b/profile/device/aie_trace/ve2/aie_trace_offload_ve2.cpp index f2c56981..b1bcdf46 100644 --- a/profile/device/aie_trace/ve2/aie_trace_offload_ve2.cpp +++ b/profile/device/aie_trace/ve2/aie_trace_offload_ve2.cpp @@ -1,3 +1,4 @@ +// VE2 class /** * Copyright (C) 2019-2022 Xilinx, Inc * Copyright (C) 2022-2024 Advanced Micro Devices, Inc. - All rights reserved @@ -17,12 +18,7 @@ #define XDP_PLUGIN_SOURCE -#ifdef XDP_USE_AIE_CODEGEN -extern "C" { -#include -} -#endif - +#include #include #include "core/include/xrt.h" @@ -44,9 +40,6 @@ extern "C" { #include #include - - - namespace xdp { @@ -57,7 +50,12 @@ AIETraceOffload::AIETraceOffload , bool isPlio , uint64_t totalSize , uint64_t numStrm +#if defined(XDP_VE2_BUILD) && defined(XDP_VE2_ZOCL_BUILD) , XAie_DevInst* devInstance +#elif defined(XDP_VE2_BUILD) + , xrt::hw_context ctx + , std::shared_ptr md +#endif ) : deviceHandle(handle) , deviceId(id) @@ -72,8 +70,12 @@ AIETraceOffload::AIETraceOffload , offloadStatus(AIEOffloadThreadStatus::IDLE) , mEnCircularBuf(false) , mCircularBufOverwrite(false) +#if defined(XDP_VE2_BUILD) && defined(XDP_VE2_ZOCL_BUILD) , devInst(devInstance) - +#elif defined(XDP_VE2_BUILD) + , context(ctx) + , metadata(md) +#endif { bufAllocSz = deviceIntf->getAlignedTraceBufSize(totalSz, static_cast(numStream)); @@ -91,6 +93,7 @@ AIETraceOffload::~AIETraceOffload() offloadThread.join(); } +#ifdef XDP_VE2_ZOCL_BUILD bool AIETraceOffload::initReadTrace() { // Submit nop.elf to initialize AIE array before BD configuration @@ -167,8 +170,8 @@ bool AIETraceOffload::initReadTrace() // Compute BD: use metadata value if set, otherwise channelNumber * 4 uint16_t bdNum = (traceGMIO->bufferDescriptorId != UINT16_MAX) - ? traceGMIO->bufferDescriptorId - : channelNumber * 4; + ? traceGMIO->bufferDescriptorId + : channelNumber * 4; std::stringstream bdMsg; bdMsg << "AIE Trace: Using BD " << bdNum << " for channel " << (int)channelNumber << " on shim column " << (int)traceGMIO->shimColumn; @@ -183,7 +186,116 @@ bool AIETraceOffload::initReadTrace() bufferInitialized = true; return bufferInitialized; } +#else // XDNA flow +bool AIETraceOffload::initReadTrace() +{ + // TODO: is this only zocl specific or do we need it for XDNA as well? + // Submit nop.elf to initialize AIE array before BD configuration + if (!aie::submitNopElf(deviceHandle)) { + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", + "Failed to submit nop.elf. AIE trace configuration will not proceed."); + return false; + } + + xrt_core::message::send(severity_level::info, "XRT", "Starting configuration for VE2."); + + buffers.clear(); + buffers.resize(numStream); + + xdp::aie::driver_config meta_config = metadata->getAIEConfigMetadata(); + XAie_Config cfg{ + meta_config.hw_gen, + meta_config.base_address, + meta_config.column_shift, + meta_config.row_shift, + meta_config.num_rows, + meta_config.num_columns, + meta_config.shim_row, + meta_config.mem_row_start, + meta_config.mem_num_rows, + meta_config.aie_tile_row_start, + meta_config.aie_tile_num_rows, + {0} // PartProp + }; + + auto RC = XAie_CfgInitialize(&aieDevInst, &cfg); + if (RC != XAIE_OK) { + xrt_core::message::send(severity_level::warning, "XRT", "AIE TRACE OFFLOAD: AIE Driver Initialization Failed."); + return false; + } + + tranxHandler = std::make_unique(); + if (!tranxHandler->initializeTransaction(&aieDevInst, "AieTraceOffload")) + return false; + + gmioDMAInsts.clear(); + gmioDMAInsts.resize(numStream); + + for (uint64_t i = 0; i < numStream ; ++i) { + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "Allocating trace buffer of size " + std::to_string(bufAllocSz) + " for AIE Stream " + + std::to_string(i)); + xrt_bos.emplace_back(xrt::bo(context.get_device(), bufAllocSz, + XRT_BO_FLAGS_HOST_ONLY, tranxHandler->getGroupID(0, context))); + + buffers[i].bufId = xrt_bos.size(); + if (!buffers[i].bufId) { + bufferInitialized = false; + return bufferInitialized; + } + + if (!xrt_bos.empty()) { + auto bo_map = xrt_bos.back().map(); + memset(bo_map, 0, bufAllocSz); + } + + VPDatabase* db = VPDatabase::Instance(); + TraceGMIO* traceGMIO = (db->getStaticInfo()).getTraceGMIO(deviceId, i); + + // channelNumber: (0-S2MM0,1-S2MM1,2-MM2S0,3-MM2S1) + // Enable shim DMA channel, need to start first so the status is correct + uint16_t channelNumber = (traceGMIO->channelNumber > 1) ? (traceGMIO->channelNumber - 2) : traceGMIO->channelNumber; + XAie_DmaDirection dir = (traceGMIO->channelNumber > 1) ? DMA_MM2S : DMA_S2MM; + + gmioDMAInsts[i].gmioTileLoc = XAie_TileLoc(traceGMIO->shimColumn, 0); + + RC = XAie_DmaDescInit(&aieDevInst, &(gmioDMAInsts[i].shimDmaInst), gmioDMAInsts[i].gmioTileLoc); + RC = XAie_DmaChannelEnable(&aieDevInst, gmioDMAInsts[i].gmioTileLoc, channelNumber, dir); + RC = XAie_DmaSetAxi(&(gmioDMAInsts[i].shimDmaInst), 0, traceGMIO->burstLength, 0, 0, 0); + + // TODO: get board-specific values + // // TODO: XAie_DmaSetAddrLen like this? + RC = XAie_DmaSetAddrLen(&(gmioDMAInsts[i].shimDmaInst), xrt_bos[i].address(), static_cast(bufAllocSz)); + // // TODO: or this? + // char* vaddr = reinterpret_cast(mmap(NULL, bufAllocSz, PROT_READ | PROT_WRITE, MAP_SHARED, boExportHandle, 0)); + // RC = XAie_DmaSetAddrLen(&(gmioDMAInsts[i].shimDmaInst), (uint64_t)vaddr, bufAllocSz); + + XAie_DmaEnableBd(&(gmioDMAInsts[i].shimDmaInst)); + + uint16_t bdNum = (traceGMIO->bufferDescriptorId != UINT16_MAX) ? traceGMIO->bufferDescriptorId : channelNumber * 4; + std::stringstream bdMsg; + bdMsg << "AIE Trace: Using BD " << bdNum << " for channel " << (int)channelNumber << " on shim column " << (int)traceGMIO->shimColumn; + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", bdMsg.str()); + + // Write to shim DMA BD AxiMM registers + XAie_DmaWriteBd(&aieDevInst, &(gmioDMAInsts[i].shimDmaInst), gmioDMAInsts[i].gmioTileLoc, bdNum); + + XAie_DmaChannelPushBdToQueue(&aieDevInst, gmioDMAInsts[i].gmioTileLoc, channelNumber, dir, bdNum); + + if (!tranxHandler->submitTransaction(&aieDevInst, context)) + return false; + + xrt_core::message::send(severity_level::info, "XRT", "Successfully scheduled AIE Trace Offloading VE2."); + } + + bufferInitialized = true; + return bufferInitialized; +} +#endif +// TODO: NPU3 does not have lines 199-213. why? + +#ifdef XDP_VE2_ZOCL_BUILD void AIETraceOffload::endReadTrace() { // reset @@ -193,7 +305,7 @@ void AIETraceOffload::endReadTrace() if (isPLIO) { deviceIntf->resetAIETs2mm(i); -// deviceIntf->freeTraceBuf(b.bufId); + // deviceIntf->freeTraceBuf(b.bufId); } else { VPDatabase* db = VPDatabase::Instance(); TraceGMIO* traceGMIO = (db->getStaticInfo()).getTraceGMIO(deviceId, i); @@ -210,6 +322,29 @@ void AIETraceOffload::endReadTrace() } bufferInitialized = false; } +#else // XDNA +void AIETraceOffload::endReadTrace() +{ + // reset + for (uint64_t i = 0; i < numStream ; ++i) { + if (!buffers[i].bufId) + continue; + + VPDatabase* db = VPDatabase::Instance(); + TraceGMIO* traceGMIO = (db->getStaticInfo()).getTraceGMIO(deviceId, i); + + // channelNumber: (0-S2MM0,1-S2MM1,2-MM2S0,3-MM2S1) + // Enable shim DMA channel, need to start first so the status is correct + uint16_t channelNumber = (traceGMIO->channelNumber > 1) ? (traceGMIO->channelNumber - 2) : traceGMIO->channelNumber; + XAie_DmaDirection dir = (traceGMIO->channelNumber > 1) ? DMA_MM2S : DMA_S2MM; + + XAie_DmaChannelDisable(&aieDevInst, gmioDMAInsts[i].gmioTileLoc, channelNumber, dir); + + buffers[i].bufId = 0; + } + bufferInitialized = false; +} +#endif void AIETraceOffload::readTraceGMIO(bool final) { @@ -231,8 +366,13 @@ void AIETraceOffload::readTraceGMIO(bool final) } } +// TODO: only for zocl right now since xdna does not support plio right now, and this function is only for plio void AIETraceOffload::readTracePLIO(bool final) { + #if defined(XDP_VE2_BUILD) && ! defined(XDP_VE2_ZOCL_BUILD) + return; + #endif + if (mCircularBufOverwrite) return; @@ -316,6 +456,7 @@ void AIETraceOffload::readTracePLIO(bool final) } } +#ifdef XDP_VE2_ZOCL_BUILD uint64_t AIETraceOffload::syncAndLog(uint64_t index) { auto& bd = buffers[index]; @@ -355,6 +496,39 @@ uint64_t AIETraceOffload::syncAndLog(uint64_t index) traceLogger->addAIETraceData(index, hostBuf, nBytes, mEnCircularBuf); return nBytes; } +#else // XDNA +uint64_t AIETraceOffload::syncAndLog(uint64_t index) +{ + auto& bd = buffers[index]; + + if (bd.offset >= bd.usedSz) + return 0; + + // Amount of newly written trace + uint64_t nBytes = bd.usedSz - bd.offset; + + // Sync to host + xrt_bos[index].sync(XCL_BO_SYNC_BO_FROM_DEVICE, nBytes, bd.offset); + auto in_bo_map = xrt_bos[index].map() + bd.offset; + + if (!in_bo_map) + return 0; + + // Find amount of non-zero data in buffer + if (!isPLIO) + nBytes = searchWrittenBytes((void*)in_bo_map, bufAllocSz); + + // check for full buffer + if ((bd.offset + nBytes >= bufAllocSz) && !mEnCircularBuf) { + bd.isFull = true; + bd.offloadDone = true; + } + + // Log nBytes of trace + traceLogger->addAIETraceData(index, (void*)in_bo_map, nBytes, mEnCircularBuf); + return nBytes; +} +#endif bool AIETraceOffload::isTraceBufferFull() { @@ -366,8 +540,13 @@ bool AIETraceOffload::isTraceBufferFull() return false; } +// TODO: only for zocl right now since xdna does not support plio right now, and this function is only for plio void AIETraceOffload::checkCircularBufferSupport() { + #if defined(XDP_VE2_BUILD) && ! defined(XDP_VE2_ZOCL_BUILD) + return; + #endif + mEnCircularBuf = xrt_core::config::get_aie_trace_settings_reuse_buffer(); if (!mEnCircularBuf) return; @@ -464,9 +643,9 @@ void AIETraceOffload::offloadFinished() uint64_t AIETraceOffload::searchWrittenBytes(void* buf, uint64_t bytes) { /* - * Look For trace boundary using binary search. - * Use Dword to be safe - */ + * Look For trace boundary using binary search. + * Use Dword to be safe + */ auto words = static_cast(buf); uint64_t wordcount = bytes / TRACE_PACKET_SIZE; diff --git a/profile/device/aie_trace/ve2/aie_trace_offload_ve2.h b/profile/device/aie_trace/ve2/aie_trace_offload_ve2.h index 618353c0..e538a362 100644 --- a/profile/device/aie_trace/ve2/aie_trace_offload_ve2.h +++ b/profile/device/aie_trace/ve2/aie_trace_offload_ve2.h @@ -18,17 +18,22 @@ #ifndef XDP_PROFILE_AIE_TRACE_OFFLOAD_VE2_H_ #define XDP_PROFILE_AIE_TRACE_OFFLOAD_VE2_H_ +#include +#include +#include +#include +#include + +#include "core/include/xrt/xrt_bo.h" +#include "core/include/xrt/xrt_hw_context.h" + #include "xdp/profile/device/tracedefs.h" +#include "xdp/profile/device/common/ve2/ve2_transaction.h" +#include "xdp/profile/plugin/aie_trace/aie_trace_metadata.h" -extern "C" -{ -#ifdef XDP_USE_AIE_CODEGEN - #include - #include -#else - #include "xaiengine/xaiegbl.h" - #include -#endif +extern "C" { +#include +#include } namespace xdp { @@ -76,14 +81,23 @@ enum class AIEOffloadThreadStatus { class AIETraceOffload { public: + // ZOCL edge: live devInst pointer. VE2 XDNA (client-style): hw_context + metadata. +#if defined(XDP_VE2_BUILD) && defined(XDP_VE2_ZOCL_BUILD) AIETraceOffload(void* handle, uint64_t id, PLDeviceIntf*, AIETraceLogger*, bool isPlio, uint64_t totalSize, uint64_t numStrm, - XAie_DevInst* devInstance - ); - + XAie_DevInst* devInstance); +#elif defined(XDP_VE2_BUILD) + AIETraceOffload(void* handle, uint64_t id, + PLDeviceIntf*, AIETraceLogger*, + bool isPlio, + uint64_t totalSize, + uint64_t numStrm, + xrt::hw_context context, + std::shared_ptr metadata); +#endif virtual ~AIETraceOffload(); public: @@ -111,7 +125,15 @@ class AIETraceOffload uint64_t deviceId; PLDeviceIntf* deviceIntf; AIETraceLogger* traceLogger; +#if defined(XDP_VE2_BUILD) && defined(XDP_VE2_ZOCL_BUILD) XAie_DevInst* devInst; +#elif defined(XDP_VE2_BUILD) + XAie_DevInst aieDevInst = {0}; + std::unique_ptr tranxHandler; + xrt::hw_context context; + std::shared_ptr metadata; + std::vector xrt_bos; +#endif bool isPLIO; uint64_t totalSz; @@ -151,4 +173,4 @@ class AIETraceOffload } -#endif +#endif \ No newline at end of file diff --git a/profile/device/common/ve2/ve2_transaction.cpp b/profile/device/common/ve2/ve2_transaction.cpp index a3f57f68..c0ab83fb 100644 --- a/profile/device/common/ve2/ve2_transaction.cpp +++ b/profile/device/common/ve2/ve2_transaction.cpp @@ -80,7 +80,6 @@ namespace xdp::aie { libpaths.push_back("./"); try { -#if 1 //Read ASM file std::string asmFileName = getAsmFileName(); if (!std::filesystem::exists(asmFileName)) @@ -111,26 +110,6 @@ namespace xdp::aie { std::cout << "Elf size:" << e.size() << std::endl; std::ofstream outElf(getElfFileName(), std::ios_base::binary); outElf.write(e.data(), e.size()); -#else - auto check1 = std::getenv("AIEBU_REPO"); - auto check2 = std::getenv("PYTHONPATH"); - if ((check1 == nullptr) || (check2 == nullptr)) { - xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", - "Please define AIEBU_REPO and PYTHONPATH so elf generation can work."); - return false; - } - - std::stringstream command; - command << "${AIEBU_REPO}/src/python/aiebu/control_asm_disasm.py -t aie4 " - << getAsmFileName() << " -o " << getElfFileName(); - xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", - "Generating ELF using: " + command.str()); - if (system(command.str().c_str())) { - xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", - "Elf generation failed"); - return false; - } -#endif } catch(const std::exception& e) { xrt_core::message::send(xrt_core::message::severity_level::error, "XRT", @@ -172,14 +151,14 @@ namespace xdp::aie { xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "XDP_KERNEL created"); xrt::run run{kernel}; - xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "Kernel run created"); + run.start(); - xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "Run started"); + run.wait2(); - xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "Wait done!"); + return true; } diff --git a/profile/plugin/aie_trace/CMakeLists.txt b/profile/plugin/aie_trace/CMakeLists.txt index fe02abb1..4c63c908 100644 --- a/profile/plugin/aie_trace/CMakeLists.txt +++ b/profile/plugin/aie_trace/CMakeLists.txt @@ -104,22 +104,21 @@ else() "${IMPL_DIR}/*.cpp" ) - add_library(xdp_aie_trace_plugin_xdna SHARED ${AIE_TRACE_PLUGIN_FILES} ${AIE_TRACE_COMPONENT_FILES} ${AIE_TRACE_UTIL_FILES} ${AIE_TRACE_CONFIG_FILES} ${AIE_JSON_PARSER_FILES}) + add_library(xdp_aie_trace_plugin_xdna SHARED ${AIE_TRACE_PLUGIN_FILES} ${AIE_TRACE_COMPONENT_FILES} ${AIE_TRACE_UTIL_FILES} ${AIE_TRACE_CONFIG_FILES} ${AIE_JSON_PARSER_FILES} "${PROFILE_DIR}/device/common/ve2/ve2_transaction.cpp") add_dependencies(xdp_aie_trace_plugin_xdna xdp_core xrt_coreutil) - # TODO: add aie_codegen to aie_trace once ASM->ELF support is there - # target_link_libraries(xdp_aie_trace_plugin_xdna PRIVATE xdp_core xrt_coreutil aie_codegen) - # target_link_options(xdp_aie_trace_plugin_xdna PRIVATE -Wl,-Bsymbolic) - # target_compile_definitions(xdp_aie_trace_plugin_xdna PRIVATE XDP_VE2_BUILD=1 XDP_USE_AIE_CODEGEN=1 FAL_LINUX="on") - # target_include_directories(xdp_aie_profile_plugin_xdna PRIVATE - # ${CMAKE_SOURCE_DIR}/src - # ${AIEFAL_DIR} - # ${AIEBU_SOURCE_DIR}/src/cpp/include - # ${AIERT_DIR}/include - # ${XRT_SOURCE_DIR}/runtime_src/core/common/elf - # ) - target_link_libraries(xdp_aie_trace_plugin_xdna PRIVATE xdp_core xrt_coreutil xaiengine) - target_compile_definitions(xdp_aie_trace_plugin_xdna PRIVATE XDP_VE2_BUILD=1 FAL_LINUX="on") - target_include_directories(xdp_aie_trace_plugin_xdna PRIVATE ${CMAKE_SOURCE_DIR}/src) + target_link_libraries(xdp_aie_trace_plugin_xdna PRIVATE xdp_core xrt_coreutil aie_codegen aiebu_library_objects) + target_link_options(xdp_aie_trace_plugin_xdna PRIVATE -Wl,-Bsymbolic) + target_compile_definitions(xdp_aie_trace_plugin_xdna PRIVATE XDP_VE2_BUILD=1 XDP_USE_AIE_CODEGEN=1 FAL_LINUX="on") + target_include_directories(xdp_aie_trace_plugin_xdna PRIVATE + ${CMAKE_SOURCE_DIR}/src + ${AIEFAL_DIR} + ${AIEBU_SOURCE_DIR}/src/cpp/include + ${AIERT_DIR}/include + ${XRT_SOURCE_DIR}/runtime_src/core/common/elf + ) + #target_link_libraries(xdp_aie_trace_plugin_xdna PRIVATE xdp_core xrt_coreutil xaiengine) + #target_compile_definitions(xdp_aie_trace_plugin_xdna PRIVATE XDP_VE2_BUILD=1 FAL_LINUX="on") + #target_include_directories(xdp_aie_trace_plugin_xdna PRIVATE ${CMAKE_SOURCE_DIR}/src) set_target_properties(xdp_aie_trace_plugin_xdna PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) install (TARGETS xdp_aie_trace_plugin_xdna @@ -146,7 +145,7 @@ else() add_dependencies(xdp_aie_trace_plugin xdp_core xrt_coreutil) target_link_libraries(xdp_aie_trace_plugin PRIVATE xdp_core xrt_coreutil xaiengine) if (XDP_VE2_BUILD_CMAKE STREQUAL "yes") - target_compile_definitions(xdp_aie_trace_plugin PRIVATE XDP_VE2_ZOCL_BUILD=1 FAL_LINUX="on") + target_compile_definitions(xdp_aie_trace_plugin PRIVATE XDP_VE2_BUILD=1 XDP_VE2_ZOCL_BUILD=1 FAL_LINUX="on") else() target_compile_definitions(xdp_aie_trace_plugin PRIVATE FAL_LINUX="on") endif() diff --git a/profile/plugin/aie_trace/aie_trace_offload_manager.cpp b/profile/plugin/aie_trace/aie_trace_offload_manager.cpp index e31139c7..ddf8d3e2 100644 --- a/profile/plugin/aie_trace/aie_trace_offload_manager.cpp +++ b/profile/plugin/aie_trace/aie_trace_offload_manager.cpp @@ -49,6 +49,11 @@ uint64_t AIETraceOffloadManager::checkAndCapToBankSize(uint8_t memIndex, uint64_ {} void AIETraceOffloadManager::initPLIO(void* handle, PLDeviceIntf* deviceIntf, uint64_t bufSize, uint64_t numStreams, XAie_DevInst* devInst) { + // VE2 XDNA: PLIO unsupported and AIETraceOffload has no devInst ctor — omit the body below so it is not instantiated. +#if defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD) + xrt_core::message::send(severity_level::debug, "XRT", "No support for VE2 XDNA PLIO right now"); + return; +#else if (!offloadEnabledPLIO) return; @@ -70,13 +75,14 @@ uint64_t AIETraceOffloadManager::checkAndCapToBankSize(uint8_t memIndex, uint64_ << " MB is used for AIE trace buffer for " << numStreams << " PLIO streams."; xrt_core::message::send(severity_level::debug, "XRT", msg.str()); - +#endif } - #ifdef XDP_CLIENT_BUILD +#if defined(XDP_CLIENT_BUILD) || (defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD)) void AIETraceOffloadManager::initGMIO(void* handle, PLDeviceIntf* deviceIntf, - uint64_t bufSize, uint64_t numStreams, xrt::hw_context context, - std::shared_ptr metadata) { + uint64_t bufSize, uint64_t numStreams, xrt::hw_context context, + std::shared_ptr metadata) + { if (!offloadEnabledGMIO) return; @@ -206,10 +212,14 @@ uint64_t AIETraceOffloadManager::checkAndCapToBankSize(uint8_t memIndex, uint64_ } } - bool AIETraceOffloadManager::configureAndInitPLIO(void* handle, PLDeviceIntf* deviceIntf, uint64_t desiredBufSize, uint64_t numStreamsPLIO, XAie_DevInst* devInst) { +#if (defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD)) + xrt_core::message::send(severity_level::debug, "XRT", "No support for VE2 XDNA PLIO right now"); + return true; +#endif + uint8_t memIndex = 0; if (deviceIntf) memIndex = deviceIntf->getAIETs2mmMemIndex(0); @@ -227,23 +237,23 @@ bool AIETraceOffloadManager::configureAndInitPLIO(void* handle, PLDeviceIntf* de return true; } -bool AIETraceOffloadManager::configureAndInitGMIO( - void* handle, PLDeviceIntf* deviceIntf, - uint64_t desiredBufSize, uint64_t numStreamsGMIO -#ifdef XDP_CLIENT_BUILD - , const xrt::hw_context& hwctx, const std::shared_ptr& md -#else - , XAie_DevInst* devInst -#endif - ) +#if defined(XDP_CLIENT_BUILD) || (defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD)) +bool AIETraceOffloadManager::configureAndInitGMIO(void* handle, PLDeviceIntf* deviceIntf, + uint64_t desiredBufSize, uint64_t numStreamsGMIO, + const xrt::hw_context& hwctx, const std::shared_ptr& md) { desiredBufSize = checkAndCapToBankSize(/*bank 0*/ 0, desiredBufSize); desiredBufSize = aieTraceImpl->checkTraceBufSize(desiredBufSize); - -#ifdef XDP_CLIENT_BUILD initGMIO(handle, deviceIntf, desiredBufSize, numStreamsGMIO, hwctx, md); return true; +} #else +bool AIETraceOffloadManager::configureAndInitGMIO(void* handle, PLDeviceIntf* deviceIntf, + uint64_t desiredBufSize, uint64_t numStreamsGMIO, + XAie_DevInst* devInst) +{ + desiredBufSize = checkAndCapToBankSize(/*bank 0*/ 0, desiredBufSize); + desiredBufSize = aieTraceImpl->checkTraceBufSize(desiredBufSize); if (!devInst) { xrt_core::message::send(severity_level::warning, "XRT", "Unable to get AIE device instance. AIE event trace will not be available."); @@ -251,7 +261,7 @@ bool AIETraceOffloadManager::configureAndInitGMIO( } initGMIO(handle, deviceIntf, desiredBufSize, numStreamsGMIO, devInst); return true; -#endif } +#endif } // namespace xdp diff --git a/profile/plugin/aie_trace/aie_trace_offload_manager.h b/profile/plugin/aie_trace/aie_trace_offload_manager.h index 2594ba58..da2ffb63 100644 --- a/profile/plugin/aie_trace/aie_trace_offload_manager.h +++ b/profile/plugin/aie_trace/aie_trace_offload_manager.h @@ -62,7 +62,7 @@ class AIETraceOffloadManager { void initPLIO(void* handle, PLDeviceIntf* deviceIntf, uint64_t bufSize, uint64_t numStreams, XAie_DevInst* devInst); // TODO: Use const references for parameters where applicable - #ifdef XDP_CLIENT_BUILD + #if defined(XDP_CLIENT_BUILD) || (defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD)) void initGMIO(void* handle, PLDeviceIntf* deviceIntf, uint64_t bufSize, uint64_t numStreams, xrt::hw_context context, std::shared_ptr metadata); @@ -78,14 +78,16 @@ class AIETraceOffloadManager { std::vector& writers); bool configureAndInitPLIO(void* handle, PLDeviceIntf* deviceIntf, uint64_t desiredBufSize, uint64_t numStreamsPLIO, XAie_DevInst* devInst); + +#if defined(XDP_CLIENT_BUILD) || (defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD)) bool configureAndInitGMIO(void* handle, PLDeviceIntf* deviceIntf, - uint64_t desiredBufSize, uint64_t numStreamsGMIO - #ifdef XDP_CLIENT_BUILD - , const xrt::hw_context& hwctx, const std::shared_ptr& md - #else - , XAie_DevInst* devInst - #endif - ); + uint64_t desiredBufSize, uint64_t numStreamsGMIO, + const xrt::hw_context& hwctx, const std::shared_ptr& md); +#else + bool configureAndInitGMIO(void* handle, PLDeviceIntf* deviceIntf, + uint64_t desiredBufSize, uint64_t numStreamsGMIO, + XAie_DevInst* devInst); +#endif }; // class AIETraceOffloadManager diff --git a/profile/plugin/aie_trace/aie_trace_plugin.cpp b/profile/plugin/aie_trace/aie_trace_plugin.cpp index c2bda88f..d759eb10 100644 --- a/profile/plugin/aie_trace/aie_trace_plugin.cpp +++ b/profile/plugin/aie_trace/aie_trace_plugin.cpp @@ -174,7 +174,11 @@ void AieTracePluginUnified::updateAIEDevice(void *handle, bool hw_context_flow) AIEData.implementation = std::make_unique(db, AIEData.metadata); #elif defined(XRT_X86_BUILD) AIEData.implementation = std::make_unique(db, AIEData.metadata); -#elif XDP_VE2_BUILD +#elif defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD) + xrt::hw_context context = xrt_core::hw_context_int::create_hw_context_from_implementation(handle); + AIEData.metadata->setHwContext(context); + AIEData.implementation = std::make_unique(db, AIEData.metadata); +#elif defined(XDP_VE2_BUILD) AIEData.implementation = std::make_unique(db, AIEData.metadata); #else AIEData.implementation = std::make_unique(db, AIEData.metadata); @@ -260,7 +264,9 @@ void AieTracePluginUnified::updateAIEDevice(void *handle, bool hw_context_flow) // uint64_t aieTraceBufSizePLIO = aieTraceBufSize; // uint64_t aieTraceBufSizeGMIO = aieTraceBufSize; if (isPLIO && !configuredOnePlioPartition) { - +#if defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD) + // TODO: if VE2 XDNA flow then we do not have devInst so we do something else +#else XAie_DevInst* devInst = static_cast(AIEData.implementation->setAieDeviceInst(handle, deviceID)); if(!devInst) { xrt_core::message::send(severity_level::warning, "XRT", @@ -269,11 +275,12 @@ void AieTracePluginUnified::updateAIEDevice(void *handle, bool hw_context_flow) } AIEData.offloadManager->configureAndInitPLIO(handle, deviceIntf, aieTraceBufSize, AIEData.metadata->getNumStreamsPLIO(), devInst); - // Mark that we've successfully configured the first PLIO partition +#endif configuredOnePlioPartition = true; } + if (isGMIO) { -#ifdef XDP_CLIENT_BUILD +#if defined(XDP_CLIENT_BUILD) || (defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD)) if (!AIEData.offloadManager->configureAndInitGMIO( handle, deviceIntf, aieTraceBufSize, AIEData.metadata->getNumStreamsGMIO(), diff --git a/profile/plugin/aie_trace/ve2/aie_trace.cpp b/profile/plugin/aie_trace/ve2/aie_trace.cpp index af5a47f1..33be68d1 100644 --- a/profile/plugin/aie_trace/ve2/aie_trace.cpp +++ b/profile/plugin/aie_trace/ve2/aie_trace.cpp @@ -18,6 +18,7 @@ #include "xdp/profile/plugin/aie_base/aie_base_util.h" #include "xdp/profile/plugin/vp_base/utility.h" +#include #include #include #include @@ -32,6 +33,11 @@ #include "core/common/api/hw_context_int.h" #include "shim_ve2/xdna_hwctx.h" +#ifndef XDP_VE2_ZOCL_BUILD +#include "xdp/profile/device/aie_trace/ve2/aie_trace_offload_ve2.h" +#endif + +#ifdef XDP_VE2_ZOCL_BUILD namespace { static void* fetchAieDevInst(void* devHandle) { @@ -93,6 +99,9 @@ namespace xdp { memoryTileTraceEndEvent = XAIE_EVENT_USER_EVENT_1_MEM_TILE; interfaceTileTraceStartEvent = XAIE_EVENT_TRUE_PL; interfaceTileTraceEndEvent = XAIE_EVENT_USER_EVENT_1_PL; + + // TODO: tranxHandler to record ASM transaction + // TODO: XAie_cfg to create local aieDevInst } /**************************************************************************** @@ -1215,5 +1224,1357 @@ namespace xdp { aieDevice = static_cast(db->getStaticInfo().getAieDevice(allocateAieDevice, deallocateAieDevice, handle, deviceID)); return aieDevInst; } - } // namespace xdp + +#else // XDNA flow + +namespace xdp { + using severity_level = xrt_core::message::severity_level; + + /**************************************************************************** + * Constructor: AIE trace implementation for edge devices + ***************************************************************************/ + AieTrace_VE2Impl::AieTrace_VE2Impl(VPDatabase* database, std::shared_ptr metadata) + : AieTraceImpl(database, metadata) + { + auto hwGen = metadata->getHardwareGen(); + + // Pre-defined metric sets + coreEventSets = aie::trace::getCoreEventSets(hwGen); + memoryEventSets = aie::trace::getMemoryEventSets(hwGen); + memoryTileEventSets = aie::trace::getMemoryTileEventSets(hwGen); + interfaceTileEventSets = aie::trace::getInterfaceTileEventSets(hwGen); + + // Core trace start/end: these are also broadcast to memory module + coreTraceStartEvent = XAIE_EVENT_ACTIVE_CORE; + coreTraceEndEvent = XAIE_EVENT_USER_EVENT_3_CORE; + + // Memory/interface tile trace is flushed at end of run + memoryTileTraceStartEvent = XAIE_EVENT_TRUE_MEM_TILE; + memoryTileTraceEndEvent = XAIE_EVENT_USER_EVENT_1_MEM_TILE; + interfaceTileTraceStartEvent = XAIE_EVENT_TRUE_PL; + interfaceTileTraceEndEvent = XAIE_EVENT_USER_EVENT_1_PL; + + tranxHandler = std::make_unique(); + + xdp::aie::driver_config meta_config = metadata->getAIEConfigMetadata(); + XAie_Config cfg { + meta_config.hw_gen, + meta_config.base_address, + meta_config.column_shift, + meta_config.row_shift, + meta_config.num_rows, + meta_config.num_columns, + meta_config.shim_row, + meta_config.mem_row_start, + meta_config.mem_num_rows, + meta_config.aie_tile_row_start, + meta_config.aie_tile_num_rows, + {0} // PartProp + }; + + auto RC = XAie_CfgInitialize(&aieDevInst, &cfg); + if (RC != XAIE_OK) + xrt_core::message::send(severity_level::warning, "XRT", "AIE Driver Initialization Failed."); + } + + /**************************************************************************** + * Verify correctness of trace buffer size + ***************************************************************************/ + uint64_t AieTrace_VE2Impl::checkTraceBufSize(uint64_t aieTraceBufSize) + { + uint64_t deviceMemorySize = getPSMemorySize(); + if (deviceMemorySize == 0) + return aieTraceBufSize; + + double percentSize = (100.0 * aieTraceBufSize) / deviceMemorySize; + + std::stringstream percentSizeStr; + percentSizeStr << std::fixed << std::setprecision(3) << percentSize; + + // Limit size of trace buffer if requested amount is too high + if (percentSize >= 80.0) { + aieTraceBufSize = static_cast(std::ceil(0.8 * deviceMemorySize)); + + std::stringstream newBufSizeStr; + newBufSizeStr << std::fixed << std::setprecision(3) << (aieTraceBufSize / (1024.0 * 1024.0)); // In MB + + std::string msg = "Requested AIE trace buffer is " + percentSizeStr.str() + "% of device memory." + + " You may run into errors depending upon memory usage" + " of your application." + + " Limiting to " + newBufSizeStr.str() + " MB."; + xrt_core::message::send(severity_level::warning, "XRT", msg); + } else { + std::string msg = "Requested AIE trace buffer is " + percentSizeStr.str() + "% of device memory."; + xrt_core::message::send(severity_level::info, "XRT", msg); + } + + return aieTraceBufSize; + } + + /**************************************************************************** + * Update device (e.g., after loading xclbin) + ***************************************************************************/ + void AieTrace_VE2Impl::updateDevice() + { + xrt_core::message::send(severity_level::info, "XRT", "Calling AIE Trace VE2 XDNA updateDevice."); + + // If runtime metrics are not enabled, do not configure trace + if(!metadata->getRuntimeMetrics()) + return; + + boost::property_tree::ptree aiePartitionPt = xdp::aie::getAIEPartitionInfo(metadata->getHandle()); + if (aiePartitionPt.empty()) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE trace: no partition info for trace-start broadcast; skipping broadcast network."); + return; + } + uint8_t startCol = static_cast(aiePartitionPt.back().second.get("start_col")); + uint8_t numCols = static_cast(aiePartitionPt.back().second.get("num_cols")); // !!! should be 36 + + std::cout << "!!! updateDevice startCol (back): " << static_cast(startCol) << std::endl; + std::cout << "!!! updateDevice numCols (back): " << static_cast(numCols) << std::endl; + + startCol = static_cast(aiePartitionPt.front().second.get("start_col")); + numCols = static_cast(aiePartitionPt.front().second.get("num_cols")); // !!! should be 36 + + std::cout << "!!! updateDevice startCol (front): " << static_cast(startCol) << std::endl; + std::cout << "!!! updateDevice numCols (front): " << static_cast(numCols) << std::endl; + + // Set metrics for counters and trace events + if (!setMetricsSettings(metadata->getDeviceID(), metadata->getHandle())) { + std::string msg("Unable to configure AIE trace control and events. No trace will be generated."); + xrt_core::message::send(severity_level::warning, "XRT", msg); + return; + } + + // Configure windowed event trace if layer-based start is enabled + if (xrt_core::config::get_aie_trace_settings_start_type() == "layer") { + if (!configureWindowedEventTrace(metadata->getHandle())) { + std::string msg("Unable to configure AIE windowed event trace"); + xrt_core::message::send(severity_level::warning, "XRT", msg); + return; + } + } + } + + /**************************************************************************** + * Configure windowed event trace for layer-based triggering + ***************************************************************************/ + bool AieTrace_VE2Impl::configureWindowedEventTrace(void* hwCtxImpl) + { + // Start recording the windowed event trace transaction + if (!tranxHandler->initializeTransaction(&aieDevInst, "AieTraceWindow")) { + xrt_core::message::send(severity_level::warning, "XRT", "AIE TRACE: Failed to initialize transaction for Windowed Event Trace."); + return false; + } + + boost::property_tree::ptree aiePartitionPt = xdp::aie::getAIEPartitionInfo(hwCtxImpl); + // Currently, assuming only one Hw Context is alive at a time + //uint8_t startCol = static_cast(aiePartitionPt.back().second.get("start_col")); + uint8_t startCol = 0; + uint8_t numCols = static_cast(aiePartitionPt.back().second.get("num_cols")); + + std::cout << "!!! startCol: " << startCol << std::endl; + std::cout << "!!! numCols: " << numCols << std::endl; + + auto metadataReader = (VPDatabase::Instance()->getStaticInfo()).getAIEmetadataReader(metadata->getDeviceID()); + if (!metadataReader) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE metadata reader not available for windowed trace configuration"); + return false; + } + + // XDNA path has no xaiefal broadcast reservation; use fixed channels (see + // client/resources_def.h traceStartBroadcastChId1 / traceStartBroadcastChId2). + const uint8_t traceStartBroadcastChId1 = 6; + const uint8_t traceStartBroadcastChId2 = 7; + + // Define trace start events for different module types + XAie_Events shimTraceStartEvent = (XAie_Events) (XAIE_EVENT_BROADCAST_A_0_PL + traceStartBroadcastChId2); + XAie_Events memTileTraceStartEvent = (XAie_Events)(XAIE_EVENT_BROADCAST_0_MEM_TILE + traceStartBroadcastChId1); + XAie_Events coreModTraceStartEvent = (XAie_Events)(XAIE_EVENT_BROADCAST_0_CORE + traceStartBroadcastChId1); + XAie_Events memTraceStartEvent = (XAie_Events)(XAIE_EVENT_BROADCAST_0_MEM + traceStartBroadcastChId1); + + unsigned int startLayer = xrt_core::config::get_aie_trace_settings_start_layer(); + + // Configure trace start events for tiles + // NOTE: rows are stored as absolute as required by resource manager + for (auto& tileMetric : metadata->getConfigMetrics()) { + auto tile = tileMetric.first; + auto col = tile.col; + auto row = tile.row; + auto type = aie::getModuleType(row, metadata->getRowOffset()); + auto loc = XAie_TileLoc(col, row); + + if (startLayer != UINT_MAX) { + if (type == module_type::shim) { + // Configure shim/interface tile trace start + if (col == startCol) + XAie_TraceStartEvent(&aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_PERF_CNT_0_PL); + else + XAie_TraceStartEvent(&aieDevInst, loc, XAIE_PL_MOD, shimTraceStartEvent); + } + else if (type == module_type::mem_tile) { + // Configure memory tile trace start + XAie_TraceStartEvent(&aieDevInst, loc, XAIE_MEM_MOD, memTileTraceStartEvent); + } + else if (type == module_type::core) { + // Configure core module trace start + XAie_TraceStartEvent(&aieDevInst, loc, XAIE_CORE_MOD, coreModTraceStartEvent); + XAie_TraceStartEvent(&aieDevInst, loc, XAIE_MEM_MOD, memTraceStartEvent); + } + } + } + + if (startLayer != UINT_MAX) { + XAie_PerfCounterControlSet(&aieDevInst, XAie_TileLoc(startCol, 0), XAIE_PL_MOD, 0, XAIE_EVENT_USER_EVENT_0_PL, XAIE_EVENT_USER_EVENT_0_PL); + XAie_PerfCounterEventValueSet(&aieDevInst, XAie_TileLoc(startCol, 0), XAIE_PL_MOD, 0, startLayer); + } + + // Build 2-channel broadcast network for trace start synchronization + build2ChannelBroadcastNetwork(hwCtxImpl, traceStartBroadcastChId1, traceStartBroadcastChId2, XAIE_EVENT_PERF_CNT_0_PL); + + xrt_core::message::send(severity_level::info, "XRT", "Finished AIE Windowed Trace Settings."); + auto hwContext = metadata->getHwContext(); + + // Submit the windowed event trace transaction + if (!tranxHandler->submitTransaction(&aieDevInst, hwContext)) { + xrt_core::message::send(severity_level::warning, "XRT", "AIE TRACE: Failed to submit windowed event trace transaction."); + return false; + } + + return true; + } + + /**************************************************************************** + * Configure requested tiles with trace metrics and settings + ***************************************************************************/ + bool AieTrace_VE2Impl::setMetricsSettings(uint64_t deviceId, void* handle) + { + if (!metadata->getIsValidMetrics()) { + std::string msg("AIE trace metrics were not specified in xrt.ini. AIE event trace will not be available."); + xrt_core::message::send(severity_level::warning, "XRT", msg); + return false; + } + + // Get partition columns + boost::property_tree::ptree aiePartitionPt = xdp::aie::getAIEPartitionInfo(handle); + // Currently, assuming only one Hw Context is alive at a time + //uint8_t startCol = static_cast(aiePartitionPt.front().second.get("start_col")); + uint8_t startCol = 0; + uint8_t numCols = static_cast(aiePartitionPt.back().second.get("num_cols")); + + std::string startType = xrt_core::config::get_aie_trace_settings_start_type(); + unsigned int startLayer = xrt_core::config::get_aie_trace_settings_start_layer(); + + tranxHandler->clearExternalAsmOverride(); + + std::string tranxName = "AieTraceMetrics"; + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "Starting transaction " + tranxName); + if (!tranxHandler->initializeTransaction(&aieDevInst, tranxName)) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE TRACE: Failed to initialize transaction for AIE TRACE Metrics."); + return false; + } + + // XDNA: fixed broadcast IDs for trace-start network (matches client/resources_def.h). + const uint8_t traceStartBroadcastChId1 = 6; + const uint8_t traceStartBroadcastChId2 = 7; + + // Get channel configurations (memory and interface tiles) + auto configChannel0 = metadata->getConfigChannel0(); + auto configChannel1 = metadata->getConfigChannel1(); + + // Get the column shift for partition + // NOTE: If partition is not used, this value is zero. + uint8_t startColShift = metadata->getPartitionOverlayStartCols().front(); + aie::displayColShiftInfo(startColShift); + + // Zero trace event tile counts + for (int m = 0; m < static_cast(module_type::num_types); ++m) { + for (int n = 0; n <= NUM_TRACE_EVENTS; ++n) + mNumTileTraceEvents[m][n] = 0; + } + + auto metadataReader = (VPDatabase::Instance()->getStaticInfo()).getAIEmetadataReader(deviceId); + if (!metadataReader) { + if (aie::isDebugVerbosity()) { + std::stringstream msg; + msg << "AIE metadata reader is null"; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + } + } + + // Using user event for trace end to enable flushing + // NOTE: Flush trace module always at the end because for some applications + // core might be running infinitely. + if (metadata->getUseUserControl()) + coreTraceStartEvent = XAIE_EVENT_INSTR_EVENT_0_CORE; + coreTraceEndEvent = XAIE_EVENT_USER_EVENT_3_CORE; + + // Iterate over all used/specified tiles + // NOTE: rows are stored as absolute as required by resource manager + for (auto& tileMetric : metadata->getConfigMetrics()) { + auto& metricSet = tileMetric.second; + auto tile = tileMetric.first; + auto col = tile.col + startColShift; + auto row = tile.row; + auto subtype = tile.subtype; + auto type = aie::getModuleType(row, metadata->getRowOffset()); + auto typeInt = static_cast(type); + auto loc = XAie_TileLoc(col, row); + + if ((type == module_type::core) && !aie::isDmaSet(metricSet)) { + // If we're not looking at DMA events, then don't display the DMA + // If core is not active (i.e., DMA-only tile), then ignore this tile + if (tile.active_core) + tile.active_memory = false; + else + continue; + } + + std::string tileName = (type == module_type::mem_tile) ? "memory" + : ((type == module_type::shim) ? "interface" : "AIE"); + tileName.append(" tile (" + std::to_string(col) + "," + std::to_string(row) + ")"); + + if (aie::isInfoVerbosity()) { + std::stringstream infoMsg; + infoMsg << "Configuring " << tileName << " for trace using metric set " << metricSet; + xrt_core::message::send(severity_level::info, "XRT", infoMsg.str()); + } + + // Store location to flush at end of run + if (type == module_type::core || (type == module_type::mem_tile) + || (type == module_type::shim)) { + if (type == module_type::core) + traceFlushLocs.push_back(loc); + else if (type == module_type::mem_tile) + memoryTileTraceFlushLocs.push_back(loc); + else if (type == module_type::shim) + interfaceTileTraceFlushLocs.push_back(loc); + } + + // AIE config object for this tile + auto cfgTile = std::make_unique(col, row, type); + cfgTile->type = type; + cfgTile->trace_metric_set = metricSet; + cfgTile->active_core = tile.active_core; + cfgTile->active_memory = tile.active_memory; + + // Catch core execution trace + if ((type == module_type::core) && (metricSet == "execution")) { + // Set start/end events, use execution packets, and start trace module + XAie_TraceStopEvent(&aieDevInst, loc, XAIE_CORE_MOD, coreTraceEndEvent); + + // Driver requires at least one, non-zero trace event + XAie_TraceEvent(&aieDevInst, loc, XAIE_CORE_MOD, XAIE_EVENT_TRUE_CORE, 0); + + XAie_Packet pkt = {0, 0}; + XAie_TraceModeConfig(&aieDevInst, loc, XAIE_CORE_MOD, XAIE_TRACE_INST_EXEC); + XAie_TracePktConfig(&aieDevInst, loc, XAIE_CORE_MOD, pkt); + + if(startType != "layer" || startLayer == UINT_MAX) + XAie_TraceStartEvent(&aieDevInst, loc, XAIE_CORE_MOD, coreTraceStartEvent); + (db->getStaticInfo()).addAIECfgTile(deviceId, cfgTile); + continue; + } + + // Get vector of pre-defined metrics for this set + // NOTE: these are local copies as we are adding tile/counter-specific events + EventVector coreEvents; + EventVector memoryEvents; + EventVector interfaceEvents; + if (type == module_type::core) { + coreEvents = coreEventSets[metricSet]; + memoryEvents = memoryEventSets[metricSet]; + } + else if (type == module_type::mem_tile) { + memoryEvents = memoryTileEventSets[metricSet]; + } + else if (type == module_type::shim) { + interfaceEvents = interfaceTileEventSets[metricSet]; + } + + if (coreEvents.empty() && memoryEvents.empty() && interfaceEvents.empty()) { + std::stringstream msg; + msg << "Event trace is not available for " << tileName << " using metric set " + << metricSet << " on hardware generation " << metadata->getHardwareGen() << "."; + xrt_core::message::send(severity_level::warning, "XRT", msg.str()); + continue; + } + + if (xrt_core::config::get_verbosity() >= static_cast(severity_level::info)) { + std::stringstream infoMsg; + auto tileName = (type == module_type::mem_tile) ? "memory" + : ((type == module_type::shim) ? "interface" : "AIE"); + infoMsg << "Configuring " << tileName << " tile (" << +col << "," + << +row << ") for trace using metric set " << metricSet; + xrt_core::message::send(severity_level::info, "XRT", infoMsg.str()); + } + + int numCoreTraceEvents = 0; + int numMemoryTraceEvents = 0; + int numInterfaceTraceEvents = 0; + + // + // 1. Configure Core Trace Events + // + if (type == module_type::core) { + xrt_core::message::send(severity_level::info, "XRT", "Configuring Core Trace Events"); + + XAie_ModuleType mod = XAIE_CORE_MOD; + uint16_t phyEvent = 0; + + // Configure combo & group events (e.g., DMA monitoring/group masks) + auto comboEvents = configComboEvents(loc, mod, type, metricSet, cfgTile->core_trace_config); + (void)comboEvents; + configGroupEvents(loc, mod, type, metricSet); + + // Set end event for trace capture + // NOTE: This needs to be done first + if (XAie_TraceStopEvent(&aieDevInst, loc, mod, coreTraceEndEvent) != XAIE_OK) + break; + + // Program core trace event slots + for (uint8_t i = 0; i < coreEvents.size(); ++i) { + if (XAie_TraceEvent(&aieDevInst, loc, mod, coreEvents[i], i) != XAIE_OK) + break; + + ++numCoreTraceEvents; + + // Update config file + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, coreEvents[i], &phyEvent); + cfgTile->core_trace_config.traced_events[i] = phyEvent; + } + + // Update config file + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, coreTraceStartEvent, &phyEvent); + cfgTile->core_trace_config.start_event = phyEvent; + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, coreTraceEndEvent, &phyEvent); + cfgTile->core_trace_config.stop_event = phyEvent; + + coreEvents.clear(); + mNumTileTraceEvents[typeInt][numCoreTraceEvents]++; + + XAie_Packet pkt = {0, 0}; // core trace uses PC packets + if (XAie_TraceModeConfig(&aieDevInst, loc, mod, XAIE_TRACE_EVENT_PC) != XAIE_OK) + break; + if (XAie_TracePktConfig(&aieDevInst, loc, mod, pkt) != XAIE_OK) + break; + if (startType != "layer" || startLayer == UINT_MAX) + XAie_TraceStartEvent(&aieDevInst, loc, mod, coreTraceStartEvent); + } // Core modules + + // + // 2. Configure Memory Trace Events + // + // Applicable to memory module in AIE tiles and memory tiles + if ((type == module_type::core) || (type == module_type::mem_tile)) { + xrt_core::message::send(severity_level::info, "XRT", "Configuring Memory Trace Events"); + + XAie_ModuleType mod = XAIE_MEM_MOD; + uint8_t firstBroadcastId = 8; + + XAie_Events traceStartEvent = + (type == module_type::core) ? coreTraceStartEvent : memoryTileTraceStartEvent; + XAie_Events traceEndEvent = + (type == module_type::core) ? coreTraceEndEvent : memoryTileTraceEndEvent; + + aie_cfg_base& aieConfig = cfgTile->core_trace_config; + if (type == module_type::mem_tile) + aieConfig = cfgTile->memory_tile_trace_config; + + // Combo event override (for DMA metric sets) + auto comboEvents = configComboEvents(loc, mod, type, metricSet, aieConfig); + if (comboEvents.size() == 2) { + traceStartEvent = comboEvents.at(0); + traceEndEvent = comboEvents.at(1); + } + else if (type == module_type::core) { + // Route core start/stop into memory-module trace via broadcast (same as client/NPU3). + if (!m_trace_start_broadcast) { + if (XAie_EventBroadcast(&aieDevInst, loc, XAIE_CORE_MOD, 8, traceStartEvent) != XAIE_OK) + break; + } + if (XAie_EventBroadcast(&aieDevInst, loc, XAIE_CORE_MOD, 9, traceEndEvent) != XAIE_OK) + break; + + uint16_t phyBroadcast = 0; + if (!m_trace_start_broadcast) { + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, XAIE_CORE_MOD, traceStartEvent, &phyBroadcast); + cfgTile->core_trace_config.internal_events_broadcast[8] = phyBroadcast; + } + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, XAIE_CORE_MOD, traceEndEvent, &phyBroadcast); + cfgTile->core_trace_config.internal_events_broadcast[9] = phyBroadcast; + + if (m_trace_start_broadcast) + traceStartEvent = + static_cast(XAIE_EVENT_BROADCAST_0_MEM + traceStartBroadcastChId1); + else + traceStartEvent = XAIE_EVENT_BROADCAST_8_MEM; + traceEndEvent = XAIE_EVENT_BROADCAST_9_MEM; + firstBroadcastId = 10; + } + + if (type == module_type::core) { + if (XAie_EventBroadcastBlockMapDir(&aieDevInst, loc, XAIE_CORE_MOD, XAIE_EVENT_SWITCH_A, 0xFF00, + XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_NORTH | + XAIE_EVENT_BROADCAST_SOUTH) != XAIE_OK) + break; + if (XAie_EventBroadcastBlockMapDir(&aieDevInst, loc, XAIE_MEM_MOD, XAIE_EVENT_SWITCH_A, 0xFF00, + XAIE_EVENT_BROADCAST_EAST | XAIE_EVENT_BROADCAST_NORTH | + XAIE_EVENT_BROADCAST_SOUTH) != XAIE_OK) + break; + + for (uint8_t bi = 8; bi < 16; bi++) { + if (XAie_EventBroadcastUnblockDir(&aieDevInst, loc, XAIE_CORE_MOD, XAIE_EVENT_SWITCH_A, bi, + XAIE_EVENT_BROADCAST_EAST) != XAIE_OK) + break; + } + } + + // Configure stream switch ports (core SS DMA monitors feeding MEM-side trace) + configStreamSwitchPorts(tile, loc, type, metricSet, 0, 0, memoryEvents, aieConfig); + + memoryModTraceStartEvent = traceStartEvent; + if (XAie_TraceStopEvent(&aieDevInst, loc, mod, traceEndEvent) != XAIE_OK) + break; + + { + uint16_t phyEvent1 = 0; + uint16_t phyEvent2 = 0; + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, traceStartEvent, &phyEvent1); + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, traceEndEvent, &phyEvent2); + if (type == module_type::core) { + cfgTile->memory_trace_config.start_event = phyEvent1; + cfgTile->memory_trace_config.stop_event = phyEvent2; + } else { + cfgTile->memory_tile_trace_config.start_event = phyEvent1; + cfgTile->memory_tile_trace_config.stop_event = phyEvent2; + } + } + + auto iter0 = configChannel0.find(tile); + auto iter1 = configChannel1.find(tile); + uint8_t channel0 = (iter0 == configChannel0.end()) ? 0 : iter0->second; + uint8_t channel1 = (iter1 == configChannel1.end()) ? 1 : iter1->second; + + if (type == module_type::mem_tile) { + configEventSelections(tile, loc, type, metricSet, channel0, channel1, cfgTile->memory_tile_trace_config); + } else { + if (!memoryEvents.empty()) { + auto channelNum = aie::getChannelNumberFromEvent(memoryEvents.at(0)); + if (channelNum >= 0) { + if (aie::isInputSet(type, metricSet)) { + cfgTile->core_trace_config.mm2s_channels[0] = channelNum; + if (static_cast(channelNum) < tile.mm2s_names.size()) + cfgTile->core_trace_config.mm2s_names[0] = tile.mm2s_names.at(channelNum); + } else { + cfgTile->core_trace_config.s2mm_channels[0] = channelNum; + if (static_cast(channelNum) < tile.s2mm_names.size()) + cfgTile->core_trace_config.s2mm_names[0] = tile.s2mm_names.at(channelNum); + } + } + } + } + + uint8_t bcId = firstBroadcastId; + int bcIndex = (firstBroadcastId == 10) ? 2 : 0; + static const XAie_Events kMemBcEv[] = { + XAIE_EVENT_BROADCAST_8_MEM, XAIE_EVENT_BROADCAST_9_MEM, XAIE_EVENT_BROADCAST_10_MEM, + XAIE_EVENT_BROADCAST_11_MEM, XAIE_EVENT_BROADCAST_12_MEM, XAIE_EVENT_BROADCAST_13_MEM, + XAIE_EVENT_BROADCAST_14_MEM, XAIE_EVENT_BROADCAST_15_MEM}; + + for (uint8_t i = 0; i < memoryEvents.size(); i++) { + const bool isCoreEvent = xdp::aie::isCoreModuleEvent(memoryEvents[i]); + + if (isCoreEvent) { + if (XAie_EventBroadcast(&aieDevInst, loc, XAIE_CORE_MOD, bcId, memoryEvents[i]) != XAIE_OK) + break; + if (bcIndex >= static_cast(sizeof(kMemBcEv) / sizeof(kMemBcEv[0]))) + break; + if (XAie_TraceEvent(&aieDevInst, loc, XAIE_MEM_MOD, kMemBcEv[bcIndex++], i) != XAIE_OK) + break; + } else { + if (XAie_TraceEvent(&aieDevInst, loc, XAIE_MEM_MOD, memoryEvents[i], i) != XAIE_OK) + break; + } + + ++numMemoryTraceEvents; + + configEdgeEvents(tile, type, metricSet, memoryEvents[i], channel0); + + uint16_t phyEvent = 0; + const XAie_ModuleType phyModConv = isCoreEvent ? XAIE_CORE_MOD : XAIE_MEM_MOD; + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, phyModConv, memoryEvents[i], &phyEvent); + + if (isCoreEvent) { + cfgTile->core_trace_config.internal_events_broadcast[bcId] = phyEvent; + cfgTile->memory_trace_config.traced_events[i] = bcIdToEvent(bcId); + ++bcId; + } else if (type == module_type::mem_tile) { + cfgTile->memory_tile_trace_config.traced_events[i] = phyEvent; + } else { + cfgTile->memory_trace_config.traced_events[i] = phyEvent; + } + } + + memoryEvents.clear(); + mNumTileTraceEvents[typeInt][numMemoryTraceEvents]++; + + uint8_t packetType = (type == module_type::mem_tile) ? 3 : 1; + XAie_Packet pkt = {0, packetType}; + + if (XAie_TracePktConfig(&aieDevInst, loc, mod, pkt) != XAIE_OK) + break; + if ((startType != "layer") || (startLayer == UINT_MAX)) { + if (XAie_TraceStartEvent(&aieDevInst, loc, mod, traceStartEvent) != XAIE_OK) + break; + } + + if (type == module_type::mem_tile) + cfgTile->memory_tile_trace_config.packet_type = packetType; + else + cfgTile->memory_trace_config.packet_type = packetType; + } // Memory modules/tiles + + // + // 3. Configure Interface Tile Trace Events + // + if (type == module_type::shim) { + xrt_core::message::send(severity_level::info, "XRT", "Configuring Interface Tile Trace Events"); + XAie_ModuleType mod = XAIE_PL_MOD; + + auto iter0 = configChannel0.find(tile); + auto iter1 = configChannel1.find(tile); + uint8_t channel0 = (iter0 == configChannel0.end()) ? 0 : iter0->second; + uint8_t channel1 = (iter1 == configChannel1.end()) ? 1 : iter1->second; + std::vector channels = {channel0, channel1, 2, 3}; + + modifyEvents(type, subtype, metricSet, channel0, interfaceEvents); + configEventSelections(tile, loc, type, metricSet, channel0, channel1, cfgTile->interface_tile_trace_config); + configStreamSwitchPorts(tileMetric.first, loc, type, metricSet, channel0, channel1, + interfaceEvents, cfgTile->interface_tile_trace_config); + + // Configure interface tile trace events + for (size_t i = 0; i < interfaceEvents.size(); ++i) { + auto event = interfaceEvents.at(i); + if (XAie_TraceEvent(&aieDevInst, loc, mod, event, static_cast(i)) != XAIE_OK) + break; + + ++numInterfaceTraceEvents; + + uint16_t phyEvent = 0; + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, XAIE_PL_MOD, event, &phyEvent); + cfgTile->interface_tile_trace_config.traced_events[i] = phyEvent; + } + + // Update config file + { + // Add interface trace control events + // Start + uint16_t phyEvent = 0; + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, XAIE_PL_MOD, interfaceTileTraceStartEvent, &phyEvent); + cfgTile->interface_tile_trace_config.start_event = phyEvent; + // Stop + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, XAIE_PL_MOD, interfaceTileTraceEndEvent, &phyEvent); + cfgTile->interface_tile_trace_config.stop_event = phyEvent; + } + + mNumTileTraceEvents[typeInt][numInterfaceTraceEvents]++; + + uint8_t packetType = 4; + XAie_Packet pkt = {0, packetType}; + if (XAie_TracePktConfig(&aieDevInst, loc, mod, pkt) != XAIE_OK) + break; + if (startType != "layer" || startLayer == UINT_MAX) { + if (XAie_TraceStartEvent(&aieDevInst, loc, mod, interfaceTileTraceStartEvent) != XAIE_OK) + break; + } + if (XAie_TraceStopEvent(&aieDevInst, loc, mod, interfaceTileTraceEndEvent) != XAIE_OK) + break; + + cfgTile->interface_tile_trace_config.packet_type = packetType; + + if (!interfaceEvents.empty()) { + auto channelNum = aie::getChannelNumberFromEvent(interfaceEvents.at(0)); + if (channelNum >= 0) { + if (aie::isInputSet(type, metricSet)) + cfgTile->interface_tile_trace_config.mm2s_channels[channelNum] = channelNum; + else + cfgTile->interface_tile_trace_config.s2mm_channels[channelNum] = channelNum; + } + } + } // Interface tiles + + if (xrt_core::config::get_verbosity() >= static_cast(severity_level::debug)) { + std::stringstream msg; + msg << "Reserved "; + if (type == module_type::core) + msg << numCoreTraceEvents << " core and " << numMemoryTraceEvents << " memory"; + else if (type == module_type::mem_tile) + msg << numMemoryTraceEvents << " memory tile"; + else if (type == module_type::shim) + msg << numInterfaceTraceEvents << " interface tile"; + msg << " trace events for tile (" << +col << "," << +row + << "). Adding tile to static database."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + } + + // Add config info to static database + // NOTE: Do not access cfgTile after this + (db->getStaticInfo()).addAIECfgTile(deviceId, cfgTile); + xrt_core::message::send(severity_level::info, "XRT", "Debugging XDP: after (db->getStaticInfo()).addAIECfgTile"); + } // For tiles + + // Report and store trace events per tile + for (int m = 0; m < static_cast(module_type::num_types); ++m) { + aie::trace::printTraceEventStats(m, mNumTileTraceEvents[m]); + for (int n = 0; n <= NUM_TRACE_EVENTS; ++n) + (db->getStaticInfo()).addAIECoreEventResources(deviceId, n, mNumTileTraceEvents[m][n]); + } + + if (m_trace_start_broadcast) { + xrt_core::message::send(severity_level::info, "XRT", "before build2ChannelBroadcastNetwork"); + build2ChannelBroadcastNetwork(handle, traceStartBroadcastChId1, traceStartBroadcastChId2, interfaceTileTraceStartEvent); + xrt_core::message::send(severity_level::info, "XRT", "before XAie_EventGenerate"); + XAie_EventGenerate(&aieDevInst, XAie_TileLoc(startCol, 0), XAIE_PL_MOD, interfaceTileTraceStartEvent); + reset2ChannelBroadcastNetwork(handle, traceStartBroadcastChId1, traceStartBroadcastChId2); + } + + auto hwContextSubmit = metadata->getHwContext(); + if (!tranxHandler->submitTransaction(&aieDevInst, hwContextSubmit)) { + xrt_core::message::send(severity_level::error, "XRT", + "Aie trace control-code transaction submission failed."); + return false; + } + xrt_core::message::send(severity_level::info, "XRT", "Successfully scheduled AIE Trace."); + + if (!tranxHandler->initializeTransaction(&aieDevInst, "AieTraceFlush")) { + xrt_core::message::send(severity_level::error, "XRT", + "AIE trace flush transaction initialization failed."); + return false; + } + + // Flush trace by forcing end event + // NOTE: this informs tiles to output remaining packets (even if partial) + for (const auto& loc : traceFlushLocs) + XAie_EventGenerate(&aieDevInst, loc, XAIE_CORE_MOD, coreTraceEndEvent); + for (const auto& loc : memoryTileTraceFlushLocs) + XAie_EventGenerate(&aieDevInst, loc, XAIE_MEM_MOD, memoryTileTraceEndEvent); + for (const auto& loc : interfaceTileTraceFlushLocs) + XAie_EventGenerate(&aieDevInst, loc, XAIE_PL_MOD, interfaceTileTraceEndEvent); + + tranxHandler->completeASM(&aieDevInst); + if (!tranxHandler->generateELF()) { + xrt_core::message::send(severity_level::error, "XRT", + "AIE trace flush ELF generation failed."); + return false; + } + + xrt_core::message::send(severity_level::info, "XRT", "Successfully generated ELF for AIE Trace Flush."); + + return true; + } // end setMetricsSettings + + /**************************************************************************** + * Flush trace modules by forcing end events + * + * Trace modules buffer partial packets. At end of run, this needs to be + * flushed using a custom end event. This applies to trace windowing and + * passive tiles like memory and interface. + * + ***************************************************************************/ + void AieTrace_VE2Impl::flushTraceModules() + { + //if (db->infoAvailable(xdp::info::ml_timeline)) { + // db->broadcast(VPDatabase::MessageType::READ_RECORD_TIMESTAMPS, nullptr); + // xrt_core::message::send(severity_level::debug, "XRT", "Done reading recorded timestamps."); + //} + + if (traceFlushLocs.empty() && memoryTileTraceFlushLocs.empty() + && interfaceTileTraceFlushLocs.empty()) + return; + + if (aie::isDebugVerbosity()) { + std::stringstream msg; + msg << "Flushing AIE trace by forcing end event for " << traceFlushLocs.size() + << " AIE tiles, " << memoryTileTraceFlushLocs.size() << " memory tiles, and " + << interfaceTileTraceFlushLocs.size() << " interface tiles."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + } + + traceFlushLocs.clear(); + memoryTileTraceFlushLocs.clear(); + interfaceTileTraceFlushLocs.clear(); + + xrt_core::message::send(severity_level::info, "XRT", "Before AIE trace flush."); + auto hwContext = metadata->getHwContext(); + if (!tranxHandler->submitELF(hwContext)) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE trace flush control-code submission failed."); + return; + } + xrt_core::message::send(severity_level::info, "XRT", "Successfully scheduled AIE trace flush."); + } + + /*************************************************************************** + * Build broadcast network using specified channels + ***************************************************************************/ + void AieTrace_VE2Impl::build2ChannelBroadcastNetwork(void *hwCtxImpl, uint8_t broadcastId1, + uint8_t broadcastId2, XAie_Events event) + { + boost::property_tree::ptree aiePartitionPt = xdp::aie::getAIEPartitionInfo(hwCtxImpl); + if (aiePartitionPt.empty()) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE trace: no partition info for trace-start broadcast; skipping broadcast network."); + return; + } + // Currently, assuming only one Hw Context is alive at a time + // uint8_t startCol = static_cast(aiePartitionPt.front().second.get("start_col")); + uint8_t startCol = 0; + // uint8_t numCols = static_cast(aiePartitionPt.front().second.get("num_cols")); + uint8_t numCols = 36; + const uint8_t startColShift = metadata->getPartitionOverlayStartCols().front(); + + std::vector maxRowAtCol(startCol + numCols, 0); + for (auto& tileMetric : metadata->getConfigMetrics()) { + auto tile = tileMetric.first; + auto col = tile.col; + auto row = tile.row; + maxRowAtCol[startCol + col] = std::max(maxRowAtCol[col], (uint8_t)row); + } + + XAie_Events bcastEvent2_PL = static_cast(XAIE_EVENT_BROADCAST_A_0_PL + broadcastId2); + XAie_EventBroadcast(&aieDevInst, XAie_TileLoc(startCol, 0), XAIE_PL_MOD, broadcastId2, event); + + for (uint8_t col = startCol; col < (startCol + numCols); col++) { + for (uint8_t row = 0; row <= maxRowAtCol[col]; row++) { + module_type tileType = aie::getModuleType(row, metadata->getRowOffset()); + auto loc = XAie_TileLoc(col, row); + + // shim tile + if (tileType == module_type::shim) { + // first channel is only used to send north + if (col == startCol) { + XAie_EventBroadcast(&aieDevInst, loc, XAIE_PL_MOD, broadcastId1, event); + } else { + XAie_EventBroadcast(&aieDevInst, loc, XAIE_PL_MOD, broadcastId1, bcastEvent2_PL); + } + if (maxRowAtCol[col] != row) { + XAie_EventBroadcastBlockDir(&aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, + XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_EAST); + } else { + XAie_EventBroadcastBlockDir( + &aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, + XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_EAST | XAIE_EVENT_BROADCAST_NORTH); + } + + // second channel is only used to send east + if (col != startCol + numCols - 1) { + XAie_EventBroadcastBlockDir(&aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_SWITCH_A, broadcastId2, + XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_NORTH); + } else { + XAie_EventBroadcastBlockDir(&aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_SWITCH_A, broadcastId2, + XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_NORTH); + } + } + + // mem tile + else if (tileType == module_type::mem_tile) { + if (maxRowAtCol[col] != row) { + XAie_EventBroadcastBlockDir(&aieDevInst, loc, XAIE_MEM_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, + XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_EAST); + } else { + XAie_EventBroadcastBlockDir( + &aieDevInst, loc, XAIE_MEM_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, + XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_EAST | XAIE_EVENT_BROADCAST_NORTH); + } + } + + // core tile + else { + if (maxRowAtCol[col] != row) { + XAie_EventBroadcastBlockDir(&aieDevInst, loc, XAIE_CORE_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, + XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_EAST); + } else { + XAie_EventBroadcastBlockDir( + &aieDevInst, loc, XAIE_CORE_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, + XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_EAST | XAIE_EVENT_BROADCAST_NORTH); + } + } + } + } + } + + /*************************************************************************** + * Reset using broadcast network on specified channels + ***************************************************************************/ + void AieTrace_VE2Impl::reset2ChannelBroadcastNetwork(void *hwCtxImpl, uint8_t broadcastId1, + uint8_t broadcastId2) + { + boost::property_tree::ptree aiePartitionPt = xdp::aie::getAIEPartitionInfo(hwCtxImpl); + if (aiePartitionPt.empty()) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE trace: no partition info for trace-start broadcast reset; skipping."); + return; + } + // Currently, assuming only one Hw Context is alive at a time + //uint8_t startCol = static_cast(aiePartitionPt.back().second.get("start_col")); + uint8_t startCol = 0; + //uint8_t numCols = static_cast(aiePartitionPt.back().second.get("num_cols")); + uint8_t numCols = 36; + const uint8_t startColShift = metadata->getPartitionOverlayStartCols().front(); + + std::vector maxRowAtCol(startCol + numCols, 0); + for (auto& tileMetric : metadata->getConfigMetrics()) { + auto tile = tileMetric.first; + auto col = tile.col; + auto row = tile.row; + maxRowAtCol[startCol + col] = std::max(maxRowAtCol[col], (uint8_t)row); + } + + XAie_EventBroadcastReset(&aieDevInst, XAie_TileLoc(startCol, 0), XAIE_PL_MOD, broadcastId2); + + for (uint8_t col = startCol; col < (startCol + numCols); col++) { + for (uint8_t row = 0; row <= maxRowAtCol[col]; row++) { + module_type tileType = aie::getModuleType(row, metadata->getRowOffset()); + auto loc = XAie_TileLoc(col, row); + + // shim tile + if (tileType == module_type::shim) { + XAie_EventBroadcastReset(&aieDevInst, loc, XAIE_PL_MOD, broadcastId1); + XAie_EventBroadcastUnblockDir(&aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, + XAIE_EVENT_BROADCAST_ALL); + XAie_EventBroadcastUnblockDir(&aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_SWITCH_A, broadcastId2, + XAIE_EVENT_BROADCAST_ALL); + XAie_EventBroadcastUnblockDir(&aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_SWITCH_B, broadcastId2, + XAIE_EVENT_BROADCAST_ALL); + } + + // mem tile + else if (tileType == module_type::mem_tile) { + XAie_EventBroadcastUnblockDir(&aieDevInst, loc, XAIE_MEM_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, + XAIE_EVENT_BROADCAST_ALL); + } + + // core tile + else { + XAie_EventBroadcastUnblockDir(&aieDevInst, loc, XAIE_CORE_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, + XAIE_EVENT_BROADCAST_ALL); + } + } + } + } + + /**************************************************************************** + * Modify events in metric set based on type and channel + ***************************************************************************/ + void AieTrace_VE2Impl::modifyEvents(module_type type, io_type subtype, + const std::string metricSet, uint8_t channel, + std::vector& events) + { + // Only needed for GMIO DMA channel 1 + if ((type != module_type::shim) || (subtype == io_type::PLIO) || (channel == 0)) + return; + + // Check type to minimize replacements + if (aie::isInputSet(type, metricSet)) { + // Input or MM2S + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_MM2S_0_START_TASK_PL, XAIE_EVENT_NOC0_DMA_MM2S_1_START_TASK_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_MM2S_0_FINISHED_BD_PL, XAIE_EVENT_NOC0_DMA_MM2S_1_FINISHED_BD_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_MM2S_0_FINISHED_TASK_PL, XAIE_EVENT_NOC0_DMA_MM2S_1_FINISHED_TASK_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_MM2S_0_STALLED_LOCK_PL, XAIE_EVENT_NOC0_DMA_MM2S_1_STALLED_LOCK_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_MM2S_0_STREAM_BACKPRESSURE_PL, XAIE_EVENT_NOC0_DMA_MM2S_1_STREAM_BACKPRESSURE_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_MM2S_0_MEMORY_STARVATION_PL, XAIE_EVENT_NOC0_DMA_MM2S_1_MEMORY_STARVATION_PL); + } else { + // Output or S2MM + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_S2MM_0_START_TASK_PL, XAIE_EVENT_NOC0_DMA_S2MM_1_START_TASK_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_S2MM_0_FINISHED_BD_PL, XAIE_EVENT_NOC0_DMA_S2MM_1_FINISHED_BD_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_S2MM_0_FINISHED_TASK_PL, XAIE_EVENT_NOC0_DMA_S2MM_1_FINISHED_TASK_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_S2MM_0_STALLED_LOCK_PL, XAIE_EVENT_NOC0_DMA_S2MM_1_STALLED_LOCK_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_S2MM_0_STREAM_STARVATION_PL, XAIE_EVENT_NOC0_DMA_S2MM_1_STREAM_STARVATION_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_S2MM_0_MEMORY_BACKPRESSURE_PL, XAIE_EVENT_NOC0_DMA_S2MM_1_MEMORY_BACKPRESSURE_PL); + } + } + + uint32_t AieTrace_VE2Impl::bcIdToEvent(int bcId) + { + return bcId + CORE_BROADCAST_EVENT_BASE; + } + + /**************************************************************************** + * Configure stream switch event ports for monitoring purposes + ***************************************************************************/ + void + AieTrace_VE2Impl::configStreamSwitchPorts(const tile_type& tile, const XAie_LocType loc, + const module_type type, const std::string metricSet, + const uint8_t channel0, const uint8_t channel1, + std::vector& events, aie_cfg_base& config) + { + std::set portSet; + + // Traverse all counters and request monitor ports as needed + for (int i=0; i < events.size(); ++i) { + // Ensure applicable event + auto event = events.at(i); + if (!xdp::aie::isStreamSwitchPortEvent(event)) + continue; + + auto portnum = xdp::aie::getPortNumberFromEvent(event); + uint8_t channelNum = portnum % 2; + uint8_t channel = (channelNum == 0) ? channel0 : channel1; + + // New port needed: reserve, configure, and store + if (portSet.find(portnum) == portSet.end()) { + portSet.insert(portnum); + + if (type == module_type::core) { + // AIE Tiles - Monitor DMA channels + bool isMaster = ((portnum >= 2) || (metricSet.find("s2mm") != std::string::npos)); + auto slaveOrMaster = isMaster ? XAIE_STRMSW_MASTER : XAIE_STRMSW_SLAVE; + std::string typeName = isMaster ? "S2MM" : "MM2S"; + std::string msg = "Configuring core module stream switch to monitor DMA " + + typeName + " channel " + std::to_string(channelNum); + xrt_core::message::send(severity_level::debug, "XRT", msg); + XAie_EventSelectStrmPort(&aieDevInst, loc, portnum, slaveOrMaster, DMA, channelNum); + + // Record for runtime config file + // NOTE: channel info informs back-end there will be events on that channel + config.port_trace_ids[portnum] = channelNum; + config.port_trace_is_master[portnum] = isMaster; + config.port_trace_names[portnum] = tile.port_names.at(portnum); + + if (isMaster) { + config.s2mm_channels[channelNum] = channelNum; + if (channelNum < tile.s2mm_names.size()) + config.s2mm_names[channelNum] = tile.s2mm_names.at(channelNum); + } + else { + config.mm2s_channels[channelNum] = channelNum; + if (channelNum < tile.mm2s_names.size()) + config.mm2s_names[channelNum] = tile.mm2s_names.at(channelNum); + } + } + // Interface tiles (e.g., PLIO, GMIO) + else if (type == module_type::shim) { + // NOTE: skip configuration of extra ports for tile if stream_ids are not available. + if (portnum >= tile.stream_ids.size()) + continue; + + auto slaveOrMaster = (tile.is_master_vec.at(portnum) == 0) ? XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER; + std::string typeName = (tile.is_master_vec.at(portnum) == 0) ? "slave" : "master"; + uint8_t streamPortId = static_cast(tile.stream_ids.at(portnum)); + + std::string msg = "Configuring interface tile stream switch to monitor " + + typeName + " stream port " + std::to_string(streamPortId); + xrt_core::message::send(severity_level::debug, "XRT", msg); + XAie_EventSelectStrmPort(&aieDevInst, loc, portnum, slaveOrMaster, SOUTH, streamPortId); + + // Record for runtime config file + config.port_trace_ids[portnum] = (tile.subtype == io_type::PLIO) ? portnum : channel; + config.port_trace_is_master[portnum] = (tile.is_master_vec.at(portnum) != 0); + if (tile.subtype == io_type::PLIO) { + if (streamPortId < tile.port_names.size()) + config.port_trace_names[portnum] = tile.port_names.at(streamPortId); + } else { + if (channel < tile.port_names.size()) + config.port_trace_names[portnum] = tile.port_names.at(channel); + } + + if (tile.is_master_vec.at(portnum) == 0) { + config.mm2s_channels[channelNum] = channel; + if (channelNum < tile.mm2s_names.size()) + config.mm2s_names[channelNum] = tile.mm2s_names.at(channelNum); + } + else { + config.s2mm_channels[channelNum] = channel; + if (channelNum < tile.s2mm_names.size()) + config.s2mm_names[channelNum] = tile.s2mm_names.at(channelNum); + } + } + else { + // Memory tiles + auto slaveOrMaster = aie::isInputSet(type, metricSet) ? XAIE_STRMSW_MASTER : XAIE_STRMSW_SLAVE; + std::string typeName = (slaveOrMaster == XAIE_STRMSW_MASTER) ? "master" : "slave"; + std::string msg = "Configuring memory tile stream switch to monitor " + + typeName + " stream port " + std::to_string(channel); + xrt_core::message::send(severity_level::debug, "XRT", msg); + XAie_EventSelectStrmPort(&aieDevInst, loc, portnum, slaveOrMaster, DMA, channel); + + // Record for runtime config file + config.port_trace_ids[portnum] = channel; + config.port_trace_is_master[portnum] = (slaveOrMaster == XAIE_STRMSW_MASTER); + if (portnum < tile.port_names.size()) + config.port_trace_names[portnum] = tile.port_names.at(portnum); + } + } + } + + if ((type == module_type::shim) && (tile.subtype == io_type::PLIO) && + (portSet.size() < tile.stream_ids.size())) { + std::string msg = "Interface tile " + std::to_string(tile.col) + " has more " + + "PLIO than can be monitored by metric set " + metricSet + ". Please " + + "run again with different trace settings or choose a different set."; + xrt_core::message::send(severity_level::warning, "XRT", msg); + } + + portSet.clear(); + } + + /**************************************************************************** + * Configure combo events (AIE tiles only) + ***************************************************************************/ + std::vector + AieTrace_VE2Impl::configComboEvents(const XAie_LocType loc, const XAie_ModuleType mod, + const module_type type, const std::string metricSet, + aie_cfg_base& config) + { + // Only needed for core/memory modules and metric sets that include DMA events + if (!xdp::aie::isDmaSet(metricSet) || ((type != module_type::core) && (type != module_type::dma))) + return {}; + + std::vector comboEvents; + + if (mod == XAIE_CORE_MOD) { + comboEvents.push_back(XAIE_EVENT_COMBO_EVENT_2_CORE); + + // Combo2 = Port_Idle_0 OR Port_Idle_1 OR Port_Idle_2 OR Port_Idle_3 + std::vector events = {XAIE_EVENT_PORT_IDLE_0_CORE, + XAIE_EVENT_PORT_IDLE_1_CORE, XAIE_EVENT_PORT_IDLE_2_CORE, + XAIE_EVENT_PORT_IDLE_3_CORE}; + std::vector opts = {XAIE_EVENT_COMBO_E1_OR_E2, + XAIE_EVENT_COMBO_E1_OR_E2, XAIE_EVENT_COMBO_E1_OR_E2}; + + // Capture in config class to report later + for (int i=0; i < NUM_COMBO_EVENT_CONTROL; ++i) + config.combo_event_control[i] = 2; + for (int i=0; i < events.size(); ++i) { + uint16_t phyEvent = 0; + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, events.at(i), &phyEvent); + config.combo_event_input[i] = phyEvent; + } + + // Set events and trigger on OR of events + XAie_EventComboConfig(&aieDevInst, loc, mod, XAIE_EVENT_COMBO0, opts[0], events[0], events[1]); + XAie_EventComboConfig(&aieDevInst, loc, mod, XAIE_EVENT_COMBO1, opts[1], events[2], events[3]); + XAie_EventComboConfig(&aieDevInst, loc, mod, XAIE_EVENT_COMBO2, opts[2], XAIE_EVENT_COMBO_EVENT_0_PL, XAIE_EVENT_COMBO_EVENT_1_PL); + return comboEvents; + } + + // Combo events do not auto-broadcast from core to memory module, + // so let's avoid the complexity and find a different method. + #if 0 + // Below is for memory modules + + // Memory_Combo0 = (Active OR Group_Stream_Switch) + auto comboEvent0 = xaieTile.mem().comboEvent(); + comboEvents.push_back(XAIE_EVENT_COMBO_EVENT_0_MEM); + + std::vector events0; + events0.push_back(XAIE_EVENT_ACTIVE_CORE); + events0.push_back(XAIE_EVENT_GROUP_STREAM_SWITCH_CORE); + std::vector opts0; + opts0.push_back(XAIE_EVENT_COMBO_E1_OR_E2); + + comboEvent0->setEvents(events0, opts0); + + // Memory_Combo1 = (Group_Core_Program_Flow AND Core_Combo2) + auto comboEvent1 = xaieTile.mem().comboEvent(); + comboEvents.push_back(XAIE_EVENT_COMBO_EVENT_1_MEM); + + std::vector events1; + events1.push_back(XAIE_EVENT_GROUP_CORE_PROGRAM_FLOW_CORE); + events1.push_back(XAIE_EVENT_COMBO_EVENT_2_CORE); + std::vector opts1; + opts1.push_back(XAIE_EVENT_COMBO_E1_AND_E2); + + comboEvent1->setEvents(events1, opts1); + #else + // Since we're tracing DMA events, start trace right away. + // Specify user event 0 as trace end so we can flush after run. + comboEvents.push_back(XAIE_EVENT_TRUE_MEM); + comboEvents.push_back(XAIE_EVENT_USER_EVENT_0_MEM); + #endif + return comboEvents; + } + + /**************************************************************************** + * Configure group events (core modules only) + ***************************************************************************/ + void AieTrace_VE2Impl::configGroupEvents(const XAie_LocType loc, const XAie_ModuleType mod, + const module_type type, const std::string metricSet) + { + // Only needed for core module and metric sets that include DMA events + if (!aie::isDmaSet(metricSet) || (type != module_type::core)) + return; + + // Set masks for group events + XAie_EventGroupControl(&aieDevInst, loc, mod, XAIE_EVENT_GROUP_CORE_PROGRAM_FLOW_CORE, + GROUP_CORE_FUNCTIONS_MASK); + XAie_EventGroupControl(&aieDevInst, loc, mod, XAIE_EVENT_GROUP_CORE_STALL_CORE, + GROUP_CORE_STALL_MASK); + XAie_EventGroupControl(&aieDevInst, loc, mod, XAIE_EVENT_GROUP_STREAM_SWITCH_CORE, + GROUP_STREAM_SWITCH_RUNNING_MASK); + } + + /**************************************************************************** + * Configure event selection (memory tiles only) + ***************************************************************************/ + void AieTrace_VE2Impl::configEventSelections(const tile_type& tile, const XAie_LocType loc, const module_type type, + const std::string metricSet, const uint8_t channel0, + const uint8_t channel1, aie_cfg_base& config) + { + if (type != module_type::mem_tile) + return; + + XAie_DmaDirection dmaDir = aie::isInputSet(type, metricSet) ? DMA_S2MM : DMA_MM2S; + + if (aie::isDebugVerbosity()) { + std::string typeName = (dmaDir == DMA_S2MM) ? "S2MM" : "MM2S"; + std::string msg = "Configuring event selections for DMA " + typeName + " channels " + + std::to_string(channel0) + " and " + std::to_string(channel1); + xrt_core::message::send(severity_level::debug, "XRT", msg); + } + + XAie_EventSelectDmaChannel(&aieDevInst, loc, 0, dmaDir, channel0); + XAie_EventSelectDmaChannel(&aieDevInst, loc, 1, dmaDir, channel1); + + // Record for runtime config file + config.port_trace_ids[0] = channel0; + config.port_trace_ids[1] = channel1; + if (aie::isInputSet(type, metricSet)) { + config.port_trace_is_master[0] = true; + config.port_trace_is_master[1] = true; + config.s2mm_channels[0] = channel0; + if (channel0 < tile.s2mm_names.size()) + config.s2mm_names[0] = tile.s2mm_names[channel0]; + if (channel0 != channel1) { + config.s2mm_channels[1] = channel1; + if (channel1 < tile.s2mm_names.size()) + config.s2mm_names[1] = tile.s2mm_names[channel1]; + } + } + else { + config.port_trace_is_master[0] = false; + config.port_trace_is_master[1] = false; + config.mm2s_channels[0] = channel0; + if (channel0 < tile.mm2s_names.size()) + config.mm2s_names[0] = tile.mm2s_names[channel0]; + if (channel0 != channel1) { + config.mm2s_channels[1] = channel1; + if (channel1 < tile.mm2s_names.size()) + config.mm2s_names[1] = tile.mm2s_names[channel1]; + } + } + } + + /**************************************************************************** + * Configure edge detection events + ***************************************************************************/ + void AieTrace_VE2Impl::configEdgeEvents(const tile_type& tile, + const module_type type, const std::string metricSet, + const XAie_Events event, const uint8_t channel) + { + if ((event != XAIE_EVENT_EDGE_DETECTION_EVENT_0_MEM_TILE) + && (event != XAIE_EVENT_EDGE_DETECTION_EVENT_1_MEM_TILE) + && (event != XAIE_EVENT_EDGE_DETECTION_EVENT_0_MEM) + && (event != XAIE_EVENT_EDGE_DETECTION_EVENT_1_MEM)) + return; + + // Catch memory tiles + if (type == module_type::mem_tile) { + // Event is DMA_S2MM_Sel0_stream_starvation or DMA_MM2S_Sel0_stalled_lock + uint16_t eventNum = aie::isInputSet(type, metricSet) + ? EVENT_MEM_TILE_DMA_S2MM_SEL0_STREAM_STARVATION + : EVENT_MEM_TILE_DMA_MM2S_SEL0_STALLED_LOCK; + + // Register Edge_Detection_event_control + // 26 Event 1 triggered on falling edge + // 25 Event 1 triggered on rising edge + // 23:16 Input event for edge event 1 + // 10 Event 0 triggered on falling edge + // 9 Event 0 triggered on rising edge + // 7:0 Input event for edge event 0 + uint32_t edgeEventsValue = (1 << 26) + (eventNum << 16) + (1 << 9) + eventNum; + + xrt_core::message::send(severity_level::debug, "XRT", + "Configuring memory tile edge events to detect rise and fall of event " + + std::to_string(eventNum)); + + auto tileOffset = _XAie_GetTileAddr(&aieDevInst, tile.row, tile.col); + XAie_Write32(&aieDevInst, tileOffset + AIE_OFFSET_EDGE_CONTROL_MEM_TILE, + edgeEventsValue); + return; + } + + // Below is AIE tile support + + // Event is DMA_MM2S_stalled_lock or DMA_S2MM_stream_starvation + uint16_t eventNum = aie::isInputSet(type, metricSet) + ? ((channel == 0) ? EVENT_MEM_DMA_MM2S_0_STALLED_LOCK + : EVENT_MEM_DMA_MM2S_1_STALLED_LOCK) + : ((channel == 0) ? EVENT_MEM_DMA_S2MM_0_STREAM_STARVATION + : EVENT_MEM_DMA_S2MM_1_STREAM_STARVATION); + + // Register Edge_Detection_event_control + // 26 Event 1 triggered on falling edge + // 25 Event 1 triggered on rising edge + // 23:16 Input event for edge event 1 + // 10 Event 0 triggered on falling edge + // 9 Event 0 triggered on rising edge + // 7:0 Input event for edge event 0 + uint32_t edgeEventsValue = (1 << 26) + (eventNum << 16) + (1 << 9) + eventNum; + + xrt_core::message::send(severity_level::debug, "XRT", + "Configuring AIE tile edge events to detect rise and fall of event " + + std::to_string(eventNum)); + + auto tileOffset = _XAie_GetTileAddr(&aieDevInst, tile.row, tile.col); + XAie_Write32(&aieDevInst, tileOffset + AIE_OFFSET_EDGE_CONTROL_MEM, + edgeEventsValue); + } + + /**************************************************************************** + * Poll AIE timers (for system timeline only) + ***************************************************************************/ + void AieTrace_VE2Impl::pollTimers(uint64_t index, void* handle) + { + // TODO: Poll timers (needed for system timeline only) + (void)index; + (void)handle; + } + + /**************************************************************************** + * Set AIE device instance + ***************************************************************************/ + void* AieTrace_VE2Impl::setAieDeviceInst(void* handle, uint64_t deviceID) + { + (void)handle; + (void)deviceID; + // XDNA has no shim-provided XAie_DevInst for offload; trace uses hw_context + // in AIETraceOffloadManager / AIETraceOffload (see aie_trace_offload_ve2). + return nullptr; + } + + bool AieTrace_VE2Impl::tileHasFreeRsc(xaiefal::XAieDev* aieDevice, XAie_LocType& loc, const module_type type, const std::string& metricSet) {} + void AieTrace_VE2Impl::freeResources() {} + + +} // namespace xdp + +#endif \ No newline at end of file diff --git a/profile/plugin/aie_trace/ve2/aie_trace.h b/profile/plugin/aie_trace/ve2/aie_trace.h index 2ea803a6..bea1c49f 100755 --- a/profile/plugin/aie_trace/ve2/aie_trace.h +++ b/profile/plugin/aie_trace/ve2/aie_trace.h @@ -10,6 +10,10 @@ #include "xdp/profile/plugin/aie_trace/aie_trace_impl.h" #include "xdp/profile/plugin/aie_trace/util/aie_trace_config.h" +#ifndef XDP_VE2_ZOCL_BUILD +#include "xdp/profile/device/common/ve2/ve2_transaction.h" +#endif + namespace xdp { class AieTrace_VE2Impl : public AieTraceImpl { @@ -24,23 +28,17 @@ namespace xdp { void* setAieDeviceInst(void* handle, uint64_t deviceID) override; private: + // Common helpers used by both VE2 flows. uint64_t checkTraceBufSize(uint64_t size) override; bool tileHasFreeRsc(xaiefal::XAieDev* aieDevice, XAie_LocType& loc, const module_type type, const std::string& metricSet); bool setMetricsSettings(uint64_t deviceId, void* handle); - bool configureWindowedEventTrace(xaiefal::XAieDev* aieDevice); - private: typedef XAie_Events EventType; typedef std::vector EventVector; typedef std::vector ValueVector; - XAie_DevInst* aieDevInst = nullptr; - xaiefal::XAieDev* aieDevice = nullptr; // AIE resources - std::vector> perfCounters; - std::vector> streamPorts; - std::map coreEventSets; std::map memoryEventSets; std::map memoryTileEventSets; @@ -73,7 +71,52 @@ namespace xdp { // Keep track of number of events reserved per module and/or tile int mNumTileTraceEvents[static_cast(module_type::num_types)][NUM_TRACE_EVENTS + 1]; - }; + +#ifdef XDP_VE2_ZOCL_BUILD + // VE2 ZOCL flow (FAL-backed). + XAie_DevInst* aieDevInst = nullptr; + xaiefal::XAieDev* aieDevice = nullptr; + std::vector> perfCounters; + std::vector> streamPorts; + + bool configureWindowedEventTrace(xaiefal::XAieDev* aieDevice); + +#else + // VE2 XDNA flow (no FAL resource ownership path). + // Control-code order: AieTraceOffload (initReadTrace), AieTraceMetrics (updateDevice), + // AieTraceFlush (end of setMetricsSettings). + XAie_DevInst aieDevInst = {0}; + std::unique_ptr tranxHandler; + bool m_trace_start_broadcast = false; + EventType memoryModTraceStartEvent; + + bool configureWindowedEventTrace(void* handle); + void build2ChannelBroadcastNetwork(void* handle, uint8_t broadcastId1, + uint8_t broadcastId2, XAie_Events event); + void reset2ChannelBroadcastNetwork(void* handle, uint8_t broadcastId1, + uint8_t broadcastId2); + uint32_t bcIdToEvent(int bcId); + + void configStreamSwitchPorts(const tile_type& tile, const XAie_LocType loc, + const module_type type, const std::string metricSet, + const uint8_t channel0, const uint8_t channel1, + std::vector& events, aie_cfg_base& config); + std::vector configComboEvents(const XAie_LocType loc, const XAie_ModuleType mod, + const module_type type, const std::string metricSet, + aie_cfg_base& config); + void configGroupEvents(const XAie_LocType loc, const XAie_ModuleType mod, + const module_type type, const std::string metricSet); + void configEventSelections(const tile_type& tile, const XAie_LocType loc, + const module_type type, const std::string metricSet, + const uint8_t channel0, const uint8_t channel1, + aie_cfg_base& config); + void configEdgeEvents(const tile_type& tile, const module_type type, + const std::string metricSet, const XAie_Events event, + const uint8_t channel = 0); + void modifyEvents(module_type type, io_type subtype, const std::string metricSet, + uint8_t channel, std::vector& events); +#endif +}; } // namespace xdp From 2f88ccd209e7d5a4c134a66304f0fb4df70e1851 Mon Sep 17 00:00:00 2001 From: snigupta Date: Wed, 29 Apr 2026 16:02:07 -0600 Subject: [PATCH 19/19] some fixes Signed-off-by: snigupta --- profile/device/aie_trace/ve2/aie_trace_offload_ve2.h | 10 +++++++--- profile/device/common/ve2/ve2_transaction.h | 1 + profile/plugin/aie_trace/CMakeLists.txt | 3 ++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/profile/device/aie_trace/ve2/aie_trace_offload_ve2.h b/profile/device/aie_trace/ve2/aie_trace_offload_ve2.h index e538a362..3f1b7950 100644 --- a/profile/device/aie_trace/ve2/aie_trace_offload_ve2.h +++ b/profile/device/aie_trace/ve2/aie_trace_offload_ve2.h @@ -28,13 +28,17 @@ #include "core/include/xrt/xrt_hw_context.h" #include "xdp/profile/device/tracedefs.h" -#include "xdp/profile/device/common/ve2/ve2_transaction.h" #include "xdp/profile/plugin/aie_trace/aie_trace_metadata.h" +#if defined(XDP_VE2_BUILD) && defined(XDP_VE2_ZOCL_BUILD) +// Edge ZOCL: xaiengine only (no aie_codegen / ve2_transaction). extern "C" { -#include -#include +#include +#include } +#else +#include "xdp/profile/device/common/ve2/ve2_transaction.h" +#endif namespace xdp { diff --git a/profile/device/common/ve2/ve2_transaction.h b/profile/device/common/ve2/ve2_transaction.h index 7758d239..d84a9a43 100644 --- a/profile/device/common/ve2/ve2_transaction.h +++ b/profile/device/common/ve2/ve2_transaction.h @@ -11,6 +11,7 @@ #include "xrt/xrt_hw_context.h" #include "xrt/xrt_kernel.h" +// XDNA-only: pulled from device offload via aie_trace_offload_ve2.h (ZOCL does not use this file). extern "C" { #include #include diff --git a/profile/plugin/aie_trace/CMakeLists.txt b/profile/plugin/aie_trace/CMakeLists.txt index 4c63c908..70e7148a 100644 --- a/profile/plugin/aie_trace/CMakeLists.txt +++ b/profile/plugin/aie_trace/CMakeLists.txt @@ -108,7 +108,7 @@ else() add_dependencies(xdp_aie_trace_plugin_xdna xdp_core xrt_coreutil) target_link_libraries(xdp_aie_trace_plugin_xdna PRIVATE xdp_core xrt_coreutil aie_codegen aiebu_library_objects) target_link_options(xdp_aie_trace_plugin_xdna PRIVATE -Wl,-Bsymbolic) - target_compile_definitions(xdp_aie_trace_plugin_xdna PRIVATE XDP_VE2_BUILD=1 XDP_USE_AIE_CODEGEN=1 FAL_LINUX="on") + target_compile_definitions(xdp_aie_trace_plugin_xdna PRIVATE XDP_VE2_BUILD=1 XDP_USE_AIE_CODEGEN=1 XAIE_FEATURE_MSVC FAL_LINUX="on") target_include_directories(xdp_aie_trace_plugin_xdna PRIVATE ${CMAKE_SOURCE_DIR}/src ${AIEFAL_DIR} @@ -145,6 +145,7 @@ else() add_dependencies(xdp_aie_trace_plugin xdp_core xrt_coreutil) target_link_libraries(xdp_aie_trace_plugin PRIVATE xdp_core xrt_coreutil xaiengine) if (XDP_VE2_BUILD_CMAKE STREQUAL "yes") + # VE2 ZOCL (edge): xaiengine / FAL only — no aie_codegen (see aie_trace_offload_ve2.h). target_compile_definitions(xdp_aie_trace_plugin PRIVATE XDP_VE2_BUILD=1 XDP_VE2_ZOCL_BUILD=1 FAL_LINUX="on") else() target_compile_definitions(xdp_aie_trace_plugin PRIVATE FAL_LINUX="on")