Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/runtime_src/core/common/api/xrt_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ alloc_device_index(unsigned int index)
static std::shared_ptr<xrt_core::device>
alloc_device_handle(xclDeviceHandle dhdl)
{
XRT_TRACE_POINT_SCOPE(xrt_device_alloc_handle);
return xrt_core::get_userpf_device(dhdl) ;
}

Expand Down Expand Up @@ -261,7 +262,7 @@ device::error::
what() const noexcept
{
return handle->m_message.c_str();
}
}

device::
device(unsigned int index)
Expand Down
7 changes: 7 additions & 0 deletions src/runtime_src/core/common/api/xrt_elf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "core/common/config_reader.h"
#include "core/common/error.h"
#include "core/common/message.h"
#include "core/common/trace.h"
#include "core/common/xclbin_parser.h"

#include <boost/interprocess/streams/bufferstream.hpp>
Expand Down Expand Up @@ -512,6 +513,7 @@ void
elf_impl::
parse_sections()
{
XRT_TRACE_POINT_SCOPE(xrt_elf_parse_sections);
if (!is_group_elf()) { // older ELF format without .group sections
init_legacy_section_maps();
finalize_kernels();
Expand Down Expand Up @@ -766,6 +768,7 @@ class elf_aie_gen2 : public elf_impl
void
initialize_section_buffer_maps()
{
XRT_TRACE_POINT_SCOPE(xrt_elf_initialize_section_buffer_maps);
initialize_section_buf_map(patcher_buf_type::ctrltext, m_instr_buf_map);
initialize_section_buf_map(patcher_buf_type::ctrldata, m_ctrl_packet_map);
initialize_save_restore_buf_map();
Expand Down Expand Up @@ -823,6 +826,7 @@ class elf_aie_gen2 : public elf_impl
void
initialize_arg_patchers()
{
XRT_TRACE_POINT_SCOPE(xrt_elf_initialize_arg_patchers);
static constexpr const char* Control_ScratchPad_Symbol = "scratch-pad-ctrl";
static constexpr const char* ctrlpkt_pm_dynsym = "ctrlpkt-pm";

Expand Down Expand Up @@ -1210,6 +1214,7 @@ class elf_aie_gen2_plus : public elf_impl
void
initialize_arg_patchers(const std::map<uint32_t, std::vector<size_t>>& pad_offsets)
{
XRT_TRACE_POINT_SCOPE(xrt_elf_initialize_arg_patchers);
static constexpr auto pad_pattern = xrt_core::elf_patcher::get_section_name(patcher_buf_type::pad);
static constexpr auto ctrlpkt_pattern = xrt_core::elf_patcher::get_section_name(patcher_buf_type::ctrlpkt);

Expand Down Expand Up @@ -1312,6 +1317,7 @@ class elf_aie_gen2_plus : public elf_impl
void
initialize_section_buffer_maps()
{
XRT_TRACE_POINT_SCOPE(xrt_elf_initialize_section_buffer_maps);
std::map<uint32_t, std::vector<size_t>> pad_offsets;
initialize_column_ctrlcode(pad_offsets);
initialize_ctrlpkt_bufs();
Expand Down Expand Up @@ -1426,6 +1432,7 @@ namespace {
static std::shared_ptr<xrt::elf_impl>
create_elf_impl(ELFIO::elfio&& elfio)
{
XRT_TRACE_POINT_SCOPE(xrt_elf_create_impl);
auto os_abi = elfio.get_os_abi();

switch (os_abi) {
Expand Down
12 changes: 8 additions & 4 deletions src/runtime_src/core/common/api/xrt_hw_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ class hw_context_impl : public std::enable_shared_from_this<hw_context_impl>
static std::unique_ptr<uc_log_buffer>
init_uc_log_buf(const std::shared_ptr<xrt_core::device>& device, xrt_core::hwctx_handle* ctx_hdl)
{
XRT_TRACE_POINT_SCOPE(xrt_hw_context_init_uc_log_buf);
// If uc log buffer is not supported then this function returns nullptr
static auto uc_log_enabled = xrt_core::config::get_uc_log();
if (!ctx_hdl || !uc_log_enabled)
Expand Down Expand Up @@ -347,6 +348,7 @@ class hw_context_impl : public std::enable_shared_from_this<hw_context_impl>
void
add_config(const xrt::elf& elf)
{
XRT_TRACE_POINT_SCOPE(xrt_hw_context_add_config);
auto part_size = elf.get_partition_size();

// create hw ctx handle if not already created
Expand Down Expand Up @@ -409,6 +411,7 @@ class hw_context_impl : public std::enable_shared_from_this<hw_context_impl>
void
dump_uc_log_buffer()
{
XRT_TRACE_POINT_SCOPE(xrt_hw_context_dump_uc_log_buffer);
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it the dump() function you want to trace?

if (!m_uc_log_buf)
return;

Expand Down Expand Up @@ -537,6 +540,7 @@ class hw_context_impl : public std::enable_shared_from_this<hw_context_impl>
void
dump_scratchpad_mem()
{
XRT_TRACE_POINT_SCOPE(xrt_hw_context_dump_scratchpad_mem);
if (m_scratchpad_buf.size() == 0) {
xrt_core::message::send(xrt_core::message::severity_level::debug, "xrt_hw_context",
"preemption scratchpad memory is not available");
Expand Down Expand Up @@ -728,23 +732,23 @@ template<typename Cfg>
static std::shared_ptr<hw_context_impl>
alloc_hwctx_from_cfg(const xrt::device& device, const xrt::uuid& xclbin_id, Cfg&& cfg)
{
XRT_TRACE_POINT_SCOPE(xrt_hw_context);
XRT_TRACE_POINT_SCOPE(xrt_hw_context_alloc_from_cfg);
return post_alloc_hwctx(std::make_shared<hw_context_impl>(
device.get_handle(), xclbin_id, hwctx_cfg_storage{std::forward<Cfg>(cfg)}));
}

static std::shared_ptr<hw_context_impl>
alloc_hwctx_from_mode(const xrt::device& device, const xrt::uuid& xclbin_id, xrt::hw_context::access_mode mode)
{
XRT_TRACE_POINT_SCOPE(xrt_hw_context);
XRT_TRACE_POINT_SCOPE(xrt_hw_context_alloc_from_mode);
return post_alloc_hwctx(std::make_shared<hw_context_impl>(device.get_handle(), xclbin_id, mode));
}

template<typename Cfg>
static std::shared_ptr<hw_context_impl>
alloc_empty_hwctx(const xrt::device& device, Cfg&& cfg, xrt::hw_context::access_mode mode)
{
XRT_TRACE_POINT_SCOPE(xrt_hw_context);
XRT_TRACE_POINT_SCOPE(xrt_hw_context_alloc_empty);
return post_alloc_hwctx(std::make_shared<hw_context_impl>(
device.get_handle(), hwctx_cfg_storage{std::forward<Cfg>(cfg)}, mode));
}
Expand All @@ -754,7 +758,7 @@ static std::shared_ptr<hw_context_impl>
alloc_hwctx_from_elf(const xrt::device& device, const xrt::elf& elf, Cfg&& cfg,
xrt::hw_context::access_mode mode)
{
XRT_TRACE_POINT_SCOPE(xrt_hw_context);
XRT_TRACE_POINT_SCOPE(xrt_hw_context_alloc_from_elf);
return post_alloc_hwctx(std::make_shared<hw_context_impl>(
device.get_handle(), elf, hwctx_cfg_storage{std::forward<Cfg>(cfg)}, mode));
}
Expand Down
20 changes: 19 additions & 1 deletion src/runtime_src/core/common/api/xrt_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3948,6 +3948,7 @@ get_run_update(xrtRunHandle rhdl)
static std::unique_ptr<xrt::run_impl>
alloc_run(const std::shared_ptr<xrt::kernel_impl>& khdl)
{
XRT_TRACE_POINT_SCOPE(xrt_run_alloc);
return khdl->has_mailbox()
? std::make_unique<xrt::mailbox_impl>(khdl)
: std::make_unique<xrt::run_impl>(khdl);
Expand All @@ -3959,6 +3960,7 @@ alloc_kernel(const std::shared_ptr<device_type>& dev,
const std::string& name,
xrt::kernel::cu_access_mode mode)
{
XRT_TRACE_POINT_SCOPE(xrt_kernel_alloc);
auto amode = hwctx_access_mode(mode); // legacy access mode to hwctx qos
return std::make_shared<xrt::kernel_impl>(dev, xrt::hw_context{dev->get_xrt_device(), xclbin_id, amode}, xrt::module{}, name);
}
Expand All @@ -3968,6 +3970,7 @@ alloc_kernel_from_ctx(const std::shared_ptr<device_type>& dev,
const xrt::hw_context& hwctx,
const std::string& name)
{
XRT_TRACE_POINT_SCOPE(xrt_kernel_alloc_from_ctx);
// Delegating constructor with no module
return std::make_shared<xrt::kernel_impl>(dev, hwctx, xrt::module{}, name);
}
Expand All @@ -3978,6 +3981,7 @@ alloc_kernel_from_module(const std::shared_ptr<device_type>& dev,
const xrt::module& module,
const std::string& name)
{
XRT_TRACE_POINT_SCOPE(xrt_kernel_alloc_from_module);
return std::make_shared<xrt::kernel_impl>(dev, hwctx, module, name);
}

Expand All @@ -3986,6 +3990,7 @@ alloc_kernel_from_name(const std::shared_ptr<device_type>& dev,
const xrt::hw_context& hwctx,
const std::string& name)
{
XRT_TRACE_POINT_SCOPE(xrt_kernel_alloc_from_name);
return std::make_shared<xrt::kernel_impl>(dev, hwctx, name);
}

Expand Down Expand Up @@ -4238,6 +4243,7 @@ void
run::
set_dtrace_control_file(const std::string& path)
{
XRT_TRACE_POINT_SCOPE(xrt_run_set_dtrace_control_file);
handle->set_dtrace_control_file(path);
}

Expand Down Expand Up @@ -4324,20 +4330,23 @@ void
run::
set_arg_at_index(int index, const void* value, size_t bytes)
{
XRT_TRACE_POINT_SCOPE(xrt_run_set_arg_value);
handle->set_arg_at_index(index, value, bytes);
}

void
run::
set_arg_at_index(int index, const xrt::bo& glb)
{
XRT_TRACE_POINT_SCOPE(xrt_run_set_arg_bo);
handle->set_arg_at_index(index, glb);
}

void
run::
update_arg_at_index(int index, const void* value, size_t bytes)
{
XRT_TRACE_POINT_SCOPE(xrt_run_update_arg_value);
auto upd = get_run_update(handle.get());
upd->update_arg_at_index(index, value, bytes);
}
Expand All @@ -4346,6 +4355,7 @@ void
run::
update_arg_at_index(int index, const xrt::bo& glb)
{
XRT_TRACE_POINT_SCOPE(xrt_run_update_arg_bo);
auto upd = get_run_update(handle.get());
upd->update_arg_at_index(index, glb);
}
Expand Down Expand Up @@ -4512,9 +4522,16 @@ set_read_range(const xrt::kernel& kernel, uint32_t start, uint32_t size)
ip->m_readrange = {start, size};
}

static std::shared_ptr<runlist_impl>
alloc_runlist(xrt::hw_context hwctx)
{
XRT_TRACE_POINT_SCOPE(xrt_runlist_alloc);
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are we measuring? The constructor is trivial with 0 overhead. I understand why alloc_runlist was created, it was to ensure that you can measure the time it takes to create runlist_impl, which does all initialization in its initializer list. This makes sense if we truly want to measure the construction time, but do we?

The function is kind of like an observer of hwctx, where the sink is really runlist_impl ctor. Shouldn't alloc_runlist take hwctx by const ref and leave the copying to the runlist_impl ctor?

return std::make_shared<runlist_impl>(std::move(hwctx));
}

runlist::
runlist(const xrt::hw_context& hwctx)
: detail::pimpl<runlist_impl>(std::make_shared<runlist_impl>(hwctx))
: detail::pimpl<runlist_impl>(alloc_runlist(hwctx))
{}

runlist::
Expand All @@ -4527,6 +4544,7 @@ void
runlist::
add(const xrt::run& run)
{
XRT_TRACE_POINT_SCOPE(xrt_runlist_add);
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we measuring here and not in && method. If we want to trace anything here, then it should be runlist_impl::add().

if (!handle)
throw xrt_core::error("cannot add run object to uninitialized runlist");

Expand Down
32 changes: 30 additions & 2 deletions src/runtime_src/core/common/api/xrt_module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#define XRT_CORE_COMMON_SOURCE // in same dll as core_common
#include "core/common/config_reader.h"
#include "core/common/message.h"
#include "core/common/trace.h"
#include "xrt/experimental/xrt_module.h"
#include "xrt/experimental/xrt_aie.h"
#include "xrt/experimental/xrt_elf.h"
Expand Down Expand Up @@ -374,6 +375,7 @@ class module_run_aie_gen2 : public module_run
void
create_ctrlpkt_buf(const xrt::bo& ctrlpkt_bo)
{
XRT_TRACE_POINT_SCOPE(xrt_module_run_create_ctrlpkt_buf);
if (ctrlpkt_bo.size() == 0) {
XRT_DEBUGF("ctrpkt buf is empty\n");
return;
Expand All @@ -394,6 +396,7 @@ class module_run_aie_gen2 : public module_run
void
create_ctrlpkt_pm_bufs()
{
XRT_TRACE_POINT_SCOPE(xrt_module_run_create_ctrlpkt_pm_bufs);
for (const auto& [key, buf] : m_config.ctrlpkt_pm_bufs) {
m_ctrlpkt_pm_bos[key] = xbi::create_bo(m_hwctx, buf.size(), xbi::use_type::ctrlpkt);
fill_bo_with_data(m_ctrlpkt_pm_bos.at(key), buf);
Expand All @@ -403,6 +406,7 @@ class module_run_aie_gen2 : public module_run
void
create_instruction_buf()
{
XRT_TRACE_POINT_SCOPE(xrt_module_run_create_instruction_buf);
XRT_DEBUGF("-> module_run_aie_gen2::create_instruction_buf()\n");

// Get instruction buffer from config
Expand Down Expand Up @@ -573,6 +577,7 @@ class module_run_aie_gen2 : public module_run
void
patch(const std::string& argnm, size_t index, uint64_t value) override
{
XRT_TRACE_POINT_SCOPE(xrt_module_run_patch_arg);
bool patched = false;

// patch control-packet buffer
Expand All @@ -595,6 +600,7 @@ class module_run_aie_gen2 : public module_run
void
sync_if_dirty() override
{
XRT_TRACE_POINT_SCOPE(xrt_module_run_sync_if_dirty);
if (!m_dirty) {
if (!m_first_patch)
return;
Expand Down Expand Up @@ -808,6 +814,7 @@ class module_run_aie_gen2_plus : public module_run
void
initialize_dtrace_buf(const std::string& run_level_ct_file = "")
{
XRT_TRACE_POINT_SCOPE(xrt_module_run_initialize_dtrace_buf);
if (!create_dtrace_util(run_level_ct_file))
return; // create failure

Expand All @@ -819,6 +826,7 @@ class module_run_aie_gen2_plus : public module_run
void
set_dtrace_control_file(const std::string& path) override
{
XRT_TRACE_POINT_SCOPE(xrt_module_run_set_dtrace_control_file);
initialize_dtrace_buf(path);
// Only update dtrace addresses; instruction buffer layout is unchanged.
update_column_bo_dtrace_addresses();
Expand All @@ -832,6 +840,7 @@ class module_run_aie_gen2_plus : public module_run
void
create_ctrlpkt_bufs()
{
XRT_TRACE_POINT_SCOPE(xrt_module_run_create_ctrlpkt_bufs);
if (m_config.ctrlpkt_bufs.empty())
return; // older ELFs have ctrlpkt in pad section

Expand Down Expand Up @@ -908,6 +917,7 @@ class module_run_aie_gen2_plus : public module_run
void
create_instruction_buffer()
{
XRT_TRACE_POINT_SCOPE(xrt_module_run_create_instruction_buffer);
const auto& data = m_config.ctrlcodes;

// Create bo with combined size of all ctrlcodes
Expand All @@ -931,6 +941,7 @@ class module_run_aie_gen2_plus : public module_run
void
fill_column_bo_address()
{
XRT_TRACE_POINT_SCOPE(xrt_module_run_fill_column_bo_address);
const auto& ctrlcodes = m_config.ctrlcodes;

m_column_bo_address.clear();
Expand Down Expand Up @@ -1015,6 +1026,7 @@ class module_run_aie_gen2_plus : public module_run
void
patch(const std::string& argnm, size_t index, uint64_t value) override
{
XRT_TRACE_POINT_SCOPE(xrt_module_run_patch_arg);
bool patched = false;

// patch instruction buffer
Expand All @@ -1040,6 +1052,7 @@ class module_run_aie_gen2_plus : public module_run
void
sync_if_dirty() override
{
XRT_TRACE_POINT_SCOPE(xrt_module_run_sync_if_dirty);
if (!m_dirty) {
if (!m_first_patch)
return;
Expand Down Expand Up @@ -1124,14 +1137,28 @@ class module_run_aie_gen2_plus : public module_run
}
};

static std::shared_ptr<module_impl>
alloc_module_impl(const xrt::elf& elf)
{
XRT_TRACE_POINT_SCOPE(xrt_module_alloc_impl);
return std::make_shared<module_impl>(elf);
}

static std::shared_ptr<module_impl>
alloc_module_impl(const xrt::elf& elf, std::string name)
{
XRT_TRACE_POINT_SCOPE(xrt_module_alloc_impl_with_name);
return std::make_shared<module_impl>(elf, std::move(name));
}

module::
module(const xrt::elf& elf)
: detail::pimpl<module_impl>(std::make_shared<module_impl>(elf))
: detail::pimpl<module_impl>(alloc_module_impl(elf))
{}

module::
module(const xrt::elf& elf, const std::string& name)
: detail::pimpl<module_impl>(std::make_shared<module_impl>(elf, name))
: detail::pimpl<module_impl>(alloc_module_impl(elf, name))
{}

xrt::hw_context
Expand Down Expand Up @@ -1164,6 +1191,7 @@ xrt::module
create_module_run(const xrt::elf& elf, const xrt::hw_context& hwctx,
uint32_t ctrl_code_id, const xrt::bo& ctrlpkt_bo)
{
XRT_TRACE_POINT_SCOPE(xrt_module_run);
auto platform = elf.get_platform();
switch (platform) {
case xrt::elf::platform::aie2p:
Expand Down
Loading
Loading