Skip to content
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 81 additions & 59 deletions src/runtime_src/core/common/api/xrt_hw_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,28 @@
#define XRT_API_SOURCE // exporting xrt_hwcontext.h
#define XCL_DRIVER_DLL_EXPORT // exporting xrt_xclbin.h
#define XRT_CORE_COMMON_SOURCE // in same dll as coreutil
#include "core/include/xrt/xrt_hw_context.h"
#include "hw_context_int.h"

#include "core/include/xrt/detail/span.h"
#include "core/include/xrt/experimental/xrt_ext.h"

#include "core/common/buffer_dumper.h"
#include "core/common/config_reader.h"
#include "core/common/device.h"
#include "core/common/message.h"
#include "core/common/query_requests.h"
#include "core/common/time.h"
#include "core/common/trace.h"
#include "core/common/usage_metrics.h"
#include "core/common/utils.h"
#include "core/include/xrt/xrt_hw_context.h"
#include "core/include/xrt/experimental/xrt_ext.h"
#include "core/common/shim/hwctx_handle.h"
#include "core/common/xdp/profile.h"

#include "bo_int.h"
#include "elf_int.h"
#include "hw_context_int.h"
#include "xclbin_int.h"

#include "core/common/device.h"
#include "core/common/trace.h"
#include "core/common/shim/hwctx_handle.h"
#include "core/common/usage_metrics.h"
#include "core/common/xdp/profile.h"

#include <cstddef>
#include <ctime>
#include <fstream>
Expand All @@ -42,22 +44,27 @@
#endif

namespace {

// NOLINTBEGIN(cppcoreguidelines-avoid-magic-numbers)
constexpr std::size_t
operator""_kb(unsigned long long v) { return 1024U * v; } // NOLINT(cppcoreguidelines-avoid-magic-numbers)
operator""_kb(unsigned long long v) { return 1024U * v; }

constexpr double
operator""_mhz(long double v) { return static_cast<double>(v) * 1'000'000.0; } // NOLINT(cppcoreguidelines-avoid-magic-numbers)
operator""_mhz(long double v) { return static_cast<double>(v) * 1'000'000.0; }
// NOLINTEND(cppcoreguidelines-avoid-magic-numbers)

template <typename T>
using span = xrt::detail::span<T>;

// Dumps the content into a file with given size from given offset
static void
dump_bo(const char* buf_map, const std::string& filename, size_t size, size_t offset = 0)
dump_data_to_file(const std::string& fname, span<char> data)
{
std::ofstream ofs(filename, std::ios::out | std::ios::binary);
std::ofstream ofs(fname, std::ios::out | std::ios::binary);
if (!ofs.is_open())
throw std::runtime_error("Failure opening file " + filename + " for writing!");
throw std::runtime_error("Failure opening file " + fname + " for writing!");

const char* buf = buf_map + offset;
ofs.write(buf, static_cast<std::streamsize>(size));
ofs.write(data.data(), static_cast<std::streamsize>(data.size()));
}

// Convert experimental string config to cfg_param_type (uint32 values).
Expand Down Expand Up @@ -111,10 +118,10 @@ class hw_context_impl : public std::enable_shared_from_this<hw_context_impl>
}, st);
}

// Struct used for initializing and dumping firmware log buffer
// struct uc_log_buffer - initialize and dump firmware log buffer
struct uc_log_buffer
{
size_t m_num_uc; // number of uc's
size_t m_num_uc; // number of uc's
uint32_t m_slot_idx; // index of slot in which these uc's are present
std::unique_ptr<xrt_core::buffer_dumper> m_uc_log_bo_dumper;

Expand All @@ -130,30 +137,31 @@ class hw_context_impl : public std::enable_shared_from_this<hw_context_impl>
constexpr size_t count_size = 8;

// Get per-uC cert log size from xrt.ini.
// Make sure metadata_size bytes can fit in log size
const auto size_per_uc = xrt_core::config::get_uc_log_size_per_uc_kb() * 1_kb;
if (metadata_size > size_per_uc)
throw std::runtime_error("Bad log buffer size");

// Create a bo to collect all dump data across all uCs
auto bo = xrt_core::bo_int::create_bo(
ctx_hdl, device, (size_per_uc * num_uc), xrt_core::bo_int::use_type::log);

// So make sure for each uC metadata bytes are initialized with
// zero's before configuring
char* buf_map = bo.map<char*>();
if (!buf_map)
auto bo_data = bo.map<char*>();
if (!bo_data)
throw std::runtime_error("Failed to map uc log buffer");

// create map with uC index and log buffer size
// and also initialize metadata bytes to zero for each uC
// Build uc_buf_map and zero metadata for each uC
std::map<uint32_t, size_t> uc_buf_map;
for (uint32_t i = 0; i < num_uc; ++i) {
std::memset(buf_map + (i * size_per_uc), 0, metadata_size);
uc_buf_map[i] = size_per_uc;
std::memset(bo_data + (i * size_per_uc), 0, metadata_size);
uc_buf_map.emplace(i, size_per_uc);
}

// configure the log buffer
// Configure the log buffer
xrt_core::bo_int::config_bo(bo, uc_buf_map, ctx_hdl);

// create buffer dumper object to dump the log buffer contents
xrt_core::buffer_dumper::config config;
// Create buffer dumper object to dump the log buffer contents
xrt_core::buffer_dumper::config config {};
config.chunk_size = size_per_uc;
config.metadata_size = metadata_size;
config.count_offset = count_offset;
Expand Down Expand Up @@ -181,7 +189,7 @@ class hw_context_impl : public std::enable_shared_from_this<hw_context_impl>
{
m_uc_log_bo_dumper->flush();
}
};
}; // struct uc_log_buffer

std::shared_ptr<xrt_core::device> m_core_device;
xrt::xclbin m_xclbin;
Expand All @@ -190,7 +198,8 @@ class hw_context_impl : public std::enable_shared_from_this<hw_context_impl>
// Stores the Elf corresponding to each kernel name it contains.
// This map is used for lookup when creating xrt::kernel object
// using kernel name.
std::map<std::string, xrt::elf> m_elf_map;
using elf_map_type = std::map<std::string, xrt::elf>;
elf_map_type m_elf_map;

// mutex to protect the data members of hw_context_impl (elf_map, cfg_param etc.)
mutable std::mutex m_mutex;
Expand All @@ -205,30 +214,54 @@ class hw_context_impl : public std::enable_shared_from_this<hw_context_impl>
access_mode m_mode;
std::unique_ptr<xrt_core::hwctx_handle> m_hdl;
std::unique_ptr<uc_log_buffer> m_uc_log_buf;
// During preemption, when a hardware context is interrupted L2 memory contents
// are saved to a scratchpad memory allocated specifically for that context.

// During preemption, when a hardware context is interrupted
// L2 memory contents are saved to a scratchpad memory allocated
// specifically for that context.
std::once_flag m_scratchpad_init_flag; // used for thread safe lazy init of scratchpad
xrt::bo m_scratchpad_buf;

std::shared_ptr<xrt_core::usage_metrics::base_logger> m_usage_logger =
xrt_core::usage_metrics::get_usage_metrics_logger();

bool m_elf_flow = false;

// update_elf_map() - Insert all kernels in elf
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor fix
Mismatch b/w function name and the name in comment

// A hwctx can be configured with multiple ELFs. The elf map
// maintains a mapping from kernel name to the ELF containing
// the kernel.
void
create_elf_map(const xrt::elf& elf)
update_from_elf(const xrt::elf& elf)
{
// Store the ELF in the map against all available kernels in the it.
// This will be useful for ELF lookup when creating xrt::kernel object
// using kernel name
// Map each kernel in this elf Store the ELF in the map against all available kernels in the
// it. This will be useful for ELF lookup when creating
// xrt::kernel object using kernel name
for (const auto& kernel : elf.get_kernels()) {
auto kernel_name = kernel.get_name();
std::lock_guard lk(m_mutex);
if (m_elf_map.find(kernel_name) != m_elf_map.end())
throw std::runtime_error("kernel already exists, cannot use this ELF with this hw ctx\n");

m_elf_map.emplace(kernel_name, elf);
}
}

// init_from_elf() - Initialize this hwctx from an ELF
// This function is called if and only if the hwctx has no
// underlying representation (no m_hdl).
void
init_from_elf(const xrt::elf& elf, uint32_t partition_size)
{
m_hdl = m_core_device->create_hw_context(elf, effective_cfg_param(m_cfg_storage), m_mode);
m_partition_size = partition_size;
update_from_elf(elf);
m_elf_flow = true; // ELF flow
m_uc_log_buf = init_uc_log_buf(m_core_device, m_hdl.get()); // create only for first config

// XDP configuration is required only once,
// as all the elfs in one HWCtx will have the same partition size
xrt_core::xdp::update_device(this, true);
}

// Initializes uc log buffer
// Creates log buffer and starts a thread to dump the log buffer contents
// periodically per column to a file
Expand Down Expand Up @@ -308,7 +341,7 @@ class hw_context_impl : public std::enable_shared_from_this<hw_context_impl>
, m_uc_log_buf(init_uc_log_buf(m_core_device, m_hdl.get()))
, m_elf_flow{true}
{
create_elf_map(elf);
update_from_elf(elf);
}

std::shared_ptr<hw_context_impl>
Expand Down Expand Up @@ -351,27 +384,16 @@ class hw_context_impl : public std::enable_shared_from_this<hw_context_impl>
add_config(const xrt::elf& elf)
{
auto part_size = elf.get_partition_size();

// create hw ctx handle if not already created
if (!m_hdl) {
m_hdl = m_core_device->create_hw_context(elf, effective_cfg_param(m_cfg_storage), m_mode);
m_partition_size = part_size;
create_elf_map(elf);
m_elf_flow = true; // ELF flow
m_uc_log_buf = init_uc_log_buf(m_core_device, m_hdl.get()); // create only for first config
// XDP configuration is required only once,
// as all the elfs in one HWCtx will have the same partition size
xrt_core::xdp::update_device(this, true);
return;
}

// add ELF only if partition size matches existing configuration
if (m_partition_size != part_size)
if (m_hdl && m_partition_size != part_size)
throw std::runtime_error("can not add config to ctx with different configuration\n");

// Add kernels available in ELF to elf map
// This function throws if kernel with same name is already present
create_elf_map(elf);
std::lock_guard lk(m_mutex);
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

init_from_elf() is currently invoked while holding the mutex and performs a driver call to create the HW context. This can potentially extend the lock hold time due to kernel latency.
So I was thinking whether this needs to be under the lock or if it could be done outside the critical section.
Ideally this should be okay as hw ctx creation is not in critical path but just a thought.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rbramand-xilinx . Thanks for the comments. My concern was that add_config() could theoretically be called from different threads, and without locking m_hdl creation could be created under race (multiple handles created). I don't think we have choice but to lock as I did?

if (m_hdl)
// Add ELF kernels to elf map. Throws if kernel already in map
update_from_elf(elf);
else
// Intialize this hwctx, and add kernels
init_from_elf(elf, part_size);
}

void
Expand Down Expand Up @@ -554,7 +576,7 @@ class hw_context_impl : public std::enable_shared_from_this<hw_context_impl>
std::string dump_file_name =
"preemption_scratchpad_mem_" + std::to_string(m_hdl->get_slotidx()) + "_" +
xrt_core::get_timestamp_for_filename() + ".bin";
dump_bo(m_scratchpad_buf.map<char*>(), dump_file_name, m_scratchpad_buf.size());
dump_data_to_file(dump_file_name, {m_scratchpad_buf.map<char*>(), m_scratchpad_buf.size()});

std::string msg {"Dumped scratchpad buffer into file : "};
msg.append(dump_file_name);
Expand Down
Loading