diff --git a/src/runtime_src/core/common/api/xrt_hw_context.cpp b/src/runtime_src/core/common/api/xrt_hw_context.cpp index ece104ff6b1..6a00346336e 100644 --- a/src/runtime_src/core/common/api/xrt_hw_context.cpp +++ b/src/runtime_src/core/common/api/xrt_hw_context.cpp @@ -6,26 +6,28 @@ #define XRT_API_SOURCE // exporting xrt_hwcontext.h #define XCL_DRIVER_DLL_EXPORT // exporting xrt_xclbin.h #define XRT_CORE_COMMON_SOURCE // in same dll as coreutil +#include "core/include/xrt/xrt_hw_context.h" +#include "hw_context_int.h" + +#include "core/include/xrt/detail/span.h" +#include "core/include/xrt/experimental/xrt_ext.h" #include "core/common/buffer_dumper.h" #include "core/common/config_reader.h" +#include "core/common/device.h" #include "core/common/message.h" #include "core/common/query_requests.h" #include "core/common/time.h" +#include "core/common/trace.h" +#include "core/common/usage_metrics.h" #include "core/common/utils.h" -#include "core/include/xrt/xrt_hw_context.h" -#include "core/include/xrt/experimental/xrt_ext.h" +#include "core/common/shim/hwctx_handle.h" +#include "core/common/xdp/profile.h" + #include "bo_int.h" #include "elf_int.h" -#include "hw_context_int.h" #include "xclbin_int.h" -#include "core/common/device.h" -#include "core/common/trace.h" -#include "core/common/shim/hwctx_handle.h" -#include "core/common/usage_metrics.h" -#include "core/common/xdp/profile.h" - #include #include #include @@ -42,22 +44,27 @@ #endif namespace { + +// NOLINTBEGIN(cppcoreguidelines-avoid-magic-numbers) constexpr std::size_t -operator""_kb(unsigned long long v) { return 1024U * v; } // NOLINT(cppcoreguidelines-avoid-magic-numbers) +operator""_kb(unsigned long long v) { return 1024U * v; } constexpr double -operator""_mhz(long double v) { return static_cast(v) * 1'000'000.0; } // NOLINT(cppcoreguidelines-avoid-magic-numbers) +operator""_mhz(long double v) { return static_cast(v) * 1'000'000.0; } +// NOLINTEND(cppcoreguidelines-avoid-magic-numbers) + +template +using span = xrt::detail::span; // Dumps the content into a file with given size from given offset static void -dump_bo(const char* buf_map, const std::string& filename, size_t size, size_t offset = 0) +dump_data_to_file(const std::string& fname, span data) { - std::ofstream ofs(filename, std::ios::out | std::ios::binary); + std::ofstream ofs(fname, std::ios::out | std::ios::binary); if (!ofs.is_open()) - throw std::runtime_error("Failure opening file " + filename + " for writing!"); + throw std::runtime_error("Failure opening file " + fname + " for writing!"); - const char* buf = buf_map + offset; - ofs.write(buf, static_cast(size)); + ofs.write(data.data(), static_cast(data.size())); } // Convert experimental string config to cfg_param_type (uint32 values). @@ -111,10 +118,10 @@ class hw_context_impl : public std::enable_shared_from_this }, st); } - // Struct used for initializing and dumping firmware log buffer + // struct uc_log_buffer - initialize and dump firmware log buffer struct uc_log_buffer { - size_t m_num_uc; // number of uc's + size_t m_num_uc; // number of uc's uint32_t m_slot_idx; // index of slot in which these uc's are present std::unique_ptr m_uc_log_bo_dumper; @@ -130,30 +137,31 @@ class hw_context_impl : public std::enable_shared_from_this constexpr size_t count_size = 8; // Get per-uC cert log size from xrt.ini. + // Make sure metadata_size bytes can fit in log size const auto size_per_uc = xrt_core::config::get_uc_log_size_per_uc_kb() * 1_kb; + if (metadata_size > size_per_uc) + throw std::runtime_error("Bad log buffer size"); + // Create a bo to collect all dump data across all uCs auto bo = xrt_core::bo_int::create_bo( ctx_hdl, device, (size_per_uc * num_uc), xrt_core::bo_int::use_type::log); - // So make sure for each uC metadata bytes are initialized with - // zero's before configuring - char* buf_map = bo.map(); - if (!buf_map) + auto bo_data = bo.map(); + if (!bo_data) throw std::runtime_error("Failed to map uc log buffer"); - // create map with uC index and log buffer size - // and also initialize metadata bytes to zero for each uC + // Build uc_buf_map and zero metadata for each uC std::map uc_buf_map; for (uint32_t i = 0; i < num_uc; ++i) { - std::memset(buf_map + (i * size_per_uc), 0, metadata_size); - uc_buf_map[i] = size_per_uc; + std::memset(bo_data + (i * size_per_uc), 0, metadata_size); + uc_buf_map.emplace(i, size_per_uc); } - // configure the log buffer + // Configure the log buffer xrt_core::bo_int::config_bo(bo, uc_buf_map, ctx_hdl); - // create buffer dumper object to dump the log buffer contents - xrt_core::buffer_dumper::config config; + // Create buffer dumper object to dump the log buffer contents + xrt_core::buffer_dumper::config config {}; config.chunk_size = size_per_uc; config.metadata_size = metadata_size; config.count_offset = count_offset; @@ -181,7 +189,7 @@ class hw_context_impl : public std::enable_shared_from_this { m_uc_log_bo_dumper->flush(); } - }; + }; // struct uc_log_buffer std::shared_ptr m_core_device; xrt::xclbin m_xclbin; @@ -190,7 +198,8 @@ class hw_context_impl : public std::enable_shared_from_this // Stores the Elf corresponding to each kernel name it contains. // This map is used for lookup when creating xrt::kernel object // using kernel name. - std::map m_elf_map; + using elf_map_type = std::map; + elf_map_type m_elf_map; // mutex to protect the data members of hw_context_impl (elf_map, cfg_param etc.) mutable std::mutex m_mutex; @@ -205,23 +214,30 @@ class hw_context_impl : public std::enable_shared_from_this access_mode m_mode; std::unique_ptr m_hdl; std::unique_ptr m_uc_log_buf; - // During preemption, when a hardware context is interrupted L2 memory contents - // are saved to a scratchpad memory allocated specifically for that context. + + // During preemption, when a hardware context is interrupted + // L2 memory contents are saved to a scratchpad memory allocated + // specifically for that context. std::once_flag m_scratchpad_init_flag; // used for thread safe lazy init of scratchpad xrt::bo m_scratchpad_buf; + std::shared_ptr m_usage_logger = xrt_core::usage_metrics::get_usage_metrics_logger(); + bool m_elf_flow = false; + // update_from_elf() - Insert all kernels in elf + // A hwctx can be configured with multiple ELFs. The elf map + // maintains a mapping from kernel name to the ELF containing + // the kernel. void - create_elf_map(const xrt::elf& elf) + update_from_elf(const xrt::elf& elf) { - // Store the ELF in the map against all available kernels in the it. - // This will be useful for ELF lookup when creating xrt::kernel object - // using kernel name + // Map each kernel in this elf Store the ELF in the map against all available kernels in the + // it. This will be useful for ELF lookup when creating + // xrt::kernel object using kernel name for (const auto& kernel : elf.get_kernels()) { auto kernel_name = kernel.get_name(); - std::lock_guard lk(m_mutex); if (m_elf_map.find(kernel_name) != m_elf_map.end()) throw std::runtime_error("kernel already exists, cannot use this ELF with this hw ctx\n"); @@ -229,6 +245,23 @@ class hw_context_impl : public std::enable_shared_from_this } } + // init_from_elf() - Initialize this hwctx from an ELF + // This function is called if and only if the hwctx has no + // underlying representation (no m_hdl). + void + init_from_elf(const xrt::elf& elf, uint32_t partition_size) + { + m_hdl = m_core_device->create_hw_context(elf, effective_cfg_param(m_cfg_storage), m_mode); + m_partition_size = partition_size; + update_from_elf(elf); + m_elf_flow = true; // ELF flow + m_uc_log_buf = init_uc_log_buf(m_core_device, m_hdl.get()); // create only for first config + + // XDP configuration is required only once, + // as all the elfs in one HWCtx will have the same partition size + xrt_core::xdp::update_device(this, true); + } + // Initializes uc log buffer // Creates log buffer and starts a thread to dump the log buffer contents // periodically per column to a file @@ -308,7 +341,7 @@ class hw_context_impl : public std::enable_shared_from_this , m_uc_log_buf(init_uc_log_buf(m_core_device, m_hdl.get())) , m_elf_flow{true} { - create_elf_map(elf); + update_from_elf(elf); } std::shared_ptr @@ -352,26 +385,19 @@ class hw_context_impl : public std::enable_shared_from_this { auto part_size = elf.get_partition_size(); - // create hw ctx handle if not already created - if (!m_hdl) { - m_hdl = m_core_device->create_hw_context(elf, effective_cfg_param(m_cfg_storage), m_mode); - m_partition_size = part_size; - create_elf_map(elf); - m_elf_flow = true; // ELF flow - m_uc_log_buf = init_uc_log_buf(m_core_device, m_hdl.get()); // create only for first config - // XDP configuration is required only once, - // as all the elfs in one HWCtx will have the same partition size - xrt_core::xdp::update_device(this, true); - return; - } - - // add ELF only if partition size matches existing configuration - if (m_partition_size != part_size) + // The lock must be held here to prevent race on m_hdl + // in case it is created in init_from_elf(). The lock + // also protects updated data structures. + std::lock_guard lk(m_mutex); + if (m_hdl && m_partition_size != part_size) throw std::runtime_error("can not add config to ctx with different configuration\n"); - // Add kernels available in ELF to elf map - // This function throws if kernel with same name is already present - create_elf_map(elf); + if (m_hdl) + // Add ELF kernels to elf map. Throws if kernel already in map + update_from_elf(elf); + else + // Intialize this hwctx, and add kernels + init_from_elf(elf, part_size); } void @@ -554,7 +580,7 @@ class hw_context_impl : public std::enable_shared_from_this std::string dump_file_name = "preemption_scratchpad_mem_" + std::to_string(m_hdl->get_slotidx()) + "_" + xrt_core::get_timestamp_for_filename() + ".bin"; - dump_bo(m_scratchpad_buf.map(), dump_file_name, m_scratchpad_buf.size()); + dump_data_to_file(dump_file_name, {m_scratchpad_buf.map(), m_scratchpad_buf.size()}); std::string msg {"Dumped scratchpad buffer into file : "}; msg.append(dump_file_name);