From f6be41e217cd49b6c0d0e1b165dbdb0a5d1f8fcd Mon Sep 17 00:00:00 2001 From: Jyotheeswar Ganne Date: Mon, 13 Apr 2026 21:50:18 -0600 Subject: [PATCH 1/4] Simplify AIE Dtrace CT file generation for bandwidth metrics This change simplifies the CT file generation for bandwidth monitoring: - Add self-contained CT generation that configures hardware directly via write_reg commands in the begin block (stream switch ports and perf counters) - Remove dependency on setMetricsSettings() - return early after nop.elf - Fix VE2 shim tile DMA port mappings: - S2MM ch0: master South1, port index 3 - S2MM ch1: master South3, port index 5 - MM2S ch0: slave South3, port index 5 - MM2S ch1: slave South7, port index 9 - Fix performance counter control register addresses: - Performance_Ctrl0 at 0x00031000 for counters 0,1 - Performance_Ctrl2 at 0x0003100C for counters 2,3 - Fix PORT_RUNNING event numbers for aie2ps shim tiles: - Port_Running_0 = 134 (0x86) - Port_Running_1 = 138 (0x8A) - Port_Running_2 = 142 (0x8E) - Port_Running_3 = 146 (0x92) - Use relative column indices for hardware configuration - Generate per-UC grouped counter metadata (compatible with vaianalyze) - Remove inline comments from write_reg/read_reg actions Post-processing filters bandwidth data by metric type: - read_bandwidth: S2MM counters (0, 1) - write_bandwidth: MM2S counters (2, 3) - ddr_bandwidth: all 4 counters Made-with: Cursor --- .../aie_dtrace/ve2/aie_dtrace_ct_writer.cpp | 411 +++++++++++++++++- .../aie_dtrace/ve2/aie_dtrace_ct_writer.h | 100 +++++ .../plugin/aie_dtrace/ve2/aie_dtrace_ve2.cpp | 27 +- 3 files changed, 530 insertions(+), 8 deletions(-) diff --git a/profile/plugin/aie_dtrace/ve2/aie_dtrace_ct_writer.cpp b/profile/plugin/aie_dtrace/ve2/aie_dtrace_ct_writer.cpp index 51bc15b6..02dfe016 100644 --- a/profile/plugin/aie_dtrace/ve2/aie_dtrace_ct_writer.cpp +++ b/profile/plugin/aie_dtrace/ve2/aie_dtrace_ct_writer.cpp @@ -7,6 +7,7 @@ #include "xdp/profile/plugin/aie_profile/aie_profile_metadata.h" #include "xdp/profile/database/database.h" #include "xdp/profile/database/static_info/aie_constructs.h" +#include "xdp/profile/database/static_info/aie_util.h" #include "core/common/message.h" @@ -15,10 +16,12 @@ #include #include #include +#include #include #include #include -#include + +#include namespace xdp { @@ -628,5 +631,411 @@ bool AieDtraceCTWriter::writeCTFile(const std::vector& asmFiles, return true; } +std::vector AieDtraceCTWriter::getShimTileColumns(void* hwctx) +{ + std::vector columns; + + if (!hwctx) { + xrt_core::message::send(severity_level::debug, "XRT", + "AIE dtrace: No hwctx provided for shim column discovery"); + return columns; + } + + try { + boost::property_tree::ptree aiePartitionPt = xdp::aie::getAIEPartitionInfo(hwctx); + if (aiePartitionPt.empty()) { + xrt_core::message::send(severity_level::debug, "XRT", + "AIE dtrace: No partition info available"); + return columns; + } + + uint8_t numCols = static_cast(aiePartitionPt.back().second.get("num_cols")); + + // Return relative columns (0, 1, 2, ...) for hardware configuration + for (uint8_t i = 0; i < numCols; ++i) { + columns.push_back(i); + } + + std::stringstream msg; + msg << "AIE dtrace: Found " << static_cast(numCols) << " shim columns (relative: 0-" + << static_cast(numCols - 1) << ")"; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + } + catch (const std::exception& e) { + std::stringstream msg; + msg << "AIE dtrace: Error getting shim columns: " << e.what(); + xrt_core::message::send(severity_level::warning, "XRT", msg.str()); + } + + return columns; +} + +std::vector AieDtraceCTWriter::getBandwidthCounterConfigs() +{ + // VE2 shim tile DMA port indices for stream switch event monitoring + // These port indices are architecture-specific and map to the physical + // stream switch ports that connect to the DMA channels. + // + // For VE2 shim tiles: + // - S2MM (master) ports: Stream switch master port feeds data to DMA S2MM + // - MM2S (slave) ports: Stream switch slave port receives data from DMA MM2S + // + // Port encoding in Stream_Switch_Event_Port_Selection register: + // - Bits [4:0]: Port index + // - Bit [5]: 0 = slave, 1 = master + // + // VE2 shim tile port mapping: + // - S2MM ch0: master South1 => port index 3 + // - S2MM ch1: master South3 => port index 5 + // - MM2S ch0: slave South3 => port index 5 + // - MM2S ch1: slave South7 => port index 9 + // + // counterNumber, channel, dmaPortIndex, isMaster, direction + return { + {0, 0, 3, true, "input"}, // Counter 0: S2MM Ch0 (master South1) - read_bandwidth + {1, 1, 5, true, "input"}, // Counter 1: S2MM Ch1 (master South3) - read_bandwidth + {2, 0, 5, false, "output"}, // Counter 2: MM2S Ch0 (slave South3) - write_bandwidth + {3, 1, 9, false, "output"} // Counter 3: MM2S Ch1 (slave South7) - write_bandwidth + }; +} + +std::vector AieDtraceCTWriter::generateStreamSwitchPortConfig(uint8_t column) +{ + std::vector writes; + + uint64_t tileAddress = (static_cast(column) << columnShift) | + (static_cast(SHIM_ROW) << rowShift); + uint64_t regAddr = tileAddress + STREAM_SWITCH_EVENT_PORT_SEL_OFFSET; + + auto configs = getBandwidthCounterConfigs(); + + // Build the register value: 4 ports packed into 32 bits, 8 bits per port + // Each port: bits [4:0] = DMA port index, bit [5] = slave(0)/master(1) + uint32_t regValue = 0; + for (size_t i = 0; i < configs.size() && i < PORTS_PER_REGISTER; ++i) { + const auto& cfg = configs[i]; + uint8_t slaveOrMaster = cfg.isMaster ? 1 : 0; + uint8_t bitOffset = static_cast(i) * 8; + regValue |= (static_cast(cfg.dmaPortIndex) << bitOffset) + | (static_cast(slaveOrMaster) << (bitOffset + 5)); + } + + std::stringstream comment; + comment << "SS port sel @ col " << static_cast(column) + << " (S2MM ch0,ch1; MM2S ch0,ch1)"; + + CTRegisterWrite write; + write.address = regAddr; + write.value = regValue; + write.comment = comment.str(); + writes.push_back(write); + + return writes; +} + +std::vector AieDtraceCTWriter::generatePerfCounterConfig(uint8_t column) +{ + std::vector writes; + + uint64_t tileAddress = (static_cast(column) << columnShift) | + (static_cast(SHIM_ROW) << rowShift); + + // Performance control register addresses (aie2ps_pl_module): + // Performance_Ctrl0: 0x00031000 - Counters 0,1 start/stop events + // Performance_Ctrl2: 0x0003100C - Counters 2,3 start/stop events + constexpr uint64_t PERF_CTRL0_OFFSET = 0x00031000; + constexpr uint64_t PERF_CTRL2_OFFSET = 0x0003100C; + + // PORT_RUNNING events for byte counting (aie2ps shim tile events) + // Port_Running_N events: 134, 138, 142, 146 (decimal) + constexpr uint8_t PORT_RUNNING_0_PL_EVENT = 0x86; // 134 + constexpr uint8_t PORT_RUNNING_1_PL_EVENT = 0x8A; // 138 + constexpr uint8_t PORT_RUNNING_2_PL_EVENT = 0x8E; // 142 + constexpr uint8_t PORT_RUNNING_3_PL_EVENT = 0x92; // 146 + + uint8_t startEvents[4] = { + PORT_RUNNING_0_PL_EVENT, + PORT_RUNNING_1_PL_EVENT, + PORT_RUNNING_2_PL_EVENT, + PORT_RUNNING_3_PL_EVENT + }; + + // Performance_Ctrl0: counters 0 and 1 + // Bit layout: [31:24]=Cnt1_Stop, [23:16]=Cnt1_Start, [15:8]=Cnt0_Stop, [7:0]=Cnt0_Start + { + uint32_t regValue = 0; + regValue |= (static_cast(startEvents[0]) & 0xFF) << 0; // Cnt0_Start_Event + regValue |= (static_cast(startEvents[0]) & 0xFF) << 8; // Cnt0_Stop_Event + regValue |= (static_cast(startEvents[1]) & 0xFF) << 16; // Cnt1_Start_Event + regValue |= (static_cast(startEvents[1]) & 0xFF) << 24; // Cnt1_Stop_Event + + CTRegisterWrite write; + write.address = tileAddress + PERF_CTRL0_OFFSET; + write.value = regValue; + write.comment = "PerfCtrl0 @ col " + std::to_string(column) + " (ctr0,ctr1)"; + writes.push_back(write); + } + + // Performance_Ctrl2: counters 2 and 3 + // Bit layout: [31:24]=Cnt3_Stop, [23:16]=Cnt3_Start, [15:8]=Cnt2_Stop, [7:0]=Cnt2_Start + { + uint32_t regValue = 0; + regValue |= (static_cast(startEvents[2]) & 0xFF) << 0; // Cnt2_Start_Event + regValue |= (static_cast(startEvents[2]) & 0xFF) << 8; // Cnt2_Stop_Event + regValue |= (static_cast(startEvents[3]) & 0xFF) << 16; // Cnt3_Start_Event + regValue |= (static_cast(startEvents[3]) & 0xFF) << 24; // Cnt3_Stop_Event + + CTRegisterWrite write; + write.address = tileAddress + PERF_CTRL2_OFFSET; + write.value = regValue; + write.comment = "PerfCtrl2 @ col " + std::to_string(column) + " (ctr2,ctr3)"; + writes.push_back(write); + } + + return writes; +} + +std::vector AieDtraceCTWriter::generateBandwidthCounters( + const std::vector& shimColumns) +{ + std::vector counters; + auto configs = getBandwidthCounterConfigs(); + + for (uint8_t column : shimColumns) { + for (const auto& cfg : configs) { + CTCounterInfo info; + info.column = column; + info.row = SHIM_ROW; + info.counterNumber = cfg.counterNumber; + info.module = "interface_tile"; + info.address = calculateCounterAddress(column, SHIM_ROW, cfg.counterNumber, "interface_tile"); + info.metricSet = "ddr_bandwidth"; + info.portDirection = cfg.direction; + counters.push_back(info); + } + } + + return counters; +} + +bool AieDtraceCTWriter::writeBandwidthCTFile( + const std::vector& asmFiles, + const std::vector& allCounters, + const std::vector& beginBlockWrites, + const std::string& outputPath) +{ + std::ofstream ctFile(outputPath); + + if (!ctFile.is_open()) { + std::stringstream msg; + msg << "Unable to create CT file: " << outputPath; + xrt_core::message::send(severity_level::warning, "XRT", msg.str()); + return false; + } + + ctFile << "# Auto-generated CT file for AIE bandwidth monitoring\n"; + ctFile << "# Generated by XRT AIE Dtrace Plugin (simplified bandwidth mode)\n"; + ctFile << "# Fixed 4 counters per shim tile: S2MM ch0,ch1 + MM2S ch0,ch1\n"; + ctFile << "# Post-processing filters by metric: read_bandwidth, write_bandwidth, ddr_bandwidth\n\n"; + + ctFile << "begin\n"; + ctFile << "{\n"; + ctFile << " ts_start = timestamp32()\n"; + + if (!beginBlockWrites.empty()) { + ctFile << "\n # Hardware configuration for bandwidth counters\n"; + for (const auto& write : beginBlockWrites) { + if (!write.comment.empty()) + ctFile << " # " << write.comment << "\n"; + ctFile << " write_reg(" << formatAddress(write.address) + << ", 0x" << std::hex << std::setfill('0') << std::setw(8) + << write.value << std::dec << ")\n"; + } + ctFile << "\n"; + } + + ctFile << "@blockopen\n"; + ctFile << "# COUNTER_METADATA_BEGIN\n"; + ctFile << "# {\n"; + + // Per-UC counter metadata groupings only + std::vector metaGroups; + for (const auto& asmFile : asmFiles) { + if (!asmFile.counters.empty()) + metaGroups.push_back(&asmFile); + } + + for (size_t g = 0; g < metaGroups.size(); g++) { + const auto& asmFile = *metaGroups[g]; + ctFile << "# \"" << asmFile.asmId << "\": [\n"; + + for (size_t c = 0; c < asmFile.counters.size(); c++) { + const auto& ctr = asmFile.counters[c]; + uint8_t channel = ctr.counterNumber % 2; + ctFile << "# {\"col\": " << static_cast(ctr.column) + << ", \"row\": " << static_cast(ctr.row) + << ", \"ctr\": " << static_cast(ctr.counterNumber) + << ", \"ch\": " << static_cast(channel) + << ", \"dir\": "; + + if (ctr.portDirection == "input") + ctFile << "\"i\""; + else if (ctr.portDirection == "output") + ctFile << "\"o\""; + else + ctFile << "null"; + + ctFile << "}"; + if (c < asmFile.counters.size() - 1) + ctFile << ","; + ctFile << "\n"; + } + + ctFile << "# ]"; + if (g < metaGroups.size() - 1) + ctFile << ","; + ctFile << "\n"; + } + + ctFile << "# }\n"; + ctFile << "# COUNTER_METADATA_END\n"; + ctFile << "@blockclose\n"; + ctFile << "}\n\n"; + + for (const auto& asmFile : asmFiles) { + if (asmFile.timestamps.empty() || asmFile.counters.empty()) + continue; + + std::string basename = fs::path(asmFile.filename).filename().string(); + + ctFile << "# Probes for " << basename + << " (columns " << asmFile.colStart << "-" << asmFile.colEnd << ")\n"; + + std::stringstream lineList; + lineList << "line"; + for (size_t i = 0; i < asmFile.timestamps.size(); i++) { + if (i > 0) + lineList << ","; + lineList << asmFile.timestamps[i].lineNumber; + } + + ctFile << "jprobe:" << basename + << ":uc" << asmFile.ucNumber + << ":" << lineList.str() << "\n"; + ctFile << "{\n"; + ctFile << " ts_" << asmFile.asmId << " = timestamp32()\n"; + + for (size_t i = 0; i < asmFile.counters.size(); i++) { + const auto& ctr = asmFile.counters[i]; + ctFile << " _ = read_reg(" << formatAddress(ctr.address) << ")\n"; + } + + ctFile << "}\n\n"; + } + + ctFile << "end\n"; + ctFile << "{\n"; + ctFile << " ts_end = timestamp32()\n"; + ctFile << "}\n"; + + ctFile.close(); + + std::stringstream msg; + msg << "Generated bandwidth CT file: " << outputPath + << " (" << allCounters.size() << " counters)"; + xrt_core::message::send(severity_level::info, "XRT", msg.str()); + + return true; +} + +bool AieDtraceCTWriter::generateBandwidthCT( + const std::string& outputPath, + void* hwctx, + const std::vector& opLocations) +{ + if (opLocations.empty()) { + xrt_core::message::send(severity_level::debug, "XRT", + "AIE dtrace: No op_locations provided for bandwidth CT generation"); + return false; + } + + auto shimColumns = getShimTileColumns(hwctx); + if (shimColumns.empty()) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE dtrace: No shim columns found in partition. Cannot generate bandwidth CT."); + return false; + } + + std::vector asmFiles; + std::regex filenamePattern(R"(aie_runtime_control(\d+)?\.asm)"); + + for (const auto& loc : opLocations) { + for (const auto& li : loc.line_info) { + if (li.entries.empty()) + continue; + + const auto& fname = li.entries.front().second; + std::smatch match; + if (!std::regex_search(fname, match, filenamePattern)) + continue; + + auto it = std::find_if(asmFiles.begin(), asmFiles.end(), + [&fname](const ASMFileInfo& a) { return a.filename == fname; }); + + if (it == asmFiles.end()) { + ASMFileInfo info; + info.filename = fname; + info.asmId = match[1].matched ? std::stoi(match[1].str()) : 0; + info.opLocMinCol = li.col; + info.opLocMaxCol = li.col; + asmFiles.push_back(info); + it = asmFiles.end() - 1; + } else { + it->opLocMinCol = std::min(it->opLocMinCol, li.col); + it->opLocMaxCol = std::max(it->opLocMaxCol, li.col); + } + + for (const auto& entry : li.entries) { + SaveTimestampInfo ts; + ts.lineNumber = entry.first; + ts.optionalIndex = -1; + it->timestamps.push_back(ts); + } + } + } + + if (asmFiles.empty()) { + xrt_core::message::send(severity_level::debug, "XRT", + "AIE dtrace: No ASM files found in op_locations for bandwidth CT generation"); + return false; + } + + applyUcSpansFromOpLoc(asmFiles); + + auto allCounters = generateBandwidthCounters(shimColumns); + if (allCounters.empty()) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE dtrace: No bandwidth counters generated"); + return false; + } + + extendLastUcToMaxConfiguredColumn(asmFiles, allCounters); + + for (auto& asmFile : asmFiles) { + asmFile.counters = filterCountersByColumn(allCounters, asmFile.colStart, asmFile.colEnd); + } + + std::vector beginBlockWrites; + for (uint8_t column : shimColumns) { + auto ssWrites = generateStreamSwitchPortConfig(column); + beginBlockWrites.insert(beginBlockWrites.end(), ssWrites.begin(), ssWrites.end()); + + auto pcWrites = generatePerfCounterConfig(column); + beginBlockWrites.insert(beginBlockWrites.end(), pcWrites.begin(), pcWrites.end()); + } + + return writeBandwidthCTFile(asmFiles, allCounters, beginBlockWrites, outputPath); +} + } // namespace xdp diff --git a/profile/plugin/aie_dtrace/ve2/aie_dtrace_ct_writer.h b/profile/plugin/aie_dtrace/ve2/aie_dtrace_ct_writer.h index 328a3eca..877630c5 100644 --- a/profile/plugin/aie_dtrace/ve2/aie_dtrace_ct_writer.h +++ b/profile/plugin/aie_dtrace/ve2/aie_dtrace_ct_writer.h @@ -58,6 +58,33 @@ struct ASMFileInfo { std::vector counters; // Filtered counters for this ASM }; +/** + * @brief Register write operation for CT file begin block + */ +struct CTRegisterWrite { + uint64_t address; + uint32_t value; + std::string comment; +}; + +/** + * @brief Configuration for a single bandwidth counter in a shim tile + * + * For VE2 shim tiles, DMA channels are accessed via stream switch ports: + * - S2MM (master): Stream switch master port feeds data to DMA (input to AIE) + * - MM2S (slave): Stream switch slave port receives data from DMA (output from AIE) + * + * The dmaPortIndex is the physical stream switch port index that connects + * to the DMA channel. This is architecture-specific. + */ +struct BandwidthCounterConfig { + uint8_t counterNumber; // Counter number (0-3) + uint8_t channel; // DMA channel number (0 or 1) + uint8_t dmaPortIndex; // Physical port index for stream switch (VE2-specific) + bool isMaster; // true=S2MM/input (master), false=MM2S/output (slave) + std::string direction; // "input" or "output" +}; + /** * @class AieDtraceCTWriter * @brief Generates CT (CERT Tracing) files for VE2 AIE profiling @@ -110,6 +137,23 @@ class AieDtraceCTWriter { bool generate(const std::string& outputPath, const std::vector& opLocations); + /** + * @brief Generate a self-contained CT file for bandwidth metrics + * + * This method generates a CT file that configures a fixed set of 4 performance + * counters and 4 stream switch event ports per shim tile for bandwidth monitoring. + * It does not depend on setMetricsSettings() - only needs partition info and + * SAVE_TIMESTAMPS locations. + * + * @param outputPath Full path for the generated CT file + * @param hwctx Hardware context handle for partition info access + * @param opLocations Vector of op_loc from aiebu_assembler::get_op_locations + * @return true if CT file was generated successfully, false otherwise + */ + bool generateBandwidthCT(const std::string& outputPath, + void* hwctx, + const std::vector& opLocations); + private: /** * @brief Read ASM file information from CSV file @@ -187,6 +231,53 @@ class AieDtraceCTWriter { */ std::string getPortDirection(const std::string& metricSet, uint64_t payload); + /** + * @brief Get shim tile columns from partition info + * @param hwctx Hardware context handle + * @return Vector of shim tile column numbers in the partition + */ + std::vector getShimTileColumns(void* hwctx); + + /** + * @brief Generate stream switch port configuration for 4 DMA channels per shim tile + * @param column Shim tile column + * @return Vector of register writes to configure stream switch ports + */ + std::vector generateStreamSwitchPortConfig(uint8_t column); + + /** + * @brief Generate performance counter configuration for 4 counters per shim tile + * @param column Shim tile column + * @return Vector of register writes to configure performance counters + */ + std::vector generatePerfCounterConfig(uint8_t column); + + /** + * @brief Get fixed bandwidth counter configurations for a shim tile + * @return Vector of BandwidthCounterConfig for the 4 fixed counters + */ + std::vector getBandwidthCounterConfigs(); + + /** + * @brief Generate bandwidth counters for all shim tiles in the partition + * @param shimColumns Vector of shim tile columns + * @return Vector of CTCounterInfo for all bandwidth counters + */ + std::vector generateBandwidthCounters(const std::vector& shimColumns); + + /** + * @brief Write the bandwidth CT file content with register configuration + * @param asmFiles Vector of ASMFileInfo with timestamps + * @param allCounters Vector of all CTCounterInfo for metadata + * @param beginBlockWrites Vector of register writes for begin block + * @param outputPath Full path for the output CT file + * @return true if file was written successfully + */ + bool writeBandwidthCTFile(const std::vector& asmFiles, + const std::vector& allCounters, + const std::vector& beginBlockWrites, + const std::string& outputPath); + private: VPDatabase* db; std::shared_ptr metadata; @@ -203,6 +294,15 @@ class AieDtraceCTWriter { static constexpr uint64_t MEM_TILE_BASE_OFFSET = 0x00091020; static constexpr uint64_t SHIM_TILE_BASE_OFFSET = 0x00031020; + // Stream switch and performance counter configuration offsets + static constexpr uint64_t STREAM_SWITCH_EVENT_PORT_SEL_OFFSET = 0x0003FF00; + static constexpr uint64_t PERF_CTRL_OFFSET = 0x00031000; + + // Bandwidth monitoring constants + static constexpr uint8_t NUM_BANDWIDTH_COUNTERS = 4; + static constexpr uint8_t SHIM_ROW = 0; + static constexpr uint8_t PORTS_PER_REGISTER = 4; + // Output filename static constexpr const char* CT_OUTPUT_FILENAME = "aie_profile.ct"; }; diff --git a/profile/plugin/aie_dtrace/ve2/aie_dtrace_ve2.cpp b/profile/plugin/aie_dtrace/ve2/aie_dtrace_ve2.cpp index 86e28468..fc8ea236 100644 --- a/profile/plugin/aie_dtrace/ve2/aie_dtrace_ve2.cpp +++ b/profile/plugin/aie_dtrace/ve2/aie_dtrace_ve2.cpp @@ -126,6 +126,10 @@ namespace xdp { return; } + // CT file handles hardware configuration via write_reg commands in begin block. + // Skip setMetricsSettings for now; can be re-enabled for fallback flow later. + return; + bool runtimeCounters = setMetricsSettings(deviceID, metadata->getHandle()); if (!runtimeCounters) { @@ -231,9 +235,6 @@ namespace xdp { if (!xrt_core::config::get_aie_dtrace()) return; - if (db->getStaticInfo().getNumAIECounter(deviceID) == 0) - return; - auto ctx = xrt_core::hw_context_int::create_hw_context_from_implementation(hwctx); auto slotIdx = static_cast(ctx)->get_slotidx(); @@ -251,15 +252,27 @@ namespace xdp { bool generated = false; auto it = m_op_locations_cache.find(kernel_name); + if (it != m_op_locations_cache.end() && !it->second.empty()) { - generated = ctWriter.generate(outputPath, it->second); - if (generated) + generated = ctWriter.generateBandwidthCT(outputPath, hwctx, it->second); + if (generated) { xrt_core::message::send(severity_level::debug, "XRT", - "AIE dtrace: CT generated using aiebu API (get_op_locations) for kernel '" + "AIE dtrace: Bandwidth CT generated (self-contained) for kernel '" + kernel_name + "'"); + } + } + + if (!generated && it != m_op_locations_cache.end() && !it->second.empty()) { + if (db->getStaticInfo().getNumAIECounter(deviceID) > 0) { + generated = ctWriter.generate(outputPath, it->second); + if (generated) + xrt_core::message::send(severity_level::debug, "XRT", + "AIE dtrace: CT generated using aiebu API (get_op_locations) for kernel '" + + kernel_name + "'"); + } } - if (!generated) { + if (!generated && db->getStaticInfo().getNumAIECounter(deviceID) > 0) { generated = ctWriter.generate(outputPath); if (generated) xrt_core::message::send(severity_level::debug, "XRT", From 9573c396946e45b4ec491c8ec8787137b8b1e762 Mon Sep 17 00:00:00 2001 From: Jyotheeswar Ganne Date: Tue, 14 Apr 2026 01:17:07 -0600 Subject: [PATCH 2/4] Reset performance counters to zero in CT begin block Add write_reg commands to reset performance counters 0-3 to zero before configuring the counter control registers. This ensures counters start from a known state for accurate bandwidth measurement. Counter addresses (aie2ps_pl_module): - Performance_Counter0: 0x00031020 - Performance_Counter1: 0x00031024 - Performance_Counter2: 0x00031028 - Performance_Counter3: 0x0003102C Made-with: Cursor --- .../plugin/aie_dtrace/ve2/aie_dtrace_ct_writer.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/profile/plugin/aie_dtrace/ve2/aie_dtrace_ct_writer.cpp b/profile/plugin/aie_dtrace/ve2/aie_dtrace_ct_writer.cpp index 02dfe016..ded3d93d 100644 --- a/profile/plugin/aie_dtrace/ve2/aie_dtrace_ct_writer.cpp +++ b/profile/plugin/aie_dtrace/ve2/aie_dtrace_ct_writer.cpp @@ -740,6 +740,19 @@ std::vector AieDtraceCTWriter::generatePerfCounterConfig(uint8_ uint64_t tileAddress = (static_cast(column) << columnShift) | (static_cast(SHIM_ROW) << rowShift); + // Performance counter register addresses (aie2ps_pl_module): + // Performance_Counter0-3: 0x00031020, 0x00031024, 0x00031028, 0x0003102C + constexpr uint64_t PERF_COUNTER0_OFFSET = 0x00031020; + + // Reset performance counters 0-3 to zero + for (uint8_t i = 0; i < 4; ++i) { + CTRegisterWrite write; + write.address = tileAddress + PERF_COUNTER0_OFFSET + (i * 4); + write.value = 0; + write.comment = "Reset PerfCounter" + std::to_string(i) + " @ col " + std::to_string(column); + writes.push_back(write); + } + // Performance control register addresses (aie2ps_pl_module): // Performance_Ctrl0: 0x00031000 - Counters 0,1 start/stop events // Performance_Ctrl2: 0x0003100C - Counters 2,3 start/stop events From b3a34df8b06ff66e209119cea5513660e003fcb6 Mon Sep 17 00:00:00 2001 From: Jyotheeswar Ganne Date: Tue, 14 Apr 2026 03:00:41 -0600 Subject: [PATCH 3/4] Rename asmFiles/asmFile to asmFileInfoList/asmFileInfo for clarity Improve code readability by using more descriptive variable names: - asmFiles -> asmFileInfoList - asmFile -> asmFileInfo The struct ASMFileInfo name remains unchanged. Made-with: Cursor --- .../aie_dtrace/ve2/aie_dtrace_ct_writer.cpp | 172 +++++++++--------- .../aie_dtrace/ve2/aie_dtrace_ct_writer.h | 8 +- 2 files changed, 90 insertions(+), 90 deletions(-) diff --git a/profile/plugin/aie_dtrace/ve2/aie_dtrace_ct_writer.cpp b/profile/plugin/aie_dtrace/ve2/aie_dtrace_ct_writer.cpp index ded3d93d..8a9a8d20 100644 --- a/profile/plugin/aie_dtrace/ve2/aie_dtrace_ct_writer.cpp +++ b/profile/plugin/aie_dtrace/ve2/aie_dtrace_ct_writer.cpp @@ -29,25 +29,25 @@ namespace { // Order UCs by aiebu min column; each UC's width is [colStart, nextUcStart - 1] (last UC ends at opLocMaxCol). void -applyUcSpansFromOpLoc(std::vector& asmFiles) +applyUcSpansFromOpLoc(std::vector& asmFileInfoList) { - if (asmFiles.empty()) + if (asmFileInfoList.empty()) return; - std::sort(asmFiles.begin(), asmFiles.end(), + std::sort(asmFileInfoList.begin(), asmFileInfoList.end(), [](const ASMFileInfo& a, const ASMFileInfo& b) { if (a.opLocMinCol != b.opLocMinCol) return a.opLocMinCol < b.opLocMinCol; return a.filename < b.filename; }); - const size_t n = asmFiles.size(); + const size_t n = asmFileInfoList.size(); for (size_t i = 0; i < n; ++i) { - auto& af = asmFiles[i]; + auto& af = asmFileInfoList[i]; af.colStart = static_cast(af.opLocMinCol); af.ucNumber = af.colStart; if (i + 1 < n) { - const int nextStart = static_cast(asmFiles[i + 1].opLocMinCol); + const int nextStart = static_cast(asmFileInfoList[i + 1].opLocMinCol); af.colEnd = nextStart - 1; if (af.colEnd < af.colStart) af.colEnd = static_cast(af.opLocMaxCol); @@ -60,10 +60,10 @@ applyUcSpansFromOpLoc(std::vector& asmFiles) // Last UC spans through the rightmost column that has a configured counter (op_loc may only // list columns where SAVE_TIMESTAMPS appears, so colEnd would otherwise stop at opLocMaxCol). void -extendLastUcToMaxConfiguredColumn(std::vector& asmFiles, +extendLastUcToMaxConfiguredColumn(std::vector& asmFileInfoList, const std::vector& allCounters) { - if (asmFiles.empty() || allCounters.empty()) + if (asmFileInfoList.empty() || allCounters.empty()) return; int maxCfgCol = -1; @@ -72,7 +72,7 @@ extendLastUcToMaxConfiguredColumn(std::vector& asmFiles, if (maxCfgCol < 0) return; - auto& last = asmFiles.back(); + auto& last = asmFileInfoList.back(); if (maxCfgCol >= last.colStart) last.colEnd = std::max(last.colEnd, maxCfgCol); } @@ -110,7 +110,7 @@ bool AieDtraceCTWriter::generate(const std::string& outputPath, return false; // Convert op_loc data to ASMFileInfo structures - std::vector asmFiles; + std::vector asmFileInfoList; std::regex filenamePattern(R"(aie_runtime_control(\d+)?\.asm)"); for (const auto& loc : opLocations) { @@ -125,17 +125,17 @@ bool AieDtraceCTWriter::generate(const std::string& outputPath, continue; // Check if we already have an ASMFileInfo for this filename - auto it = std::find_if(asmFiles.begin(), asmFiles.end(), + auto it = std::find_if(asmFileInfoList.begin(), asmFileInfoList.end(), [&fname](const ASMFileInfo& a) { return a.filename == fname; }); - if (it == asmFiles.end()) { + if (it == asmFileInfoList.end()) { ASMFileInfo info; info.filename = fname; info.asmId = match[1].matched ? std::stoi(match[1].str()) : 0; info.opLocMinCol = li.col; info.opLocMaxCol = li.col; - asmFiles.push_back(info); - it = asmFiles.end() - 1; + asmFileInfoList.push_back(info); + it = asmFileInfoList.end() - 1; } else { it->opLocMinCol = std::min(it->opLocMinCol, li.col); it->opLocMaxCol = std::max(it->opLocMaxCol, li.col); @@ -150,30 +150,30 @@ bool AieDtraceCTWriter::generate(const std::string& outputPath, } } - if (asmFiles.empty()) + if (asmFileInfoList.empty()) return false; - applyUcSpansFromOpLoc(asmFiles); + applyUcSpansFromOpLoc(asmFileInfoList); auto allCounters = getConfiguredCounters(); if (allCounters.empty()) return false; - extendLastUcToMaxConfiguredColumn(asmFiles, allCounters); + extendLastUcToMaxConfiguredColumn(asmFileInfoList, allCounters); - for (auto& asmFile : asmFiles) { - asmFile.counters = filterCountersByColumn(allCounters, - asmFile.colStart, asmFile.colEnd); + for (auto& asmFileInfo : asmFileInfoList) { + asmFileInfo.counters = filterCountersByColumn(allCounters, + asmFileInfo.colStart, asmFileInfo.colEnd); } - return writeCTFile(asmFiles, allCounters, outputPath); + return writeCTFile(asmFileInfoList, allCounters, outputPath); } bool AieDtraceCTWriter::generate(const std::string& outputPath) { std::string csvPath = (fs::current_path() / "aie_profile_timestamps.csv").string(); - auto asmFiles = readASMInfoFromCSV(csvPath); - if (asmFiles.empty()) { + auto asmFileInfoList = readASMInfoFromCSV(csvPath); + if (asmFileInfoList.empty()) { xrt_core::message::send(severity_level::debug, "XRT", "No ASM file information found in CSV. CT file will not be generated."); return false; @@ -186,16 +186,16 @@ bool AieDtraceCTWriter::generate(const std::string& outputPath) return false; } - extendLastUcToMaxConfiguredColumn(asmFiles, allCounters); + extendLastUcToMaxConfiguredColumn(asmFileInfoList, allCounters); bool hasTimestamps = false; - for (auto& asmFile : asmFiles) { - if (!asmFile.timestamps.empty()) + for (auto& asmFileInfo : asmFileInfoList) { + if (!asmFileInfo.timestamps.empty()) hasTimestamps = true; - asmFile.counters = filterCountersByColumn(allCounters, - asmFile.colStart, - asmFile.colEnd); + asmFileInfo.counters = filterCountersByColumn(allCounters, + asmFileInfo.colStart, + asmFileInfo.colEnd); } if (!hasTimestamps) { @@ -204,19 +204,19 @@ bool AieDtraceCTWriter::generate(const std::string& outputPath) return false; } - return writeCTFile(asmFiles, allCounters, outputPath); + return writeCTFile(asmFileInfoList, allCounters, outputPath); } std::vector AieDtraceCTWriter::readASMInfoFromCSV(const std::string& csvPath) { - std::vector asmFiles; + std::vector asmFileInfoList; std::ifstream csvFile(csvPath); if (!csvFile.is_open()) { std::stringstream msg; msg << "Unable to open CSV file: " << csvPath << ". Please run parse_aie_runtime_to_csv.py first."; xrt_core::message::send(severity_level::warning, "XRT", msg.str()); - return asmFiles; + return asmFileInfoList; } std::string line; @@ -303,7 +303,7 @@ std::vector AieDtraceCTWriter::readASMInfoFromCSV(const std::string } } - asmFiles.push_back(info); + asmFileInfoList.push_back(info); std::stringstream msg; msg << "Loaded " << info.filename << " (id=" << info.asmId @@ -321,7 +321,7 @@ std::vector AieDtraceCTWriter::readASMInfoFromCSV(const std::string csvFile.close(); // Sort by UC start column for consistent output - std::sort(asmFiles.begin(), asmFiles.end(), + std::sort(asmFileInfoList.begin(), asmFileInfoList.end(), [](const ASMFileInfo& a, const ASMFileInfo& b) { if (a.colStart != b.colStart) return a.colStart < b.colStart; @@ -329,15 +329,15 @@ std::vector AieDtraceCTWriter::readASMInfoFromCSV(const std::string }); std::stringstream msg; - msg << "Loaded " << asmFiles.size() << " ASM files from CSV with " - << std::accumulate(asmFiles.begin(), asmFiles.end(), 0, + msg << "Loaded " << asmFileInfoList.size() << " ASM files from CSV with " + << std::accumulate(asmFileInfoList.begin(), asmFileInfoList.end(), 0, [](int sum, const ASMFileInfo& info) { return sum + info.timestamps.size(); }) << " total SAVE_TIMESTAMPS"; xrt_core::message::send(severity_level::info, "XRT", msg.str()); - return asmFiles; + return asmFileInfoList; } std::vector AieDtraceCTWriter::getConfiguredCounters() @@ -489,7 +489,7 @@ std::string AieDtraceCTWriter::getPortDirection(const std::string& metricSet, ui return ""; // Not a throughput metric with port direction } -bool AieDtraceCTWriter::writeCTFile(const std::vector& asmFiles, +bool AieDtraceCTWriter::writeCTFile(const std::vector& asmFileInfoList, const std::vector& allCounters, const std::string& outputPath) { @@ -539,17 +539,17 @@ bool AieDtraceCTWriter::writeCTFile(const std::vector& asmFiles, // Collect ASM groups that have counters std::vector metaGroups; - for (const auto& asmFile : asmFiles) { - if (!asmFile.counters.empty()) - metaGroups.push_back(&asmFile); + for (const auto& asmFileInfo : asmFileInfoList) { + if (!asmFileInfo.counters.empty()) + metaGroups.push_back(&asmFileInfo); } for (size_t g = 0; g < metaGroups.size(); g++) { - const auto& asmFile = *metaGroups[g]; - ctFile << "# \"" << asmFile.asmId << "\": [\n"; + const auto& asmFileInfo = *metaGroups[g]; + ctFile << "# \"" << asmFileInfo.asmId << "\": [\n"; - for (size_t c = 0; c < asmFile.counters.size(); c++) { - const auto& ctr = asmFile.counters[c]; + for (size_t c = 0; c < asmFileInfo.counters.size(); c++) { + const auto& ctr = asmFileInfo.counters[c]; ctFile << "# {\"col\": " << static_cast(ctr.column) << ", \"row\": " << static_cast(ctr.row) << ", \"ctr\": " << static_cast(ctr.counterNumber) @@ -564,7 +564,7 @@ bool AieDtraceCTWriter::writeCTFile(const std::vector& asmFiles, ctFile << "null"; ctFile << "}"; - if (c < asmFile.counters.size() - 1) + if (c < asmFileInfo.counters.size() - 1) ctFile << ","; ctFile << "\n"; } @@ -581,36 +581,36 @@ bool AieDtraceCTWriter::writeCTFile(const std::vector& asmFiles, ctFile << "}\n\n"; // Write jprobe blocks for each ASM file - for (const auto& asmFile : asmFiles) { - if (asmFile.timestamps.empty() || asmFile.counters.empty()) + for (const auto& asmFileInfo : asmFileInfoList) { + if (asmFileInfo.timestamps.empty() || asmFileInfo.counters.empty()) continue; - std::string basename = fs::path(asmFile.filename).filename().string(); + std::string basename = fs::path(asmFileInfo.filename).filename().string(); // Write comment ctFile << "# Probes for " << basename - << " (columns " << asmFile.colStart << "-" << asmFile.colEnd << ")\n"; + << " (columns " << asmFileInfo.colStart << "-" << asmFileInfo.colEnd << ")\n"; // Build line number list for jprobe std::stringstream lineList; lineList << "line"; - for (size_t i = 0; i < asmFile.timestamps.size(); i++) { + for (size_t i = 0; i < asmFileInfo.timestamps.size(); i++) { if (i > 0) lineList << ","; - lineList << asmFile.timestamps[i].lineNumber; + lineList << asmFileInfo.timestamps[i].lineNumber; } // Write jprobe declaration ctFile << "jprobe:" << basename - << ":uc" << asmFile.ucNumber + << ":uc" << asmFileInfo.ucNumber << ":" << lineList.str() << "\n"; ctFile << "{\n"; - ctFile << " ts_" << asmFile.asmId << " = timestamp32()\n"; + ctFile << " ts_" << asmFileInfo.asmId << " = timestamp32()\n"; // Write counter reads using _ as throwaway variable - for (size_t i = 0; i < asmFile.counters.size(); i++) { + for (size_t i = 0; i < asmFileInfo.counters.size(); i++) { ctFile << " _ = read_reg(" - << formatAddress(asmFile.counters[i].address) << ")\n"; + << formatAddress(asmFileInfo.counters[i].address) << ")\n"; } ctFile << "}\n\n"; @@ -832,7 +832,7 @@ std::vector AieDtraceCTWriter::generateBandwidthCounters( } bool AieDtraceCTWriter::writeBandwidthCTFile( - const std::vector& asmFiles, + const std::vector& asmFileInfoList, const std::vector& allCounters, const std::vector& beginBlockWrites, const std::string& outputPath) @@ -873,17 +873,17 @@ bool AieDtraceCTWriter::writeBandwidthCTFile( // Per-UC counter metadata groupings only std::vector metaGroups; - for (const auto& asmFile : asmFiles) { - if (!asmFile.counters.empty()) - metaGroups.push_back(&asmFile); + for (const auto& asmFileInfo : asmFileInfoList) { + if (!asmFileInfo.counters.empty()) + metaGroups.push_back(&asmFileInfo); } for (size_t g = 0; g < metaGroups.size(); g++) { - const auto& asmFile = *metaGroups[g]; - ctFile << "# \"" << asmFile.asmId << "\": [\n"; + const auto& asmFileInfo = *metaGroups[g]; + ctFile << "# \"" << asmFileInfo.asmId << "\": [\n"; - for (size_t c = 0; c < asmFile.counters.size(); c++) { - const auto& ctr = asmFile.counters[c]; + for (size_t c = 0; c < asmFileInfo.counters.size(); c++) { + const auto& ctr = asmFileInfo.counters[c]; uint8_t channel = ctr.counterNumber % 2; ctFile << "# {\"col\": " << static_cast(ctr.column) << ", \"row\": " << static_cast(ctr.row) @@ -899,7 +899,7 @@ bool AieDtraceCTWriter::writeBandwidthCTFile( ctFile << "null"; ctFile << "}"; - if (c < asmFile.counters.size() - 1) + if (c < asmFileInfo.counters.size() - 1) ctFile << ","; ctFile << "\n"; } @@ -915,31 +915,31 @@ bool AieDtraceCTWriter::writeBandwidthCTFile( ctFile << "@blockclose\n"; ctFile << "}\n\n"; - for (const auto& asmFile : asmFiles) { - if (asmFile.timestamps.empty() || asmFile.counters.empty()) + for (const auto& asmFileInfo : asmFileInfoList) { + if (asmFileInfo.timestamps.empty() || asmFileInfo.counters.empty()) continue; - std::string basename = fs::path(asmFile.filename).filename().string(); + std::string basename = fs::path(asmFileInfo.filename).filename().string(); ctFile << "# Probes for " << basename - << " (columns " << asmFile.colStart << "-" << asmFile.colEnd << ")\n"; + << " (columns " << asmFileInfo.colStart << "-" << asmFileInfo.colEnd << ")\n"; std::stringstream lineList; lineList << "line"; - for (size_t i = 0; i < asmFile.timestamps.size(); i++) { + for (size_t i = 0; i < asmFileInfo.timestamps.size(); i++) { if (i > 0) lineList << ","; - lineList << asmFile.timestamps[i].lineNumber; + lineList << asmFileInfo.timestamps[i].lineNumber; } ctFile << "jprobe:" << basename - << ":uc" << asmFile.ucNumber + << ":uc" << asmFileInfo.ucNumber << ":" << lineList.str() << "\n"; ctFile << "{\n"; - ctFile << " ts_" << asmFile.asmId << " = timestamp32()\n"; + ctFile << " ts_" << asmFileInfo.asmId << " = timestamp32()\n"; - for (size_t i = 0; i < asmFile.counters.size(); i++) { - const auto& ctr = asmFile.counters[i]; + for (size_t i = 0; i < asmFileInfo.counters.size(); i++) { + const auto& ctr = asmFileInfo.counters[i]; ctFile << " _ = read_reg(" << formatAddress(ctr.address) << ")\n"; } @@ -979,7 +979,7 @@ bool AieDtraceCTWriter::generateBandwidthCT( return false; } - std::vector asmFiles; + std::vector asmFileInfoList; std::regex filenamePattern(R"(aie_runtime_control(\d+)?\.asm)"); for (const auto& loc : opLocations) { @@ -992,17 +992,17 @@ bool AieDtraceCTWriter::generateBandwidthCT( if (!std::regex_search(fname, match, filenamePattern)) continue; - auto it = std::find_if(asmFiles.begin(), asmFiles.end(), + auto it = std::find_if(asmFileInfoList.begin(), asmFileInfoList.end(), [&fname](const ASMFileInfo& a) { return a.filename == fname; }); - if (it == asmFiles.end()) { + if (it == asmFileInfoList.end()) { ASMFileInfo info; info.filename = fname; info.asmId = match[1].matched ? std::stoi(match[1].str()) : 0; info.opLocMinCol = li.col; info.opLocMaxCol = li.col; - asmFiles.push_back(info); - it = asmFiles.end() - 1; + asmFileInfoList.push_back(info); + it = asmFileInfoList.end() - 1; } else { it->opLocMinCol = std::min(it->opLocMinCol, li.col); it->opLocMaxCol = std::max(it->opLocMaxCol, li.col); @@ -1017,13 +1017,13 @@ bool AieDtraceCTWriter::generateBandwidthCT( } } - if (asmFiles.empty()) { + if (asmFileInfoList.empty()) { xrt_core::message::send(severity_level::debug, "XRT", "AIE dtrace: No ASM files found in op_locations for bandwidth CT generation"); return false; } - applyUcSpansFromOpLoc(asmFiles); + applyUcSpansFromOpLoc(asmFileInfoList); auto allCounters = generateBandwidthCounters(shimColumns); if (allCounters.empty()) { @@ -1032,10 +1032,10 @@ bool AieDtraceCTWriter::generateBandwidthCT( return false; } - extendLastUcToMaxConfiguredColumn(asmFiles, allCounters); + extendLastUcToMaxConfiguredColumn(asmFileInfoList, allCounters); - for (auto& asmFile : asmFiles) { - asmFile.counters = filterCountersByColumn(allCounters, asmFile.colStart, asmFile.colEnd); + for (auto& asmFileInfo : asmFileInfoList) { + asmFileInfo.counters = filterCountersByColumn(allCounters, asmFileInfo.colStart, asmFileInfo.colEnd); } std::vector beginBlockWrites; @@ -1047,7 +1047,7 @@ bool AieDtraceCTWriter::generateBandwidthCT( beginBlockWrites.insert(beginBlockWrites.end(), pcWrites.begin(), pcWrites.end()); } - return writeBandwidthCTFile(asmFiles, allCounters, beginBlockWrites, outputPath); + return writeBandwidthCTFile(asmFileInfoList, allCounters, beginBlockWrites, outputPath); } } // namespace xdp diff --git a/profile/plugin/aie_dtrace/ve2/aie_dtrace_ct_writer.h b/profile/plugin/aie_dtrace/ve2/aie_dtrace_ct_writer.h index 877630c5..76439b45 100644 --- a/profile/plugin/aie_dtrace/ve2/aie_dtrace_ct_writer.h +++ b/profile/plugin/aie_dtrace/ve2/aie_dtrace_ct_writer.h @@ -193,12 +193,12 @@ class AieDtraceCTWriter { /** * @brief Write the CT file content - * @param asmFiles Vector of ASMFileInfo with all parsed information + * @param asmFileInfoList Vector of ASMFileInfo with all parsed information * @param allCounters Vector of all CTCounterInfo for metadata * @param outputPath Full path for the output CT file * @return true if file was written successfully */ - bool writeCTFile(const std::vector& asmFiles, + bool writeCTFile(const std::vector& asmFileInfoList, const std::vector& allCounters, const std::string& outputPath); @@ -267,13 +267,13 @@ class AieDtraceCTWriter { /** * @brief Write the bandwidth CT file content with register configuration - * @param asmFiles Vector of ASMFileInfo with timestamps + * @param asmFileInfoList Vector of ASMFileInfo with timestamps * @param allCounters Vector of all CTCounterInfo for metadata * @param beginBlockWrites Vector of register writes for begin block * @param outputPath Full path for the output CT file * @return true if file was written successfully */ - bool writeBandwidthCTFile(const std::vector& asmFiles, + bool writeBandwidthCTFile(const std::vector& asmFileInfoList, const std::vector& allCounters, const std::vector& beginBlockWrites, const std::string& outputPath); From b5cb43dbc30ca6f0549504bc4d27891938822662 Mon Sep 17 00:00:00 2001 From: Jyotheeswar Ganne Date: Thu, 16 Apr 2026 00:20:52 -0600 Subject: [PATCH 4/4] Remove nop.elf submission from AieDtrace_VE2Impl::updateDevice CT file handles all hardware configuration via write_reg commands in the begin block. No need to submit nop.elf or call setMetricsSettings in updateDevice. The original code is preserved for potential fallback flow in the future. Made-with: Cursor --- profile/plugin/aie_dtrace/ve2/aie_dtrace_ve2.cpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/profile/plugin/aie_dtrace/ve2/aie_dtrace_ve2.cpp b/profile/plugin/aie_dtrace/ve2/aie_dtrace_ve2.cpp index fc8ea236..4bfd1029 100644 --- a/profile/plugin/aie_dtrace/ve2/aie_dtrace_ve2.cpp +++ b/profile/plugin/aie_dtrace/ve2/aie_dtrace_ve2.cpp @@ -119,15 +119,9 @@ namespace xdp { if(!checkAieDevice(deviceID, metadata->getHandle())) return; - // Same sequence as AieProfile_VE2Impl::updateDevice: nop.elf then setMetricsSettings. - if (!aie::submitNopElf(metadata->getHandle())) { - xrt_core::message::send(severity_level::warning, "XRT", - "Failed to submit nop.elf. AIE dtrace configuration will not proceed."); - return; - } - - // CT file handles hardware configuration via write_reg commands in begin block. - // Skip setMetricsSettings for now; can be re-enabled for fallback flow later. + // CT file handles all hardware configuration via write_reg commands in begin block. + // No need to submit nop.elf or call setMetricsSettings here. + // The code below is preserved for potential fallback flow in the future. return; bool runtimeCounters = setMetricsSettings(deviceID, metadata->getHandle());