@@ -360,6 +360,7 @@ std::vector<CTCounterInfo> AieDtraceCTWriter::getConfiguredCounters()
360360 info.column = aieCounter->column ;
361361 info.row = aieCounter->row ;
362362 info.counterNumber = aieCounter->counterNumber ;
363+ info.channel = 0 ; // Default; overwritten for bandwidth metrics
363364 info.module = aieCounter->module ;
364365 info.address = calculateCounterAddress (info.column , info.row ,
365366 info.counterNumber , info.module );
@@ -464,6 +465,14 @@ bool AieDtraceCTWriter::isThroughputMetric(const std::string& metricSet)
464465
465466std::string AieDtraceCTWriter::getPortDirection (const std::string& metricSet, uint64_t payload)
466467{
468+ // Direction is from AIE/application perspective:
469+ // - "input" = data read FROM DDR into AIE = MM2S channels (Memory-Mapped to Stream)
470+ // - "output" = data written TO DDR from AIE = S2MM channels (Stream to Memory-Mapped)
471+ //
472+ // Stream switch port types:
473+ // - S2MM channels use master ports (isMaster=true) = output (AIE writing to DDR)
474+ // - MM2S channels use slave ports (isMaster=false) = input (AIE reading from DDR)
475+
467476 // For interface tile ddr_bandwidth, read_bandwidth, write_bandwidth - use payload
468477 // These metrics can have mixed input/output ports per tile
469478 if (metricSet == " ddr_bandwidth" ||
@@ -474,25 +483,25 @@ std::string AieDtraceCTWriter::getPortDirection(const std::string& metricSet, ui
474483 return isMaster ? " output" : " input" ;
475484 }
476485
477- // peak_read_bandwidth: S2MM channels (input/ read from DDR)
486+ // peak_read_bandwidth: MM2S channels (read from DDR = input to AIE )
478487 if (metricSet == " peak_read_bandwidth" ) {
479488 return " input" ;
480489 }
481490
482- // peak_write_bandwidth: MM2S channels (output/ write to DDR)
491+ // peak_write_bandwidth: S2MM channels (write to DDR = output from AIE )
483492 if (metricSet == " peak_write_bandwidth" ) {
484493 return " output" ;
485494 }
486495
487- // For input/s2mm metrics - always input direction
496+ // For input/mm2s metrics - always input direction (data into AIE)
488497 if (metricSet.find (" input" ) != std::string::npos ||
489- metricSet.find (" s2mm " ) != std::string::npos) {
498+ metricSet.find (" mm2s " ) != std::string::npos) {
490499 return " input" ;
491500 }
492501
493- // For output/mm2s metrics - always output direction
502+ // For output/s2mm metrics - always output direction (data from AIE)
494503 if (metricSet.find (" output" ) != std::string::npos ||
495- metricSet.find (" mm2s " ) != std::string::npos) {
504+ metricSet.find (" s2mm " ) != std::string::npos) {
496505 return " output" ;
497506 }
498507
@@ -687,49 +696,53 @@ std::vector<BandwidthCounterConfig> AieDtraceCTWriter::getBandwidthCounterConfig
687696 // These port indices are architecture-specific and map to the physical
688697 // stream switch ports that connect to the DMA channels.
689698 //
699+ // Direction is from AIE/application perspective:
700+ // - "input" = data read FROM DDR into AIE = MM2S channels (Memory-Mapped to Stream)
701+ // - "output" = data written TO DDR from AIE = S2MM channels (Stream to Memory-Mapped)
702+ //
690703 // For VE2 shim tiles:
691- // - S2MM (master) ports: Stream switch master port feeds data to DMA S2MM
692- // - MM2S (slave) ports: Stream switch slave port receives data from DMA MM2S
704+ // - S2MM (master) ports: Stream switch master port feeds data to DMA S2MM = output
705+ // - MM2S (slave) ports: Stream switch slave port receives data from DMA MM2S = input
693706 //
694707 // Port encoding in Stream_Switch_Event_Port_Selection register:
695708 // - Bits [4:0]: Port index
696709 // - Bit [5]: 0 = slave, 1 = master
697710 //
698711 // VE2 shim tile port mapping:
699- // - S2MM ch0: master South1 => port index 3
700- // - S2MM ch1: master South3 => port index 5
701- // - MM2S ch0: slave South3 => port index 5
702- // - MM2S ch1: slave South7 => port index 9
712+ // - S2MM ch0: master South1 => port index 3 (output)
713+ // - S2MM ch1: master South3 => port index 5 (output)
714+ // - MM2S ch0: slave South3 => port index 5 (input)
715+ // - MM2S ch1: slave South7 => port index 9 (input)
703716 //
704- // For peak_read_bandwidth: 2 S2MM channels with RUNNING + STALL events
705- // For peak_write_bandwidth: 2 MM2S channels with RUNNING + STALL events
717+ // For peak_read_bandwidth: 2 MM2S channels with RUNNING + STALL events (read from DDR = input)
718+ // For peak_write_bandwidth: 2 S2MM channels with RUNNING + STALL events (write to DDR = output)
706719 // For ddr_bandwidth/read_bandwidth/write_bandwidth: 4 ports with RUNNING events only
707720 //
708721 // counterNumber, channel, dmaPortIndex, isMaster, direction, eventType
709722 if (metricSet == " peak_read_bandwidth" ) {
710- // S2MM ch0/ch1 with RUNNING + STALL events for peak read bandwidth
723+ // MM2S ch0/ch1 with RUNNING + STALL events for peak read bandwidth (read from DDR = input)
711724 return {
712- {0 , 0 , 3 , true , " input" , " running" }, // Counter 0: S2MM Ch0 RUNNING
713- {1 , 0 , 3 , true , " input" , " stalled" }, // Counter 1: S2MM Ch0 STALL
714- {2 , 1 , 5 , true , " input" , " running" }, // Counter 2: S2MM Ch1 RUNNING
715- {3 , 1 , 5 , true , " input" , " stalled" } // Counter 3: S2MM Ch1 STALL
725+ {0 , 0 , 5 , false , " input" , " running" }, // Counter 0: MM2S Ch0 RUNNING
726+ {1 , 0 , 5 , false , " input" , " stalled" }, // Counter 1: MM2S Ch0 STALL
727+ {2 , 1 , 9 , false , " input" , " running" }, // Counter 2: MM2S Ch1 RUNNING
728+ {3 , 1 , 9 , false , " input" , " stalled" } // Counter 3: MM2S Ch1 STALL
716729 };
717730 }
718731 else if (metricSet == " peak_write_bandwidth" ) {
719- // MM2S ch0/ch1 with RUNNING + STALL events for peak write bandwidth
732+ // S2MM ch0/ch1 with RUNNING + STALL events for peak write bandwidth (write to DDR = output)
720733 return {
721- {0 , 0 , 5 , false , " output" , " running" }, // Counter 0: MM2S Ch0 RUNNING
722- {1 , 0 , 5 , false , " output" , " stalled" }, // Counter 1: MM2S Ch0 STALL
723- {2 , 1 , 9 , false , " output" , " running" }, // Counter 2: MM2S Ch1 RUNNING
724- {3 , 1 , 9 , false , " output" , " stalled" } // Counter 3: MM2S Ch1 STALL
734+ {0 , 0 , 3 , true , " output" , " running" }, // Counter 0: S2MM Ch0 RUNNING
735+ {1 , 0 , 3 , true , " output" , " stalled" }, // Counter 1: S2MM Ch0 STALL
736+ {2 , 1 , 5 , true , " output" , " running" }, // Counter 2: S2MM Ch1 RUNNING
737+ {3 , 1 , 5 , true , " output" , " stalled" } // Counter 3: S2MM Ch1 STALL
725738 };
726739 }
727740 // Default: ddr_bandwidth, read_bandwidth, write_bandwidth
728741 return {
729- {0 , 0 , 3 , true , " input" , " running" }, // Counter 0: S2MM Ch0 (master South1 ) - read_bandwidth
730- {1 , 1 , 5 , true , " input" , " running" }, // Counter 1: S2MM Ch1 (master South3 ) - read_bandwidth
731- {2 , 0 , 5 , false , " output" , " running" }, // Counter 2: MM2S Ch0 (slave South3 ) - write_bandwidth
732- {3 , 1 , 9 , false , " output" , " running" } // Counter 3: MM2S Ch1 (slave South7 ) - write_bandwidth
742+ {0 , 0 , 5 , false , " input" , " running" }, // Counter 0: MM2S Ch0 (slave South3 ) - input from DDR
743+ {1 , 1 , 9 , false , " input" , " running" }, // Counter 1: MM2S Ch1 (slave South7 ) - input from DDR
744+ {2 , 0 , 3 , true , " output" , " running" }, // Counter 2: S2MM Ch0 (master South1 ) - output to DDR
745+ {3 , 1 , 5 , true , " output" , " running" } // Counter 3: S2MM Ch1 (master South3 ) - output to DDR
733746 };
734747}
735748
@@ -758,11 +771,11 @@ std::vector<CTRegisterWrite> AieDtraceCTWriter::generateStreamSwitchPortConfig(
758771 std::stringstream comment;
759772 comment << " SS port sel @ col " << static_cast <int >(column);
760773 if (metricSet == " peak_read_bandwidth" )
761- comment << " (S2MM ch0,ch1 x2 for running+stall)" ;
762- else if (metricSet == " peak_write_bandwidth" )
763774 comment << " (MM2S ch0,ch1 x2 for running+stall)" ;
775+ else if (metricSet == " peak_write_bandwidth" )
776+ comment << " (S2MM ch0,ch1 x2 for running+stall)" ;
764777 else
765- comment << " (S2MM ch0,ch1; MM2S ch0,ch1)" ;
778+ comment << " (MM2S ch0,ch1; S2MM ch0,ch1)" ;
766779
767780 CTRegisterWrite write;
768781 write.address = regAddr;
@@ -875,6 +888,7 @@ std::vector<CTCounterInfo> AieDtraceCTWriter::generateBandwidthCounters(
875888 info.column = column;
876889 info.row = SHIM_ROW;
877890 info.counterNumber = cfg.counterNumber ;
891+ info.channel = cfg.channel ;
878892 info.module = " interface_tile" ;
879893 info.address = calculateCounterAddress (column, SHIM_ROW, cfg.counterNumber , " interface_tile" );
880894 info.metricSet = metricSet;
@@ -940,11 +954,10 @@ bool AieDtraceCTWriter::writeBandwidthCTFile(
940954
941955 for (size_t c = 0 ; c < asmFileInfo.counters .size (); c++) {
942956 const auto & ctr = asmFileInfo.counters [c];
943- uint8_t channel = ctr.counterNumber % 2 ;
944957 ctFile << " # {\" col\" : " << static_cast <int >(ctr.column )
945958 << " , \" row\" : " << static_cast <int >(ctr.row )
946959 << " , \" ctr\" : " << static_cast <int >(ctr.counterNumber )
947- << " , \" ch\" : " << static_cast <int >(channel)
960+ << " , \" ch\" : " << static_cast <int >(ctr. channel )
948961 << " , \" dir\" : " ;
949962
950963 if (ctr.portDirection == " input" )
0 commit comments