Skip to content

Commit 7e759ab

Browse files
authored
feat: module performance histogram (#476)
1 parent 0098eaa commit 7e759ab

File tree

24 files changed

+1239
-66
lines changed

24 files changed

+1239
-66
lines changed

api/counter.h

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,103 @@ yanet_get_counter_value(
7676

7777
void
7878
yanet_counter_handle_list_free(struct counter_handle_list *counters);
79+
80+
/**
81+
* Represents a single latency bucket in a performance histogram.
82+
*
83+
* Each bucket tracks how many packet batches were processed with latency
84+
* greater than or equal to min_latency. The histogram uses a hybrid approach
85+
* with linear buckets for fine-grained resolution at typical latencies and
86+
* exponential buckets for efficient coverage of outliers.
87+
*/
88+
struct module_performance_counter_latency_range {
89+
/** Minimum latency in nanoseconds for this bucket */
90+
uint64_t min_latency;
91+
92+
/** Number of packet batches that fell into this latency bucket */
93+
size_t batches;
94+
};
95+
96+
/**
97+
* Performance metrics for a specific packet batch size range.
98+
*
99+
* Modules process packets in batches, and this structure contains latency
100+
* statistics for a particular batch size range (e.g., 1 packet, 2-3 packets,
101+
* 4-7 packets, etc.). The latency distribution is captured using a hybrid
102+
* histogram with both linear and exponential buckets.
103+
*/
104+
struct module_performance_counter {
105+
/** Mean processing latency in nanoseconds across all batches */
106+
float mean_latency;
107+
108+
/** Minimum batch size for this counter (e.g., 1, 2, 4, 8, 16, 32) */
109+
uint64_t min_batch_size;
110+
111+
/** Number of latency histogram buckets */
112+
size_t latency_ranges_count;
113+
114+
/** Array of latency histogram buckets, sorted by increasing min_latency
115+
*/
116+
struct module_performance_counter_latency_range *latency_ranges;
117+
};
118+
119+
/**
120+
* Collection of all performance counters for a module.
121+
*
122+
* Contains performance metrics for all 6 batch size ranges tracked by the
123+
* module: 1, 2-3, 4-7, 8-15, 16-31, and 32+ packets. Each counter includes
124+
* mean latency and a detailed histogram of latency measurements.
125+
*/
126+
struct module_performance_counters {
127+
/** Number of performance counters (typically 6, one per batch size
128+
* range) */
129+
size_t counters_count;
130+
131+
/** Array of performance counters, ordered by min_batch_size */
132+
struct module_performance_counter *counters;
133+
};
134+
135+
/**
136+
* Retrieve module performance counters from the dataplane configuration.
137+
*
138+
* This function extracts and aggregates performance metrics for a specific
139+
* module across all worker threads. The metrics include latency histograms
140+
* for different packet batch sizes (1, 2-3, 4-7, 8-15, 16-31, 32+ packets).
141+
*
142+
* The returned structure must be freed using
143+
* yanet_module_performance_counters_free() when no longer needed.
144+
*
145+
* @param counters Output parameter for the performance counters structure
146+
* @param dp_config Pointer to the dataplane configuration
147+
* @param device_name Name of the device
148+
* @param pipeline_name Name of the pipeline
149+
* @param function_name Name of the function
150+
* @param chain_name Name of the chain
151+
* @param module_type Type identifier of the module
152+
* @param module_name Name identifier of the module
153+
* @return 0 on success, negative error code on failure
154+
*/
155+
int
156+
yanet_module_performance_counters(
157+
struct module_performance_counters *counters,
158+
struct dp_config *dp_config,
159+
const char *device_name,
160+
const char *pipeline_name,
161+
const char *function_name,
162+
const char *chain_name,
163+
const char *module_type,
164+
const char *module_name
165+
);
166+
167+
/**
168+
* Free resources allocated for module performance counters.
169+
*
170+
* Releases all memory allocated by yanet_module_performance_counters(),
171+
* including the latency_ranges arrays within each counter.
172+
*
173+
* @param counters Pointer to the performance counters structure to free
174+
*/
175+
void
176+
yanet_module_performance_counters_free(
177+
struct module_performance_counters *counters
178+
);

cli/modules/counters/src/main.rs

Lines changed: 51 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@ use core::error::Error;
55
use clap::{ArgAction, CommandFactory, Parser};
66
use clap_complete::CompleteEnv;
77
use code::{
8-
counters_service_client::CountersServiceClient, ChainCountersRequest, DeviceCountersRequest,
9-
FunctionCountersRequest, ModuleCountersRequest, PipelineCountersRequest,
8+
counters_service_client::CountersServiceClient, ModulePerfCountersRequest,
9+
ChainCountersRequest, DeviceCountersRequest, FunctionCountersRequest, ModuleCountersRequest,
10+
PipelineCountersRequest,
1011
};
1112
use tonic::{codec::CompressionEncoding, transport::Channel};
1213
use ync::logging;
@@ -98,6 +99,9 @@ pub struct ModuleCmd {
9899
pub module_type: String,
99100
#[arg(long)]
100101
pub module_name: String,
102+
/// Show performance counters instead of raw counters.
103+
#[arg(long)]
104+
pub perf: bool,
101105
}
102106

103107
#[tokio::main(flavor = "current_thread")]
@@ -130,16 +134,29 @@ async fn run(cmd: Cmd) -> Result<(), Box<dyn Error>> {
130134
.await?
131135
}
132136
ModeCmd::Module(cmd) => {
133-
service
134-
.show_module(
135-
cmd.device_name,
136-
cmd.pipeline_name,
137-
cmd.function_name,
138-
cmd.chain_name,
139-
cmd.module_type,
140-
cmd.module_name,
141-
)
142-
.await?
137+
if cmd.perf {
138+
service
139+
.show_perf_module(
140+
cmd.device_name,
141+
cmd.pipeline_name,
142+
cmd.function_name,
143+
cmd.chain_name,
144+
cmd.module_type,
145+
cmd.module_name,
146+
)
147+
.await?
148+
} else {
149+
service
150+
.show_module(
151+
cmd.device_name,
152+
cmd.pipeline_name,
153+
cmd.function_name,
154+
cmd.chain_name,
155+
cmd.module_type,
156+
cmd.module_name,
157+
)
158+
.await?
159+
}
143160
}
144161
}
145162

@@ -232,4 +249,26 @@ impl CountersService {
232249
println!("{}", serde_json::to_string(response.get_ref())?);
233250
Ok(())
234251
}
252+
253+
pub async fn show_perf_module(
254+
&mut self,
255+
device_name: String,
256+
pipeline_name: String,
257+
function_name: String,
258+
chain_name: String,
259+
module_type: String,
260+
module_name: String,
261+
) -> Result<(), Box<dyn Error>> {
262+
let request = ModulePerfCountersRequest {
263+
device: device_name,
264+
pipeline: pipeline_name,
265+
function: function_name,
266+
chain: chain_name,
267+
module_type,
268+
module_name,
269+
};
270+
let response = self.client.module_perf(request).await?;
271+
println!("{}", serde_json::to_string_pretty(response.get_ref())?);
272+
Ok(())
273+
}
235274
}

common/exp_array.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ mem_array_free_exp(
4040
if (!count)
4141
return;
4242

43-
uint64_t capacity = 1 << uint64_log(count);
43+
uint64_t capacity = 1 << uint64_log_up(count);
4444
memory_bfree(memory_context, array, capacity * item_size);
4545
}
4646

@@ -57,7 +57,7 @@ mem_array_alloc_exp(
5757
}
5858
return NULL;
5959
}
60-
uint64_t capacity = 1 << uint64_log(count);
60+
uint64_t capacity = 1 << uint64_log_up(count);
6161
if (res_capacity) {
6262
*res_capacity = capacity;
6363
}

common/numutils.h

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,30 @@
11
#pragma once
22

3+
#include "common/likely.h"
34
#include <stdint.h>
45

6+
/// TODO: docs
57
static inline uint64_t
6-
uint64_log(uint64_t value) {
7-
if (value == 0)
8+
uint64_log_up(uint64_t value) {
9+
if (unlikely(value == 0))
810
return 0;
911

1012
return sizeof(long long) * 8 - __builtin_clzll(value) -
1113
!(value & (value - 1));
1214
}
1315

16+
/// TODO: docs
17+
static inline uint64_t
18+
uint64_log_down(uint64_t value) {
19+
if (unlikely(value == 0)) {
20+
return 0;
21+
}
22+
23+
return sizeof(long long) * 8 - 1 - __builtin_clzll(value);
24+
}
25+
1426
/**
15-
* @brief Align number up to next power of 2
27+
* @brief Align number up to next power of 2hhhhhhhhhhhdff
1628
* @param n Input number
1729
* @return Next power of 2, or 0 if overflow
1830
*/

common/range_collector.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ range_collector_init(
3636
static inline void
3737
range_collector_free(struct range_collector *collector, uint8_t key_size) {
3838
if (collector->mask_count) {
39-
uint64_t capacity = 1 << uint64_log(collector->mask_count);
39+
uint64_t capacity = 1 << uint64_log_up(collector->mask_count);
4040
memory_bfree(
4141
collector->memory_context,
4242
ADDR_OF(&collector->masks),

common/range_index.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ range_index_remap(
107107

108108
static inline void
109109
range_index_free(struct range_index *range_index) {
110-
uint64_t capacity = 1 << uint64_log(range_index->count);
110+
uint64_t capacity = 1 << uint64_log_up(range_index->count);
111111
memory_bfree(
112112
ADDR_OF(&range_index->memory_context),
113113
ADDR_OF(&range_index->values),

common/registry.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ value_registry_free(struct value_registry *registry) {
284284
}
285285

286286
if (registry->range_count) {
287-
uint64_t capacity = 1 << uint64_log(registry->range_count);
287+
uint64_t capacity = 1 << uint64_log_up(registry->range_count);
288288

289289
memory_bfree(
290290
registry->memory_context,

controlplane/ffi/shm.go

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package ffi
66
//#cgo LDFLAGS: -L../../build/lib/counters -lcounters
77
//#cgo LDFLAGS: -L../../build/lib/dataplane/config -lconfig_dp
88
//#include "api/agent.h"
9+
//#include "api/counter.h"
910
import "C"
1011
import (
1112
"fmt"
@@ -560,3 +561,87 @@ func (m *DPConfig) ModuleCounters(
560561

561562
return m.encodeCounters(counters)
562563
}
564+
565+
// ModulePerformanceCounterLatencyRange represents a latency range in performance counters.
566+
type ModulePerformanceCounterLatencyRange struct {
567+
MinLatency uint64
568+
Batches uint64
569+
}
570+
571+
// ModulePerformanceCounter represents performance counter data for a module.
572+
type ModulePerformanceCounter struct {
573+
MeanLatency float32
574+
MinBatchSize uint64
575+
LatencyRanges []ModulePerformanceCounterLatencyRange
576+
}
577+
578+
// ModulePerformanceCounters retrieves performance counters for a specific module.
579+
//
580+
// Performance counters provide detailed timing and batch processing statistics
581+
// for module execution, including mean latency and latency distribution across
582+
// different batch sizes.
583+
func (m *DPConfig) ModulePerformanceCounters(
584+
deviceName string,
585+
pipelineName string,
586+
functionName string,
587+
chainName string,
588+
moduleType string,
589+
moduleName string,
590+
) ([]ModulePerformanceCounter, error) {
591+
cDeviceName := C.CString(deviceName)
592+
defer C.free(unsafe.Pointer(cDeviceName))
593+
cPipelineName := C.CString(pipelineName)
594+
defer C.free(unsafe.Pointer(cPipelineName))
595+
cFunctionName := C.CString(functionName)
596+
defer C.free(unsafe.Pointer(cFunctionName))
597+
cChainName := C.CString(chainName)
598+
defer C.free(unsafe.Pointer(cChainName))
599+
cModuleType := C.CString(moduleType)
600+
defer C.free(unsafe.Pointer(cModuleType))
601+
cModuleName := C.CString(moduleName)
602+
defer C.free(unsafe.Pointer(cModuleName))
603+
604+
var counters C.struct_module_performance_counters
605+
rc := C.yanet_module_performance_counters(
606+
&counters,
607+
m.ptr,
608+
cDeviceName,
609+
cPipelineName,
610+
cFunctionName,
611+
cChainName,
612+
cModuleType,
613+
cModuleName,
614+
)
615+
defer C.yanet_module_performance_counters_free(&counters)
616+
617+
if rc != 0 {
618+
return nil, fmt.Errorf("failed to get module performance counters")
619+
}
620+
621+
result := make([]ModulePerformanceCounter, counters.counters_count)
622+
623+
// Convert C array to Go slice for iteration
624+
cCounters := unsafe.Slice(counters.counters, counters.counters_count)
625+
626+
for i := range result {
627+
cCounter := &cCounters[i]
628+
629+
latencyRanges := make([]ModulePerformanceCounterLatencyRange, cCounter.latency_ranges_count)
630+
cLatencyRanges := unsafe.Slice(cCounter.latency_ranges, cCounter.latency_ranges_count)
631+
632+
for j := range latencyRanges {
633+
latencyRanges[j] = ModulePerformanceCounterLatencyRange{
634+
MinLatency: uint64(cLatencyRanges[j].min_latency),
635+
Batches: uint64(cLatencyRanges[j].batches),
636+
}
637+
}
638+
639+
result[i] = ModulePerformanceCounter{
640+
MeanLatency: float32(cCounter.mean_latency),
641+
MinBatchSize: uint64(cCounter.min_batch_size),
642+
LatencyRanges: latencyRanges,
643+
}
644+
}
645+
646+
return result, nil
647+
}

0 commit comments

Comments
 (0)