Skip to content

Commit 3077fae

Browse files
committed
feat: add config ingester_traffic_overflow_action
1 parent b067070 commit 3077fae

File tree

6 files changed

+153
-15
lines changed

6 files changed

+153
-15
lines changed

agent/src/config/config.rs

+2
Original file line numberDiff line numberDiff line change
@@ -2129,6 +2129,7 @@ pub struct Communication {
21292129
#[serde(deserialize_with = "deser_usize_with_mega_unit")]
21302130
pub grpc_buffer_size: usize,
21312131
pub max_throughput_to_ingester: u64,
2132+
pub ingester_traffic_overflow_action: u8, // 0: wait, 1: drop
21322133
pub request_via_nat_ip: bool,
21332134
pub proxy_controller_ip: String,
21342135
pub proxy_controller_port: u16,
@@ -2145,6 +2146,7 @@ impl Default for Communication {
21452146
ingester_port: 30033,
21462147
grpc_buffer_size: 5 << 20,
21472148
max_throughput_to_ingester: 100,
2149+
ingester_traffic_overflow_action: 0,
21482150
request_via_nat_ip: false,
21492151
}
21502152
}

agent/src/config/handler.rs

+17-1
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,8 @@ pub struct SenderConfig {
231231
pub npb_bps_threshold: u64,
232232
pub npb_socket_type: agent::SocketType,
233233
pub multiple_sockets_to_ingester: bool,
234-
pub max_throughput_to_ingester: u64, // unit: Mbps
234+
pub max_throughput_to_ingester: u64, // unit: Mbps
235+
pub ingester_traffic_overflow_action: u8, // 0: wait, 1: drop
235236
pub collector_socket_type: agent::SocketType,
236237
pub standalone_data_file_size: u32,
237238
pub standalone_data_file_dir: String,
@@ -1740,6 +1741,10 @@ impl TryFrom<(Config, UserConfig)> for ModuleConfig {
17401741
.throughput_monitoring_interval,
17411742
multiple_sockets_to_ingester: conf.outputs.socket.multiple_sockets_to_ingester,
17421743
max_throughput_to_ingester: conf.global.communication.max_throughput_to_ingester,
1744+
ingester_traffic_overflow_action: conf
1745+
.global
1746+
.communication
1747+
.ingester_traffic_overflow_action,
17431748
collector_socket_type: conf.outputs.socket.data_socket_type,
17441749
standalone_data_file_size: conf.global.standalone_mode.max_data_file_size,
17451750
standalone_data_file_dir: conf.global.standalone_mode.data_file_dir.clone(),
@@ -3837,6 +3842,17 @@ impl ConfigHandler {
38373842
);
38383843
communication.max_throughput_to_ingester = new_communication.max_throughput_to_ingester;
38393844
}
3845+
if communication.ingester_traffic_overflow_action
3846+
!= new_communication.ingester_traffic_overflow_action
3847+
{
3848+
info!(
3849+
"Update global.communication.ingester_traffic_overflow_action from {:?} to {:?}.",
3850+
communication.ingester_traffic_overflow_action,
3851+
new_communication.ingester_traffic_overflow_action
3852+
);
3853+
communication.ingester_traffic_overflow_action =
3854+
new_communication.ingester_traffic_overflow_action;
3855+
}
38403856
if communication.ingester_ip != new_communication.ingester_ip {
38413857
info!(
38423858
"Update global.communication.ingester_ip from {:?} to {:?}.",

agent/src/sender/uniform_sender.rs

+35-10
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ pub struct SenderCounter {
5353
pub tx: AtomicU64,
5454
pub tx_bytes: AtomicU64,
5555
pub dropped: AtomicU64,
56+
pub waited: AtomicU64,
5657
}
5758

5859
impl RefCountable for SenderCounter {
@@ -89,6 +90,11 @@ impl RefCountable for SenderCounter {
8990
CounterType::Counted,
9091
CounterValue::Unsigned(self.dropped.swap(0, Ordering::Relaxed)),
9192
),
93+
(
94+
"waited",
95+
CounterType::Counted,
96+
CounterValue::Unsigned(self.waited.swap(0, Ordering::Relaxed)),
97+
),
9298
]
9399
}
94100
}
@@ -626,7 +632,11 @@ impl<T: Sendable> UniformSender<T> {
626632
}
627633
}
628634

629-
fn is_exceed_max_throughput(&mut self, max_throughput_mbps: u64) -> bool {
635+
fn is_exceed_max_throughput(
636+
&mut self,
637+
max_throughput_mbps: u64,
638+
ingester_traffic_overflow_action: u8,
639+
) -> bool {
630640
if max_throughput_mbps == 0 {
631641
return false;
632642
}
@@ -637,7 +647,7 @@ impl<T: Sendable> UniformSender<T> {
637647
.unwrap();
638648

639649
let used = now - Duration::from_nanos(SENT_START_DURATION.load(Ordering::Relaxed));
640-
if used > Duration::from_secs(1) {
650+
if used >= Duration::from_secs(1) {
641651
SENT_START_DURATION.store(now.as_nanos() as u64, Ordering::Relaxed);
642652
TOTAL_SENT_BYTES.store(0, Ordering::Relaxed);
643653
} else {
@@ -646,11 +656,16 @@ impl<T: Sendable> UniformSender<T> {
646656
> Duration::from_secs(5)
647657
{
648658
warn!(
649-
"{} sender dropping message, throughput execeed setting value 'max_throughput_to_ingester' {}Mbps",
650-
self.name, max_throughput_mbps
659+
"{} sender dropping message, throughput execeed setting value 'max_throughput_to_ingester' {}Mbps, action {} (0: wait, 1: drop)",
660+
self.name, max_throughput_mbps, ingester_traffic_overflow_action
651661
);
652662
LAST_LOGGING_DURATION.store(now.as_nanos() as u64, Ordering::Relaxed);
653663
}
664+
// action is wait
665+
if ingester_traffic_overflow_action == 0 {
666+
thread::sleep(Duration::from_secs(1) - used);
667+
return true;
668+
}
654669
self.exception_handler
655670
.set(Exception::DataBpsThresholdExceeded);
656671
return true;
@@ -678,6 +693,7 @@ impl<T: Sendable> UniformSender<T> {
678693
let config = self.config.load();
679694
let socket_type = config.collector_socket_type;
680695
let max_throughput_mpbs = config.max_throughput_to_ingester;
696+
let ingester_traffic_overflow_action = config.ingester_traffic_overflow_action;
681697
match self.input.recv_all(
682698
&mut batch,
683699
Some(Duration::from_secs(Self::QUEUE_READ_TIMEOUT)),
@@ -688,12 +704,21 @@ impl<T: Sendable> UniformSender<T> {
688704
start_cached = Instant::now();
689705
self.cached = false;
690706
}
691-
if self.is_exceed_max_throughput(max_throughput_mpbs) {
692-
self.counter
693-
.dropped
694-
.fetch_add(batch.len() as u64, Ordering::Relaxed);
695-
batch.clear();
696-
continue;
707+
if self.is_exceed_max_throughput(
708+
max_throughput_mpbs,
709+
ingester_traffic_overflow_action,
710+
) {
711+
if ingester_traffic_overflow_action == 0 {
712+
self.counter
713+
.waited
714+
.fetch_add(batch.len() as u64, Ordering::Relaxed);
715+
} else {
716+
self.counter
717+
.dropped
718+
.fetch_add(batch.len() as u64, Ordering::Relaxed);
719+
batch.clear();
720+
continue;
721+
}
697722
}
698723
for send_item in batch.drain(..) {
699724
if !self.running.load(Ordering::Relaxed) {

server/agent_config/README-CH.md

+35-1
Original file line numberDiff line numberDiff line change
@@ -1051,9 +1051,43 @@ global:
10511051
**详细描述**:
10521052

10531053
向 Server 端 Ingester 模块发送可观测性数据的最大允许流量,
1054-
超过此限速时数据将会主动丢弃、且采集器会标记为异常状态并触发告警。
1054+
若`ingester_traffic_overflow_action` 配置为`丢弃`,超过此限速时数据将会主动丢弃、且采集器会标记为异常状态并触发告警。
10551055
配置为 0 表示不限速。
10561056

1057+
### Ingester 流量超限的动作 {#global.communication.ingester_traffic_overflow_action}
1058+
1059+
**标签**:
1060+
1061+
`hot_update`
1062+
1063+
**FQCN**:
1064+
1065+
`global.communication.ingester_traffic_overflow_action`
1066+
1067+
**默认值**:
1068+
```yaml
1069+
global:
1070+
communication:
1071+
ingester_traffic_overflow_action: 0
1072+
```
1073+
1074+
**枚举可选值**:
1075+
| Value | Note |
1076+
| ----- | ---------------------------- |
1077+
| 0 | 等待 |
1078+
| 1 | 丢弃 |
1079+
1080+
**模式**:
1081+
| Key | Value |
1082+
| ---- | ---------------------------- |
1083+
| Type | int |
1084+
1085+
**详细描述**:
1086+
1087+
Ingester 流量超限的动作
1088+
- 等待:暂停发送,数据缓存到队列,等待下次发送。
1089+
- 丢弃:直接丢弃数据,并触发 Agent `数据流量达到限速`异常。
1090+
10571091
### 请求 NAT IP 地址 {#global.communication.request_via_nat_ip}
10581092

10591093
**标签**:

server/agent_config/README.md

+36-1
Original file line numberDiff line numberDiff line change
@@ -1075,10 +1075,45 @@ global:
10751075
**Description**:
10761076

10771077
The maximum allowed flow rate for sending observability data to the server-side Ingester module.
1078-
When this rate limit is exceeded, the data will be actively discarded,
1078+
If `ingester_traffic_overflow_action` is set to `drop`,
1079+
when this rate limit is exceeded, the data will be actively discarded,
10791080
and the agent will be marked as abnormal and trigger an alarm.
10801081
Setting it to 0 means no speed limit.
10811082

1083+
### Action when the Ingester traffic exceeds the limit {#global.communication.ingester_traffic_overflow_action}
1084+
1085+
**Tags**:
1086+
1087+
`hot_update`
1088+
1089+
**FQCN**:
1090+
1091+
`global.communication.ingester_traffic_overflow_action`
1092+
1093+
**Default value**:
1094+
```yaml
1095+
global:
1096+
communication:
1097+
ingester_traffic_overflow_action: 0
1098+
```
1099+
1100+
**Enum options**:
1101+
| Value | Note |
1102+
| ----- | ---------------------------- |
1103+
| 0 | wait |
1104+
| 1 | drop |
1105+
1106+
**Schema**:
1107+
| Key | Value |
1108+
| ---- | ---------------------------- |
1109+
| Type | int |
1110+
1111+
**Description**:
1112+
1113+
Action when the Ingester traffic exceeds the limit
1114+
- wait: pause sending, cache data into queue, and wait for next sending
1115+
- drop: the data is discarded directly and the Agent `DATA_BPS_THRESHOLD_EXCEEDED` exception is triggered
1116+
10821117
### Request via NAT IP Address {#global.communication.request_via_nat_ip}
10831118

10841119
**Tags**:

server/agent_config/template.yaml

+28-2
Original file line numberDiff line numberDiff line change
@@ -713,14 +713,40 @@ global:
713713
# description:
714714
# en: |-
715715
# The maximum allowed flow rate for sending observability data to the server-side Ingester module.
716-
# When this rate limit is exceeded, the data will be actively discarded,
716+
# If `ingester_traffic_overflow_action` is set to `drop`,
717+
# when this rate limit is exceeded, the data will be actively discarded,
717718
# and the agent will be marked as abnormal and trigger an alarm.
718719
# Setting it to 0 means no speed limit.
719720
# ch: |-
720721
# 向 Server 端 Ingester 模块发送可观测性数据的最大允许流量,
721-
# 超过此限速时数据将会主动丢弃、且采集器会标记为异常状态并触发告警。
722+
# 若`ingester_traffic_overflow_action` 配置为`丢弃`,超过此限速时数据将会主动丢弃、且采集器会标记为异常状态并触发告警。
722723
# 配置为 0 表示不限速。
723724
max_throughput_to_ingester: 100
725+
# type: int
726+
# name:
727+
# en: Action when the Ingester traffic exceeds the limit
728+
# ch: Ingester 流量超限的动作
729+
# unit:
730+
# range: []
731+
# enum_options:
732+
# - 0:
733+
# en: wait
734+
# ch: 等待
735+
# - 1:
736+
# en: drop
737+
# ch: 丢弃
738+
# modification: hot_update
739+
# ee_feature: false
740+
# description:
741+
# en: |-
742+
# Action when the Ingester traffic exceeds the limit
743+
# - wait: pause sending, cache data into queue, and wait for next sending
744+
# - drop: the data is discarded directly and the Agent `DATA_BPS_THRESHOLD_EXCEEDED` exception is triggered
745+
# ch: |-
746+
# Ingester 流量超限的动作
747+
# - 等待:暂停发送,数据缓存到队列,等待下次发送。
748+
# - 丢弃:直接丢弃数据,并触发 Agent `数据流量达到限速`异常。
749+
ingester_traffic_overflow_action: 0
724750
# type: bool
725751
# name:
726752
# en: Request via NAT IP Address

0 commit comments

Comments
 (0)