Skip to content

Commit 7c75f67

Browse files
TomatoMrrvql
authored andcommitted
refactor: agent adds a disable_cgroups option
1 parent 7f0f2a1 commit 7c75f67

File tree

4 files changed

+19
-7
lines changed

4 files changed

+19
-7
lines changed

agent/src/config/handler.rs

+2
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ pub struct EnvironmentConfig {
205205
pub sys_free_memory_limit: u32,
206206
pub log_file_size: u32,
207207
pub tap_mode: TapMode,
208+
pub guard_interval: Duration,
208209
pub system_load_circuit_breaker_threshold: f32,
209210
pub system_load_circuit_breaker_recover: f32,
210211
pub system_load_circuit_breaker_metric: trident::SystemLoadMetric,
@@ -1462,6 +1463,7 @@ impl TryFrom<(Config, RuntimeConfig)> for ModuleConfig {
14621463
sys_free_memory_limit: conf.sys_free_memory_limit,
14631464
log_file_size: conf.log_file_size,
14641465
tap_mode: conf.tap_mode,
1466+
guard_interval: conf.yaml_config.guard_interval,
14651467
system_load_circuit_breaker_threshold: conf.system_load_circuit_breaker_threshold,
14661468
system_load_circuit_breaker_recover: conf.system_load_circuit_breaker_recover,
14671469
system_load_circuit_breaker_metric: conf.system_load_circuit_breaker_metric,

agent/src/main.rs

+5
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,10 @@ struct Opts {
7070
/// optionally `K8S_POD_IP_FOR_DEEPFLOW` can be set to override ip address.
7171
#[clap(long)]
7272
sidecar: bool,
73+
74+
/// Disable cgroups, deepflow-agent will default to checking the CPU and memory resource usage in a loop every 10 seconds to prevent resource usage from exceeding limits.
75+
#[clap(long)]
76+
disable_cgroups: bool,
7377
}
7478

7579
#[cfg(unix)]
@@ -117,6 +121,7 @@ fn main() -> Result<()> {
117121
trident::RunningMode::Managed
118122
},
119123
opts.sidecar,
124+
opts.disable_cgroups,
120125
)?;
121126
wait_on_signals();
122127
t.stop();

agent/src/trident.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ impl Trident {
242242
version_info: &'static VersionInfo,
243243
agent_mode: RunningMode,
244244
sidecar_mode: bool,
245+
disable_cgroups: bool,
245246
) -> Result<Trident> {
246247
let config = match agent_mode {
247248
RunningMode::Managed => {
@@ -404,6 +405,7 @@ impl Trident {
404405
exception_handler,
405406
config_path,
406407
sidecar_mode,
408+
disable_cgroups,
407409
ntp_diff,
408410
) {
409411
warn!(
@@ -432,6 +434,7 @@ impl Trident {
432434
exception_handler: ExceptionHandler,
433435
config_path: Option<PathBuf>,
434436
sidecar_mode: bool,
437+
disable_cgroups: bool,
435438
ntp_diff: Arc<AtomicI64>,
436439
) -> Result<()> {
437440
info!("==================== Launching DeepFlow-Agent ====================");
@@ -583,6 +586,8 @@ impl Trident {
583586
} else if !is_kernel_available_for_cgroups() {
584587
// fixme: Linux after kernel version 2.6.24 can use cgroups
585588
info!("don't initialize cgroups controller, because kernel version < 3 or agent is in Windows");
589+
} else if disable_cgroups {
590+
info!("don't initialize cgroups controller, disable cgroups, deepflow-agent will default to checking the CPU and memory resource usage in a loop every 10 seconds to prevent resource usage from exceeding limits");
586591
} else {
587592
match Cgroups::new(process::id() as u64, config_handler.environment()) {
588593
Ok(cg_controller) => {
@@ -603,14 +608,14 @@ impl Trident {
603608
let guard = match Guard::new(
604609
config_handler.environment(),
605610
log_dir.to_string(),
606-
config_handler.candidate_config.yaml_config.guard_interval,
607611
exception_handler.clone(),
608612
cgroup_mount_path,
609613
is_cgroup_v2,
610614
config_handler
611615
.candidate_config
612616
.yaml_config
613617
.memory_trim_disabled,
618+
disable_cgroups,
614619
) {
615620
Ok(g) => g,
616621
Err(e) => {

agent/src/utils/guard.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,6 @@ impl SystemLoadGuard {
132132
pub struct Guard {
133133
config: EnvironmentAccess,
134134
log_dir: String,
135-
interval: Duration,
136135
thread: Mutex<Option<JoinHandle<()>>>,
137136
running: Arc<(Mutex<bool>, Condvar)>,
138137
exception_handler: ExceptionHandler,
@@ -141,25 +140,25 @@ pub struct Guard {
141140
memory_trim_disabled: bool,
142141
system: Arc<Mutex<System>>,
143142
pid: Pid,
143+
disable_cgroups: bool,
144144
}
145145

146146
impl Guard {
147147
pub fn new(
148148
config: EnvironmentAccess,
149149
log_dir: String,
150-
interval: Duration,
151150
exception_handler: ExceptionHandler,
152151
cgroup_mount_path: String,
153152
is_cgroup_v2: bool,
154153
memory_trim_disabled: bool,
154+
disable_cgroups: bool,
155155
) -> Result<Self, &'static str> {
156156
let Ok(pid) = get_current_pid() else {
157157
return Err("get the process' pid failed: {}, deepflow-agent restart...");
158158
};
159159
Ok(Self {
160160
config,
161161
log_dir,
162-
interval,
163162
thread: Mutex::new(None),
164163
running: Arc::new((Mutex::new(false), Condvar::new())),
165164
exception_handler,
@@ -168,6 +167,7 @@ impl Guard {
168167
memory_trim_disabled,
169168
system: Arc::new(Mutex::new(System::new())),
170169
pid,
170+
disable_cgroups,
171171
})
172172
}
173173

@@ -326,7 +326,6 @@ impl Guard {
326326
let running = self.running.clone();
327327
let exception_handler = self.exception_handler.clone();
328328
let log_dir = self.log_dir.clone();
329-
let interval = self.interval;
330329
let mut over_memory_limit = false; // Higher than the limit does not meet expectations
331330
let mut over_cpu_limit = false; // Higher than the limit does not meet expectations
332331
let mut under_sys_free_memory_limit = false; // Below the limit, it does not meet expectations
@@ -339,6 +338,7 @@ impl Guard {
339338
let pid: Pid = self.pid.clone();
340339
let cgroups_available = is_kernel_available_for_cgroups();
341340
let in_container = running_in_container();
341+
let disable_cgroups = self.disable_cgroups;
342342
let mut last_exceeded = get_timestamp(0);
343343

344344
let thread = thread::Builder::new().name("guard".to_owned()).spawn(move || {
@@ -377,7 +377,7 @@ impl Guard {
377377
}
378378
// If it is in a container or tap_mode is Analyzer, there is no need to limit resource, so there is no need to check cgroups
379379
if !in_container && config.tap_mode != TapMode::Analyzer {
380-
if cgroups_available {
380+
if cgroups_available && !disable_cgroups {
381381
if check_cgroup_result {
382382
check_cgroup_result = Self::check_cgroups(cgroup_mount_path.clone(), is_cgroup_v2);
383383
if !check_cgroup_result {
@@ -483,7 +483,7 @@ impl Guard {
483483
if !*running {
484484
break;
485485
}
486-
running = timer.wait_timeout(running, interval).unwrap().0;
486+
running = timer.wait_timeout(running, config.guard_interval).unwrap().0;
487487
if !*running {
488488
break;
489489
}

0 commit comments

Comments
 (0)