Skip to content

Commit d424dca

Browse files
authored
enh(cloud::kubernetes::plugin): CPU/Memory requests inflated due to non-Running and unscheduled pods included in allocation calculation for node-usage mode (#6133)
Refs: CTOR-2252
1 parent 06497a8 commit d424dca

5 files changed

Lines changed: 610 additions & 21 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
{
22
"dependencies": [
3+
"libfile-homedir-perl"
34
]
45
}
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
{
22
"dependencies": [
3-
"perl(DateTime)"
3+
"perl(DateTime)",
4+
"perl-File-HomeDir"
45
]
56
}

src/cloud/kubernetes/mode/nodeusage.pm

Lines changed: 119 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#
2-
# Copyright 2024 Centreon (http://www.centreon.com/)
2+
# Copyright 2026-Present Centreon (http://www.centreon.com/)
33
#
44
# Centreon is a full-fledged industry-strength solution that meets
55
# the needs in IT infrastructure and application monitoring for
@@ -21,6 +21,8 @@
2121
package cloud::kubernetes::mode::nodeusage;
2222

2323
use base qw(centreon::plugins::templates::counter);
24+
use centreon::plugins::constants qw/:values :counters/;
25+
use centreon::plugins::misc qw/is_excluded/;
2426

2527
use strict;
2628
use warnings;
@@ -108,8 +110,8 @@ sub set_counters {
108110
my ($self, %options) = @_;
109111

110112
$self->{maps_counters_type} = [
111-
{ name => 'nodes', type => 1, cb_prefix_output => 'prefix_node_output',
112-
message_multiple => 'All Nodes usage are ok', skipped_code => { -11 => 1 } },
113+
{ name => 'nodes', type => COUNTER_TYPE_INSTANCE, cb_prefix_output => 'prefix_node_output',
114+
message_multiple => 'All Nodes usage are ok', skipped_code => { NO_VALUE() => 1 } },
113115
];
114116

115117
$self->{maps_counters}->{nodes} = [
@@ -183,8 +185,12 @@ sub new {
183185
bless $self, $class;
184186

185187
$options{options}->add_options(arguments => {
186-
'filter-name:s' => { name => 'filter_name' },
187-
'units:s' => { name => 'units', default => '%' } # Keep compat
188+
'filter-name:s' => { redirect => 'include_name' },
189+
'include-name:s' => { name => 'include_name', default => '' },
190+
'exclude-name:s' => { name => 'exclude_name', default => '' },
191+
'include-status:s' => { name => 'include_status', default => 'running' },
192+
'exclude-status:s' => { name => 'exclude_status', default => '' },
193+
'units:s' => { name => 'units', default => '%' } # Keep compat
188194
});
189195

190196
return $self;
@@ -202,12 +208,9 @@ sub manage_selection {
202208

203209
my $nodes = $options{custom}->kubernetes_list_nodes();
204210

211+
my $found = 0;
205212
foreach my $node (@{$nodes}) {
206-
if (defined($self->{option_results}->{filter_name}) && $self->{option_results}->{filter_name} ne '' &&
207-
$node->{metadata}->{name} !~ /$self->{option_results}->{filter_name}/) {
208-
$self->{output}->output_add(long_msg => "skipping '" . $node->{metadata}->{name} . "': no matching filter name.", debug => 1);
209-
next;
210-
}
213+
next if is_excluded($node->{metadata}->{name}, $self->{option_results}->{include_name}, $self->{option_results}->{exclude_name}, output => $self->{output});
211214

212215
$self->{nodes}->{$node->{metadata}->{name}} = {
213216
display => $node->{metadata}->{name},
@@ -217,23 +220,30 @@ sub manage_selection {
217220
}
218221
}
219222

220-
if (scalar(keys %{$self->{nodes}}) <= 0) {
221-
$self->{output}->add_option_msg(short_msg => "No Nodes found.");
222-
$self->{output}->option_exit();
223-
}
223+
$self->{output}->option_exit(short_msg => "No Nodes found.")
224+
unless keys %{$self->{nodes}};
224225

225226
my $pods = $options{custom}->kubernetes_list_pods();
226227

227228
foreach my $pod (@{$pods}) {
228-
next if (defined($pod->{spec}->{nodeName}) && !defined($self->{nodes}->{$pod->{spec}->{nodeName}}));
229+
next unless defined $pod->{spec}->{nodeName} &&
230+
defined $pod->{status}->{phase} &&
231+
defined $self->{nodes}->{$pod->{spec}->{nodeName}};
232+
$pod->{status}->{phase} = lc $pod->{status}->{phase};
233+
next if is_excluded($pod->{status}->{phase}, $self->{option_results}->{include_status}, $self->{option_results}->{exclude_status}, output => $self->{output});
234+
229235
$self->{nodes}->{$pod->{spec}->{nodeName}}->{pods_allocated}++;
230236
foreach my $container (@{$pod->{spec}->{containers}}) {
231237
$self->{nodes}->{$pod->{spec}->{nodeName}}->{cpu_requests} += $self->to_core(value => $container->{resources}->{requests}->{cpu}) if (defined($container->{resources}->{requests}->{cpu}));
232238
$self->{nodes}->{$pod->{spec}->{nodeName}}->{cpu_limits} += $self->to_core(value => $container->{resources}->{limits}->{cpu}) if (defined($container->{resources}->{limits}->{cpu}));
233239
$self->{nodes}->{$pod->{spec}->{nodeName}}->{memory_requests} += $self->to_bytes(value => $container->{resources}->{requests}->{memory}) if (defined($container->{resources}->{requests}->{memory}));
234240
$self->{nodes}->{$pod->{spec}->{nodeName}}->{memory_limits} += $self->to_bytes(value => $container->{resources}->{limits}->{memory}) if (defined($container->{resources}->{limits}->{memory}));
235241
}
242+
$found++
236243
}
244+
245+
$self->{output}->option_exit(short_msg => "No Pods found.")
246+
unless $found;
237247
}
238248

239249
sub to_bytes {
@@ -280,15 +290,104 @@ Check node usage.
280290
281291
=over 8
282292
283-
=item B<--filter-name>
293+
=item B<--include-name>
284294
285295
Filter node name (can be a regexp).
286296
287-
=item B<--warning-*> B<--critical-*>
297+
=item B<--exclude-name>
298+
299+
Exclude by node name (can be a regexp).
300+
301+
=item B<--include-status>
302+
303+
Filter by node status (can be a regexp).
304+
Default: 'running'
305+
Status can be 'pending', 'running', 'succeeded', 'failed', 'unknown'
306+
307+
=item B<--exclude-status>
308+
309+
Exclude by node status (can be a regexp).
310+
Status can be 'pending', 'running', 'succeeded', 'failed', 'unknown'
311+
312+
=item B<--warning-allocated-pods>
313+
314+
Threshold in percentage.
315+
316+
=item B<--critical-allocated-pods>
317+
318+
Threshold in percentage.
319+
320+
=item B<--warning-cpu-limits>
321+
322+
Threshold in percentage.
323+
324+
=item B<--critical-cpu-limits>
325+
326+
Threshold in percentage.
327+
328+
=item B<--warning-cpu-requests>
329+
330+
Threshold in percentage.
331+
332+
=item B<--critical-cpu-requests>
333+
334+
Threshold in percentage.
335+
336+
=item B<--warning-memory-limits>
337+
338+
Threshold in percentage.
339+
340+
=item B<--critical-memory-limits>
341+
342+
Threshold in percentage.
343+
344+
=item B<--warning-memory-requests>
345+
346+
Threshold in percentage.
347+
348+
=item B<--critical-memory-requests>
349+
350+
Threshold in percentage.
351+
352+
=item B<--warning-allocated-pods>
353+
354+
Threshold in percentage.
355+
356+
=item B<--critical-allocated-pods>
357+
358+
Threshold in percentage.
359+
360+
=item B<--warning-cpu-limits>
361+
362+
Threshold in percentage.
363+
364+
=item B<--critical-cpu-limits>
365+
366+
Threshold in percentage.
367+
368+
=item B<--warning-cpu-requests>
369+
370+
Threshold in percentage.
371+
372+
=item B<--critical-cpu-requests>
373+
374+
Threshold in percentage.
375+
376+
=item B<--warning-memory-limits>
377+
378+
Threshold in percentage.
379+
380+
=item B<--critical-memory-limits>
381+
382+
Threshold in percentage.
383+
384+
=item B<--warning-memory-requests>
385+
386+
Threshold in percentage.
387+
388+
=item B<--critical-memory-requests>
288389
289-
Thresholds (in percentage).
290-
Can be: 'cpu-requests', 'cpu-limits', 'memory-requests', 'memory-limits',
291-
'allocated-pods'.
390+
Threshold in percentage.
292391
293392
=back
294393

0 commit comments

Comments
 (0)