Skip to content

Commit 5c804c9

Browse files
authored
feat: modify CPU collector (alibaba#2296)
* feat: modify CPU collector add multi-values pushing Signed-off-by: chenshiyan <chenshiyan@linux.alibaba.com> * feat: add cpu collector unit tests Signed-off-by: chenshiyan <chenshiyan@linux.alibaba.com> * Revert coolbpf submodule to cf9b9300 * bugfix: fix cpuTotal spelling mistake Signed-off-by: chenshiyan <chenshiyan@linux.alibaba.com> --------- Signed-off-by: chenshiyan <chenshiyan@linux.alibaba.com>
1 parent 17ea512 commit 5c804c9

4 files changed

Lines changed: 328 additions & 101 deletions

File tree

core/host_monitor/LinuxSystemInterface.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,7 @@ bool LinuxSystemInterface::GetCPUInformationOnce(CPUInformation& cpuInfo) {
405405
std::vector<std::string> cpuLines;
406406
std::string errorMessage;
407407
if (!GetHostSystemStat(cpuLines, errorMessage)) {
408+
LOG_ERROR(sLogger, ("failed to get CPU information", errorMessage));
408409
return false;
409410
}
410411
// cpu 1195061569 1728645 418424132 203670447952 14723544 0 773400 0 0 0

core/host_monitor/collector/CPUCollector.cpp

Lines changed: 99 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -24,51 +24,122 @@
2424
namespace logtail {
2525

2626
const std::string CPUCollector::sName = "cpu";
27-
const std::string kMetricLabelCPU = "cpu";
28-
const std::string kMetricLabelMode = "mode";
2927

28+
CPUCollector::CPUCollector() {
29+
Init();
30+
}
31+
int CPUCollector::Init(int totalCount) {
32+
mCountPerReport = totalCount;
33+
mCount = 0;
34+
return 0;
35+
}
3036
bool CPUCollector::Collect(const HostMonitorTimerEvent::CollectConfig& collectConfig, PipelineEventGroup* group) {
3137
if (group == nullptr) {
38+
LOG_ERROR(sLogger, ("PipelineEventGroup got nullptr", "skip"));
3239
return false;
3340
}
3441
CPUInformation cpuInfo;
42+
CPUPercent totalCpuPercent{};
3543
if (!SystemInterface::GetInstance()->GetCPUInformation(cpuInfo)) {
3644
return false;
3745
}
46+
47+
if (cpuInfo.stats.size() <= 1) {
48+
LOG_ERROR(sLogger, ("cpu count is negative", cpuInfo.stats.size()));
49+
return false;
50+
}
51+
3852
const time_t now = time(nullptr);
39-
constexpr struct MetricDef {
40-
const char* name;
41-
const char* mode;
42-
double CPUStat::*value;
43-
} metrics[] = {
44-
{"node_cpu_seconds_total", "user", &CPUStat::user},
45-
{"node_cpu_seconds_total", "nice", &CPUStat::nice},
46-
{"node_cpu_seconds_total", "system", &CPUStat::system},
47-
{"node_cpu_seconds_total", "idle", &CPUStat::idle},
48-
{"node_cpu_seconds_total", "iowait", &CPUStat::iowait},
49-
{"node_cpu_seconds_total", "irq", &CPUStat::irq},
50-
{"node_cpu_seconds_total", "softirq", &CPUStat::softirq},
51-
{"node_cpu_seconds_total", "steal", &CPUStat::steal},
52-
{"node_cpu_guest_seconds_total", "user", &CPUStat::guest},
53-
{"node_cpu_guest_seconds_total", "nice", &CPUStat::guestNice},
54-
};
53+
5554
for (const auto& cpu : cpuInfo.stats) {
56-
if (cpu.index == -1) {
55+
if (cpu.index != -1) {
5756
continue;
5857
}
58+
59+
CPUStat cpuTotal = cpu;
60+
double cpuCores = cpuCount;
61+
if (!CalculateCPUPercent(totalCpuPercent, cpuTotal)) {
62+
return false;
63+
}
64+
// first time get cpu count and not calculate mCount
65+
if (cpuCount == 0) {
66+
cpuCount = cpuInfo.stats.size() - 1;
67+
return true;
68+
}
69+
70+
cpuCount = cpuInfo.stats.size() - 1;
71+
mCalculate.AddValue(totalCpuPercent);
72+
mCount++;
73+
74+
if (mCount < mCountPerReport) {
75+
return true;
76+
}
77+
78+
CPUPercent minCPU, maxCPU, avgCPU, lastCPU;
79+
mCalculate.Stat(maxCPU, minCPU, avgCPU, &lastCPU);
80+
81+
mCount = 0;
82+
mCalculate.Reset();
83+
struct MetricDef {
84+
const char* name;
85+
double* value;
86+
} metrics[] = {
87+
{"cpu_system_avg", &avgCPU.sys}, {"cpu_system_min", &minCPU.sys}, {"cpu_system_max", &maxCPU.sys},
88+
{"cpu_idle_avg", &avgCPU.idle}, {"cpu_idle_min", &minCPU.idle}, {"cpu_idle_max", &maxCPU.idle},
89+
{"cpu_user_avg", &avgCPU.user}, {"cpu_user_min", &minCPU.user}, {"cpu_user_max", &maxCPU.user},
90+
{"cpu_wait_avg", &avgCPU.wait}, {"cpu_wait_min", &minCPU.wait}, {"cpu_wait_max", &maxCPU.wait},
91+
{"cpu_other_avg", &avgCPU.other}, {"cpu_other_min", &minCPU.other}, {"cpu_other_max", &maxCPU.other},
92+
{"cpu_total_avg", &avgCPU.total}, {"cpu_total_min", &minCPU.total}, {"cpu_total_max", &maxCPU.total},
93+
{"cpu_cores_value", &cpuCores},
94+
95+
};
96+
MetricEvent* metricEvent = group->AddMetricEvent(true);
97+
if (!metricEvent) {
98+
return false;
99+
}
100+
metricEvent->SetTimestamp(now, 0);
101+
metricEvent->SetValue<UntypedMultiDoubleValues>(metricEvent);
102+
metricEvent->SetTag(std::string("m"), std::string("system.cpu"));
103+
auto* multiDoubleValues = metricEvent->MutableValue<UntypedMultiDoubleValues>();
59104
for (const auto& def : metrics) {
60-
auto* metricEvent = group->AddMetricEvent(true);
61-
if (!metricEvent) {
62-
continue;
63-
}
64-
metricEvent->SetName(def.name);
65-
metricEvent->SetTimestamp(now, 0);
66-
metricEvent->SetValue<UntypedSingleValue>(cpu.*(def.value) / SYSTEM_HERTZ);
67-
metricEvent->SetTag(kMetricLabelCPU, std::to_string(cpu.index));
68-
metricEvent->SetTagNoCopy(kMetricLabelMode, def.mode);
105+
multiDoubleValues->SetValue(std::string(def.name),
106+
UntypedMultiDoubleValue{UntypedValueMetricType::MetricTypeGauge, *def.value});
69107
}
70108
}
71109
return true;
72110
}
73111

112+
bool CPUCollector::CalculateCPUPercent(CPUPercent& cpuPercent, CPUStat& currentCpu) {
113+
if (cpuCount == 0) {
114+
lastCpu = currentCpu;
115+
cpuPercent.sys = cpuPercent.user = cpuPercent.wait = cpuPercent.idle = cpuPercent.other = cpuPercent.total
116+
= 0.0;
117+
LOG_DEBUG(sLogger, ("first time collect Cpu info", "empty"));
118+
return true;
119+
}
120+
121+
double currentJiffies, lastJiffies, jiffiesDelta;
122+
currentJiffies = currentCpu.user + currentCpu.nice + currentCpu.system + currentCpu.idle + currentCpu.iowait
123+
+ currentCpu.irq + currentCpu.softirq + currentCpu.steal;
124+
lastJiffies = lastCpu.user + lastCpu.nice + lastCpu.system + lastCpu.idle + lastCpu.iowait + lastCpu.irq
125+
+ lastCpu.softirq + lastCpu.steal;
126+
jiffiesDelta = currentJiffies - lastJiffies;
127+
128+
if (jiffiesDelta <= 0) {
129+
LOG_ERROR(sLogger, ("jiffies delta is negative", "skip"));
130+
return false;
131+
}
132+
133+
cpuPercent.sys = (currentCpu.system - lastCpu.system) / jiffiesDelta * 100;
134+
cpuPercent.user = (currentCpu.user - lastCpu.user) / jiffiesDelta * 100;
135+
cpuPercent.wait = (currentCpu.iowait - lastCpu.iowait) / jiffiesDelta * 100;
136+
cpuPercent.idle = (currentCpu.idle - lastCpu.idle) / jiffiesDelta * 100;
137+
cpuPercent.other = (currentCpu.nice + currentCpu.irq + currentCpu.softirq + currentCpu.steal - lastCpu.nice
138+
- lastCpu.irq - lastCpu.softirq - lastCpu.steal)
139+
/ jiffiesDelta * 100;
140+
cpuPercent.total = 100 - cpuPercent.idle;
141+
lastCpu = currentCpu;
142+
return true;
143+
}
144+
74145
} // namespace logtail

core/host_monitor/collector/CPUCollector.h

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,64 @@
1616

1717
#pragma once
1818

19+
#include "host_monitor/Constants.h"
20+
#include "host_monitor/SystemInterface.h"
1921
#include "host_monitor/collector/BaseCollector.h"
22+
#include "host_monitor/collector/MetricCalculate.h"
23+
#include "plugin/input/InputHostMonitor.h"
2024

2125
namespace logtail {
2226

27+
extern const uint32_t kHostMonitorMinInterval;
28+
extern const uint32_t kHostMonitorDefaultInterval;
29+
30+
struct CPUPercent {
31+
double sys;
32+
double user;
33+
double wait;
34+
double idle;
35+
double other;
36+
double total;
37+
38+
// Define the field descriptors
39+
static inline const FieldName<CPUPercent> CPUMetricFields[] = {
40+
FIELD_ENTRY(CPUPercent, sys),
41+
FIELD_ENTRY(CPUPercent, user),
42+
FIELD_ENTRY(CPUPercent, wait),
43+
FIELD_ENTRY(CPUPercent, idle),
44+
FIELD_ENTRY(CPUPercent, other),
45+
FIELD_ENTRY(CPUPercent, total),
46+
};
47+
48+
// Define the enumerate function for your metric type
49+
static void enumerate(const std::function<void(const FieldName<CPUPercent, double>&)>& callback) {
50+
for (const auto& field : CPUMetricFields) {
51+
callback(field);
52+
}
53+
}
54+
};
55+
2356
class CPUCollector : public BaseCollector {
2457
public:
58+
CPUCollector();
59+
int Init(int totalCount = kHostMonitorDefaultInterval / kHostMonitorMinInterval);
60+
2561
~CPUCollector() override = default;
2662

2763
bool Collect(const HostMonitorTimerEvent::CollectConfig& collectConfig, PipelineEventGroup* group) override;
2864

2965
static const std::string sName;
3066
const std::string& Name() const override { return sName; }
67+
68+
private:
69+
bool CalculateCPUPercent(CPUPercent& cpuPercent, CPUStat& cpu);
70+
71+
private:
72+
int mCountPerReport = 0;
73+
int mCount = 0;
74+
int cpuCount = 0;
75+
MetricCalculate<CPUPercent> mCalculate;
76+
CPUStat lastCpu{};
3177
};
3278

3379
} // namespace logtail

0 commit comments

Comments
 (0)