Skip to content

Commit e746c05

Browse files
authored
feat: input host monitor support CPU metric (#2153)
* feat: input host monitor support CPU metric * fix lint * fix * fix * fix * add more ut
1 parent 3a1b24b commit e746c05

22 files changed

+779
-81
lines changed

core/collection_pipeline/plugin/PluginRegistry.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "plugin/input/InputContainerStdio.h"
3232
#include "plugin/input/InputFile.h"
3333
#include "plugin/input/InputHostMeta.h"
34+
#include "plugin/input/InputHostMonitor.h"
3435
#include "plugin/input/InputPrometheus.h"
3536
#if defined(__linux__) && !defined(__ANDROID__)
3637
#include "plugin/input/InputFileSecurity.h"
@@ -144,7 +145,8 @@ void PluginRegistry::LoadStaticPlugins() {
144145
RegisterInputCreator(new StaticInputCreator<InputNetworkObserver>(), true);
145146
RegisterInputCreator(new StaticInputCreator<InputNetworkSecurity>(), true);
146147
RegisterInputCreator(new StaticInputCreator<InputProcessSecurity>(), true);
147-
RegisterInputCreator(new StaticInputCreator<InputHostMeta>());
148+
RegisterInputCreator(new StaticInputCreator<InputHostMeta>(), true);
149+
RegisterInputCreator(new StaticInputCreator<InputHostMonitor>(), true);
148150
#endif
149151

150152
RegisterProcessorCreator(new StaticProcessorCreator<ProcessorSplitLogStringNative>());

core/host_monitor/Constants.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,14 @@
1414
* limitations under the License.
1515
*/
1616

17-
#include "Constants.h"
17+
#include "host_monitor/Constants.h"
1818

19+
#include <unistd.h>
1920

2021
namespace logtail {
2122

2223
std::filesystem::path PROCESS_DIR = "/proc";
2324
const std::filesystem::path PROCESS_STAT = "stat";
25+
const int64_t SYSTEM_HERTZ = sysconf(_SC_CLK_TCK);
2426

2527
} // namespace logtail

core/host_monitor/Constants.h

+1
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,6 @@ namespace logtail {
2222

2323
extern std::filesystem::path PROCESS_DIR;
2424
const extern std::filesystem::path PROCESS_STAT;
25+
const extern int64_t SYSTEM_HERTZ;
2526

2627
} // namespace logtail

core/host_monitor/HostMonitorInputRunner.cpp

+16-2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "common/Flags.h"
3131
#include "common/timer/Timer.h"
3232
#include "host_monitor/HostMonitorTimerEvent.h"
33+
#include "host_monitor/collector/CPUCollector.h"
3334
#include "host_monitor/collector/ProcessEntityCollector.h"
3435
#include "logger/Logger.h"
3536
#include "runner/ProcessorRunner.h"
@@ -40,6 +41,8 @@ namespace logtail {
4041

4142
HostMonitorInputRunner::HostMonitorInputRunner() {
4243
RegisterCollector<ProcessEntityCollector>();
44+
RegisterCollector<CPUCollector>();
45+
4346
size_t threadPoolSize = 1;
4447
// threadPoolSize should be greater than 0
4548
if (INT32_FLAG(host_monitor_thread_pool_size) > 0) {
@@ -73,7 +76,18 @@ void HostMonitorInputRunner::UpdateCollector(const std::vector<std::string>& new
7376
}
7477
}
7578

76-
void HostMonitorInputRunner::RemoveCollector() {
79+
void HostMonitorInputRunner::RemoveCollector(const std::vector<std::string>& collectorNames) {
80+
std::unique_lock<std::shared_mutex> lock(mRegisteredCollectorMapMutex);
81+
for (const auto& name : collectorNames) {
82+
auto iter = mRegisteredCollectorMap.find(name);
83+
if (iter == mRegisteredCollectorMap.end()) {
84+
continue;
85+
}
86+
iter->second.Disable();
87+
}
88+
}
89+
90+
void HostMonitorInputRunner::RemoveAllCollector() {
7791
std::unique_lock<std::shared_mutex> lock(mRegisteredCollectorMapMutex);
7892
for (auto& collector : mRegisteredCollectorMap) {
7993
collector.second.Disable();
@@ -95,7 +109,7 @@ void HostMonitorInputRunner::Stop() {
95109
if (!mIsStarted.exchange(false)) {
96110
return;
97111
}
98-
RemoveCollector();
112+
RemoveAllCollector();
99113
#ifndef APSARA_UNIT_TEST_MAIN
100114
std::future<void> result = std::async(std::launch::async, [this]() { mThreadPool->Stop(); });
101115
if (result.wait_for(std::chrono::seconds(3)) == std::future_status::timeout) {

core/host_monitor/HostMonitorInputRunner.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,8 @@ class HostMonitorInputRunner : public InputRunner {
7272
const std::vector<uint32_t>& newCollectorIntervals,
7373
QueueKey processQueueKey,
7474
size_t inputIndex);
75-
void RemoveCollector();
75+
void RemoveCollector(const std::vector<std::string>& collectorNames);
76+
void RemoveAllCollector();
7677

7778
void Init() override;
7879
void Stop() override;

core/host_monitor/SystemInformationTools.cpp

+9-33
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@
1616

1717
#include "host_monitor/SystemInformationTools.h"
1818

19+
#include <iostream>
1920
#include <string>
2021
#include <vector>
2122

2223
#include "common/FileSystemUtil.h"
23-
#include "common/StringTools.h"
2424
#include "constants/EntityConstants.h"
2525
#include "host_monitor/Constants.h"
2626
#include "logger/Logger.h"
@@ -30,42 +30,18 @@ using namespace std::chrono;
3030

3131
namespace logtail {
3232

33-
int64_t GetHostSystemBootTime() {
34-
static int64_t systemBootSeconds = 0;
35-
if (systemBootSeconds != 0) {
36-
return systemBootSeconds;
37-
}
38-
int64_t currentSeconds = duration_cast<seconds>(system_clock::now().time_since_epoch()).count();
33+
bool GetHostSystemStat(vector<string>& lines, string& errorMessage) {
34+
errorMessage.clear();
3935
if (!CheckExistance(PROCESS_DIR / PROCESS_STAT)) {
40-
LOG_WARNING(sLogger,
41-
("failed to get system boot time", "use process start time instead")(
42-
"error msg", "file not exists")("process start time", currentSeconds));
43-
return currentSeconds;
44-
}
45-
46-
vector<string> cpuLines = {};
47-
string errorMessage;
48-
int ret = GetFileLines(PROCESS_DIR / PROCESS_STAT, cpuLines, true, &errorMessage);
49-
if (ret != 0 || cpuLines.empty()) {
50-
LOG_WARNING(sLogger,
51-
("failed to get system boot time", "use process start time instead")("error msg", errorMessage)(
52-
"process start time", currentSeconds));
53-
return currentSeconds;
36+
errorMessage = "file does not exist: " + (PROCESS_DIR / PROCESS_STAT).string();
37+
return false;
5438
}
5539

56-
for (auto const& cpuLine : cpuLines) {
57-
auto cpuMetric = SplitString(cpuLine);
58-
// example: btime 1719922762
59-
if (cpuMetric.size() >= 2 && cpuMetric[0] == "btime") {
60-
systemBootSeconds = StringTo<int64_t>(cpuMetric[1]);
61-
return systemBootSeconds;
62-
}
40+
int ret = GetFileLines(PROCESS_DIR / PROCESS_STAT, lines, true, &errorMessage);
41+
if (ret != 0 || lines.empty()) {
42+
return false;
6343
}
64-
65-
LOG_WARNING(sLogger,
66-
("failed to get system boot time", "use process start time instead")(
67-
"error msg", "btime not found in stat")("process start time", currentSeconds));
68-
return currentSeconds;
44+
return true;
6945
}
7046

7147
} // namespace logtail

core/host_monitor/SystemInformationTools.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,11 @@
1616

1717
#pragma once
1818

19-
#include <cstdint>
19+
#include <string>
20+
#include <vector>
2021

2122
namespace logtail {
2223

23-
int64_t GetHostSystemBootTime();
24+
bool GetHostSystemStat(std::vector<std::string>& lines, std::string& errorMessage);
2425

2526
} // namespace logtail

core/host_monitor/collector/BaseCollector.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class BaseCollector {
3131
virtual const std::string& Name() const = 0;
3232

3333
protected:
34-
bool mValidState = false;
34+
bool mValidState = true;
3535
};
3636

3737
} // namespace logtail
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
/*
2+
* Copyright 2025 iLogtail Authors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include "host_monitor/collector/CPUCollector.h"
18+
19+
#include <string>
20+
21+
#include "boost/algorithm/string.hpp"
22+
#include "boost/algorithm/string/split.hpp"
23+
24+
#include "MetricValue.h"
25+
#include "common/StringTools.h"
26+
#include "host_monitor/Constants.h"
27+
#include "host_monitor/SystemInformationTools.h"
28+
#include "logger/Logger.h"
29+
30+
namespace logtail {
31+
32+
const std::string CPUCollector::sName = "cpu";
33+
const std::string kMetricLabelCPU = "cpu";
34+
const std::string kMetricLabelMode = "mode";
35+
36+
bool CPUCollector::Collect(const HostMonitorTimerEvent::CollectConfig& collectConfig, PipelineEventGroup* group) {
37+
if (group == nullptr) {
38+
return false;
39+
}
40+
std::vector<CPUStat> cpus;
41+
if (!GetHostSystemCPUStat(cpus)) {
42+
return false;
43+
}
44+
const time_t now = time(nullptr);
45+
constexpr struct MetricDef {
46+
const char* name;
47+
const char* mode;
48+
double CPUStat::*value;
49+
} metrics[] = {
50+
{"node_cpu_seconds_total", "user", &CPUStat::user},
51+
{"node_cpu_seconds_total", "nice", &CPUStat::nice},
52+
{"node_cpu_seconds_total", "system", &CPUStat::system},
53+
{"node_cpu_seconds_total", "idle", &CPUStat::idle},
54+
{"node_cpu_seconds_total", "iowait", &CPUStat::iowait},
55+
{"node_cpu_seconds_total", "irq", &CPUStat::irq},
56+
{"node_cpu_seconds_total", "softirq", &CPUStat::softirq},
57+
{"node_cpu_seconds_total", "steal", &CPUStat::steal},
58+
{"node_cpu_guest_seconds_total", "user", &CPUStat::guest},
59+
{"node_cpu_guest_seconds_total", "nice", &CPUStat::guestNice},
60+
};
61+
for (const auto& cpu : cpus) {
62+
if (cpu.index == -1) {
63+
continue;
64+
}
65+
for (const auto& def : metrics) {
66+
auto* metricEvent = group->AddMetricEvent(true);
67+
if (!metricEvent) {
68+
continue;
69+
}
70+
metricEvent->SetName(def.name);
71+
metricEvent->SetTimestamp(now, 0);
72+
metricEvent->SetValue<UntypedSingleValue>(cpu.*(def.value) / SYSTEM_HERTZ);
73+
metricEvent->SetTag(kMetricLabelCPU, std::to_string(cpu.index));
74+
metricEvent->SetTagNoCopy(kMetricLabelMode, def.mode);
75+
}
76+
}
77+
return true;
78+
}
79+
80+
bool CPUCollector::GetHostSystemCPUStat(std::vector<CPUStat>& cpus) {
81+
std::vector<std::string> cpuLines;
82+
std::string errorMessage;
83+
if (!GetHostSystemStat(cpuLines, errorMessage)) {
84+
if (mValidState) {
85+
LOG_WARNING(sLogger, ("failed to get system cpu", "invalid CPU collector")("error msg", errorMessage));
86+
mValidState = false;
87+
}
88+
return false;
89+
}
90+
mValidState = true;
91+
// cpu 1195061569 1728645 418424132 203670447952 14723544 0 773400 0 0 0
92+
// cpu0 14708487 14216 4613031 2108180843 57199 0 424744 0 0 0
93+
// ...
94+
cpus.clear();
95+
cpus.reserve(cpuLines.size());
96+
for (auto const& line : cpuLines) {
97+
std::vector<std::string> cpuMetric;
98+
boost::split(cpuMetric, line, boost::is_any_of(" "), boost::token_compress_on);
99+
if (cpuMetric.size() > 0 && cpuMetric[0].substr(0, 3) == "cpu") {
100+
CPUStat cpuStat{};
101+
if (cpuMetric[0] == "cpu") {
102+
cpuStat.index = -1;
103+
} else {
104+
try {
105+
cpuStat.index = StringTo<int32_t>(cpuMetric[0].substr(3));
106+
} catch (...) {
107+
LOG_ERROR(sLogger, ("failed to parse cpu index", "skip")("wrong cpu index", cpuMetric[0]));
108+
continue;
109+
}
110+
}
111+
cpuStat.user = ParseMetric(cpuMetric, EnumCpuKey::user);
112+
cpuStat.nice = ParseMetric(cpuMetric, EnumCpuKey::nice);
113+
cpuStat.system = ParseMetric(cpuMetric, EnumCpuKey::system);
114+
cpuStat.idle = ParseMetric(cpuMetric, EnumCpuKey::idle);
115+
cpuStat.iowait = ParseMetric(cpuMetric, EnumCpuKey::iowait);
116+
cpuStat.irq = ParseMetric(cpuMetric, EnumCpuKey::irq);
117+
cpuStat.softirq = ParseMetric(cpuMetric, EnumCpuKey::softirq);
118+
cpuStat.steal = ParseMetric(cpuMetric, EnumCpuKey::steal);
119+
cpuStat.guest = ParseMetric(cpuMetric, EnumCpuKey::guest);
120+
cpuStat.guestNice = ParseMetric(cpuMetric, EnumCpuKey::guest_nice);
121+
cpus.push_back(cpuStat);
122+
}
123+
}
124+
return true;
125+
}
126+
127+
double CPUCollector::ParseMetric(const std::vector<std::string>& cpuMetric, EnumCpuKey key) const {
128+
try {
129+
if (cpuMetric.size() <= static_cast<size_t>(key)) {
130+
return 0;
131+
}
132+
return StringTo<double>(cpuMetric[static_cast<size_t>(key)]);
133+
} catch (...) {
134+
return 0.0;
135+
}
136+
}
137+
138+
} // namespace logtail
+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* Copyright 2025 iLogtail Authors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
#include <vector>
20+
21+
#include "host_monitor/collector/BaseCollector.h"
22+
23+
namespace logtail {
24+
25+
// man proc: https://man7.org/linux/man-pages/man5/proc.5.html
26+
// search key: /proc/stat
27+
enum class EnumCpuKey : int {
28+
user = 1,
29+
nice,
30+
system,
31+
idle,
32+
iowait, // since Linux 2.5.41
33+
irq, // since Linux 2.6.0
34+
softirq, // since Linux 2.6.0
35+
steal, // since Linux 2.6.11
36+
guest, // since Linux 2.6.24
37+
guest_nice, // since Linux 2.6.33
38+
};
39+
40+
struct CPUStat {
41+
int32_t index; // -1 means total cpu
42+
double user;
43+
double nice;
44+
double system;
45+
double idle;
46+
double iowait;
47+
double irq;
48+
double softirq;
49+
double steal;
50+
double guest;
51+
double guestNice;
52+
};
53+
54+
class CPUCollector : public BaseCollector {
55+
public:
56+
~CPUCollector() override = default;
57+
58+
bool Collect(const HostMonitorTimerEvent::CollectConfig& collectConfig, PipelineEventGroup* group) override;
59+
60+
static const std::string sName;
61+
const std::string& Name() const override { return sName; }
62+
63+
private:
64+
bool GetHostSystemCPUStat(std::vector<CPUStat>& cpus);
65+
double ParseMetric(const std::vector<std::string>& cpuMetric, EnumCpuKey key) const;
66+
};
67+
68+
} // namespace logtail

0 commit comments

Comments
 (0)