Skip to content

Commit 70edb93

Browse files
committed
Let the icinga check command also output the error when last reload failed
refs #7263
1 parent 2005e4b commit 70edb93

File tree

6 files changed

+86
-22
lines changed

6 files changed

+86
-22
lines changed

doc/10-icinga-template-library.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ Name | Description
8484
-----------------------|---------------
8585
icinga\_min\_version | **Optional.** Required minimum Icinga 2 version, e.g. `2.8.0`. If not satisfied, the state changes to `Critical`. Release packages only.
8686
icinga\_perfdata | **Optional.** Only yield the given performance data. E.g. `[ "*_latency", "*_execution_time" ]`
87+
icinga\_verbose | **Optional.** If the last reload failed, tell the reason.
8788

8889
### cluster <a id="itl-icinga-cluster"></a>
8990

lib/base/application.cpp

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,12 @@
1515
#include "base/scriptglobal.hpp"
1616
#include "base/process.hpp"
1717
#include "base/tlsutility.hpp"
18+
#include <algorithm>
1819
#include <boost/algorithm/string/trim.hpp>
1920
#include <boost/exception/errinfo_api_function.hpp>
2021
#include <boost/exception/errinfo_errno.hpp>
2122
#include <boost/exception/errinfo_file_name.hpp>
23+
#include <cstring>
2224
#include <sstream>
2325
#include <iostream>
2426
#include <fstream>
@@ -56,11 +58,12 @@ double Application::m_StartTime;
5658
bool Application::m_ScriptDebuggerEnabled = false;
5759

5860
#ifdef _WIN32
59-
double Application::m_LastReloadFailed = 0;
61+
Application::LastFailedReload Application::m_LastReloadFailed;
62+
std::mutex Application::m_LastReloadFailedMutex;
6063
#else /* _WIN32 */
61-
std::atomic<double>* Application::m_LastReloadFailed = ([]() -> std::atomic<double>* {
64+
Application::LastFailedReload* Application::m_LastReloadFailed = ([]() -> Application::LastFailedReload* {
6265
auto memory (mmap(
63-
nullptr, sizeof(std::atomic<double>),
66+
nullptr, sizeof(Application::LastFailedReload),
6467
PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0
6568
));
6669
if (memory == MAP_FAILED) {
@@ -69,8 +72,11 @@ std::atomic<double>* Application::m_LastReloadFailed = ([]() -> std::atomic<doub
6972
<< boost::errinfo_errno(errno));
7073
}
7174

72-
auto lrf ((std::atomic<double>*)memory);
73-
lrf->store(0);
75+
auto lrf ((Application::LastFailedReload*)memory);
76+
77+
lrf->When.store(0);
78+
std::fill((volatile char*)lrf->Why, (volatile char*)lrf->Why + sizeof(lrf->Why), 0);
79+
7480
return lrf;
7581
})();
7682
#endif /* _WIN32 */
@@ -383,7 +389,7 @@ void Application::OnShutdown()
383389
static void ReloadProcessCallbackInternal(const ProcessResult& pr)
384390
{
385391
if (pr.ExitStatus != 0) {
386-
Application::SetLastReloadFailed(Utility::GetTime());
392+
Application::SetLastReloadFailed(Utility::GetTime(), pr.Output);
387393
Log(LogCritical, "Application", "Found error in config: reloading aborted");
388394
}
389395
#ifdef _WIN32
@@ -1188,21 +1194,30 @@ void Application::SetScriptDebuggerEnabled(bool enabled)
11881194
m_ScriptDebuggerEnabled = enabled;
11891195
}
11901196

1191-
double Application::GetLastReloadFailed()
1197+
std::pair<double, String> Application::GetLastReloadFailed()
11921198
{
11931199
#ifdef _WIN32
1194-
return m_LastReloadFailed;
1200+
std::unique_lock<std::mutex> lock (m_LastReloadFailedMutex);
1201+
return std::pair<double, String>(m_LastReloadFailed.When, m_LastReloadFailed.Why);
11951202
#else /* _WIN32 */
1196-
return m_LastReloadFailed->load();
1203+
char reason[sizeof(m_LastReloadFailed->Why)];
1204+
std::copy((volatile char*)m_LastReloadFailed->Why, (volatile char*)m_LastReloadFailed->Why + sizeof(m_LastReloadFailed->Why), (char*)reason);
1205+
1206+
return std::pair<double, String>(m_LastReloadFailed->When.load(), String((char*)reason));
11971207
#endif /* _WIN32 */
11981208
}
11991209

1200-
void Application::SetLastReloadFailed(double ts)
1210+
void Application::SetLastReloadFailed(double ts, const String& reason)
12011211
{
12021212
#ifdef _WIN32
1203-
m_LastReloadFailed = ts;
1213+
std::unique_lock<std::mutex> lock (m_LastReloadFailedMutex);
1214+
m_LastReloadFailed = LastFailedReload{ts, reason};
12041215
#else /* _WIN32 */
1205-
m_LastReloadFailed->store(ts);
1216+
char buf[sizeof(m_LastReloadFailed->Why)] = {0};
1217+
1218+
(void)strncpy((char*)buf, reason.CStr(), sizeof(buf));
1219+
std::copy((char*)buf, (char*)buf + sizeof(buf), (volatile char*)m_LastReloadFailed->Why);
1220+
m_LastReloadFailed->When.store(ts);
12061221
#endif /* _WIN32 */
12071222
}
12081223

lib/base/application.hpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
#include "base/configuration.hpp"
1010
#include <atomic>
1111
#include <iosfwd>
12+
#include <mutex>
13+
#include <utility>
1214

1315
namespace icinga
1416
{
@@ -101,8 +103,8 @@ class Application : public ObjectImpl<Application> {
101103
static bool GetScriptDebuggerEnabled();
102104
static void SetScriptDebuggerEnabled(bool enabled);
103105

104-
static double GetLastReloadFailed();
105-
static void SetLastReloadFailed(double ts);
106+
static std::pair<double, String> GetLastReloadFailed();
107+
static void SetLastReloadFailed(double ts, const String& reason);
106108

107109
static void DisplayInfoMessage(std::ostream& os, bool skipVersion = false);
108110

@@ -138,10 +140,24 @@ class Application : public ObjectImpl<Application> {
138140
static double m_StartTime;
139141
static double m_MainTime;
140142
static bool m_ScriptDebuggerEnabled;
143+
141144
#ifdef _WIN32
142-
static double m_LastReloadFailed;
145+
struct LastFailedReload
146+
{
147+
double When = 0;
148+
String Why;
149+
};
150+
151+
static LastFailedReload m_LastReloadFailed;
152+
static std::mutex m_LastReloadFailedMutex;
143153
#else /* _WIN32 */
144-
static std::atomic<double> *m_LastReloadFailed;
154+
struct LastFailedReload
155+
{
156+
std::atomic<double> When;
157+
volatile char Why[4096];
158+
};
159+
160+
static LastFailedReload *m_LastReloadFailed;
145161
#endif /* _WIN32 */
146162

147163
#ifdef _WIN32

lib/base/logger.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@ class Logger : public ObjectImpl<Logger>
7676

7777
void ValidateSeverity(const Lazy<String>& lvalue, const ValidationUtils& utils) final;
7878

79-
protected:
8079
void Start(bool runtimeCreated) override;
8180
void Stop(bool runtimeRemoved) override;
8281

lib/cli/daemoncommand.cpp

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "base/atomic.hpp"
1111
#include "base/defer.hpp"
1212
#include "base/logger.hpp"
13+
#include "base/streamlogger.hpp"
1314
#include "base/application.hpp"
1415
#include "base/timer.hpp"
1516
#include "base/utility.hpp"
@@ -23,6 +24,7 @@
2324
#include <boost/program_options.hpp>
2425
#include <iostream>
2526
#include <fstream>
27+
#include <sstream>
2628

2729
#ifdef _WIN32
2830
#include <windows.h>
@@ -233,16 +235,31 @@ int RunWorker(const std::vector<std::string>& configs, bool closeConsoleLog = fa
233235
}
234236
#endif /* I2_DEBUG */
235237

238+
std::ostringstream oss;
239+
StreamLogger::Ptr sl = new StreamLogger();
240+
241+
sl->BindStream(&oss, false);
242+
sl->Start(true);
243+
236244
Log(LogInformation, "cli", "Loading configuration file(s).");
237245

238246
{
239247
std::vector<ConfigItem::Ptr> newItems;
240248

241249
if (!DaemonUtility::LoadConfigFiles(configs, newItems, Configuration::ObjectsPath, Configuration::VarsPath)) {
242250
Log(LogCritical, "cli", "Config validation failed. Re-run with 'icinga2 daemon -C' after fixing the config.");
251+
252+
sl->Stop(true);
253+
sl = nullptr;
254+
Application::SetLastReloadFailed(Utility::GetTime(), oss.str());
255+
243256
return EXIT_FAILURE;
244257
}
245258

259+
sl->Stop(true);
260+
sl = nullptr;
261+
oss = decltype(oss)();
262+
246263
#ifndef _WIN32
247264
Log(LogNotice, "cli")
248265
<< "Notifying umbrella process (PID " << l_UmbrellaPid << ") about the config loading success";
@@ -477,6 +494,7 @@ static pid_t StartUnixWorker(const std::vector<std::string>& configs, bool close
477494
}
478495

479496
(void)sigprocmask(SIG_UNBLOCK, &l_UnixWorkerSignals, nullptr);
497+
Application::SetLastReloadFailed(Utility::GetTime(), "fork() failed");
480498
return -1;
481499

482500
case 0:
@@ -511,11 +529,13 @@ static pid_t StartUnixWorker(const std::vector<std::string>& configs, bool close
511529
} catch (const std::exception& ex) {
512530
Log(LogCritical, "cli")
513531
<< "Failed to re-initialize thread pool after forking (child): " << DiagnosticInformation(ex);
532+
Application::SetLastReloadFailed(Utility::GetTime(), "Failed to re-initialize thread pool after forking (child)");
514533
_exit(EXIT_FAILURE);
515534
}
516535

517536
_exit(RunWorker(configs, closeConsoleLog, stderrFile));
518537
} catch (...) {
538+
Application::SetLastReloadFailed(Utility::GetTime(), "");
519539
_exit(EXIT_FAILURE);
520540
}
521541

@@ -767,12 +787,11 @@ int DaemonCommand::Run(const po::variables_map& vm, const std::vector<std::strin
767787

768788
if (nextWorker == -1) {
769789
Log(LogCritical, "Application", "Found error in config: reloading aborted");
770-
Application::SetLastReloadFailed(Utility::GetTime());
771790
} else {
772791
Log(LogInformation, "Application")
773792
<< "Reload done, old process shutting down. Child process with PID '" << nextWorker << "' is taking over.";
774793

775-
Application::SetLastReloadFailed(0);
794+
Application::SetLastReloadFailed(0, "");
776795
(void)kill(currentWorker, SIGTERM);
777796

778797
{

lib/methods/icingachecktask.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ void IcingaCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes
5050
auto perfdataFilter (MacroProcessor::ResolveMacros("$icinga_perfdata$", resolvers, checkable->GetLastCheckResult(),
5151
nullptr, MacroProcessor::EscapeCallback(), resolvedMacros, useResolvedMacros));
5252

53+
auto verbose (MacroProcessor::ResolveMacros("$icinga_verbose$", resolvers, checkable->GetLastCheckResult(),
54+
nullptr, MacroProcessor::EscapeCallback(), resolvedMacros, useResolvedMacros));
55+
5356
if (resolvedMacros && !useResolvedMacros)
5457
return;
5558

@@ -163,10 +166,17 @@ void IcingaCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes
163166
". Version: " + appVersion;
164167

165168
/* Indicate a warning if the last reload failed. */
166-
double lastReloadFailed = Application::GetLastReloadFailed();
169+
auto lastReloadFailed (Application::GetLastReloadFailed());
170+
String verboseText;
171+
172+
if (lastReloadFailed.first > 0) {
173+
output += "; Last reload attempt failed at " + Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", lastReloadFailed.first);
174+
175+
if (verbose.ToBool() && lastReloadFailed.second.GetLength()) {
176+
output += " (reason: see below)";
177+
verboseText = lastReloadFailed.second;
178+
}
167179

168-
if (lastReloadFailed > 0) {
169-
output += "; Last reload attempt failed at " + Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", lastReloadFailed);
170180
state =ServiceWarning;
171181
}
172182

@@ -192,6 +202,10 @@ void IcingaCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes
192202
state = ServiceCritical;
193203
}
194204

205+
if (verboseText.GetLength()) {
206+
output += "\n\n" + verboseText;
207+
}
208+
195209
String commandName = command->GetName();
196210
if (Checkable::ExecuteCommandProcessFinishedHandler) {
197211
double now = Utility::GetTime();

0 commit comments

Comments
 (0)