Skip to content

Commit dc22a0d

Browse files
committed
Let the icinga check command also output the error when last reload failed
refs #7263
1 parent 5c330e9 commit dc22a0d

File tree

6 files changed

+112
-29
lines changed

6 files changed

+112
-29
lines changed

doc/10-icinga-template-library.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ Custom variables passed as [command parameters](03-monitoring-basics.md#command-
8383
Name | Description
8484
-----------------------|---------------
8585
icinga\_min\_version | **Optional.** Required minimum Icinga 2 version, e.g. `2.8.0`. If not satisfied, the state changes to `Critical`. Release packages only.
86+
icinga\_verbose | **Optional.** If the last reload failed, tell the reason.
8687

8788
### cluster <a id="itl-icinga-cluster"></a>
8889

lib/base/application.cpp

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <boost/exception/errinfo_errno.hpp>
2121
#include <boost/exception/errinfo_file_name.hpp>
2222
#include <boost/stacktrace.hpp>
23+
#include <cstring>
2324
#include <sstream>
2425
#include <iostream>
2526
#include <fstream>
@@ -30,6 +31,7 @@
3031
#ifdef _WIN32
3132
#include <windows.h>
3233
#else /* _WIN32 */
34+
#include "base/shared-memory.hpp"
3335
#include <signal.h>
3436
#endif /* _WIN32 */
3537

@@ -63,11 +65,18 @@ char **Application::m_ArgV;
6365
double Application::m_StartTime;
6466
bool Application::m_ScriptDebuggerEnabled = false;
6567

68+
Application::LastReloadFailed* Application::m_LastReloadFailed (Application::AllocLastReloadFailed());
69+
70+
Application::LastReloadFailed* Application::AllocLastReloadFailed()
71+
{
6672
#ifdef _WIN32
67-
double Application::m_LastReloadFailed = 0;
73+
static LastReloadFailed lrf;
74+
return &lrf;
6875
#else /* _WIN32 */
69-
SharedMemory<Application::AtomicTs> Application::m_LastReloadFailed (0);
76+
static SharedMemory<LastReloadFailed> slrf;
77+
return &slrf.Get();
7078
#endif /* _WIN32 */
79+
}
7180

7281
#ifdef _WIN32
7382
static LPTOP_LEVEL_EXCEPTION_FILTER l_DefaultUnhandledExceptionFilter = nullptr;
@@ -379,7 +388,7 @@ void Application::OnShutdown()
379388
static void ReloadProcessCallbackInternal(const ProcessResult& pr)
380389
{
381390
if (pr.ExitStatus != 0) {
382-
Application::SetLastReloadFailed(Utility::GetTime());
391+
Application::SetLastReloadFailed(Utility::GetTime(), pr.Output);
383392
Log(LogCritical, "Application", "Found error in config: reloading aborted");
384393
}
385394
#ifdef _WIN32
@@ -1211,22 +1220,17 @@ void Application::SetScriptDebuggerEnabled(bool enabled)
12111220
m_ScriptDebuggerEnabled = enabled;
12121221
}
12131222

1214-
double Application::GetLastReloadFailed()
1223+
std::pair<double, String> Application::GetLastReloadFailed()
12151224
{
1216-
#ifdef _WIN32
1217-
return m_LastReloadFailed;
1218-
#else /* _WIN32 */
1219-
return m_LastReloadFailed.Get().load();
1220-
#endif /* _WIN32 */
1225+
LastReloadFailed::SharedLock lock (m_LastReloadFailed->Mutex);
1226+
return {m_LastReloadFailed->When, String(m_LastReloadFailed->Why)};
12211227
}
12221228

1223-
void Application::SetLastReloadFailed(double ts)
1229+
void Application::SetLastReloadFailed(double ts, const String& error)
12241230
{
1225-
#ifdef _WIN32
1226-
m_LastReloadFailed = ts;
1227-
#else /* _WIN32 */
1228-
m_LastReloadFailed.Get().store(ts);
1229-
#endif /* _WIN32 */
1231+
LastReloadFailed::UniqueLock lock (m_LastReloadFailed->Mutex);
1232+
m_LastReloadFailed->When = ts;
1233+
strncpy(m_LastReloadFailed->Why, error.CStr(), sizeof(m_LastReloadFailed->Why));
12301234
}
12311235

12321236
void Application::ValidateName(const Lazy<String>& lvalue, const ValidationUtils& utils)

lib/base/application.hpp

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,20 @@
44
#define APPLICATION_H
55

66
#include "base/i2-base.hpp"
7-
#include "base/atomic.hpp"
87
#include "base/application-ti.hpp"
98
#include "base/logger.hpp"
109
#include "base/configuration.hpp"
11-
#include "base/shared-memory.hpp"
1210
#include <iosfwd>
11+
#include <utility>
12+
13+
#ifdef _WIN32
14+
#include <mutex>
15+
#include <shared_mutex>
16+
#else /* _WIN32 */
17+
#include <boost/interprocess/sync/interprocess_sharable_mutex.hpp>
18+
#include <boost/interprocess/sync/scoped_lock.hpp>
19+
#include <boost/interprocess/sync/sharable_lock.hpp>
20+
#endif /* _WIN32 */
1321

1422
namespace icinga
1523
{
@@ -102,8 +110,8 @@ class Application : public ObjectImpl<Application> {
102110
static bool GetScriptDebuggerEnabled();
103111
static void SetScriptDebuggerEnabled(bool enabled);
104112

105-
static double GetLastReloadFailed();
106-
static void SetLastReloadFailed(double ts);
113+
static std::pair<double, String> GetLastReloadFailed();
114+
static void SetLastReloadFailed(double ts, const String& error);
107115

108116
static void DisplayInfoMessage(std::ostream& os, bool skipVersion = false);
109117

@@ -139,13 +147,26 @@ class Application : public ObjectImpl<Application> {
139147
static double m_StartTime;
140148
static double m_MainTime;
141149
static bool m_ScriptDebuggerEnabled;
150+
151+
struct LastReloadFailed
152+
{
142153
#ifdef _WIN32
143-
static double m_LastReloadFailed;
154+
typedef std::shared_lock<std::shared_mutex> SharedLock;
155+
typedef std::unique_lock<std::shared_mutex> UniqueLock;
156+
157+
std::shared_mutex Mutex;
144158
#else /* _WIN32 */
145-
typedef Atomic<double> AtomicTs;
146-
static_assert(AtomicTs::is_always_lock_free);
147-
static SharedMemory<AtomicTs> m_LastReloadFailed;
159+
typedef boost::interprocess::sharable_lock<boost::interprocess::interprocess_sharable_mutex> SharedLock;
160+
typedef boost::interprocess::scoped_lock<boost::interprocess::interprocess_sharable_mutex> UniqueLock;
161+
162+
boost::interprocess::interprocess_sharable_mutex Mutex;
148163
#endif /* _WIN32 */
164+
double When = 0;
165+
char Why[16 * 1024] = {0};
166+
};
167+
168+
static LastReloadFailed* m_LastReloadFailed;
169+
static LastReloadFailed* AllocLastReloadFailed();
149170

150171
#ifdef _WIN32
151172
static BOOL WINAPI CtrlHandler(DWORD type);

lib/base/logger.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@ class Logger : public ObjectImpl<Logger>
8989
void SetSeverity(const String& value, bool suppress_events = false, const Value& cookie = Empty) override;
9090
void ValidateSeverity(const Lazy<String>& lvalue, const ValidationUtils& utils) final;
9191

92-
protected:
9392
void Start(bool runtimeCreated) override;
9493
void Stop(bool runtimeRemoved) override;
9594

lib/cli/daemoncommand.cpp

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "base/atomic.hpp"
1212
#include "base/defer.hpp"
1313
#include "base/logger.hpp"
14+
#include "base/streamlogger.hpp"
1415
#include "base/application.hpp"
1516
#include "base/process.hpp"
1617
#include "base/timer.hpp"
@@ -25,6 +26,7 @@
2526
#include <boost/program_options.hpp>
2627
#include <iostream>
2728
#include <fstream>
29+
#include <sstream>
2830

2931
#ifdef _WIN32
3032
#include <windows.h>
@@ -222,6 +224,10 @@ static double GetDebugWorkerDelay()
222224

223225
static String l_ObjectsPath;
224226

227+
#ifndef _WIN32
228+
static bool l_WorkerLoadedConfig = false;
229+
#endif /* _WIN32 */
230+
225231
/**
226232
* Do the actual work (config loading, ...)
227233
*
@@ -246,6 +252,13 @@ int RunWorker(const std::vector<std::string>& configs, bool closeConsoleLog = fa
246252
}
247253
#endif /* I2_DEBUG */
248254

255+
std::ostringstream oss;
256+
StreamLogger::Ptr sl = new StreamLogger();
257+
258+
sl->BindStream(&oss, false);
259+
sl->Start(true);
260+
sl->SetActive(true);
261+
249262
Log(LogInformation, "cli", "Loading configuration file(s).");
250263
NotifyStatus("Loading configuration file(s)...");
251264

@@ -255,14 +268,24 @@ int RunWorker(const std::vector<std::string>& configs, bool closeConsoleLog = fa
255268
if (!DaemonUtility::LoadConfigFiles(configs, newItems, l_ObjectsPath, Configuration::VarsPath)) {
256269
Log(LogCritical, "cli", "Config validation failed. Re-run with 'icinga2 daemon -C' after fixing the config.");
257270
NotifyStatus("Config validation failed.");
271+
272+
sl->Stop(true);
273+
sl = nullptr;
274+
Application::SetLastReloadFailed(Utility::GetTime(), oss.str());
275+
258276
return EXIT_FAILURE;
259277
}
260278

279+
sl->Stop(true);
280+
sl = nullptr;
281+
oss = decltype(oss)();
282+
261283
#ifndef _WIN32
262284
Log(LogNotice, "cli")
263285
<< "Notifying umbrella process (PID " << l_UmbrellaPid << ") about the config loading success";
264286

265287
(void)kill(l_UmbrellaPid, SIGUSR2);
288+
l_WorkerLoadedConfig = true;
266289

267290
Log(LogNotice, "cli")
268291
<< "Waiting for the umbrella process to let us doing the actual work";
@@ -489,6 +512,7 @@ static pid_t StartUnixWorker(const std::vector<std::string>& configs, bool close
489512
}
490513

491514
(void)sigprocmask(SIG_UNBLOCK, &l_UnixWorkerSignals, nullptr);
515+
Application::SetLastReloadFailed(Utility::GetTime(), "fork(2) failed");
492516
return -1;
493517

494518
case 0:
@@ -531,6 +555,12 @@ static pid_t StartUnixWorker(const std::vector<std::string>& configs, bool close
531555
} catch (const std::exception& ex) {
532556
Log(LogCritical, "cli")
533557
<< "Failed to re-initialize thread pool after forking (child): " << DiagnosticInformation(ex);
558+
559+
Application::SetLastReloadFailed(
560+
Utility::GetTime(),
561+
"Failed to re-initialize thread pool after forking (child): " + DiagnosticInformation(ex)
562+
);
563+
534564
_exit(EXIT_FAILURE);
535565
}
536566

@@ -539,14 +569,29 @@ static pid_t StartUnixWorker(const std::vector<std::string>& configs, bool close
539569
} catch (const std::exception& ex) {
540570
Log(LogCritical, "cli")
541571
<< "Failed to initialize process spawn helper after forking (child): " << DiagnosticInformation(ex);
572+
573+
Application::SetLastReloadFailed(
574+
Utility::GetTime(),
575+
"Failed to initialize process spawn helper after forking (child): " + DiagnosticInformation(ex)
576+
);
577+
542578
_exit(EXIT_FAILURE);
543579
}
544580

545581
_exit(RunWorker(configs, closeConsoleLog, stderrFile));
546582
} catch (const std::exception& ex) {
547583
Log(LogCritical, "cli") << "Exception in main process: " << DiagnosticInformation(ex);
584+
585+
if (!l_WorkerLoadedConfig) {
586+
Application::SetLastReloadFailed(Utility::GetTime(), "Exception in main process: " + DiagnosticInformation(ex));
587+
}
588+
548589
_exit(EXIT_FAILURE);
549590
} catch (...) {
591+
if (!l_WorkerLoadedConfig) {
592+
Application::SetLastReloadFailed(Utility::GetTime(), "Exception in main process");
593+
}
594+
550595
_exit(EXIT_FAILURE);
551596
}
552597

@@ -813,15 +858,14 @@ int DaemonCommand::Run(const po::variables_map& vm, const std::vector<std::strin
813858
break;
814859
case -2:
815860
Log(LogCritical, "Application", "Found error in config: reloading aborted");
816-
Application::SetLastReloadFailed(Utility::GetTime());
817861
break;
818862
default:
819863
Log(LogInformation, "Application")
820864
<< "Reload done, old process shutting down. Child process with PID '" << nextWorker << "' is taking over.";
821865

822866
NotifyStatus("Shutting down old instance...");
823867

824-
Application::SetLastReloadFailed(0);
868+
Application::SetLastReloadFailed(0, "");
825869
(void)kill(currentWorker, SIGTERM);
826870

827871
{

lib/methods/icingachecktask.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ void IcingaCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes
4646
String icingaMinVersion = MacroProcessor::ResolveMacros("$icinga_min_version$", resolvers, checkable->GetLastCheckResult(),
4747
&missingIcingaMinVersion, MacroProcessor::EscapeCallback(), resolvedMacros, useResolvedMacros);
4848

49+
auto verbose (MacroProcessor::ResolveMacros("$icinga_verbose$", resolvers, checkable->GetLastCheckResult(),
50+
nullptr, MacroProcessor::EscapeCallback(), resolvedMacros, useResolvedMacros));
51+
4952
if (resolvedMacros && !useResolvedMacros)
5053
return;
5154

@@ -158,10 +161,17 @@ void IcingaCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes
158161
". Version: " + appVersion;
159162

160163
/* Indicate a warning if the last reload failed. */
161-
double lastReloadFailed = Application::GetLastReloadFailed();
164+
auto lastReloadFailed (Application::GetLastReloadFailed());
165+
String verboseText;
166+
167+
if (lastReloadFailed.first > 0) {
168+
output += "; Last reload attempt failed at " + Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", lastReloadFailed.first);
169+
170+
if (verbose.ToBool() && lastReloadFailed.second.GetLength()) {
171+
output += ", see below";
172+
verboseText = lastReloadFailed.second;
173+
}
162174

163-
if (lastReloadFailed > 0) {
164-
output += "; Last reload attempt failed at " + Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", lastReloadFailed);
165175
state =ServiceWarning;
166176
}
167177

@@ -187,6 +197,10 @@ void IcingaCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes
187197
state = ServiceCritical;
188198
}
189199

200+
if (verboseText.GetLength()) {
201+
output += "\n\n" + verboseText;
202+
}
203+
190204
String commandName = command->GetName();
191205

192206
if (Checkable::ExecuteCommandProcessFinishedHandler) {

0 commit comments

Comments
 (0)