diff --git a/doc/09-object-types.md b/doc/09-object-types.md index 67735e9865..5e5c1803f7 100644 --- a/doc/09-object-types.md +++ b/doc/09-object-types.md @@ -353,6 +353,7 @@ Configuration Attributes: check\_timeout | Duration | **Optional.** Check command timeout in seconds. Overrides the CheckCommand's `timeout` attribute. check\_interval | Duration | **Optional.** The check interval (in seconds). This interval is used for checks when the host is in a `HARD` state. Defaults to `5m`. retry\_interval | Duration | **Optional.** The retry interval (in seconds). This interval is used for checks when the host is in a `SOFT` state. Defaults to `1m`. Note: This does not affect the scheduling [after a passive check result](08-advanced-topics.md#check-result-freshness). + scheduler\_shuffle\_cap | Number | **Optional.** Number of percent by up to which Icinga is allowed to override the check interval arbitrarily and in any direction to reduce load spikes. Defaults to 0. enable\_notifications | Boolean | **Optional.** Whether notifications are enabled. Defaults to true. enable\_active\_checks | Boolean | **Optional.** Whether active checks are enabled. Defaults to true. enable\_passive\_checks | Boolean | **Optional.** Whether passive checks are enabled. Defaults to true. @@ -719,6 +720,7 @@ Configuration Attributes: check\_timeout | Duration | **Optional.** Check command timeout in seconds. Overrides the CheckCommand's `timeout` attribute. check\_interval | Duration | **Optional.** The check interval (in seconds). This interval is used for checks when the service is in a `HARD` state. Defaults to `5m`. retry\_interval | Duration | **Optional.** The retry interval (in seconds). This interval is used for checks when the service is in a `SOFT` state. Defaults to `1m`. Note: This does not affect the scheduling [after a passive check result](08-advanced-topics.md#check-result-freshness). + scheduler\_shuffle\_cap | Number | **Optional.** Number of percent by up to which Icinga is allowed to override the check interval arbitrarily and in any direction to reduce load spikes. Defaults to 0. enable\_notifications | Boolean | **Optional.** Whether notifications are enabled. Defaults to `true`. enable\_active\_checks | Boolean | **Optional.** Whether active checks are enabled. Defaults to `true`. enable\_passive\_checks | Boolean | **Optional.** Whether passive checks are enabled. Defaults to `true`. diff --git a/lib/icinga/checkable-check.cpp b/lib/icinga/checkable-check.cpp index 2e96df5afc..7f4fb92b7d 100644 --- a/lib/icinga/checkable-check.cpp +++ b/lib/icinga/checkable-check.cpp @@ -14,6 +14,7 @@ #include "base/convert.hpp" #include "base/utility.hpp" #include "base/context.hpp" +#include using namespace icinga; @@ -67,7 +68,7 @@ void Checkable::UpdateNextCheck(const MessageOrigin::Ptr& origin) if (adj != 0.0) adj = std::min(0.5 + fmod(GetSchedulingOffset(), interval * 5) / 100.0, adj); - double nextCheck = now - adj + interval; + double nextCheck = now - adj + interval * GetIntervalShuffleFactor(); double lastCheck = GetLastCheck(); Log(LogDebug, "Checkable") @@ -384,7 +385,7 @@ Checkable::ProcessingResult Checkable::ProcessCheckResult(const CheckResult::Ptr if (ttl > 0) offset = ttl; else - offset = GetCheckInterval(); + offset = GetCheckInterval() * GetIntervalShuffleFactor(); SetNextCheck(Utility::GetTime() + offset); } @@ -425,7 +426,7 @@ Checkable::ProcessingResult Checkable::ProcessCheckResult(const CheckResult::Ptr if (!parent->GetEnableActiveChecks()) continue; - if (parent->GetNextCheck() >= now + parent->GetRetryInterval()) { + if (parent->GetNextCheck() >= now + parent->GetRetryInterval() * parent->GetIntervalShuffleFactor()) { ObjectLock olock(parent); parent->SetNextCheck(now); } @@ -720,3 +721,20 @@ void Checkable::AquirePendingCheckSlot(int maxPendingChecks) m_PendingChecks++; } + +/** + * Returns a random factor derived from scheduler_shuffle_cap to multiply the check interval with. + * + * E.g. if scheduler_shuffle_cap is 20 (%), this function returns [0.8, 1.2]. + */ +double Checkable::GetIntervalShuffleFactor() +{ + if (!GetEnableActiveChecks()) { + // scheduler_shuffle_cap doesn't influence external checkers. + return 1; + } + + return (GetSchedulerShuffleCap() / 100) // scheduler_shuffle_cap as non-%, i.e. 10 => 0.1 + * (rand() / (double)RAND_MAX * 2 - 1) // random number [-1, 1] + + 1; +} diff --git a/lib/icinga/checkable.cpp b/lib/icinga/checkable.cpp index 13fd778a30..24dab46096 100644 --- a/lib/icinga/checkable.cpp +++ b/lib/icinga/checkable.cpp @@ -9,6 +9,7 @@ #include "base/exception.hpp" #include "base/timer.hpp" #include +#include using namespace icinga; @@ -93,9 +94,9 @@ void Checkable::Start(bool runtimeCreated) } if (GetNextCheck() < now + 60) { - double delta = std::min(GetCheckInterval(), 60.0); + double delta = std::min(GetCheckInterval() * GetIntervalShuffleFactor(), 60.0); delta *= (double)std::rand() / RAND_MAX; - SetNextCheck(now + delta); + SetNextCheck(now + delta + GetCheckInterval() * fabs(GetIntervalShuffleFactor() - 1)); } ObjectImpl::Start(runtimeCreated); diff --git a/lib/icinga/checkable.hpp b/lib/icinga/checkable.hpp index 98c015ed60..159087bdfa 100644 --- a/lib/icinga/checkable.hpp +++ b/lib/icinga/checkable.hpp @@ -204,6 +204,7 @@ class Checkable : public ObjectImpl bool NotificationReasonApplies(NotificationType type); bool NotificationReasonSuppressed(NotificationType type); bool IsLikelyToBeCheckedSoon(); + double GetIntervalShuffleFactor(); void FireSuppressedNotifications(); diff --git a/lib/icinga/checkable.ti b/lib/icinga/checkable.ti index 6f7a5daee4..aaf7d4951d 100644 --- a/lib/icinga/checkable.ti +++ b/lib/icinga/checkable.ti @@ -47,6 +47,9 @@ abstract class Checkable : CustomVarObject [config] double retry_interval { default {{{ return 60; }}} }; + [config] double scheduler_shuffle_cap { + default {{{ return 0.0; }}} + }; [config, navigation] name(EventCommand) event_command (EventCommandRaw) { navigate {{{ return EventCommand::GetByName(GetEventCommandRaw());