Skip to content

CpuBoundWork#CpuBoundWork(): don't spin on atomic int to acquire slot #9990

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
161 changes: 123 additions & 38 deletions lib/base/io-engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,60 +16,121 @@

using namespace icinga;

CpuBoundWork::CpuBoundWork(boost::asio::yield_context yc)
/**
* Acquires a slot for CPU-bound work.
*
* If and as long as the lock-free TryAcquireSlot() doesn't succeed,
* subscribes to the slow path by waiting on a condition variable.
* It is woken up by Done() which is called by the destructor.
*
* @param yc Needed to asynchronously wait for the condition variable.
* @param strand Where to post the wake-up of the condition variable.
*/
CpuBoundWork::CpuBoundWork(boost::asio::yield_context yc, boost::asio::io_context::strand& strand)
: m_Done(false)
{
auto& ioEngine (IoEngine::Get());
VERIFY(strand.running_in_this_thread());

for (;;) {
auto availableSlots (ioEngine.m_CpuBoundSemaphore.fetch_sub(1));
auto& ie (IoEngine::Get());
Shared<AsioConditionVariable>::Ptr cv;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would place this where it is actually utilized, just before the while loop down below.


while (!TryAcquireSlot()) {
if (!cv) {
cv = Shared<AsioConditionVariable>::Make(ie.GetIoContext());

if (availableSlots < 1) {
ioEngine.m_CpuBoundSemaphore.fetch_add(1);
IoEngine::YieldCurrentCoroutine(yc);
continue;
// The above line may take a little bit, so let's optimistically re-check
if (TryAcquireSlot()) {
break;
}
}

break;
{
std::unique_lock lock (ie.m_CpuBoundWaitingMutex);

// The above line may take even longer, so let's check again.
// Also mitigate lost wake-ups by re-checking during the lock:
//
// During our lock, Done() can't retrieve the subscribers to wake up,
// so any ongoing wake-up is either done at this point or has not started yet.
// If such a wake-up is done, it's a lost wake-up to us unless we re-check here
// whether the slot being freed (just before the wake-up) is still available.
if (TryAcquireSlot()) {
break;
}

// If the (hypothetical) slot mentioned above was taken by another coroutine,
// there are no free slots again, just as if no wake-ups happened just now.
ie.m_CpuBoundWaiting.emplace_back(strand, cv);
}

cv->Wait(yc);
}
}

CpuBoundWork::~CpuBoundWork()
/**
* Tries to acquire a slot for CPU-bound work.
*
* Specifically, decrements the number of free slots (semaphore) by one,
* but only if it's currently greater than zero.
* Not falling below zero requires an atomic#compare_exchange_weak() loop
* instead of a simple atomic#fetch_sub() call, but it's also atomic.
*
* @return Whether a slot was acquired.
*/
bool CpuBoundWork::TryAcquireSlot()
{
if (!m_Done) {
IoEngine::Get().m_CpuBoundSemaphore.fetch_add(1);
auto& ie (IoEngine::Get());
auto freeSlots (ie.m_CpuBoundSemaphore.load());

while (freeSlots > 0) {
// If ie.m_CpuBoundSemaphore was changed after the last load,
// compare_exchange_weak() will load its latest value into freeSlots for us to retry until...
if (ie.m_CpuBoundSemaphore.compare_exchange_weak(freeSlots, freeSlots - 1)) {
// ... either we successfully decrement ie.m_CpuBoundSemaphore by one, ...
return true;
}
}

// ... or it becomes zero due to another coroutine.
return false;
}

/**
* Releases the own slot acquired by the constructor (TryAcquireSlot()) if not already done.
*
* Precisely, increments the number of free slots (semaphore) by one.
* Also wakes up all waiting constructors (slow path) if necessary.
*/
void CpuBoundWork::Done()
{
if (!m_Done) {
IoEngine::Get().m_CpuBoundSemaphore.fetch_add(1);

m_Done = true;
}
}

IoBoundWorkSlot::IoBoundWorkSlot(boost::asio::yield_context yc)
: yc(yc)
{
IoEngine::Get().m_CpuBoundSemaphore.fetch_add(1);
}

IoBoundWorkSlot::~IoBoundWorkSlot()
{
auto& ioEngine (IoEngine::Get());

for (;;) {
auto availableSlots (ioEngine.m_CpuBoundSemaphore.fetch_sub(1));

if (availableSlots < 1) {
ioEngine.m_CpuBoundSemaphore.fetch_add(1);
IoEngine::YieldCurrentCoroutine(yc);
continue;
auto& ie (IoEngine::Get());

// The constructor takes the slow path only if the semaphore is full,
// so we only have to wake up constructors if the semaphore was full.
// This works because after fetch_add(), TryAcquireSlot() (fast path) will succeed.
if (ie.m_CpuBoundSemaphore.fetch_add(1) < 1) {
// So now there are only slow path subscribers from just before the fetch_add() to be woken up.
// Precisely, only subscribers from just before the fetch_add() which turned 0 to 1.

decltype(ie.m_CpuBoundWaiting) subscribers;

{
// Locking after fetch_add() is safe because a delayed wake-up is fine.
// Wake-up of constructors which subscribed after the fetch_add() is also not a problem.
// In worst case, they will just re-subscribe to the slow path.
// Lost wake-ups are mitigated by the constructor, see its implementation comments.
std::unique_lock lock (ie.m_CpuBoundWaitingMutex);
std::swap(subscribers, ie.m_CpuBoundWaiting);
}

// Again, a delayed wake-up is fine, hence unlocked.
for (auto& [strand, cv] : subscribers) {
boost::asio::post(strand, [cv = std::move(cv)] { cv->NotifyOne(); });
}
}

break;
}
}

Expand Down Expand Up @@ -124,18 +185,30 @@ void IoEngine::RunEventLoop()
}
}

AsioConditionVariable::AsioConditionVariable(boost::asio::io_context& io, bool init)
AsioEvent::AsioEvent(boost::asio::io_context& io, bool init)
: m_Timer(io)
{
m_Timer.expires_at(init ? boost::posix_time::neg_infin : boost::posix_time::pos_infin);
}

void AsioConditionVariable::Set()
void AsioEvent::Set()
{
m_Timer.expires_at(boost::posix_time::neg_infin);
}

void AsioConditionVariable::Clear()
void AsioEvent::Clear()
{
m_Timer.expires_at(boost::posix_time::pos_infin);
}

void AsioEvent::Wait(boost::asio::yield_context yc)
{
boost::system::error_code ec;
m_Timer.async_wait(yc[ec]);
}

AsioConditionVariable::AsioConditionVariable(boost::asio::io_context& io)
: m_Timer(io)
{
m_Timer.expires_at(boost::posix_time::pos_infin);
}
Expand All @@ -146,6 +219,18 @@ void AsioConditionVariable::Wait(boost::asio::yield_context yc)
m_Timer.async_wait(yc[ec]);
}

bool AsioConditionVariable::NotifyOne()
{
boost::system::error_code ec;
return m_Timer.cancel_one(ec);
}

size_t AsioConditionVariable::NotifyAll()
{
boost::system::error_code ec;
return m_Timer.cancel(ec);
}

/**
* Cancels any pending timeout callback.
*
Expand Down
39 changes: 24 additions & 15 deletions lib/base/io-engine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <atomic>
#include <exception>
#include <memory>
#include <mutex>
#include <thread>
#include <utility>
#include <vector>
Expand All @@ -20,6 +21,7 @@
#include <boost/exception/all.hpp>
#include <boost/asio/deadline_timer.hpp>
#include <boost/asio/io_context.hpp>
#include <boost/asio/io_context_strand.hpp>
#include <boost/asio/spawn.hpp>

#if BOOST_VERSION >= 108700
Expand All @@ -37,36 +39,41 @@ namespace icinga
class CpuBoundWork
{
public:
CpuBoundWork(boost::asio::yield_context yc);
CpuBoundWork(boost::asio::yield_context yc, boost::asio::io_context::strand&);
CpuBoundWork(const CpuBoundWork&) = delete;
CpuBoundWork(CpuBoundWork&&) = delete;
CpuBoundWork& operator=(const CpuBoundWork&) = delete;
CpuBoundWork& operator=(CpuBoundWork&&) = delete;
~CpuBoundWork();

inline ~CpuBoundWork()
{
Done();
}

void Done();

private:
static bool TryAcquireSlot();

bool m_Done;
};

/**
* Scope break for CPU-bound work done in an I/O thread
* Condition variable which doesn't block I/O threads
*
* @ingroup base
*/
class IoBoundWorkSlot
class AsioConditionVariable
{
public:
IoBoundWorkSlot(boost::asio::yield_context yc);
IoBoundWorkSlot(const IoBoundWorkSlot&) = delete;
IoBoundWorkSlot(IoBoundWorkSlot&&) = delete;
IoBoundWorkSlot& operator=(const IoBoundWorkSlot&) = delete;
IoBoundWorkSlot& operator=(IoBoundWorkSlot&&) = delete;
~IoBoundWorkSlot();
AsioConditionVariable(boost::asio::io_context& io);

void Wait(boost::asio::yield_context yc);
bool NotifyOne();
size_t NotifyAll();

private:
boost::asio::yield_context yc;
boost::asio::deadline_timer m_Timer;
};

/**
Expand All @@ -77,7 +84,6 @@ class IoBoundWorkSlot
class IoEngine
{
friend CpuBoundWork;
friend IoBoundWorkSlot;

public:
IoEngine(const IoEngine&) = delete;
Expand Down Expand Up @@ -150,22 +156,25 @@ class IoEngine
boost::asio::executor_work_guard<boost::asio::io_context::executor_type> m_KeepAlive;
std::vector<std::thread> m_Threads;
boost::asio::deadline_timer m_AlreadyExpiredTimer;

std::atomic_int_fast32_t m_CpuBoundSemaphore;
std::mutex m_CpuBoundWaitingMutex;
std::vector<std::pair<boost::asio::io_context::strand, Shared<AsioConditionVariable>::Ptr>> m_CpuBoundWaiting;
};

class TerminateIoThread : public std::exception
{
};

/**
* Condition variable which doesn't block I/O threads
* Awaitable flag which doesn't block I/O threads, inspired by threading.Event from Python
*
* @ingroup base
*/
class AsioConditionVariable
class AsioEvent
{
public:
AsioConditionVariable(boost::asio::io_context& io, bool init = false);
AsioEvent(boost::asio::io_context& io, bool init = false);

void Set();
void Clear();
Expand Down
2 changes: 1 addition & 1 deletion lib/icingadb/redisconnection.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ namespace icinga
std::set<QueryPriority> m_SuppressedQueryKinds;

// Indicate that there's something to send/receive
AsioConditionVariable m_QueuedWrites, m_QueuedReads;
AsioEvent m_QueuedWrites, m_QueuedReads;

std::function<void(boost::asio::yield_context& yc)> m_ConnectedCallback;

Expand Down
2 changes: 0 additions & 2 deletions lib/remote/eventshandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,6 @@ bool EventsHandler::HandleRequest(
response.result(http::status::ok);
response.set(http::field::content_type, "application/json");

IoBoundWorkSlot dontLockTheIoThread (yc);

http::async_write(stream, response, yc);
stream.async_flush(yc);

Expand Down
15 changes: 11 additions & 4 deletions lib/remote/httpserverconnection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#include "base/configtype.hpp"
#include "base/defer.hpp"
#include "base/exception.hpp"
#include "base/io-engine.hpp"
#include "base/logger.hpp"
#include "base/objectlock.hpp"
#include "base/timer.hpp"
Expand Down Expand Up @@ -105,6 +104,9 @@ void HttpServerConnection::StartStreaming()

m_HasStartedStreaming = true;

VERIFY(m_HandlingRequest);
m_HandlingRequest->Done();

HttpServerConnection::Ptr keepAlive (this);

IoEngine::SpawnCoroutine(m_IoStrand, [this, keepAlive](asio::yield_context yc) {
Expand Down Expand Up @@ -418,19 +420,24 @@ bool ProcessRequest(
ApiUser::Ptr& authenticatedUser,
boost::beast::http::response<boost::beast::http::string_body>& response,
HttpServerConnection& server,
CpuBoundWork*& m_HandlingRequest,
bool& hasStartedStreaming,
std::chrono::steady_clock::duration& cpuBoundWorkTime,
boost::asio::yield_context& yc
boost::asio::yield_context& yc,
boost::asio::io_context::strand& strand
)
{
namespace http = boost::beast::http;

try {
// Cache the elapsed time to acquire a CPU semaphore used to detect extremely heavy workloads.
auto start (std::chrono::steady_clock::now());
CpuBoundWork handlingRequest (yc);
CpuBoundWork handlingRequest (yc, strand);
cpuBoundWorkTime = std::chrono::steady_clock::now() - start;

Defer resetHandlingRequest ([&m_HandlingRequest] { m_HandlingRequest = nullptr; });
m_HandlingRequest = &handlingRequest;

HttpHandler::ProcessRequest(stream, authenticatedUser, request, response, yc, server);
} catch (const std::exception& ex) {
if (hasStartedStreaming) {
Expand Down Expand Up @@ -548,7 +555,7 @@ void HttpServerConnection::ProcessMessages(boost::asio::yield_context yc)

m_Seen = std::numeric_limits<decltype(m_Seen)>::max();

if (!ProcessRequest(*m_Stream, request, authenticatedUser, response, *this, m_HasStartedStreaming, cpuBoundWorkTime, yc)) {
if (!ProcessRequest(*m_Stream, request, authenticatedUser, response, *this, m_HandlingRequest, m_HasStartedStreaming, cpuBoundWorkTime, yc, m_IoStrand)) {
break;
}

Expand Down
Loading
Loading