Skip to content

Commit 7e7dfe6

Browse files
committed
Introduce zombie list for stopping blocks
This allows to remove blocks without having to stop the scheduler. Running blocks can't transition directly to stopped state, so we move them into a "zombie list". There's a maintenance period where we delete blocks that have stopped. Furthermore, deleted blocks are now removed from each thread's local block list. Signed-off-by: Sergio Martins <[email protected]>
1 parent 377ceac commit 7e7dfe6

File tree

3 files changed

+113
-6
lines changed

3 files changed

+113
-6
lines changed

core/include/gnuradio-4.0/Block.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,7 @@ class Block : public lifecycle::StateMachine<Derived> {
498498

499499
~Block() { // NOSONAR -- need to request the (potentially) running ioThread to stop
500500
if (lifecycle::isActive(this->state())) {
501+
// Only happens in artificial cases likes qa_Block test. In practice blocks stay in zombie list if active
501502
emitErrorMessageIfAny("~Block()", this->changeStateTo(lifecycle::State::REQUESTED_STOP));
502503
}
503504
if constexpr (blockingIO) {

core/include/gnuradio-4.0/Graph.hpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,9 @@ class Graph : public gr::Block<Graph> {
319319

320320
constexpr static block::Category blockCategory = block::Category::TransparentBlockGroup;
321321

322+
std::mutex _zombieBlocksMutex;
323+
std::vector<std::unique_ptr<BlockModel>> _zombieBlocks;
324+
322325
Graph(property_map settings = {}) : gr::Block<Graph>(std::move(settings)) {
323326
_blocks.reserve(100); // TODO: remove
324327

@@ -504,7 +507,7 @@ class Graph : public gr::Block<Graph> {
504507
return std::addressof(edge.sourceBlock()) == it->get() || std::addressof(edge.destinationBlock()) == it->get();
505508
});
506509

507-
_blocks.erase(it);
510+
makeZombie(it);
508511
}
509512

510513
gr::BlockModel* replaceBlock(const std::string& uniqueName, const std::string& type, const property_map& properties) {
@@ -531,11 +534,25 @@ class Graph : public gr::Block<Graph> {
531534
edge._destinationBlock = newBlockRaw;
532535
}
533536
}
534-
_blocks.erase(it);
537+
538+
makeZombie(it);
535539

536540
return newBlockRaw;
537541
}
538542

543+
void makeZombie(auto it) {
544+
if ((*it)->state() == lifecycle::State::PAUSED || (*it)->state() == lifecycle::State::RUNNING) {
545+
emitErrorMessageIfAny("makeZombie", (*it)->changeStateTo(lifecycle::State::REQUESTED_STOP));
546+
}
547+
548+
{
549+
std::lock_guard guard(_zombieBlocksMutex);
550+
_zombieBlocks.push_back(std::move(*it));
551+
}
552+
553+
_blocks.erase(it);
554+
}
555+
539556
void emplaceEdge(std::string_view sourceBlock, std::string sourcePort, std::string_view destinationBlock, //
540557
std::string destinationPort, [[maybe_unused]] const std::size_t minBufferSize, [[maybe_unused]] const std::int32_t weight, std::string_view edgeName) {
541558
auto sourceBlockIt = std::ranges::find_if(_blocks, [&sourceBlock](const auto& block) { return block->uniqueName() == sourceBlock; });

core/include/gnuradio-4.0/Scheduler.hpp

Lines changed: 93 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,11 @@ class SchedulerBase : public Block<Derived> {
356356
if (runnerID == 0UZ || _nRunningJobs.load(std::memory_order_acquire) == 0UZ) {
357357
this->processScheduledMessages(); // execute the scheduler- and Graph-specific message handler only once globally
358358
}
359+
360+
// Zombies are cleaned per-thread, as we remove from the localBlockList as well.
361+
// Cleaning zombies has low priority, so uses process_stream_to_message_ratio (a different ratio could be introduced)
362+
cleanupZombieBlocks(localBlockList);
363+
359364
std::ranges::for_each(localBlockList, [](auto& block) { block->processScheduledMessages(); });
360365
activeState = this->state();
361366
msgToCount++;
@@ -376,10 +381,6 @@ class SchedulerBase : public Block<Derived> {
376381
break;
377382
}
378383
} else if (activeState == lifecycle::State::PAUSED) {
379-
if (_graph.hasTopologyChanged()) {
380-
// TODO: update localBlockList topology if needed
381-
_graph.ackTopologyChange();
382-
}
383384
std::this_thread::sleep_for(std::chrono::milliseconds(timeout_ms));
384385
msgToCount = 0UZ;
385386
} else { // other states
@@ -416,6 +417,7 @@ class SchedulerBase : public Block<Derived> {
416417
this->emitErrorMessageIfAny("forEachBlock -> stop() -> LifecycleState", block->changeStateTo(lifecycle::State::STOPPED));
417418
}
418419
});
420+
419421
this->emitErrorMessageIfAny("stop() -> LifecycleState ->STOPPED", this->changeStateTo(lifecycle::State::STOPPED));
420422
this->emitErrorMessageIfAny("stop() -> LifecycleState ->IDLE", this->changeStateTo(lifecycle::State::IDLE));
421423
}
@@ -502,6 +504,93 @@ class SchedulerBase : public Block<Derived> {
502504
return message;
503505
}
504506

507+
/*
508+
Zombie Tutorial:
509+
510+
Blocks can't be deleted unless stopped, but since stopping can take time (async) we move such blocks
511+
to the "zombie list" and disconnect them immediately from the graph. This allows them to stop and be deleted
512+
safely.
513+
514+
Periodically, we call cleanupZombieBlocks(), which iterates the zombie list and deletes the blocks that are now stopped.
515+
516+
cleanupZombieBlocks() is called *per-thread*, since we also need to update the localBlockList, i.e.: removing dangling block pointers
517+
from the localBlockList.
518+
519+
We also update the _jobLists member variable, but probably that member can be removed, seems unneeded and only used so unit-tests can
520+
query it.
521+
*/
522+
void cleanupZombieBlocks(std::vector<BlockModel*>& localBlockList) {
523+
if (localBlockList.empty()) {
524+
return;
525+
}
526+
527+
std::lock_guard guard(_graph._zombieBlocksMutex);
528+
529+
auto& zombieBlocks = _graph._zombieBlocks;
530+
auto it = zombieBlocks.begin();
531+
532+
while (it != zombieBlocks.end()) {
533+
auto localBlockIt = std::find(localBlockList.begin(), localBlockList.end(), it->get());
534+
if (localBlockIt == localBlockList.end()) {
535+
// we only care about the blocks local to our thread.
536+
++it;
537+
continue;
538+
}
539+
540+
bool shouldDelete = false;
541+
542+
switch ((*it)->state()) {
543+
case lifecycle::State::IDLE:
544+
case lifecycle::State::STOPPED:
545+
case lifecycle::State::INITIALISED:
546+
// This block can be deleted immediately
547+
shouldDelete = true;
548+
break;
549+
case lifecycle::State::ERROR:
550+
// Delete as well. (Separate case block, as better ideas welcome)
551+
shouldDelete = true;
552+
break;
553+
case lifecycle::State::REQUESTED_STOP:
554+
// This block will be deleted later
555+
break;
556+
case lifecycle::State::REQUESTED_PAUSE:
557+
// This block will be deleted later
558+
// There's no transition from REQUESTED_PAUSE to REQUESTED_STOP
559+
// Will be moved to REQUESTED_STOP as soon as it's possible
560+
break;
561+
case lifecycle::State::PAUSED:
562+
// This zombie was in REQUESTED_PAUSE and now finally in PAUSED. Can be stopped now.
563+
// Will be deleted in a next zombie maintenance period
564+
this->emitErrorMessageIfAny("cleanupZombieBlocks", (*it)->changeStateTo(lifecycle::State::REQUESTED_STOP));
565+
break;
566+
case lifecycle::State::RUNNING: assert(false && "Doesn't happen: zombie blocks are never running"); break;
567+
}
568+
569+
if (shouldDelete) {
570+
localBlockList.erase(localBlockIt);
571+
572+
BlockModel* zombieRaw = it->get();
573+
it = zombieBlocks.erase(it); // ~Block() runs here
574+
575+
// We need to remove zombieRaw from jobLists as well, in case Scheduler ever goes to INITIALIZED
576+
// again.
577+
// TODO: I'd argue we should remove _jobLists to minimize having to maintain state. Instead, a job list can be
578+
// calculated in start().
579+
std::lock_guard lock(_jobListsMutex);
580+
for (auto& jobList : *this->_jobLists) {
581+
auto job_it = std::remove(jobList.begin(), jobList.end(), zombieRaw);
582+
if (job_it != jobList.end()) {
583+
jobList.erase(job_it, jobList.end());
584+
break;
585+
}
586+
}
587+
588+
} else {
589+
++it;
590+
}
591+
}
592+
}
593+
505594
std::optional<Message> propertyCallbackReplaceBlock([[maybe_unused]] std::string_view propertyName, Message message) {
506595
assert(propertyName == scheduler::property::kReplaceBlock);
507596
using namespace std::string_literals;

0 commit comments

Comments
 (0)