Skip to content

Commit c53fc83

Browse files
author
Addisu Z. Taddese
committed
Improve RTF Stability and Precision (#3269)
* Fix delay calculation in each time step to achieve desired RTF * Use a hybrid sleep/busy-loop for better precision * Make Server_TEST more robust --------- Signed-off-by: Addisu Z. Taddese <addisu@openrobotics.org> (cherry picked from commit 6759b33) Signed-off-by: Addisu Z. Taddese <addisu@openrobotics.org>
1 parent 8a60e1e commit c53fc83

3 files changed

Lines changed: 63 additions & 41 deletions

File tree

src/Server_TEST.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,9 @@ TEST_P(ServerFixture, RunNonBlockingPaused)
562562
while (*server.IterationCount() < 100)
563563
IGN_SLEEP_MS(100);
564564

565+
// Sleep one more time before checking because iterationCount might be updated
566+
// before the iteration is complete
567+
GZ_SLEEP_MS(100);
565568
EXPECT_EQ(100u, *server.IterationCount());
566569
EXPECT_FALSE(server.Running());
567570
EXPECT_FALSE(*server.Running(0));
@@ -582,6 +585,9 @@ TEST_P(ServerFixture, RunNonBlocking)
582585
while (*server.IterationCount() < 100)
583586
IGN_SLEEP_MS(100);
584587

588+
// Sleep one more time before checking because iterationCount might be updated
589+
// before the iteration is complete
590+
GZ_SLEEP_MS(100);
585591
EXPECT_EQ(100u, *server.IterationCount());
586592
EXPECT_FALSE(server.Running());
587593
EXPECT_FALSE(*server.Running(0));
@@ -698,6 +704,9 @@ TEST_P(ServerFixture, RunNonBlockingMultiple)
698704
while (*server.IterationCount() < 100)
699705
IGN_SLEEP_MS(100);
700706

707+
// Sleep one more time before checking because iterationCount might be updated
708+
// before the iteration is complete
709+
GZ_SLEEP_MS(100);
701710
EXPECT_EQ(100u, *server.IterationCount());
702711
EXPECT_FALSE(server.Running());
703712
EXPECT_FALSE(*server.Running(0));

src/SimulationRunner.cc

Lines changed: 54 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -634,11 +634,6 @@ bool SimulationRunner::Run(const uint64_t _iterations)
634634
if (!this->currentInfo.paused)
635635
this->realTimeWatch.Start();
636636

637-
// Variables for time keeping.
638-
std::chrono::steady_clock::time_point startTime;
639-
std::chrono::steady_clock::duration sleepTime;
640-
std::chrono::steady_clock::duration actualSleep;
641-
642637
this->running = true;
643638

644639
// Create the world statistics publisher.
@@ -722,6 +717,7 @@ bool SimulationRunner::Run(const uint64_t _iterations)
722717

723718
// Execute all the systems until we are told to stop, or the number of
724719
// iterations is reached.
720+
auto nextUpdateTime = std::chrono::steady_clock::now() + this->updatePeriod;
725721
while (this->running && (_iterations == 0 ||
726722
processedIterations < _iterations))
727723
{
@@ -730,32 +726,6 @@ bool SimulationRunner::Run(const uint64_t _iterations)
730726
// Update the step size and desired rtf
731727
this->UpdatePhysicsParams();
732728

733-
// Compute the time to sleep in order to match, as closely as possible,
734-
// the update period.
735-
sleepTime = 0ns;
736-
actualSleep = 0ns;
737-
738-
sleepTime = std::max(0ns, this->prevUpdateRealTime +
739-
this->updatePeriod - std::chrono::steady_clock::now() -
740-
this->sleepOffset);
741-
742-
// Only sleep if needed.
743-
if (sleepTime > 0ns)
744-
{
745-
IGN_PROFILE("Sleep");
746-
// Get the current time, sleep for the duration needed to match the
747-
// updatePeriod, and then record the actual time slept.
748-
startTime = std::chrono::steady_clock::now();
749-
std::this_thread::sleep_for(sleepTime);
750-
actualSleep = std::chrono::steady_clock::now() - startTime;
751-
}
752-
753-
// Exponentially average out the difference between expected sleep time
754-
// and actual sleep time.
755-
this->sleepOffset =
756-
std::chrono::duration_cast<std::chrono::nanoseconds>(
757-
(actualSleep - sleepTime) * 0.01 + this->sleepOffset * 0.99);
758-
759729
// Update time information. This will update the iteration count, RTF,
760730
// and other values.
761731
this->UpdateCurrentInfo();
@@ -783,6 +753,59 @@ bool SimulationRunner::Run(const uint64_t _iterations)
783753
this->currentInfo.iterations++;
784754
this->blockingPausedStepPending = false;
785755
}
756+
757+
// Only sleep when not paused.
758+
if (!this->currentInfo.paused)
759+
{
760+
// A hybrid sleep/busy-wait strategy is used for precise timing. A simple
761+
// sleep can suffer from wake-up latency due to CPU power-saving states
762+
// (C-states), which causes RTF to undershoot. This strategy sleeps for
763+
// long waits but busy-waits for the final moments to ensure precision.
764+
// The threshold is a conservative value based on typical C-state
765+
// latencies.
766+
using namespace std::chrono_literals;
767+
768+
// Threshold at which we switch from sleeping to spinning. This should be
769+
// larger than the typical OS + CPU C-state latency.
770+
constexpr auto kSpinThreshold = 200us;
771+
772+
// If the scheduled update time is in the future...
773+
if (nextUpdateTime > std::chrono::steady_clock::now())
774+
{
775+
// ...sleep until we are close to the target time.
776+
auto sleepTarget = nextUpdateTime - kSpinThreshold;
777+
if (sleepTarget > std::chrono::steady_clock::now())
778+
{
779+
std::this_thread::sleep_until(sleepTarget);
780+
}
781+
782+
// ...then busy-wait for the final moments for precision.
783+
while (std::chrono::steady_clock::now() < nextUpdateTime)
784+
{
785+
// Spin.
786+
}
787+
}
788+
789+
// Schedule the next update time.
790+
auto now = std::chrono::steady_clock::now();
791+
nextUpdateTime += this->updatePeriod;
792+
if (nextUpdateTime < now)
793+
{
794+
nextUpdateTime = now + this->updatePeriod;
795+
}
796+
}
797+
else
798+
{
799+
// We still need a small sleep to prevent this loop from spinning
800+
// at 100% CPU when paused.
801+
using namespace std::chrono_literals;
802+
std::this_thread::sleep_for(1ms);
803+
804+
// When paused, pre-schedule the next update time for the near future.
805+
// This ensures that when the simulation is un-paused, the first step
806+
// has a valid schedule to meet, preventing a jump in RTF.
807+
nextUpdateTime = std::chrono::steady_clock::now() + this->updatePeriod;
808+
}
786809
}
787810

788811
this->running = false;
@@ -803,9 +826,6 @@ void SimulationRunner::Step(const UpdateInfo &_info)
803826
// Publish info
804827
this->PublishStats();
805828

806-
// Record when the update step starts.
807-
this->prevUpdateRealTime = std::chrono::steady_clock::now();
808-
809829
this->levelMgr->UpdateLevelsState();
810830

811831
// Handle pending systems

src/SimulationRunner.hh

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -413,13 +413,6 @@ namespace ignition
413413
/// \brief A pool of worker threads.
414414
private: common::WorkerPool workerPool{2};
415415

416-
/// \brief Wall time of the previous update.
417-
private: std::chrono::steady_clock::time_point prevUpdateRealTime;
418-
419-
/// \brief A duration used to account for inaccuracies associated with
420-
/// sleep durations.
421-
private: std::chrono::steady_clock::duration sleepOffset{0};
422-
423416
/// \brief This is the rate at which the systems are updated.
424417
/// The default update rate is 500hz, which is a period of 2ms.
425418
private: std::chrono::steady_clock::duration updatePeriod{2ms};

0 commit comments

Comments
 (0)