Skip to content

Commit 7dac4fb

Browse files
author
Addisu Z. Taddese
committed
Improve RTF Stability and Precision (#3269)
* Fix delay calculation in each time step to achieve desired RTF * Use a hybrid sleep/busy-loop for better precision * Make Server_TEST more robust --------- Signed-off-by: Addisu Z. Taddese <addisu@openrobotics.org> (cherry picked from commit 6759b33)
1 parent d139954 commit 7dac4fb

3 files changed

Lines changed: 63 additions & 41 deletions

File tree

src/Server_TEST.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,9 @@ TEST_P(ServerFixture, RunNonBlockingPaused)
590590
while (*server.IterationCount() < 100)
591591
GZ_SLEEP_MS(100);
592592

593+
// Sleep one more time before checking because iterationCount might be updated
594+
// before the iteration is complete
595+
GZ_SLEEP_MS(100);
593596
EXPECT_EQ(100u, *server.IterationCount());
594597
EXPECT_FALSE(server.Running());
595598
EXPECT_FALSE(*server.Running(0));
@@ -610,6 +613,9 @@ TEST_P(ServerFixture, RunNonBlocking)
610613
while (*server.IterationCount() < 100)
611614
GZ_SLEEP_MS(100);
612615

616+
// Sleep one more time before checking because iterationCount might be updated
617+
// before the iteration is complete
618+
GZ_SLEEP_MS(100);
613619
EXPECT_EQ(100u, *server.IterationCount());
614620
EXPECT_FALSE(server.Running());
615621
EXPECT_FALSE(*server.Running(0));
@@ -763,6 +769,9 @@ TEST_P(ServerFixture, RunNonBlockingMultiple)
763769
while (*server.IterationCount() < 100)
764770
GZ_SLEEP_MS(100);
765771

772+
// Sleep one more time before checking because iterationCount might be updated
773+
// before the iteration is complete
774+
GZ_SLEEP_MS(100);
766775
EXPECT_EQ(100u, *server.IterationCount());
767776
EXPECT_FALSE(server.Running());
768777
EXPECT_FALSE(*server.Running(0));

src/SimulationRunner.cc

Lines changed: 54 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -733,11 +733,6 @@ bool SimulationRunner::Run(const uint64_t _iterations)
733733
if (!this->currentInfo.paused)
734734
this->realTimeWatch.Start();
735735

736-
// Variables for time keeping.
737-
std::chrono::steady_clock::time_point startTime;
738-
std::chrono::steady_clock::duration sleepTime;
739-
std::chrono::steady_clock::duration actualSleep;
740-
741736
this->running = true;
742737

743738
// Create the world statistics publisher.
@@ -844,6 +839,7 @@ bool SimulationRunner::Run(const uint64_t _iterations)
844839

845840
// Execute all the systems until we are told to stop, or the number of
846841
// iterations is reached.
842+
auto nextUpdateTime = std::chrono::steady_clock::now() + this->updatePeriod;
847843
while (this->running && (_iterations == 0 ||
848844
processedIterations < _iterations))
849845
{
@@ -859,32 +855,6 @@ bool SimulationRunner::Run(const uint64_t _iterations)
859855
// Update the step size and desired rtf
860856
this->UpdatePhysicsParams();
861857

862-
// Compute the time to sleep in order to match, as closely as possible,
863-
// the update period.
864-
sleepTime = 0ns;
865-
actualSleep = 0ns;
866-
867-
sleepTime = std::max(0ns, this->prevUpdateRealTime +
868-
this->updatePeriod - std::chrono::steady_clock::now() -
869-
this->sleepOffset);
870-
871-
// Only sleep if needed.
872-
if (sleepTime > 0ns)
873-
{
874-
GZ_PROFILE("Sleep");
875-
// Get the current time, sleep for the duration needed to match the
876-
// updatePeriod, and then record the actual time slept.
877-
startTime = std::chrono::steady_clock::now();
878-
std::this_thread::sleep_for(sleepTime);
879-
actualSleep = std::chrono::steady_clock::now() - startTime;
880-
}
881-
882-
// Exponentially average out the difference between expected sleep time
883-
// and actual sleep time.
884-
this->sleepOffset =
885-
std::chrono::duration_cast<std::chrono::nanoseconds>(
886-
(actualSleep - sleepTime) * 0.01 + this->sleepOffset * 0.99);
887-
888858
// Update time information. This will update the iteration count, RTF,
889859
// and other values.
890860
this->UpdateCurrentInfo();
@@ -919,6 +889,59 @@ bool SimulationRunner::Run(const uint64_t _iterations)
919889
}
920890

921891
this->resetInitiated = false;
892+
893+
// Only sleep when not paused.
894+
if (!this->currentInfo.paused)
895+
{
896+
// A hybrid sleep/busy-wait strategy is used for precise timing. A simple
897+
// sleep can suffer from wake-up latency due to CPU power-saving states
898+
// (C-states), which causes RTF to undershoot. This strategy sleeps for
899+
// long waits but busy-waits for the final moments to ensure precision.
900+
// The threshold is a conservative value based on typical C-state
901+
// latencies.
902+
using namespace std::chrono_literals;
903+
904+
// Threshold at which we switch from sleeping to spinning. This should be
905+
// larger than the typical OS + CPU C-state latency.
906+
constexpr auto kSpinThreshold = 200us;
907+
908+
// If the scheduled update time is in the future...
909+
if (nextUpdateTime > std::chrono::steady_clock::now())
910+
{
911+
// ...sleep until we are close to the target time.
912+
auto sleepTarget = nextUpdateTime - kSpinThreshold;
913+
if (sleepTarget > std::chrono::steady_clock::now())
914+
{
915+
std::this_thread::sleep_until(sleepTarget);
916+
}
917+
918+
// ...then busy-wait for the final moments for precision.
919+
while (std::chrono::steady_clock::now() < nextUpdateTime)
920+
{
921+
// Spin.
922+
}
923+
}
924+
925+
// Schedule the next update time.
926+
auto now = std::chrono::steady_clock::now();
927+
nextUpdateTime += this->updatePeriod;
928+
if (nextUpdateTime < now)
929+
{
930+
nextUpdateTime = now + this->updatePeriod;
931+
}
932+
}
933+
else
934+
{
935+
// We still need a small sleep to prevent this loop from spinning
936+
// at 100% CPU when paused.
937+
using namespace std::chrono_literals;
938+
std::this_thread::sleep_for(1ms);
939+
940+
// When paused, pre-schedule the next update time for the near future.
941+
// This ensures that when the simulation is un-paused, the first step
942+
// has a valid schedule to meet, preventing a jump in RTF.
943+
nextUpdateTime = std::chrono::steady_clock::now() + this->updatePeriod;
944+
}
922945
}
923946

924947
this->running = false;
@@ -939,9 +962,6 @@ void SimulationRunner::Step(const UpdateInfo &_info)
939962
// Publish info
940963
this->PublishStats();
941964

942-
// Record when the update step starts.
943-
this->prevUpdateRealTime = std::chrono::steady_clock::now();
944-
945965
this->levelMgr->UpdateLevelsState();
946966

947967
// Handle pending systems

src/SimulationRunner.hh

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -448,13 +448,6 @@ namespace gz
448448
/// \brief Manager of distributing/receiving network work.
449449
private: std::unique_ptr<NetworkManager> networkMgr{nullptr};
450450

451-
/// \brief Wall time of the previous update.
452-
private: std::chrono::steady_clock::time_point prevUpdateRealTime;
453-
454-
/// \brief A duration used to account for inaccuracies associated with
455-
/// sleep durations.
456-
private: std::chrono::steady_clock::duration sleepOffset{0};
457-
458451
/// \brief This is the rate at which the systems are updated.
459452
/// The default update rate is 500hz, which is a period of 2ms.
460453
private: std::chrono::steady_clock::duration updatePeriod{2ms};

0 commit comments

Comments
 (0)