Fixes wait time in play.py by using env.step_dt (#2239)

tylerlum · web-flow · commit 5716d5600a1a · 2025-04-07T17:30:16.000+02:00
# Description When running `play.py` with `--real-time`, the dt used to calculate this is incorrect. It is currently using `env.physics_dt`, which is `sim_dt`. However, if the decimation is >1, then the effective dt is `env.step_dt`, which is `sim_dt * decimation`. We are running 1 env.step() per loop, so this should definitely be `env.step_dt`. This affects all reinforcement_learning/<rl_library>/play.py files. This updates all of these appropriately Fixes #2230 ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [x] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [x] (No need for test for small change in script) I have added tests that prove my fix is effective or that my feature works - [x] (No need for changelog in scripts) I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [x] I have added my name to the `CONTRIBUTORS.md` or my name already exists there Signed-off-by: Tyler Lum <tylergwlum@gmail.com>
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
@@ -93,6 +93,7 @@ Guidelines for modifications:
 * Shafeef Omar
 * Shundo Kishi
 * Stephan Pleines
+* Tyler Lum
 * Victor Khaustov
 * Vladimir Fokow
 * Wei Yang
diff --git a/scripts/reinforcement_learning/rl_games/play.py b/scripts/reinforcement_learning/rl_games/play.py
@@ -152,7 +152,7 @@ def main():
     agent.restore(resume_path)
     agent.reset()
 
-    dt = env.unwrapped.physics_dt
+    dt = env.unwrapped.step_dt
 
     # reset environment
     obs = env.reset()
diff --git a/scripts/reinforcement_learning/rsl_rl/play.py b/scripts/reinforcement_learning/rsl_rl/play.py
@@ -140,7 +140,7 @@ def main():
         ppo_runner.alg.policy, normalizer=ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.onnx"
     )
 
-    dt = env.unwrapped.physics_dt
+    dt = env.unwrapped.step_dt
 
     # reset environment
     obs, _ = env.get_observations()
diff --git a/scripts/reinforcement_learning/sb3/play.py b/scripts/reinforcement_learning/sb3/play.py
@@ -134,7 +134,7 @@ def main():
     print(f"Loading checkpoint from: {checkpoint_path}")
     agent = PPO.load(checkpoint_path, env, print_system_info=True)
 
-    dt = env.unwrapped.physics_dt
+    dt = env.unwrapped.step_dt
 
     # reset environment
     obs = env.reset()
diff --git a/scripts/reinforcement_learning/skrl/play.py b/scripts/reinforcement_learning/skrl/play.py
@@ -137,11 +137,11 @@ def main():
     if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]:
         env = multi_agent_to_single_agent(env)
 
-    # get environment (physics) dt for real-time evaluation
+    # get environment (step) dt for real-time evaluation
     try:
-        dt = env.physics_dt
+        dt = env.step_dt
     except AttributeError:
-        dt = env.unwrapped.physics_dt
+        dt = env.unwrapped.step_dt
 
     # wrap for video recording
     if args_cli.video:

Original file line number	Diff line number	Diff line change
`@@ -140,7 +140,7 @@ def main():`
`140`	`140`	`ppo_runner.alg.policy, normalizer=ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.onnx"`
`141`	`141`	`)`
`142`	`142`
`143`		`- dt = env.unwrapped.physics_dt`
	`143`	`+ dt = env.unwrapped.step_dt`
`144`	`144`
`145`	`145`	`# reset environment`
`146`	`146`	`obs, _ = env.get_observations()`