Enables sb3 to load checkpoint to continue training (isaac-sim#2954)

ooctipus · kellyguo11 · web-flow · commit 2f12bb9819b6 · 2025-07-18T08:45:55.000-07:00
# Description This PR extend `script/reinforcement_learning/sb3/train.py` with feature to continue learning by loading the checkpoint. ## Type of change  - New feature (non-breaking change which adds functionality) ## Screenshots Please attach before and after screenshots of the change if applicable.  ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [ ] I have made corresponding changes to the documentation - [ ] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [x] I have added my name to the `CONTRIBUTORS.md` or my name already exists there  --------- Co-authored-by: Kelly Guo <kellyg@nvidia.com> Co-authored-by: Kelly Guo <kellyguo123@hotmail.com>
diff --git a/scripts/reinforcement_learning/sb3/train.py b/scripts/reinforcement_learning/sb3/train.py
@@ -25,6 +25,7 @@
 parser.add_argument("--task", type=str, default=None, help="Name of the task.")
 parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
 parser.add_argument("--log_interval", type=int, default=100_000, help="Log data every n timesteps.")
+parser.add_argument("--checkpoint", type=str, default=None, help="Continue the training from checkpoint.")
 parser.add_argument("--max_iterations", type=int, default=None, help="RL Policy training iterations.")
 parser.add_argument(
     "--keep_all_info",
@@ -179,6 +180,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
 
     # create agent from stable baselines
     agent = PPO(policy_arch, env, verbose=1, tensorboard_log=log_dir, **agent_cfg)
+    if args_cli.checkpoint is not None:
+        agent = agent.load(args_cli.checkpoint, env, print_system_info=True)
 
     # callbacks for agent
     checkpoint_callback = CheckpointCallback(save_freq=1000, save_path=log_dir, name_prefix="model", verbose=2)