-
Notifications
You must be signed in to change notification settings - Fork 146
Open
Labels
questionFurther information is requestedFurther information is requested
Description
Required prerequisites
- I have read the documentation https://omnisafe.readthedocs.io.
- I have searched the Issue Tracker and Discussions that this hasn't already been reported. (+1 or comment there if it has.)
- Consider asking first in a Discussion.
Questions
Hello,
I’m currently testing the SAC Lagrangian implementation provided by OmniSafe.
I followed the README instructions by cloning the repository and installing it using pip install -e .. I then applied it to the SafetyPointGoal1 task from Safety Gymnasium.
I used the default configuration:
{
"seed": 0,
"train_cfgs": {
"device": "cpu",
"torch_threads": 1,
"vector_env_nums": 1,
"parallel": 1,
"total_steps": 10000000,
"eval_episodes": 1,
"epochs": 5000
},
"algo_cfgs": {
"steps_per_epoch": 2000,
"update_cycle": 1,
"update_iters": 1,
"size": 1000000,
"batch_size": 256,
"reward_normalize": false,
"cost_normalize": false,
"obs_normalize": false,
"max_grad_norm": 40,
"use_critic_norm": false,
"critic_norm_coeff": 0.001,
"polyak": 0.005,
"gamma": 0.99,
"start_learning_steps": 10000,
"policy_delay": 2,
"use_exploration_noise": false,
"exploration_noise": 0.1,
"policy_noise": 0.2,
"policy_noise_clip": 0.5,
"alpha": 1e-05,
"auto_alpha": false,
"use_cost": true,
"warmup_epochs": 100
},
"logger_cfgs": {
"use_wandb": false,
"wandb_project": "omnisafe",
"use_tensorboard": true,
"save_model_freq": 100,
"log_dir": "./runs",
"window_lens": 10
},
"model_cfgs": {
"weight_initialization_mode": "kaiming_uniform",
"actor_type": "gaussian_sac",
"linear_lr_decay": false,
"actor": {
"hidden_sizes": [
256,
256
],
"activation": "relu",
"lr": 5e-06
},
"critic": {
"num_critics": 2,
"hidden_sizes": [
256,
256
],
"activation": "relu",
"lr": 0.001
}
},
"lagrange_cfgs": {
"cost_limit": 25.0,
"lagrangian_multiplier_init": 0.0,
"lambda_lr": 5e-07,
"lambda_optimizer": "Adam"
},
"exp_increment_cfgs": {
"train_cfgs": {
"parallel": 1,
"total_steps": 10000000,
"device": "cpu",
"vector_env_nums": 1,
"torch_threads": 1
}
},
"exp_name": "SACLag-{SafetyPointGoal1-v0}",
"env_id": "SafetyPointGoal1-v0",
"algo": "SACLag"
}However, the learning performance is quite different from the graphs shown on the OmniSafe benchmark page for off-policy algorithms.
Will running multiple seeds and computing the mean/variance help the results match those benchmark curves more closely?
Also, while PPO Lagrangian seems to reduce the cost in a stable manner, SAC Lagrangian does not. Why might that be the case?
Metadata
Metadata
Assignees
Labels
questionFurther information is requestedFurther information is requested
