Skip to content

gSDE Hyperparams #15

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 13 commits into
base: master
Choose a base branch
from
4 changes: 3 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ jobs:
run: |
python -m pip install --upgrade pip
# cpu version of pytorch - faster to download
pip install torch==1.4.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
pip install torch==1.5.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
# Install gSDE branch
pip install git+https://github.com/DLR-RM/stable-baselines3@sde
# faster to install because pre-built wheel
pip install pybullet==2.8.4
pip install -r requirements.txt
Expand Down
3 changes: 3 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@ variables:

type-check:
script:
# Install gSDE branch
- pip install git+https://github.com/DLR-RM/stable-baselines3@sde
- make type

pytest:
script:
- pip install git+https://github.com/DLR-RM/stable-baselines3@sde
# MKL_THREADING_LAYER=GNU to avoid MKL_THREADING_LAYER=INTEL incompatibility error
- MKL_THREADING_LAYER=GNU make pytest

Expand Down
163 changes: 84 additions & 79 deletions hyperparams/td3.yml
Original file line number Diff line number Diff line change
@@ -1,62 +1,68 @@
# Tuned
MountainCarContinuous-v0:
n_timesteps: 300000
normalize: True
n_timesteps: 30000
policy: 'MlpPolicy'
noise_type: 'ornstein-uhlenbeck'
noise_std: 0.5
learning_rate: !!float 3e-4
buffer_size: 50000
batch_size: 256
n_episodes_rollout: -1
gradient_steps: 8
train_freq: 8
learning_starts: 0
use_sde: True
policy_kwargs: "dict(log_std_init=0.0, net_arch=[64, 64])"

Pendulum-v0:
n_timesteps: 20000
policy: 'MlpPolicy'
gamma: 0.98
buffer_size: 200000
learning_starts: 10000
noise_type: 'normal'
noise_std: 0.1
gradient_steps: -1
n_episodes_rollout: 1
learning_rate: !!float 1e-3
policy_kwargs: "dict(net_arch=[400, 300])"
train_freq: 64
gradient_steps: 64
n_episodes_rollout: -1
learning_rate: !!float 6e-4
policy_kwargs: "dict(net_arch=[400, 300], log_std_init=-3.62, lr_sde=1.5e-3)"
use_sde: True

LunarLanderContinuous-v2:
n_timesteps: !!float 3e5
policy: 'MlpPolicy'
gamma: 0.98
buffer_size: 200000
learning_starts: 10000
noise_type: 'normal'
noise_std: 0.1
gradient_steps: -1
n_episodes_rollout: 1
learning_rate: !!float 1e-3
policy_kwargs: "dict(net_arch=[400, 300])"
train_freq: 64
gradient_steps: 64
n_episodes_rollout: -1
learning_rate: !!float 6e-4
policy_kwargs: "dict(net_arch=[400, 300], log_std_init=-3.62, lr_sde=1.5e-3)"
use_sde: True

BipedalWalker-v3:
n_timesteps: !!float 1e6
policy: 'MlpPolicy'
gamma: 0.98
buffer_size: 200000
learning_starts: 10000
noise_type: 'normal'
noise_std: 0.1
gradient_steps: -1
n_episodes_rollout: 1
learning_rate: !!float 1e-3
policy_kwargs: "dict(net_arch=[400, 300])"
train_freq: 64
gradient_steps: 64
n_episodes_rollout: -1
learning_rate: !!float 6e-4
policy_kwargs: "dict(net_arch=[400, 300], log_std_init=-3.62, lr_sde=1.5e-3)"
use_sde: True

# To be tuned
BipedalWalkerHardcore-v3:
n_timesteps: !!float 1e7
policy: 'MlpPolicy'
gamma: 0.98
buffer_size: 200000
buffer_size: 500000
learning_starts: 10000
noise_type: 'normal'
noise_std: 0.1
gradient_steps: -1
n_episodes_rollout: 1
learning_rate: !!float 1e-3
policy_kwargs: "dict(net_arch=[400, 300])"
train_freq: 64
gradient_steps: 64
n_episodes_rollout: -1
learning_rate: !!float 6e-4
policy_kwargs: "dict(net_arch=[400, 300], log_std_init=-3.62, lr_sde=1.5e-3, use_expln=True)"
use_sde: True

# Tuned
HalfCheetahBulletEnv-v0:
Expand All @@ -66,12 +72,12 @@ HalfCheetahBulletEnv-v0:
gamma: 0.98
buffer_size: 200000
learning_starts: 10000
noise_type: 'normal'
noise_std: 0.1
gradient_steps: -1
n_episodes_rollout: 1
learning_rate: !!float 1e-3
policy_kwargs: "dict(net_arch=[400, 300])"
train_freq: 64
gradient_steps: 64
n_episodes_rollout: -1
learning_rate: !!float 6e-4
policy_kwargs: "dict(net_arch=[400, 300], log_std_init=-3.62, lr_sde=1.5e-3)"
use_sde: True

AntBulletEnv-v0:
env_wrapper: utils.wrappers.TimeFeatureWrapper
Expand All @@ -80,12 +86,12 @@ AntBulletEnv-v0:
gamma: 0.98
buffer_size: 200000
learning_starts: 10000
noise_type: 'normal'
noise_std: 0.1
gradient_steps: -1
n_episodes_rollout: 1
learning_rate: !!float 1e-3
policy_kwargs: "dict(net_arch=[400, 300])"
train_freq: 64
gradient_steps: 64
n_episodes_rollout: -1
learning_rate: !!float 6e-4
policy_kwargs: "dict(net_arch=[400, 300], log_std_init=-3.62, lr_sde=1.5e-3)"
use_sde: True

HopperBulletEnv-v0:
env_wrapper: utils.wrappers.TimeFeatureWrapper
Expand All @@ -94,12 +100,12 @@ HopperBulletEnv-v0:
gamma: 0.98
buffer_size: 200000
learning_starts: 10000
noise_type: 'normal'
noise_std: 0.1
gradient_steps: -1
n_episodes_rollout: 1
learning_rate: !!float 1e-3
policy_kwargs: "dict(net_arch=[400, 300])"
train_freq: 64
gradient_steps: 64
n_episodes_rollout: -1
learning_rate: !!float 6e-4
policy_kwargs: "dict(net_arch=[400, 300], log_std_init=-3.62, lr_sde=1.5e-3)"
use_sde: True

Walker2DBulletEnv-v0:
env_wrapper: utils.wrappers.TimeFeatureWrapper
Expand All @@ -108,13 +114,12 @@ Walker2DBulletEnv-v0:
gamma: 0.98
buffer_size: 200000
learning_starts: 10000
noise_type: 'normal'
noise_std: 0.1
gradient_steps: -1
n_episodes_rollout: 1
learning_rate: !!float 1e-3
policy_kwargs: "dict(net_arch=[400, 300])"

train_freq: 64
gradient_steps: 64
n_episodes_rollout: -1
learning_rate: !!float 6e-4
policy_kwargs: "dict(net_arch=[400, 300], log_std_init=-3.62, lr_sde=1.5e-3)"
use_sde: True

# TO BE tested
HumanoidBulletEnv-v0:
Expand All @@ -124,12 +129,12 @@ HumanoidBulletEnv-v0:
gamma: 0.98
buffer_size: 200000
learning_starts: 10000
noise_type: 'normal'
noise_std: 0.1
gradient_steps: -1
n_episodes_rollout: 1
learning_rate: !!float 1e-3
policy_kwargs: "dict(net_arch=[400, 300])"
train_freq: 64
gradient_steps: 64
n_episodes_rollout: -1
learning_rate: !!float 6e-4
policy_kwargs: "dict(net_arch=[400, 300], log_std_init=-3.62, lr_sde=1.5e-3)"
use_sde: True

# Tuned
ReacherBulletEnv-v0:
Expand All @@ -139,12 +144,12 @@ ReacherBulletEnv-v0:
gamma: 0.98
buffer_size: 200000
learning_starts: 10000
noise_type: 'normal'
noise_std: 0.1
gradient_steps: -1
n_episodes_rollout: 1
learning_rate: !!float 1e-3
policy_kwargs: "dict(net_arch=[400, 300])"
train_freq: 64
gradient_steps: 64
n_episodes_rollout: -1
learning_rate: !!float 6e-4
policy_kwargs: "dict(net_arch=[400, 300], log_std_init=-3.62, lr_sde=1.5e-3)"
use_sde: True

# Tuned
InvertedDoublePendulumBulletEnv-v0:
Expand All @@ -154,12 +159,12 @@ InvertedDoublePendulumBulletEnv-v0:
gamma: 0.98
buffer_size: 200000
learning_starts: 10000
noise_type: 'normal'
noise_std: 0.1
gradient_steps: -1
n_episodes_rollout: 1
learning_rate: !!float 1e-3
policy_kwargs: "dict(net_arch=[400, 300])"
train_freq: 64
gradient_steps: 64
n_episodes_rollout: -1
learning_rate: !!float 6e-4
policy_kwargs: "dict(net_arch=[400, 300], log_std_init=-3.62, lr_sde=1.5e-3)"
use_sde: True

# Tuned
InvertedPendulumSwingupBulletEnv-v0:
Expand All @@ -169,12 +174,12 @@ InvertedPendulumSwingupBulletEnv-v0:
gamma: 0.98
buffer_size: 200000
learning_starts: 10000
noise_type: 'normal'
noise_std: 0.1
gradient_steps: -1
n_episodes_rollout: 1
learning_rate: !!float 1e-3
policy_kwargs: "dict(net_arch=[400, 300])"
train_freq: 64
gradient_steps: 64
n_episodes_rollout: -1
learning_rate: !!float 6e-4
policy_kwargs: "dict(net_arch=[400, 300], log_std_init=-3.62, lr_sde=1.5e-3)"
use_sde: True

MinitaurBulletEnv-v0:
env_wrapper: utils.wrappers.TimeFeatureWrapper
Expand Down
Loading