Skip to content

Commit cd6288f

Browse files
committed
add additional configs with shorter unroll lengths
1 parent 09e98c6 commit cd6288f

File tree

6 files changed

+170
-25
lines changed

6 files changed

+170
-25
lines changed

experiment_code/mrunner_exps/ICLR_baselines/2023_20_09_monk-APPO-AA-CEAA-T.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,6 @@
2424
"use_resnet": True,
2525
"model": "NetHackNetTtyrec",
2626
"unfreeze_actor_steps": 50_000_000,
27-
"actor_batch_size": 64,
28-
"batch_size": 32,
29-
"virtual_batch_size": 32,
30-
"ttyrec_batch_size": 128,
31-
"unroll_length": 80,
32-
"ttyrec_unroll_length": 80,
3327
}
3428

3529
# params different between exps
@@ -40,10 +34,32 @@
4034
"baseline_cost": [0.5],
4135
"reward_clip": [False],
4236
"adam_learning_rate": [0.0002],
37+
"actor_batch_size": [64],
38+
"batch_size": [32],
39+
"virtual_batch_size": [32],
40+
"ttyrec_batch_size": [128],
41+
"unroll_length": [80],
42+
"ttyrec_unroll_length": [80],
4343
},
4444
{
4545
"seed": list(range(5)),
4646
"adam_learning_rate": [0.001],
47+
"actor_batch_size": [64],
48+
"batch_size": [32],
49+
"virtual_batch_size": [32],
50+
"ttyrec_batch_size": [128],
51+
"unroll_length": [80],
52+
"ttyrec_unroll_length": [80],
53+
},
54+
{
55+
"seed": list(range(5)),
56+
"adam_learning_rate": [0.001],
57+
"actor_batch_size": [128],
58+
"batch_size": [64],
59+
"virtual_batch_size": [64],
60+
"ttyrec_batch_size": [256],
61+
"unroll_length": [32],
62+
"ttyrec_unroll_length": [32],
4763
},
4864
]
4965

experiment_code/mrunner_exps/ICLR_baselines/2023_20_09_monk-APPO-AA-KLAA-T.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,6 @@
2626
"use_resnet": True,
2727
"model": "NetHackNetTtyrec",
2828
"unfreeze_actor_steps": 50_000_000,
29-
"actor_batch_size": 64,
30-
"batch_size": 32,
31-
"virtual_batch_size": 32,
32-
"ttyrec_batch_size": 128,
33-
"unroll_length": 80,
34-
"ttyrec_unroll_length": 80,
3529
}
3630

3731
# params different between exps
@@ -42,10 +36,32 @@
4236
"baseline_cost": [0.5],
4337
"reward_clip": [False],
4438
"adam_learning_rate": [0.0002],
39+
"actor_batch_size": [64],
40+
"batch_size": [32],
41+
"virtual_batch_size": [32],
42+
"ttyrec_batch_size": [128],
43+
"unroll_length": [80],
44+
"ttyrec_unroll_length": [80],
4545
},
4646
{
4747
"seed": list(range(5)),
4848
"adam_learning_rate": [0.001],
49+
"actor_batch_size": [64],
50+
"batch_size": [32],
51+
"virtual_batch_size": [32],
52+
"ttyrec_batch_size": [128],
53+
"unroll_length": [80],
54+
"ttyrec_unroll_length": [80],
55+
},
56+
{
57+
"seed": list(range(5)),
58+
"adam_learning_rate": [0.001],
59+
"actor_batch_size": [128],
60+
"batch_size": [64],
61+
"virtual_batch_size": [64],
62+
"ttyrec_batch_size": [256],
63+
"unroll_length": [32],
64+
"ttyrec_unroll_length": [32],
4965
},
5066
]
5167

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
from random_word import RandomWords
2+
3+
from mrunner.helpers.specification_helper import (
4+
create_experiments_helper,
5+
get_combinations,
6+
)
7+
8+
name = globals()["script"][:-3]
9+
10+
# params for all exps
11+
config = {
12+
"exp_tags": [name],
13+
"connect": "0.0.0.0:4431",
14+
"exp_set": "2G",
15+
"exp_point": "monk-APPO-AMZN-KLBC",
16+
"num_actor_cpus": 20,
17+
"total_steps": 2_000_000_000,
18+
"group": "monk-APPO-AMZN-KLBC",
19+
"character": "mon-hum-neu-mal",
20+
"use_checkpoint_actor": True,
21+
"kickstarting_loss_bc": 0.2,
22+
"use_kickstarting_bc": True,
23+
"kickstarting_path": "/net/pr2/projects/plgrid/plgggmum_crl/bcupial/AMZN/checkpoint_v0",
24+
"model_checkpoint_path": "/net/pr2/projects/plgrid/plgggmum_crl/bcupial/AMZN/checkpoint_v0",
25+
"dataset": "amzn_bc",
26+
"use_resnet": True,
27+
"model": "NetHackNetTtyrec",
28+
"unfreeze_actor_steps": 50_000_000,
29+
}
30+
31+
# params different between exps
32+
params_grid = [
33+
{
34+
"seed": list(range(5)),
35+
"entropy_cost": [0.001],
36+
"baseline_cost": [0.5],
37+
"reward_clip": [False],
38+
"adam_learning_rate": [0.0002],
39+
"actor_batch_size": [64],
40+
"batch_size": [32],
41+
"virtual_batch_size": [32],
42+
"ttyrec_batch_size": [128],
43+
"unroll_length": [80],
44+
"ttyrec_unroll_length": [80],
45+
},
46+
{
47+
"seed": list(range(5)),
48+
"adam_learning_rate": [0.001],
49+
"actor_batch_size": [64],
50+
"batch_size": [32],
51+
"virtual_batch_size": [32],
52+
"ttyrec_batch_size": [128],
53+
"unroll_length": [80],
54+
"ttyrec_unroll_length": [80],
55+
},
56+
{
57+
"seed": list(range(5)),
58+
"adam_learning_rate": [0.001],
59+
"actor_batch_size": [128],
60+
"batch_size": [64],
61+
"virtual_batch_size": [64],
62+
"ttyrec_batch_size": [256],
63+
"unroll_length": [32],
64+
"ttyrec_unroll_length": [32],
65+
},
66+
]
67+
68+
params_configurations = get_combinations(params_grid)
69+
70+
final_grid = []
71+
for e, cfg in enumerate(params_configurations):
72+
cfg = {key: [value] for key, value in cfg.items()}
73+
r = RandomWords().get_random_word()
74+
cfg["group"] = [f"{name}_{e}_{r}"]
75+
final_grid.append(dict(cfg))
76+
77+
78+
experiments_list = create_experiments_helper(
79+
experiment_name=name,
80+
project_name="nle",
81+
with_neptune=False,
82+
script="python3 mrunner_run.py",
83+
python_path=".",
84+
tags=[name],
85+
exclude=["checkpoint"],
86+
base_config=config,
87+
params_grid=final_grid,
88+
)

experiment_code/mrunner_exps/ICLR_baselines/2023_20_09_monk-APPO-AA-KS-T.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,6 @@
2525
"use_resnet": True,
2626
"model": "NetHackNetTtyrec",
2727
"unfreeze_actor_steps": 50_000_000,
28-
"actor_batch_size": 64,
29-
"batch_size": 32,
30-
"virtual_batch_size": 32,
31-
"unroll_length": 80,
3228
}
3329

3430
# params different between exps
@@ -39,10 +35,26 @@
3935
"baseline_cost": [0.5],
4036
"reward_clip": [False],
4137
"adam_learning_rate": [0.0002],
38+
"actor_batch_size": [64],
39+
"batch_size": [32],
40+
"virtual_batch_size": [32],
41+
"unroll_length": [80],
4242
},
4343
{
4444
"seed": list(range(5)),
4545
"adam_learning_rate": [0.001],
46+
"actor_batch_size": [64],
47+
"batch_size": [32],
48+
"virtual_batch_size": [32],
49+
"unroll_length": [80],
50+
},
51+
{
52+
"seed": list(range(5)),
53+
"adam_learning_rate": [0.001],
54+
"actor_batch_size": [128],
55+
"batch_size": [64],
56+
"virtual_batch_size": [64],
57+
"unroll_length": [32],
4658
},
4759
]
4860

experiment_code/mrunner_exps/ICLR_baselines/2023_20_09_monk-APPO-T.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,6 @@
2222
"use_resnet": True,
2323
"model": "NetHackNetTtyrec",
2424
"unfreeze_actor_steps": 50_000_000,
25-
"actor_batch_size": 64,
26-
"batch_size": 32,
27-
"virtual_batch_size": 32,
28-
"unroll_length": 80,
2925
}
3026

3127
# params different between exps
@@ -36,10 +32,26 @@
3632
"baseline_cost": [0.5],
3733
"reward_clip": [False],
3834
"adam_learning_rate": [0.0002],
35+
"actor_batch_size": [64],
36+
"batch_size": [32],
37+
"virtual_batch_size": [32],
38+
"unroll_length": [80],
3939
},
4040
{
4141
"seed": list(range(5)),
4242
"adam_learning_rate": [0.001],
43+
"actor_batch_size": [64],
44+
"batch_size": [32],
45+
"virtual_batch_size": [32],
46+
"unroll_length": [80],
47+
},
48+
{
49+
"seed": list(range(5)),
50+
"adam_learning_rate": [0.001],
51+
"actor_batch_size": [128],
52+
"batch_size": [64],
53+
"virtual_batch_size": [64],
54+
"unroll_length": [32],
4355
},
4456
]
4557

experiment_code/mrunner_runs/iclr.sh

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ ssh-add
99
# mrunner --config ~/.mrunner.yaml --context ares_nethack_gpu run mrunner_exps/ICLR_baselines/2023_20_09_monk-APPO-AA-KS-T.py
1010
# mrunner --config ~/.mrunner.yaml --context ares_nethack_gpu run mrunner_exps/ICLR_baselines/2023_20_09_monk-APPO-T.py
1111

12-
# mrunner --config ~/.mrunner.yaml --context athena_nethack_1gpu run mrunner_exps/ICLR_baselines/2023_20_09_monk-AA-BC.py
13-
# mrunner --config ~/.mrunner.yaml --context athena_nethack_1gpu run mrunner_exps/ICLR_baselines/2023_20_09_monk-APPO-AA-CEAA-T.py
14-
# mrunner --config ~/.mrunner.yaml --context athena_nethack_1gpu run mrunner_exps/ICLR_baselines/2023_20_09_monk-APPO-AA-KLAA-T.py
15-
# mrunner --config ~/.mrunner.yaml --context athena_nethack_1gpu run mrunner_exps/ICLR_baselines/2023_20_09_monk-APPO-AA-KS-T.py
16-
# mrunner --config ~/.mrunner.yaml --context athena_nethack_1gpu run mrunner_exps/ICLR_baselines/2023_20_09_monk-APPO-T.py
12+
# mrunner --config ~/.mrunner.yaml --context athena_nethack_1gpu run mrunner_exps/ICLR_baselines/2023_20_09_monk-AA-BC.py
13+
mrunner --config ~/.mrunner.yaml --context athena_nethack_1gpu run mrunner_exps/ICLR_baselines/2023_20_09_monk-APPO-AA-CEAA-T.py
14+
mrunner --config ~/.mrunner.yaml --context athena_nethack_1gpu run mrunner_exps/ICLR_baselines/2023_20_09_monk-APPO-AA-KLAA-T.py
15+
# mrunner --config ~/.mrunner.yaml --context athena_nethack_1gpu run mrunner_exps/ICLR_baselines/2023_20_09_monk-APPO-AA-KLBC-T.py # waiting for dataset
16+
mrunner --config ~/.mrunner.yaml --context athena_nethack_1gpu run mrunner_exps/ICLR_baselines/2023_20_09_monk-APPO-AA-KS-T.py
17+
mrunner --config ~/.mrunner.yaml --context athena_nethack_1gpu run mrunner_exps/ICLR_baselines/2023_20_09_monk-APPO-T.py

0 commit comments

Comments
 (0)