-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig_file.py
More file actions
128 lines (115 loc) · 3.88 KB
/
config_file.py
File metadata and controls
128 lines (115 loc) · 3.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import os
import platform
from enum import Enum
# judge the current operating system
base = r'C:' if platform.system() == "Windows" else r'/data/wjs'
env_list = [
'RollerBall',
'3DBall',
'Boat'
]
reset_config = [None, {
'copy': 10
}]
class algorithms(Enum):
ppo_sep_ac = 1 # AC, stochastic
ppo_com = 2 # AC, stochastic
# boundary, about the way of calculate `discounted reward`
sac = 3 # AC+Q, stochastic, off-policy
sac_no_v = 4
ddpg = 5 # AC+Q, deterministic, off-policy
td3 = 6 # AC+Q, deterministic, off-policy
unity_file = [
r'C:/UnityBuild/RollerBall/OneFloor/RollerBall-custom.exe',#0
r'3dball',#1
r'C:/UnityBuild/Boat/first/BoatTrain.exe',#2
r'C:/UnityBuild/Boat/second/BoatTrain.exe',#3
r'C:/UnityBuild/Boat/interval1/BoatTrain.exe',#4
r'C:/UnityBuild/Boat/no_border/BoatTrain.exe',#5
r'C:/UnityBuild/Boat/no_border2/BoatTrain.exe'#6
]
max_episode = 50000 # max episode num or step num, depend on whether episode-update or step-update
config = {
'hyper parameters': {
# set the temperature of SAC, auto adjust or not
'alpha': 0.2,
'auto_adaption': True,
'ployak': 0.995, # range from 0. to 1.
'epsilon': 0.2, # control the learning stepsize of clip-ppo
'beta': 1.0e-3, # coefficient of entropy regularizatione
'lr': 5.0e-4,
'actor_lr': 0.0001,
'critic_lr': 0.0002,
'tp_lr': 0.001,
'reward_lr': 0.001,
'gamma': 0.99,
'lambda': 0.95,
'action_bound': 1,
'decay_rate': 0.7,
'decay_steps': 100,
'stair': False,
'max_episode': max_episode,
'base_sigma': 0.1, # only work on stochastic policy
'assign_interval': 4 # not use yet
},
'train config': {
# choose algorithm
'algorithm': algorithms.sac,
'init_max_step': 300,
'max_step': 1000, # use for both on-policy and off-policy, control the max step within one episode.
'max_episode': max_episode,
'max_sample_time': 20,
'till_all_done': True, # use for on-policy leanring
'start_continuous_done': False,
# train mode, .exe or unity-client && train or inference
'train': True,
'unity_mode': False,
'unity_file': unity_file[0].replace('C:',f'{base}'),
'port': 5006,
# trick
'use_trick': True,
# excel
'excel_record': False,
'excel_record_frequency': 10,
# mongodb
'mongo_record': False,
'mongo_record_frequency': 10,
'mongo_record_all': False,
# shuffle batch or not
'random_batch': True,
'batchsize': 100,
'epoch': 1,
# checkpoint
'save_frequency': 20,
# set the agents' number and control mode
'dynamic_allocation': True,
'reset_config': reset_config[1],
# some sets about using replay_buffer
'use_replay_buffer': True, # on-policy or off-policy
'use_priority' : False,
'buffer_size' : 10000,
'buffer_batch_size': 100,
'max_learn_time' : 20
},
'record config': {
'basic_dir': r'C:/RLData/'.replace('C:',f'{base}'),
'log_basic_dir': r'C:/RLData/logs/'.replace('C:',f'{base}'),
'excel_basic_dir': r'C:/RLData/excels/'.replace('C:',f'{base}'),
'checkpoint_basic_dir': r'C:/RLData/models/'.replace('C:',f'{base}'),
'config_basic_dir': r'C:/RLData/config/'.replace('C:',f'{base}'),
'project_name': env_list[0],
'remark': r'sac_onefloor',
'run_id': r'3',
'logger2file' : False
},
'config_file': r"",
'ps': r"time penalty=-0.01",
'clean_list': [
r'Boat\sac_off_no_border0',
r'Boat\sac_off_no_border1',
r'Boat\sac_off_no_border2',
r'RollerBall\sac_onefloor0',
r'RollerBall\sac_onefloor1',
r'RollerBall\sac_onefloor2',
]
}