🚀 [RofuncRL] Update trainers

Skylark0924 · Skylark0924 · commit 6e82505c39f8 · 2023-06-25T00:56:00.000+08:00
diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -27,7 +27,7 @@
 author = 'Junjia Liu'
 
 # The full version, including alpha/beta/rc tags
-release = '0.0.2.1'
+release = '0.0.2.3'
 
 # -- General configuration ---------------------------------------------------
 
diff --git a/examples/learning_rl/example_Ant_RofuncRL.py b/examples/learning_rl/example_Ant_RofuncRL.py
@@ -42,7 +42,8 @@ def train(custom_args):
     # Instantiate the RL trainer
     trainer = trainer_map[custom_args.agent](cfg=cfg.train,
                                              env=env,
-                                             device=cfg.rl_device)
+                                             device=cfg.rl_device,
+                                             env_name=custom_args.task)
 
     # Start training
     trainer.train()
@@ -74,7 +75,8 @@ def inference(custom_args):
     # Instantiate the RL trainer
     trainer = trainer_map[custom_args.agent](cfg=cfg.train,
                                              env=infer_env,
-                                             device=cfg.rl_device)
+                                             device=cfg.rl_device,
+                                             env_name=custom_args.task)
     # load checkpoint
     if custom_args.ckpt_path is None:
         custom_args.ckpt_path = model_zoo(name="AntRofuncRLPPO.pt")  # TODO: check
diff --git a/examples/learning_rl/example_CURICabinet_RofuncRL.py b/examples/learning_rl/example_CURICabinet_RofuncRL.py
@@ -42,7 +42,8 @@ def train(custom_args):
     # Instantiate the RL trainer
     trainer = trainer_map[custom_args.agent](cfg=cfg.train,
                                              env=env,
-                                             device=cfg.rl_device)
+                                             device=cfg.rl_device,
+                                             env_name=custom_args.task)
 
     # Start training
     trainer.train()
@@ -74,7 +75,8 @@ def inference(custom_args):
     # Instantiate the RL trainer
     trainer = trainer_map[custom_args.agent](cfg=cfg.train,
                                              env=infer_env,
-                                             device=cfg.rl_device)
+                                             device=cfg.rl_device,
+                                             env_name=custom_args.task)
     # load checkpoint
     if custom_args.ckpt_path is None:
         custom_args.ckpt_path = model_zoo(name="CURICabinetRofuncRLPPO_left_arm.pt")  # TODO: Check
diff --git a/examples/learning_rl/example_FrankaCabinet_RofuncRL.py b/examples/learning_rl/example_FrankaCabinet_RofuncRL.py
@@ -42,7 +42,8 @@ def train(custom_args):
     # Instantiate the RL trainer
     trainer = trainer_map[custom_args.agent](cfg=cfg.train,
                                              env=env,
-                                             device=cfg.rl_device)
+                                             device=cfg.rl_device,
+                                             env_name=custom_args.task)
 
     # Start training
     trainer.train()
@@ -74,7 +75,8 @@ def inference(custom_args):
     # Instantiate the RL trainer
     trainer = trainer_map[custom_args.agent](cfg=cfg.train,
                                              env=infer_env,
-                                             device=cfg.rl_device)
+                                             device=cfg.rl_device,
+                                             env_name=custom_args.task)
     # load checkpoint
     if custom_args.ckpt_path is None:
         custom_args.ckpt_path = model_zoo(name=f"{custom_args.task}.pth")  # TODO: Check
diff --git a/examples/learning_rl/example_GymTasks_RofuncRL.py b/examples/learning_rl/example_GymTasks_RofuncRL.py
@@ -34,7 +34,8 @@ def train(custom_args):
     # Instantiate the RL trainer
     trainer = trainer_map[custom_args.agent](cfg=cfg.train,
                                              env=env,
-                                             device=cfg.rl_device)
+                                             device=cfg.rl_device,
+                                             env_name=custom_args.task)
 
     # Start training
     trainer.train()
diff --git a/examples/learning_rl/example_HumanoidAMP_RofuncRL.py b/examples/learning_rl/example_HumanoidAMP_RofuncRL.py
@@ -42,8 +42,8 @@ def train(custom_args):
     # Instantiate the RL trainer
     trainer = trainer_map[custom_args.agent](cfg=cfg.train,
                                              env=env,
-                                             device=cfg.rl_device)
-
+                                             device=cfg.rl_device,
+                                             env_name=custom_args.task)
     # Start training
     trainer.train()
 
@@ -75,7 +75,9 @@ def inference(custom_args):
     # Instantiate the RL trainer
     trainer = trainer_map[custom_args.agent](cfg=cfg.train,
                                              env=infer_env,
-                                             device=cfg.rl_device)
+                                             device=cfg.rl_device,
+                                             env_name=custom_args.task)
+
     # load checkpoint
     if custom_args.ckpt_path is None:
         custom_args.ckpt_path = model_zoo(name=f"{custom_args.task}.pth")
diff --git a/examples/learning_rl/example_HumanoidASE_RofuncRL.py b/examples/learning_rl/example_HumanoidASE_RofuncRL.py
@@ -43,6 +43,7 @@ def train(custom_args):
     trainer = trainer_map[custom_args.agent](cfg=cfg.train,
                                              env=env,
                                              device=cfg.rl_device,
+                                             env_name=custom_args.task,
                                              hrl=hrl)
 
     # Start training
@@ -78,6 +79,7 @@ def inference(custom_args):
     trainer = trainer_map[custom_args.agent](cfg=cfg.train,
                                              env=infer_env,
                                              device=cfg.rl_device,
+                                             env_name=custom_args.task,
                                              hrl=hrl)
     # load checkpoint
     if custom_args.ckpt_path is None:
@@ -89,7 +91,7 @@ def inference(custom_args):
 
 
 if __name__ == '__main__':
-    gpu_id = 0
+    gpu_id = 1
 
     parser = argparse.ArgumentParser()
     # Available tasks and motion files:
diff --git a/examples/learning_rl/example_Humanoid_RofuncRL.py b/examples/learning_rl/example_Humanoid_RofuncRL.py
@@ -42,8 +42,8 @@ def train(custom_args):
     # Instantiate the RL trainer
     trainer = trainer_map[custom_args.agent](cfg=cfg.train,
                                              env=env,
-                                             device=cfg.rl_device)
-
+                                             device=cfg.rl_device,
+                                             env_name=custom_args.task)
     # Start training
     trainer.train()
 
@@ -74,7 +74,9 @@ def inference(custom_args):
     # Instantiate the RL trainer
     trainer = trainer_map[custom_args.agent](cfg=cfg.train,
                                              env=infer_env,
-                                             device=cfg.rl_device)
+                                             device=cfg.rl_device,
+                                             env_name=custom_args.task)
+
     # load checkpoint
     if custom_args.ckpt_path is None:
         custom_args.ckpt_path = model_zoo(name=f"{custom_args.task}.pth")
diff --git a/rofunc/learning/RofuncRL/trainers/a2c_trainer.py b/rofunc/learning/RofuncRL/trainers/a2c_trainer.py
@@ -20,8 +20,8 @@
 
 
 class A2CTrainer(BaseTrainer):
-    def __init__(self, cfg, env, device):
-        super().__init__(cfg, env, device)
+    def __init__(self, cfg, env, device, env_name):
+        super().__init__(cfg, env, device, env_name)
         self.memory = RandomMemory(memory_size=cfg.Trainer.rollouts, num_envs=self.env.num_envs, device=device)
         self.agent = A2CAgent(cfg, self.env.observation_space, self.env.action_space, self.memory,
                               device, self.exp_dir, self.rofunc_logger)
diff --git a/rofunc/learning/RofuncRL/trainers/amp_trainer.py b/rofunc/learning/RofuncRL/trainers/amp_trainer.py
@@ -20,8 +20,8 @@
 
 
 class AMPTrainer(BaseTrainer):
-    def __init__(self, cfg, env, device):
-        super().__init__(cfg, env, device)
+    def __init__(self, cfg, env, device, env_name):
+        super().__init__(cfg, env, device, env_name)
         self.memory = RandomMemory(memory_size=self.rollouts, num_envs=self.env.num_envs, device=device)
         self.motion_dataset = RandomMemory(memory_size=200000, device=device)
         self.replay_buffer = RandomMemory(memory_size=1000000, device=device)
diff --git a/rofunc/learning/RofuncRL/trainers/ase_trainer.py b/rofunc/learning/RofuncRL/trainers/ase_trainer.py
@@ -21,8 +21,8 @@
 
 
 class ASETrainer(BaseTrainer):
-    def __init__(self, cfg, env, device, hrl=False):
-        super().__init__(cfg, env, device)
+    def __init__(self, cfg, env, device, env_name, hrl=False):
+        super().__init__(cfg, env, device, env_name)
         self.memory = RandomMemory(memory_size=self.rollouts, num_envs=self.env.num_envs, device=device)
         self.motion_dataset = RandomMemory(memory_size=200000, device=device)
         self.replay_buffer = RandomMemory(memory_size=1000000, device=device)
diff --git a/rofunc/learning/RofuncRL/trainers/base_trainer.py b/rofunc/learning/RofuncRL/trainers/base_trainer.py
@@ -40,7 +40,8 @@ class BaseTrainer:
     def __init__(self,
                  cfg: DictConfig,
                  env: Union[gym.Env, gymnasium.Env],
-                 device: Optional[Union[str, torch.device]] = None):
+                 device: Optional[Union[str, torch.device]] = None,
+                 env_name: Optional[str] = None):
         self.cfg = cfg
         self.agent = None
         self.device = torch.device(
@@ -51,8 +52,6 @@ def __init__(self,
         exp_name = self.cfg.Trainer.experiment_name
         directory = os.path.join(os.getcwd(), "runs") if not directory else directory
         exp_name = datetime.datetime.now().strftime("%y-%m-%d_%H-%M-%S-%f") if not exp_name else exp_name
-        # env_name = env.cfg['name'] if hasattr(env, 'cfg') else env.envs[0].spec.id if hasattr(env, 'envs') else env.spec.id
-        env_name = 'ASE'
         exp_name = "RofuncRL_{}_{}_{}".format(self.__class__.__name__, env_name, exp_name)
         self.exp_dir = os.path.join(directory, exp_name)
         rf.utils.create_dir(self.exp_dir)
diff --git a/rofunc/learning/RofuncRL/trainers/ppo_trainer.py b/rofunc/learning/RofuncRL/trainers/ppo_trainer.py
@@ -20,8 +20,8 @@
 
 
 class PPOTrainer(BaseTrainer):
-    def __init__(self, cfg, env, device):
-        super().__init__(cfg, env, device)
+    def __init__(self, cfg, env, device, env_name):
+        super().__init__(cfg, env, device, env_name)
         self.memory = RandomMemory(memory_size=cfg.Trainer.rollouts, num_envs=self.env.num_envs, device=device)
         self.agent = PPOAgent(cfg, self.env.observation_space, self.env.action_space, self.memory,
                               device, self.exp_dir, self.rofunc_logger)
diff --git a/rofunc/learning/RofuncRL/trainers/sac_trainer.py b/rofunc/learning/RofuncRL/trainers/sac_trainer.py
@@ -20,8 +20,8 @@
 
 
 class SACTrainer(BaseTrainer):
-    def __init__(self, cfg, env, device):
-        super().__init__(cfg, env, device)
+    def __init__(self, cfg, env, device, env_name):
+        super().__init__(cfg, env, device, env_name)
         self.memory = RandomMemory(memory_size=10000, num_envs=self.env.num_envs, device=device, replacement=True)
         self.agent = SACAgent(cfg, self.env.observation_space, self.env.action_space, self.memory,
                               device, self.exp_dir, self.rofunc_logger)
diff --git a/rofunc/learning/RofuncRL/trainers/td3_trainer.py b/rofunc/learning/RofuncRL/trainers/td3_trainer.py
@@ -23,8 +23,8 @@
 
 
 class TD3Trainer(BaseTrainer):
-    def __init__(self, cfg, env, device):
-        super().__init__(cfg, env, device)
+    def __init__(self, cfg, env, device, env_name):
+        super().__init__(cfg, env, device, env_name)
         self.memory = RandomMemory(memory_size=10000, num_envs=self.env.num_envs, device=device, replacement=True)
         self.agent = TD3Agent(cfg, self.env.observation_space, self.env.action_space, self.memory,
                               device, self.exp_dir, self.rofunc_logger)
diff --git a/setup.py b/setup.py
@@ -13,7 +13,7 @@
 
 setup(
     name="rofunc",
-    version="0.0.2.1",
+    version="0.0.2.3",
     description='Rofunc: The Full Process Python Package for Robot Learning from Demonstration and Robot Manipulation',
     long_description=long_description,
     long_description_content_type='text/markdown',