sail-sg
diff --git a/‎benchmark/test_envpool.py‎
Lines changed: 1 addition & 3 deletions b/‎benchmark/test_envpool.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎benchmark/test_gym.py‎
Lines changed: 2 additions & 3 deletions b/‎benchmark/test_gym.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎envpool/atari/api_test.py‎
Lines changed: 3 additions & 9 deletions b/‎envpool/atari/api_test.py‎
Lines changed: 3 additions & 9 deletions
diff --git a/‎envpool/atari/atari_env_test.cc‎
Lines changed: 28 additions & 41 deletions b/‎envpool/atari/atari_env_test.cc‎
Lines changed: 28 additions & 41 deletions
diff --git a/‎envpool/box2d/box2d_correctness_test.py‎
Lines changed: 1 addition & 3 deletions b/‎envpool/box2d/box2d_correctness_test.py‎
Lines changed: 1 addition & 3 deletions
@@ -82,9 +82,7 @@
   env = envpool.make_gym(task_id, **kwargs)
   env.async_reset()
   env.action_space.seed(args.seed)
-  action = np.array(
-    [env.action_space.sample() for _ in range(args.batch_size)]
-  )
+  action = np.array([env.action_space.sample() for _ in range(args.batch_size)])
   t = time.time()
   for _ in tqdm.trange(args.total_step):
     info = env.recv()[-1]
 
@@ -33,9 +33,8 @@ def run(env, num_envs, total_step, async_):
       )
     else:
       env = gym.vector.make(
-        task_id, num_envs, async_, lambda e: wrap_deepmind(
-          e, episode_life=False, clip_rewards=False, frame_stack=4
-        )
+        task_id, num_envs, async_, lambda e:
+        wrap_deepmind(e, episode_life=False, clip_rewards=False, frame_stack=4)
       )
   elif env == "mujoco":
     task_id = "Ant-v3"
 
@@ -36,9 +36,7 @@ def test_spec(self) -> None:
       action_num = action_nums[task]
       spec = make_spec(task.capitalize() + "-v5")
       logging.info(spec)
-      self.assertEqual(
-        spec.action_array_spec["action"].maximum + 1, action_num
-      )
+      self.assertEqual(spec.action_array_spec["action"].maximum + 1, action_num)
       # check dm spec
       dm_obs_spec = spec.observation_spec().obs
       dm_act_spec = spec.action_spec()
@@ -126,9 +124,7 @@ def test_lowlevel_step(self) -> None:
     self.assertEqual(ts.observation.lives.dtype, np.int32)
     np.testing.assert_allclose(ts.observation.env_id, np.arange(num_envs))
     self.assertEqual(ts.observation.env_id.dtype, np.int32)
-    np.testing.assert_allclose(
-      ts.observation.players.env_id.shape, (num_envs,)
-    )
+    np.testing.assert_allclose(ts.observation.players.env_id.shape, (num_envs,))
     self.assertEqual(ts.observation.players.env_id.dtype, np.int32)
     action = {
       "env_id": np.arange(num_envs),
@@ -178,9 +174,7 @@ def test_highlevel_step(self) -> None:
     self.assertEqual(ts.observation.lives.dtype, np.int32)
     np.testing.assert_allclose(ts.observation.env_id, np.arange(num_envs))
     self.assertEqual(ts.observation.env_id.dtype, np.int32)
-    np.testing.assert_allclose(
-      ts.observation.players.env_id.shape, (num_envs,)
-    )
+    np.testing.assert_allclose(ts.observation.players.env_id.shape, (num_envs,))
     self.assertEqual(ts.observation.players.env_id.dtype, np.int32)
     action = {
       "env_id": np.arange(num_envs),
 
@@ -35,9 +35,9 @@ TEST(AtariEnvTest, GrayScaleMaxPoolOrder) {
       ptr1[i * n + j] = j;
     }
   }
-  Array col0(Spec<uint8_t>({n, n, 3}));
-  Array col1(Spec<uint8_t>({n, n, 3}));
-  Array result(Spec<uint8_t>({n, n, 1}));
+  TArray col0(Spec<uint8_t>({n, n, 3}));
+  TArray col1(Spec<uint8_t>({n, n, 3}));
+  TArray result(Spec<uint8_t>({n, n, 1}));
   auto* col0_ptr = static_cast<uint8_t*>(col0.Data());
   auto* col1_ptr = static_cast<uint8_t*>(col1.Data());
   auto* result_ptr = static_cast<uint8_t*>(result.Data());
@@ -97,19 +97,17 @@ TEST(AtariEnvTest, Seed) {
   atari::AtariEnvSpec spec(config);
   atari::AtariEnvPool envpool0(spec);
   atari::AtariEnvPool envpool1(spec);
-  Array all_env_ids(Spec<int>({static_cast<int>(batch)}));
+  TArray all_env_ids(Spec<int>({static_cast<int>(batch)}));
   for (std::size_t i = 0; i < batch; ++i) {
     all_env_ids[i] = i;
   }
   envpool0.Reset(all_env_ids);
   envpool1.Reset(all_env_ids);
-  std::vector<Array> raw_action(3);
-  AtariAction action(&raw_action);
+
+  AtariAction action;
   for (int i = 0; i < total_iter; ++i) {
-    auto state_vec0 = envpool0.Recv();
-    auto state_vec1 = envpool1.Recv();
-    AtariState state0(&state_vec0);
-    AtariState state1(&state_vec1);
+    AtariState state0(envpool0.Recv());
+    AtariState state1(envpool1.Recv());
     EXPECT_EQ(state0["obs"_].Shape(),
               std::vector<std::size_t>({batch, 4, 84, 84}));
     EXPECT_EQ(state1["obs"_].Shape(),
@@ -128,7 +126,7 @@ TEST(AtariEnvTest, Seed) {
     }
     action["env_id"_] = state0["info:env_id"_];
     action["players.env_id"_] = state0["info:env_id"_];
-    action["action"_] = Array(Spec<int>({static_cast<int>(batch)}));
+    action["action"_] = TArray(Spec<int>({static_cast<int>(batch)}));
     for (std::size_t j = 0; j < batch; ++j) {
       action["action"_][j] = std::rand() % 6;
     }
@@ -149,17 +147,15 @@ TEST(AtariEnvTest, MaxEpisodeSteps) {
   int total_iter = 100;
   atari::AtariEnvSpec spec(config);
   atari::AtariEnvPool envpool(spec);
-  Array all_env_ids(Spec<int>({batch}));
+  TArray all_env_ids(Spec<int>({batch}));
   for (int i = 0; i < batch; ++i) {
     all_env_ids[i] = i;
   }
   envpool.Reset(all_env_ids);
-  std::vector<Array> raw_action(3);
-  AtariAction action(&raw_action);
+  AtariAction action;
   int count = 0;
   for (int i = 0; i < total_iter; ++i) {
-    auto state_vec = envpool.Recv();
-    AtariState state(&state_vec);
+    AtariState state(envpool.Recv());
     auto elapsed_step = state["elapsed_step"_];
     for (int j = 0; j < batch; ++j) {
       EXPECT_EQ(count, static_cast<int>(elapsed_step[j]));
@@ -169,7 +165,7 @@ TEST(AtariEnvTest, MaxEpisodeSteps) {
     }
     action["env_id"_] = state["info:env_id"_];
     action["players.env_id"_] = state["info:env_id"_];
-    action["action"_] = Array(Spec<int>({batch}));
+    action["action"_] = TArray(Spec<int>({batch}));
     for (int j = 0; j < batch; ++j) {
       action["action"_][j] = 0;
     }
@@ -188,18 +184,16 @@ TEST(AtariEnvTest, EpisodicLife) {
   config["task"_] = "pong";
   atari::AtariEnvSpec spec(config);
   atari::AtariEnvPool envpool(spec);
-  Array all_env_ids(Spec<int>({batch}));
+  TArray all_env_ids(Spec<int>({batch}));
   for (int i = 0; i < batch; ++i) {
     all_env_ids[i] = i;
   }
   envpool.Reset(all_env_ids);
-  std::vector<Array> raw_action(3);
-  AtariAction action(&raw_action);
+  AtariAction action;
   std::vector<bool> last_done(batch);
   std::vector<int> last_lives(batch);
   for (int i = 0; i < total_iter; ++i) {
-    auto state_vec = envpool.Recv();
-    AtariState state(&state_vec);
+    AtariState state(envpool.Recv());
     auto done = state["done"_];
     auto lives = state["info:lives"_];
     for (int j = 0; j < batch; ++j) {
@@ -211,7 +205,7 @@ TEST(AtariEnvTest, EpisodicLife) {
     }
     action["env_id"_] = state["info:env_id"_];
     action["players.env_id"_] = state["info:env_id"_];
-    action["action"_] = Array(Spec<int>({batch}));
+    action["action"_] = TArray(Spec<int>({batch}));
     for (int j = 0; j < batch; ++j) {
       action["action"_][j] = std::rand() % 6;
     }
@@ -225,8 +219,7 @@ TEST(AtariEnvTest, EpisodicLife) {
   last_lives = std::vector<int>(4);
   last_done = std::vector<bool>(4, true);
   for (int i = 0; i < total_iter; ++i) {
-    auto state_vec = envpool2.Recv();
-    AtariState state(&state_vec);
+    AtariState state(envpool2.Recv());
     auto done = state["done"_];
     auto lives = state["info:lives"_];
     for (int j = 0; j < batch; ++j) {
@@ -250,7 +243,7 @@ TEST(AtariEnvTest, EpisodicLife) {
     }
     action["env_id"_] = state["info:env_id"_];
     action["players.env_id"_] = state["info:env_id"_];
-    action["action"_] = Array(Spec<int>({batch}));
+    action["action"_] = TArray(Spec<int>({batch}));
     for (int j = 0; j < batch; ++j) {
       action["action"_][j] = i % 4;
     }
@@ -271,22 +264,18 @@ TEST(AtariEnvTest, ZeroDiscountOnLifeLoss) {
   config["zero_discount_on_life_loss"_] = true;
   atari::AtariEnvSpec spec2(config);
   atari::AtariEnvPool envpool2(spec2);
-  Array all_env_ids(Spec<int>({batch}));
+  TArray all_env_ids(Spec<int>({batch}));
   for (int i = 0; i < batch; ++i) {
     all_env_ids[i] = i;
   }
   envpool.Reset(all_env_ids);
   envpool2.Reset(all_env_ids);
-  std::vector<Array> raw_action(3);
-  AtariAction action(&raw_action);
+  AtariAction action;
   std::vector<bool> last_done(batch, true);
   std::vector<int> last_lives(batch);
   for (int i = 0; i < total_iter; ++i) {
-    auto state_vec = envpool.Recv();
-    auto state_vec2 = envpool2.Recv();
-    AtariState state(&state_vec);
-    AtariState state2(&state_vec2);
-
+    AtariState state(envpool.Recv());
+    AtariState state2(envpool2.Recv());
     auto done = state["done"_];
     auto lives = state["info:lives"_];
     auto discount = state["discount"_];
@@ -326,7 +315,7 @@ TEST(AtariEnvTest, ZeroDiscountOnLifeLoss) {
     }
     action["env_id"_] = state["info:env_id"_];
     action["players.env_id"_] = state["info:env_id"_];
-    action["action"_] = Array(Spec<int>({batch}));
+    action["action"_] = TArray(Spec<int>({batch}));
     for (int j = 0; j < batch; ++j) {
       action["action"_][j] = i % 4;
     }
@@ -351,22 +340,20 @@ TEST(AtariEnvSpeedTest, Benchmark) {
   config["thread_affinity_offset"_] = 0;
   atari::AtariEnvSpec spec(config);
   atari::AtariEnvPool envpool(spec);
-  Array all_env_ids(Spec<int>({num_envs}));
+  TArray all_env_ids(Spec<int>({num_envs}));
   for (int i = 0; i < num_envs; ++i) {
     all_env_ids[i] = i;
   }
   envpool.Reset(all_env_ids);
-  std::vector<Array> raw_action(3);
-  AtariAction action(&raw_action);
-  action["action"_] = Array(Spec<int>({batch}));
+  AtariAction action;
+  action["action"_] = TArray(Spec<int>({batch}));
   for (int j = 0; j < batch; ++j) {
     action["action"_][j] = 1;
   }
   auto start = std::chrono::system_clock::now();
   for (int i = 0; i < total_iter; ++i) {
     // recv
-    auto state_vec = envpool.Recv();
-    AtariState state(&state_vec);
+    AtariState state(envpool.Recv());
     auto env_id = state["info:env_id"_];
     // EXPECT_EQ(env_id.Shape(),
     // std::vector<std::size_t>({(std::size_t)batch}));
 
@@ -262,9 +262,7 @@ def solve_bipedal_walker(
       env_id = env_id[~done]
       hs = hs[~done]
 
-      ah = [
-        self.heuristic_bipedal_walker_policy(s, h) for s, h in zip(obs, hs)
-      ]
+      ah = [self.heuristic_bipedal_walker_policy(s, h) for s, h in zip(obs, hs)]
       action = np.array([i[0] for i in ah])
       hs = np.array([i[1] for i in ah])
Original file line number	Diff line number	Diff line change
`@@ -33,9 +33,8 @@ def run(env, num_envs, total_step, async_):`
`33`	`33`	`)`
`34`	`34`	`else:`
`35`	`35`	`env = gym.vector.make(`
`36`		`- task_id, num_envs, async_, lambda e: wrap_deepmind(`
`37`		`- e, episode_life=False, clip_rewards=False, frame_stack=4`
`38`		`- )`
	`36`	`+ task_id, num_envs, async_, lambda e:`
	`37`	`+ wrap_deepmind(e, episode_life=False, clip_rewards=False, frame_stack=4)`
`39`	`38`	`)`
`40`	`39`	`elif env == "mujoco":`
`41`	`40`	`task_id = "Ant-v3"`