uoe-agents · michaelkoelle · May 10, 2023
diff --git a/README.md b/README.md
@@ -1,15 +1,20 @@
 # SMAClite - Starcraft Mulit-Agent Challenge lite
+
 This is a repository for the SMAClite environment. It is a (nearly) pure Python reimplementation of the Starcraft Multi-Agent Challenge, using Numpy and OpenAI Gym.
 
 ## Features
+
 The main features of this environment include:
+
 * A fully functional Python implementation of the SMAC environment
 * A JSON interface for defining units and scenarios
 * Compatibility with the OpenAI Gym API
 * (optional) a highly-performant [C++ implementation](https://github.com/micadam/SMAClite-Python-RVO2) of the collision avoidance algorithm
 
 ## Available units
+
 The following units are available in this environment:
+
 * baneling
 * colossus
 * marauder
@@ -19,8 +24,11 @@ The following units are available in this environment:
 * stalker
 * zealot
 * zergling
+
 ## Available scenarios
-The following scenarios are available in this environment:
+
+The following scenarios are available in this environment (Scenarios are also availiable in the MessySMAClite version):
+
 * 10m_vs_11m
 * 27m_vs_30m
 * 2c_vs_64zg
@@ -35,17 +43,23 @@ The following scenarios are available in this environment:
 * mmm2
 
 Note that further scenarios can easily be added by modifying or creating a scenario JSON file.
+
 ## Installation
+
 Run
+
 ```
 pip install .
 ```
+
 In the SMAClite directory
 
 ## Running
+
 As far as we are aware, this project fully adheres to the [OpenAI Gym API](https://www.gymlibrary.dev/), so it can be used with any framework capable of interfacing with Gym-capable environments. We recommend the [ePyMARL](https://github.com/uoe-agents/epymarl) framework, made available in our repository. EPyMARL uses `yaml` files to specify run configurations. To train a model in the `MMM2` scenario using the `MAPPO` algorithm, you can use this example command:
+
 ```
 python3 src/main.py --config=mappo --env-config=gymma with seed=1 env_args.time_limit=120 env_args.key="smaclite:smaclite/MMM2-v0
 ```
 
-Note that to use the C++ version of the collision avoidance algorithm, you will have to add the line `use_cpp_rvo2: true` to the `yaml` config file you're referencing, since Sacred does not allow defining new config entries in the command itself.
+Note that to use the C++ version of the collision avoidance algorithm, you will have to add the line `use_cpp_rvo2: true` to the `yaml` config file you're referencing, since Sacred does not allow defining new config entries in the command itself.
diff --git a/example.py b/example.py
@@ -13,6 +13,8 @@ def main():
     env = "MMM2"
     env = gym.make(f"smaclite/{env}-v0",
                    use_cpp_rvo2=USE_CPP_RVO2)
+    # env = gym.make(f"messy-smaclite/{env}-v0",
+    #                use_cpp_rvo2=USE_CPP_RVO2) # Messy SMAClite
     episode_num = 20
     total_time = 0
     total_timesteps = 0

diff --git a/smaclite/__init__.py b/smaclite/__init__.py
@@ -3,8 +3,15 @@
 
 for preset in MapPreset:
     map_info = preset.value
-    gym.register(f"smaclite/{map_info.name}-v0",
-                 entry_point="smaclite.env:SMACliteEnv",
-                 kwargs={"map_info": map_info})
-gym.register("smaclite/custom-v0",
-             entry_point="smaclite.env:SMACliteEnv")
+    gym.register(
+        f"smaclite/{map_info.name}-v0",
+        entry_point="smaclite.env:SMACliteEnv",
+        kwargs={"map_info": map_info},
+    )
+    gym.register(
+        f"messy-smaclite/{map_info.name}-v0",
+        entry_point="smaclite.env:MessySMACliteEnv",
+        kwargs={"map_info": map_info},
+    )
+gym.register("smaclite/custom-v0", entry_point="smaclite.env:SMACliteEnv")
+gym.register("messy-smaclite/custom-v0", entry_point="smaclite.env:MessySMACliteEnv")
diff --git a/smaclite/env/__init__.py b/smaclite/env/__init__.py
@@ -1,3 +1,4 @@
 from smaclite.env.smaclite import SMACliteEnv
+from smaclite.env.messy_smaclite import MessySMACliteEnv
 
-__all__ = ["SMACliteEnv"]
+__all__ = ["SMACliteEnv", "MessySMACliteEnv"]
diff --git a/smaclite/env/messy_smaclite.py b/smaclite/env/messy_smaclite.py
@@ -0,0 +1,79 @@
+from typing import List, Tuple
+from smaclite.env.smaclite import SMACliteEnv
+import numpy as np
+
+from smaclite.env.maps.map import MapInfo
+from smaclite.env.units.unit import Unit
+
+
+class MessySMACliteEnv(SMACliteEnv):
+    """Messy Version of SMAClite from https://arxiv.org/abs/2301.01649"""
+
+    def __init__(
+        self,
+        map_info: MapInfo = None,
+        map_file: str = None,
+        seed=None,
+        use_cpp_rvo2=False,
+        initial_random_steps=10,
+        failure_obs_prob=0.15,
+        failure_factor=-1.0,
+        **kwargs,
+    ):
+        """Initializes the environment. Note that one of map_info or map_file
+        is always required.
+
+        Args:
+            initial_random_steps (int, default=10): The amount of steps that the random walker takes at the start of the episode
+            failure_obs_prob (float, default=0.15): The probability of a observation failing
+            failure_factor (float, default=-1.0): The value that the observation is multiplied with, in case of failiure
+        """
+        # Unpack arguments from sacred
+        self.initial_random_steps = initial_random_steps
+        self.failure_obs_prob = failure_obs_prob
+        self.failure_factor = failure_factor
+        super().__init__(map_info, map_file, seed, use_cpp_rvo2, **kwargs)
+
+    def __get_agent_obs(
+        self, unit: Unit, visible_allies: List[Unit], visible_enemies: List[Unit]
+    ):
+        obs = super().__get_agent_obs(unit, visible_allies, visible_enemies)
+
+        if np.random.rand() <= self.failure_obs_prob:
+            return self.failure_factor * obs
+
+        return obs
+
+    def random_walk(self, max_steps: int):
+        steps = 0
+
+        while steps < max_steps:
+            actions = []
+            for agent_avail_actions in self.get_avail_actions():
+                avail_actions_ind = np.nonzero(agent_avail_actions)[0]
+                action = int(np.random.choice(avail_actions_ind))
+                actions.append(action)
+
+            _, _, done, _ = self.step(actions)
+            steps += 1
+
+            if done:
+                return False
+
+        return True
+
+    def reset(
+        self, seed=None, return_info=False, options=None
+    ) -> Tuple[np.ndarray, dict]:
+        super().reset(seed, return_info, options)
+
+        done = self.random_walk(self.initial_random_steps)
+
+        # Fallback if episode has already terminated
+        if done:
+            return super().reset(seed, return_info, options)
+
+        if return_info:
+            return self.__get_obs, self.__get_info()
+
+        return self.__get_obs()