From 9c4e2a1ba1a4caeec098398f0f4d7cc6c80bd01f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6lle?= Date: Wed, 10 May 2023 16:56:07 +0200 Subject: [PATCH] Add MessySMACliteEnv --- README.md | 18 +++++++- example.py | 2 + smaclite/__init__.py | 17 +++++--- smaclite/env/__init__.py | 3 +- smaclite/env/messy_smaclite.py | 79 ++++++++++++++++++++++++++++++++++ 5 files changed, 111 insertions(+), 8 deletions(-) create mode 100644 smaclite/env/messy_smaclite.py diff --git a/README.md b/README.md index a7b9512..8e53cc0 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,20 @@ # SMAClite - Starcraft Mulit-Agent Challenge lite + This is a repository for the SMAClite environment. It is a (nearly) pure Python reimplementation of the Starcraft Multi-Agent Challenge, using Numpy and OpenAI Gym. ## Features + The main features of this environment include: + * A fully functional Python implementation of the SMAC environment * A JSON interface for defining units and scenarios * Compatibility with the OpenAI Gym API * (optional) a highly-performant [C++ implementation](https://github.com/micadam/SMAClite-Python-RVO2) of the collision avoidance algorithm ## Available units + The following units are available in this environment: + * baneling * colossus * marauder @@ -19,8 +24,11 @@ The following units are available in this environment: * stalker * zealot * zergling + ## Available scenarios -The following scenarios are available in this environment: + +The following scenarios are available in this environment (Scenarios are also availiable in the MessySMAClite version): + * 10m_vs_11m * 27m_vs_30m * 2c_vs_64zg @@ -35,17 +43,23 @@ The following scenarios are available in this environment: * mmm2 Note that further scenarios can easily be added by modifying or creating a scenario JSON file. + ## Installation + Run + ``` pip install . ``` + In the SMAClite directory ## Running + As far as we are aware, this project fully adheres to the [OpenAI Gym API](https://www.gymlibrary.dev/), so it can be used with any framework capable of interfacing with Gym-capable environments. We recommend the [ePyMARL](https://github.com/uoe-agents/epymarl) framework, made available in our repository. EPyMARL uses `yaml` files to specify run configurations. To train a model in the `MMM2` scenario using the `MAPPO` algorithm, you can use this example command: + ``` python3 src/main.py --config=mappo --env-config=gymma with seed=1 env_args.time_limit=120 env_args.key="smaclite:smaclite/MMM2-v0 ``` -Note that to use the C++ version of the collision avoidance algorithm, you will have to add the line `use_cpp_rvo2: true` to the `yaml` config file you're referencing, since Sacred does not allow defining new config entries in the command itself. \ No newline at end of file +Note that to use the C++ version of the collision avoidance algorithm, you will have to add the line `use_cpp_rvo2: true` to the `yaml` config file you're referencing, since Sacred does not allow defining new config entries in the command itself. diff --git a/example.py b/example.py index 9334ff2..13d9ad8 100644 --- a/example.py +++ b/example.py @@ -13,6 +13,8 @@ def main(): env = "MMM2" env = gym.make(f"smaclite/{env}-v0", use_cpp_rvo2=USE_CPP_RVO2) + # env = gym.make(f"messy-smaclite/{env}-v0", + # use_cpp_rvo2=USE_CPP_RVO2) # Messy SMAClite episode_num = 20 total_time = 0 total_timesteps = 0 diff --git a/smaclite/__init__.py b/smaclite/__init__.py index 1feea01..3fa5b80 100644 --- a/smaclite/__init__.py +++ b/smaclite/__init__.py @@ -3,8 +3,15 @@ for preset in MapPreset: map_info = preset.value - gym.register(f"smaclite/{map_info.name}-v0", - entry_point="smaclite.env:SMACliteEnv", - kwargs={"map_info": map_info}) -gym.register("smaclite/custom-v0", - entry_point="smaclite.env:SMACliteEnv") + gym.register( + f"smaclite/{map_info.name}-v0", + entry_point="smaclite.env:SMACliteEnv", + kwargs={"map_info": map_info}, + ) + gym.register( + f"messy-smaclite/{map_info.name}-v0", + entry_point="smaclite.env:MessySMACliteEnv", + kwargs={"map_info": map_info}, + ) +gym.register("smaclite/custom-v0", entry_point="smaclite.env:SMACliteEnv") +gym.register("messy-smaclite/custom-v0", entry_point="smaclite.env:MessySMACliteEnv") diff --git a/smaclite/env/__init__.py b/smaclite/env/__init__.py index 587eee4..f8d7e64 100644 --- a/smaclite/env/__init__.py +++ b/smaclite/env/__init__.py @@ -1,3 +1,4 @@ from smaclite.env.smaclite import SMACliteEnv +from smaclite.env.messy_smaclite import MessySMACliteEnv -__all__ = ["SMACliteEnv"] +__all__ = ["SMACliteEnv", "MessySMACliteEnv"] diff --git a/smaclite/env/messy_smaclite.py b/smaclite/env/messy_smaclite.py new file mode 100644 index 0000000..9583329 --- /dev/null +++ b/smaclite/env/messy_smaclite.py @@ -0,0 +1,79 @@ +from typing import List, Tuple +from smaclite.env.smaclite import SMACliteEnv +import numpy as np + +from smaclite.env.maps.map import MapInfo +from smaclite.env.units.unit import Unit + + +class MessySMACliteEnv(SMACliteEnv): + """Messy Version of SMAClite from https://arxiv.org/abs/2301.01649""" + + def __init__( + self, + map_info: MapInfo = None, + map_file: str = None, + seed=None, + use_cpp_rvo2=False, + initial_random_steps=10, + failure_obs_prob=0.15, + failure_factor=-1.0, + **kwargs, + ): + """Initializes the environment. Note that one of map_info or map_file + is always required. + + Args: + initial_random_steps (int, default=10): The amount of steps that the random walker takes at the start of the episode + failure_obs_prob (float, default=0.15): The probability of a observation failing + failure_factor (float, default=-1.0): The value that the observation is multiplied with, in case of failiure + """ + # Unpack arguments from sacred + self.initial_random_steps = initial_random_steps + self.failure_obs_prob = failure_obs_prob + self.failure_factor = failure_factor + super().__init__(map_info, map_file, seed, use_cpp_rvo2, **kwargs) + + def __get_agent_obs( + self, unit: Unit, visible_allies: List[Unit], visible_enemies: List[Unit] + ): + obs = super().__get_agent_obs(unit, visible_allies, visible_enemies) + + if np.random.rand() <= self.failure_obs_prob: + return self.failure_factor * obs + + return obs + + def random_walk(self, max_steps: int): + steps = 0 + + while steps < max_steps: + actions = [] + for agent_avail_actions in self.get_avail_actions(): + avail_actions_ind = np.nonzero(agent_avail_actions)[0] + action = int(np.random.choice(avail_actions_ind)) + actions.append(action) + + _, _, done, _ = self.step(actions) + steps += 1 + + if done: + return False + + return True + + def reset( + self, seed=None, return_info=False, options=None + ) -> Tuple[np.ndarray, dict]: + super().reset(seed, return_info, options) + + done = self.random_walk(self.initial_random_steps) + + # Fallback if episode has already terminated + if done: + return super().reset(seed, return_info, options) + + if return_info: + return self.__get_obs, self.__get_info() + + return self.__get_obs()