From cce6986f8862c2f069c41b7a9186f683acc705c0 Mon Sep 17 00:00:00 2001 From: arthur-ventura-astro Date: Mon, 10 Feb 2025 00:12:32 -0300 Subject: [PATCH 1/8] Add BipedalWalker initial state manager --- gymnasium/envs/box2d/bipedal_walker.py | 113 +++++++++++++++++++++---- 1 file changed, 97 insertions(+), 16 deletions(-) diff --git a/gymnasium/envs/box2d/bipedal_walker.py b/gymnasium/envs/box2d/bipedal_walker.py index 94dda8bfcd..019c6eb255 100644 --- a/gymnasium/envs/box2d/bipedal_walker.py +++ b/gymnasium/envs/box2d/bipedal_walker.py @@ -1,6 +1,7 @@ __credits__ = ["Andrea PIERRÉ"] import math +from copy import deepcopy from typing import TYPE_CHECKING, List, Optional import numpy as np @@ -80,6 +81,51 @@ ) +class TerrainMetadata: + def __init__(self, metadata: dict = {}): + self._states, self._counters = [], [] # Control parameters + self._metadata = {0: [], 1: [], 2: [], 3: []} # Random values for terrain types + + if metadata: + self.__from_dict(metadata) # Copy values from existing metadata + else: + self.__generate = True # New values should be generated + + def get_dict(self): + return dict( + states=deepcopy(self._states), + counters=deepcopy(self._counters), + metadata=deepcopy(self._metadata), + ) + + def __from_dict(self, metadata: dict): + self._states: list = deepcopy(metadata.get("states")) + self._counters: list = deepcopy(metadata.get("counters")) + self._metadata: list = deepcopy(metadata.get("metadata")) + self.__generate = False + + def mode(self) -> bool: + return self.__generate + + def get_metadata(self, state: int) -> any: + return self._metadata[state].pop(0) + + def set_metadata(self, state: int, value: any): + self._metadata[state].append(value) + + def get_state(self) -> int: + return self._states.pop(0) + + def add_state(self, state: int): + self._states.append(state) + + def get_counter(self) -> int: + return self._counters.pop(0) + + def add_counter(self, counter: int): + self._counters.append(counter) + + class ContactDetector(contactListener): def __init__(self, env): contactListener.__init__(self) @@ -282,6 +328,8 @@ def _destroy(self): self.joints = [] def _generate_terrain(self, hardcore): + generate = self.terrain_metadata.mode() + GRASS, STUMP, STAIRS, PIT, _STATES_ = range(5) state = GRASS velocity = 0.0 @@ -299,13 +347,22 @@ def _generate_terrain(self, hardcore): self.terrain_x.append(x) if state == GRASS and not oneshot: - velocity = 0.8 * velocity + 0.01 * np.sign(TERRAIN_HEIGHT - y) - if i > TERRAIN_STARTPAD: - velocity += self.np_random.uniform(-1, 1) / SCALE # 1 + if generate: + velocity = 0.8 * velocity + 0.01 * np.sign(TERRAIN_HEIGHT - y) + if i > TERRAIN_STARTPAD: + velocity += self.np_random.uniform(-1, 1) / SCALE # 1 + self.terrain_metadata.set_metadata(state=GRASS, value=velocity) + else: + velocity = self.terrain_metadata.get_metadata(state=GRASS) y += velocity elif state == PIT and oneshot: - counter = self.np_random.integers(3, 5) + if generate: + counter = self.np_random.integers(3, 5) + self.terrain_metadata.set_metadata(state=PIT, value=counter) + else: + counter = self.terrain_metadata.get_metadata(state=PIT) + poly = [ (x, y), (x + TERRAIN_STEP, y), @@ -332,7 +389,12 @@ def _generate_terrain(self, hardcore): y -= 4 * TERRAIN_STEP elif state == STUMP and oneshot: - counter = self.np_random.integers(1, 3) + if generate: + counter = self.np_random.integers(1, 3) + self.terrain_metadata.set_metadata(state=STUMP, value=counter) + else: + counter = self.terrain_metadata.get_metadata(state=STUMP) + poly = [ (x, y), (x + counter * TERRAIN_STEP, y), @@ -345,9 +407,18 @@ def _generate_terrain(self, hardcore): self.terrain.append(t) elif state == STAIRS and oneshot: - stair_height = +1 if self.np_random.random() > 0.5 else -1 - stair_width = self.np_random.integers(4, 5) - stair_steps = self.np_random.integers(3, 5) + if generate: + stair_height = +1 if self.np_random.random() > 0.5 else -1 + stair_width = self.np_random.integers(4, 5) + stair_steps = self.np_random.integers(3, 5) + self.terrain_metadata.set_metadata( + state=STAIRS, value=(stair_height, stair_width, stair_steps) + ) + else: + stair_height, stair_width, stair_steps = ( + self.terrain_metadata.get_metadata(state=STAIRS) + ) + original_y = y for s in range(stair_steps): poly = [ @@ -383,9 +454,18 @@ def _generate_terrain(self, hardcore): self.terrain_y.append(y) counter -= 1 if counter == 0: - counter = self.np_random.integers(TERRAIN_GRASS / 2, TERRAIN_GRASS) + if generate: + counter = self.np_random.integers(TERRAIN_GRASS / 2, TERRAIN_GRASS) + self.terrain_metadata.add_counter(counter) + else: + counter = self.terrain_metadata.get_counter() + if state == GRASS and hardcore: - state = self.np_random.integers(1, _STATES_) + if generate: + state = self.np_random.integers(1, _STATES_) + self.terrain_metadata.add_state(state) + else: + state = self.terrain_metadata.get_state() oneshot = True else: state = GRASS @@ -429,12 +509,7 @@ def _generate_clouds(self): x2 = max(p[0] for p in poly) self.cloud_poly.append((poly, x1, x2)) - def reset( - self, - *, - seed: Optional[int] = None, - options: Optional[dict] = None, - ): + def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): super().reset(seed=seed) self._destroy() self.world.contactListener_bug_workaround = ContactDetector(self) @@ -444,6 +519,12 @@ def reset( self.scroll = 0.0 self.lidar_render = 0 + if options and "metadata" in options.keys(): + metadata = options.get("metadata") + self.terrain_metadata = TerrainMetadata(metadata) + else: + self.terrain_metadata = TerrainMetadata() + self._generate_terrain(self.hardcore) self._generate_clouds() From d4e43a6356d3acc2bb5425dcb9192dc2120eed32 Mon Sep 17 00:00:00 2001 From: arthur-ventura-astro Date: Mon, 10 Feb 2025 00:35:33 -0300 Subject: [PATCH 2/8] Add BipedalWalker interface for retrieving terrain metadata --- gymnasium/envs/box2d/bipedal_walker.py | 35 +++++++++++++++----------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/gymnasium/envs/box2d/bipedal_walker.py b/gymnasium/envs/box2d/bipedal_walker.py index 019c6eb255..f72d49ff5c 100644 --- a/gymnasium/envs/box2d/bipedal_walker.py +++ b/gymnasium/envs/box2d/bipedal_walker.py @@ -312,6 +312,7 @@ def __init__(self, render_mode: Optional[str] = None, hardcore: bool = False): self.render_mode = render_mode self.screen: Optional[pygame.Surface] = None self.clock = None + self._terrain_metadata = None def _destroy(self): if not self.terrain: @@ -327,8 +328,12 @@ def _destroy(self): self.legs = [] self.joints = [] + def terrain_metadata(self): + if self._terrain_metadata: + return self._terrain_metadata.get_dict() + def _generate_terrain(self, hardcore): - generate = self.terrain_metadata.mode() + generate = self._terrain_metadata.mode() GRASS, STUMP, STAIRS, PIT, _STATES_ = range(5) state = GRASS @@ -351,17 +356,17 @@ def _generate_terrain(self, hardcore): velocity = 0.8 * velocity + 0.01 * np.sign(TERRAIN_HEIGHT - y) if i > TERRAIN_STARTPAD: velocity += self.np_random.uniform(-1, 1) / SCALE # 1 - self.terrain_metadata.set_metadata(state=GRASS, value=velocity) + self._terrain_metadata.set_metadata(state=GRASS, value=velocity) else: - velocity = self.terrain_metadata.get_metadata(state=GRASS) + velocity = self._terrain_metadata.get_metadata(state=GRASS) y += velocity elif state == PIT and oneshot: if generate: counter = self.np_random.integers(3, 5) - self.terrain_metadata.set_metadata(state=PIT, value=counter) + self._terrain_metadata.set_metadata(state=PIT, value=counter) else: - counter = self.terrain_metadata.get_metadata(state=PIT) + counter = self._terrain_metadata.get_metadata(state=PIT) poly = [ (x, y), @@ -391,9 +396,9 @@ def _generate_terrain(self, hardcore): elif state == STUMP and oneshot: if generate: counter = self.np_random.integers(1, 3) - self.terrain_metadata.set_metadata(state=STUMP, value=counter) + self._terrain_metadata.set_metadata(state=STUMP, value=counter) else: - counter = self.terrain_metadata.get_metadata(state=STUMP) + counter = self._terrain_metadata.get_metadata(state=STUMP) poly = [ (x, y), @@ -411,12 +416,12 @@ def _generate_terrain(self, hardcore): stair_height = +1 if self.np_random.random() > 0.5 else -1 stair_width = self.np_random.integers(4, 5) stair_steps = self.np_random.integers(3, 5) - self.terrain_metadata.set_metadata( + self._terrain_metadata.set_metadata( state=STAIRS, value=(stair_height, stair_width, stair_steps) ) else: stair_height, stair_width, stair_steps = ( - self.terrain_metadata.get_metadata(state=STAIRS) + self._terrain_metadata.get_metadata(state=STAIRS) ) original_y = y @@ -456,16 +461,16 @@ def _generate_terrain(self, hardcore): if counter == 0: if generate: counter = self.np_random.integers(TERRAIN_GRASS / 2, TERRAIN_GRASS) - self.terrain_metadata.add_counter(counter) + self._terrain_metadata.add_counter(counter) else: - counter = self.terrain_metadata.get_counter() + counter = self._terrain_metadata.get_counter() if state == GRASS and hardcore: if generate: state = self.np_random.integers(1, _STATES_) - self.terrain_metadata.add_state(state) + self._terrain_metadata.add_state(state) else: - state = self.terrain_metadata.get_state() + state = self._terrain_metadata.get_state() oneshot = True else: state = GRASS @@ -521,9 +526,9 @@ def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): if options and "metadata" in options.keys(): metadata = options.get("metadata") - self.terrain_metadata = TerrainMetadata(metadata) + self._terrain_metadata = TerrainMetadata(metadata) else: - self.terrain_metadata = TerrainMetadata() + self._terrain_metadata = TerrainMetadata() self._generate_terrain(self.hardcore) self._generate_clouds() From 93884def7583ec60698245fa16f473148ffcf92e Mon Sep 17 00:00:00 2001 From: arthur-ventura-astro Date: Tue, 11 Mar 2025 12:14:42 -0300 Subject: [PATCH 3/8] Add designed environment mapping --- gymnasium/envs/box2d/bipedal_walker.py | 95 +++++++++++++++++++------- 1 file changed, 69 insertions(+), 26 deletions(-) diff --git a/gymnasium/envs/box2d/bipedal_walker.py b/gymnasium/envs/box2d/bipedal_walker.py index f72d49ff5c..335b8d870b 100644 --- a/gymnasium/envs/box2d/bipedal_walker.py +++ b/gymnasium/envs/box2d/bipedal_walker.py @@ -82,27 +82,75 @@ class TerrainMetadata: + """ + ## Description + This is metadata object handler for the BipedalWalker environment. + + + + ## Credits + Created by Arthur Plautz Ventura + + """ + def __init__(self, metadata: dict = {}): - self._states, self._counters = [], [] # Control parameters - self._metadata = {0: [], 1: [], 2: [], 3: []} # Random values for terrain types + self._states = [] # Control parameters + self._metadata = {1: [], 2: [], 3: []} # Random values for terrain types + self._variations = True + self.__generate = False if metadata: self.__from_dict(metadata) # Copy values from existing metadata else: self.__generate = True # New values should be generated + @property + def grass_variations(self): + return self._variations + + def _pit_length(self, metadata=None): + return 4 + + def _stairs_length(self, metadata): + _, stair_width, stair_steps = metadata + return stair_width * stair_steps + + def _stump_length(self, metadata): + return metadata + + def get_obstacles_length(self): + total_length = 0 + state_length = { + 1: self._stump_length, + 2: self._stairs_length, + 3: self._pit_length, + } + + n_states = len(self._states) + metadata = deepcopy(self._metadata) + for state in self._states: + length_map = state_length[state] + total_length += length_map(metadata[state].pop(0)) + return total_length, n_states + def get_dict(self): return dict( states=deepcopy(self._states), - counters=deepcopy(self._counters), metadata=deepcopy(self._metadata), + variations=self._variations, ) def __from_dict(self, metadata: dict): - self._states: list = deepcopy(metadata.get("states")) - self._counters: list = deepcopy(metadata.get("counters")) - self._metadata: list = deepcopy(metadata.get("metadata")) - self.__generate = False + if metadata.get("designed", False): + self._variations = metadata.get("variations", True) + for state_obj in metadata.get("states", []): + state = state_obj["state"] + self._states.append(state) + self._metadata[state].append(state_obj["metadata"]) + else: + self._states = metadata.get("states", self._states) + self._metadata = metadata.get("metadata", self._metadata) + self._variations = metadata.get("variations", self._variations) def mode(self) -> bool: return self.__generate @@ -119,12 +167,6 @@ def get_state(self) -> int: def add_state(self, state: int): self._states.append(state) - def get_counter(self) -> int: - return self._counters.pop(0) - - def add_counter(self, counter: int): - self._counters.append(counter) - class ContactDetector(contactListener): def __init__(self, env): @@ -334,6 +376,13 @@ def terrain_metadata(self): def _generate_terrain(self, hardcore): generate = self._terrain_metadata.mode() + if not generate: + obstacles_length, n_obstacles = ( + self._terrain_metadata.get_obstacles_length() + ) + self.terrain_grass = (TERRAIN_LENGTH - obstacles_length) // n_obstacles + else: + self.terrain_grass = TERRAIN_GRASS GRASS, STUMP, STAIRS, PIT, _STATES_ = range(5) state = GRASS @@ -352,13 +401,9 @@ def _generate_terrain(self, hardcore): self.terrain_x.append(x) if state == GRASS and not oneshot: - if generate: - velocity = 0.8 * velocity + 0.01 * np.sign(TERRAIN_HEIGHT - y) - if i > TERRAIN_STARTPAD: - velocity += self.np_random.uniform(-1, 1) / SCALE # 1 - self._terrain_metadata.set_metadata(state=GRASS, value=velocity) - else: - velocity = self._terrain_metadata.get_metadata(state=GRASS) + velocity = 0.8 * velocity + 0.01 * np.sign(TERRAIN_HEIGHT - y) + if self._terrain_metadata.grass_variations and i > TERRAIN_STARTPAD: + velocity += self.np_random.uniform(-1, 1) / SCALE # 1 y += velocity elif state == PIT and oneshot: @@ -367,6 +412,8 @@ def _generate_terrain(self, hardcore): self._terrain_metadata.set_metadata(state=PIT, value=counter) else: counter = self._terrain_metadata.get_metadata(state=PIT) + if not counter: + counter = self.np_random.integers(3, 5) poly = [ (x, y), @@ -459,11 +506,7 @@ def _generate_terrain(self, hardcore): self.terrain_y.append(y) counter -= 1 if counter == 0: - if generate: - counter = self.np_random.integers(TERRAIN_GRASS / 2, TERRAIN_GRASS) - self._terrain_metadata.add_counter(counter) - else: - counter = self._terrain_metadata.get_counter() + counter = self.terrain_grass if state == GRASS and hardcore: if generate: @@ -691,7 +734,7 @@ def step(self, action: np.ndarray): if self.game_over or pos[0] < 0: reward = -100 terminated = True - if pos[0] > (TERRAIN_LENGTH - TERRAIN_GRASS) * TERRAIN_STEP: + if pos[0] > (TERRAIN_LENGTH - self.terrain_grass) * TERRAIN_STEP: terminated = True if self.render_mode == "human": From 821eb0d1988a81e11e23742cf4d5c9ff6772207a Mon Sep 17 00:00:00 2001 From: arthur-ventura-astro Date: Tue, 11 Mar 2025 12:19:21 -0300 Subject: [PATCH 4/8] Add terrain metadata documentation --- gymnasium/envs/box2d/bipedal_walker.py | 35 ++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/gymnasium/envs/box2d/bipedal_walker.py b/gymnasium/envs/box2d/bipedal_walker.py index 335b8d870b..8ab84d779f 100644 --- a/gymnasium/envs/box2d/bipedal_walker.py +++ b/gymnasium/envs/box2d/bipedal_walker.py @@ -86,6 +86,41 @@ class TerrainMetadata: ## Description This is metadata object handler for the BipedalWalker environment. + ## Raw Example + ```python + import gymnasium as gym + + env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="human") + env.reset() + + bipedal_env = env.unwrapped + metadata = bipedal_env.terrain_metadata() + options = dict(metadata=metadata) + env.reset(options=options) + ``` + + ## Designed Example + ```python + import gymnasium as gym + + OBSTACLES = dict( + down_stairs=dict(state=2, metadata=(-1, 4, 2)), + up_stairs=dict(state=2, metadata=(1, 4, 2)), + small_stump=dict(state=1, metadata=1), + large_stump=dict(state=1, metadata=3), + hole=dict(state=3, metadata=2), + ) + + env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="human") + metadata = dict( + designed=True, + states=[OBSTACLES["up_stairs"], OBSTACLES["hole"], OBSTACLES["large_stump"]], + variations=False, + ) + options = dict(metadata=metadata) + env.reset(options=options) + ``` + ## Credits From b6f6c3fd3b70d3ca2cfc81e78358ac9c5f4579d2 Mon Sep 17 00:00:00 2001 From: arthur-ventura-astro Date: Thu, 13 Mar 2025 21:22:25 -0300 Subject: [PATCH 5/8] Add grass variation logic for grass length --- .gitignore | 2 ++ gymnasium/envs/box2d/bipedal_walker.py | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index f11b0bec01..ffea89a1ef 100644 --- a/.gitignore +++ b/.gitignore @@ -45,3 +45,5 @@ vizdoom.ini # Data generated from pytest save_videos*/ +scripts/ +.vscode diff --git a/gymnasium/envs/box2d/bipedal_walker.py b/gymnasium/envs/box2d/bipedal_walker.py index 8ab84d779f..33a6cf32e2 100644 --- a/gymnasium/envs/box2d/bipedal_walker.py +++ b/gymnasium/envs/box2d/bipedal_walker.py @@ -541,7 +541,12 @@ def _generate_terrain(self, hardcore): self.terrain_y.append(y) counter -= 1 if counter == 0: - counter = self.terrain_grass + if self._terrain_metadata.grass_variations: + counter = self.np_random.integers( + self.terrain_grass / 2, self.terrain_grass + ) + else: + counter = self.terrain_grass if state == GRASS and hardcore: if generate: From 34bbd2efd99a795bb9004dc01df8a4e218060812 Mon Sep 17 00:00:00 2001 From: arthur-ventura-astro Date: Mon, 24 Mar 2025 20:40:04 -0300 Subject: [PATCH 6/8] Splitted grass variations on x/y --- gymnasium/envs/box2d/bipedal_walker.py | 27 +++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/gymnasium/envs/box2d/bipedal_walker.py b/gymnasium/envs/box2d/bipedal_walker.py index 33a6cf32e2..bc2d4f839e 100644 --- a/gymnasium/envs/box2d/bipedal_walker.py +++ b/gymnasium/envs/box2d/bipedal_walker.py @@ -115,7 +115,8 @@ class TerrainMetadata: metadata = dict( designed=True, states=[OBSTACLES["up_stairs"], OBSTACLES["hole"], OBSTACLES["large_stump"]], - variations=False, + x_variations=False, + y_variations=False ) options = dict(metadata=metadata) env.reset(options=options) @@ -131,7 +132,8 @@ class TerrainMetadata: def __init__(self, metadata: dict = {}): self._states = [] # Control parameters self._metadata = {1: [], 2: [], 3: []} # Random values for terrain types - self._variations = True + self._y_variations = True + self._x_variations = False self.__generate = False if metadata: @@ -140,8 +142,12 @@ def __init__(self, metadata: dict = {}): self.__generate = True # New values should be generated @property - def grass_variations(self): - return self._variations + def grass_y_variations(self): + return self._y_variations + + @property + def grass_x_variations(self): + return self._x_variations def _pit_length(self, metadata=None): return 4 @@ -172,12 +178,14 @@ def get_dict(self): return dict( states=deepcopy(self._states), metadata=deepcopy(self._metadata), - variations=self._variations, + x_variations=self._x_variations, + y_variations=self._y_variations, ) def __from_dict(self, metadata: dict): if metadata.get("designed", False): - self._variations = metadata.get("variations", True) + self._y_variations = metadata.get("y_variations", False) + self._x_variations = metadata.get("x_variations", True) for state_obj in metadata.get("states", []): state = state_obj["state"] self._states.append(state) @@ -185,7 +193,8 @@ def __from_dict(self, metadata: dict): else: self._states = metadata.get("states", self._states) self._metadata = metadata.get("metadata", self._metadata) - self._variations = metadata.get("variations", self._variations) + self._y_variations = metadata.get("y_variations", self._y_variations) + self._x_variations = metadata.get("x_variations", self._x_variations) def mode(self) -> bool: return self.__generate @@ -437,7 +446,7 @@ def _generate_terrain(self, hardcore): if state == GRASS and not oneshot: velocity = 0.8 * velocity + 0.01 * np.sign(TERRAIN_HEIGHT - y) - if self._terrain_metadata.grass_variations and i > TERRAIN_STARTPAD: + if self._terrain_metadata.grass_y_variations and i > TERRAIN_STARTPAD: velocity += self.np_random.uniform(-1, 1) / SCALE # 1 y += velocity @@ -541,7 +550,7 @@ def _generate_terrain(self, hardcore): self.terrain_y.append(y) counter -= 1 if counter == 0: - if self._terrain_metadata.grass_variations: + if self._terrain_metadata.grass_x_variations: counter = self.np_random.integers( self.terrain_grass / 2, self.terrain_grass ) From d95581db50c73aa845c49886cf7d2bc2dc774216 Mon Sep 17 00:00:00 2001 From: arthur-ventura-astro Date: Fri, 11 Apr 2025 09:53:33 -0300 Subject: [PATCH 7/8] Removing TerrainMetadata class - Refactored terrain metadata generation --- gymnasium/envs/box2d/bipedal_walker.py | 271 ++++++++----------------- 1 file changed, 89 insertions(+), 182 deletions(-) diff --git a/gymnasium/envs/box2d/bipedal_walker.py b/gymnasium/envs/box2d/bipedal_walker.py index bc2d4f839e..981bab2294 100644 --- a/gymnasium/envs/box2d/bipedal_walker.py +++ b/gymnasium/envs/box2d/bipedal_walker.py @@ -1,7 +1,6 @@ __credits__ = ["Andrea PIERRÉ"] import math -from copy import deepcopy from typing import TYPE_CHECKING, List, Optional import numpy as np @@ -81,137 +80,6 @@ ) -class TerrainMetadata: - """ - ## Description - This is metadata object handler for the BipedalWalker environment. - - ## Raw Example - ```python - import gymnasium as gym - - env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="human") - env.reset() - - bipedal_env = env.unwrapped - metadata = bipedal_env.terrain_metadata() - options = dict(metadata=metadata) - env.reset(options=options) - ``` - - ## Designed Example - ```python - import gymnasium as gym - - OBSTACLES = dict( - down_stairs=dict(state=2, metadata=(-1, 4, 2)), - up_stairs=dict(state=2, metadata=(1, 4, 2)), - small_stump=dict(state=1, metadata=1), - large_stump=dict(state=1, metadata=3), - hole=dict(state=3, metadata=2), - ) - - env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="human") - metadata = dict( - designed=True, - states=[OBSTACLES["up_stairs"], OBSTACLES["hole"], OBSTACLES["large_stump"]], - x_variations=False, - y_variations=False - ) - options = dict(metadata=metadata) - env.reset(options=options) - ``` - - - - ## Credits - Created by Arthur Plautz Ventura - - """ - - def __init__(self, metadata: dict = {}): - self._states = [] # Control parameters - self._metadata = {1: [], 2: [], 3: []} # Random values for terrain types - self._y_variations = True - self._x_variations = False - self.__generate = False - - if metadata: - self.__from_dict(metadata) # Copy values from existing metadata - else: - self.__generate = True # New values should be generated - - @property - def grass_y_variations(self): - return self._y_variations - - @property - def grass_x_variations(self): - return self._x_variations - - def _pit_length(self, metadata=None): - return 4 - - def _stairs_length(self, metadata): - _, stair_width, stair_steps = metadata - return stair_width * stair_steps - - def _stump_length(self, metadata): - return metadata - - def get_obstacles_length(self): - total_length = 0 - state_length = { - 1: self._stump_length, - 2: self._stairs_length, - 3: self._pit_length, - } - - n_states = len(self._states) - metadata = deepcopy(self._metadata) - for state in self._states: - length_map = state_length[state] - total_length += length_map(metadata[state].pop(0)) - return total_length, n_states - - def get_dict(self): - return dict( - states=deepcopy(self._states), - metadata=deepcopy(self._metadata), - x_variations=self._x_variations, - y_variations=self._y_variations, - ) - - def __from_dict(self, metadata: dict): - if metadata.get("designed", False): - self._y_variations = metadata.get("y_variations", False) - self._x_variations = metadata.get("x_variations", True) - for state_obj in metadata.get("states", []): - state = state_obj["state"] - self._states.append(state) - self._metadata[state].append(state_obj["metadata"]) - else: - self._states = metadata.get("states", self._states) - self._metadata = metadata.get("metadata", self._metadata) - self._y_variations = metadata.get("y_variations", self._y_variations) - self._x_variations = metadata.get("x_variations", self._x_variations) - - def mode(self) -> bool: - return self.__generate - - def get_metadata(self, state: int) -> any: - return self._metadata[state].pop(0) - - def set_metadata(self, state: int, value: any): - self._metadata[state].append(value) - - def get_state(self) -> int: - return self._states.pop(0) - - def add_state(self, state: int): - self._states.append(state) - - class ContactDetector(contactListener): def __init__(self, env): contactListener.__init__(self) @@ -306,9 +174,15 @@ class BipedalWalker(gym.Env, EzPickle): "render_fps": FPS, } - def __init__(self, render_mode: Optional[str] = None, hardcore: bool = False): + def __init__( + self, + render_mode: Optional[str] = None, + hardcore: bool = False, + fall_down_penalty: bool = True, + ): EzPickle.__init__(self, render_mode, hardcore) self.isopen = True + self.fall_down_penaly = fall_down_penalty self.world = Box2D.b2World() self.terrain: List[Box2D.b2Body] = [] @@ -398,7 +272,7 @@ def __init__(self, render_mode: Optional[str] = None, hardcore: bool = False): self.render_mode = render_mode self.screen: Optional[pygame.Surface] = None self.clock = None - self._terrain_metadata = None + self._terrain_metadata = {} def _destroy(self): if not self.terrain: @@ -414,19 +288,79 @@ def _destroy(self): self.legs = [] self.joints = [] - def terrain_metadata(self): - if self._terrain_metadata: - return self._terrain_metadata.get_dict() + def _process_terrain_metadata(self): + STATES = (1, 2, 3) + STUMP, STAIRS, PIT = STATES - def _generate_terrain(self, hardcore): - generate = self._terrain_metadata.mode() - if not generate: - obstacles_length, n_obstacles = ( - self._terrain_metadata.get_obstacles_length() + # Defines if the terrain should be saved or copied + self._predefined_terrain = bool(self._terrain_metadata) + # Defines if the length of the grass between obstacles should be randomly distributed + self._terrain_grass_x_variation = self._terrain_metadata.get( + "x_variation", False + ) + # Defines if the grass height should randomly vary + self._terrain_grass_y_variation = self._terrain_metadata.get( + "y_variation", False + ) + + if self._predefined_terrain: + states = self._terrain_metadata.get("states", []) + + obstacles_length = [] + for state_object in states: + state, metadata = state_object.values() + if state in STATES: + if state == STUMP: + obstacle_length = metadata # Stump metadata is the stump size + elif state == STAIRS: + _, stair_width, stair_steps = metadata + obstacle_length = ( + stair_width * stair_steps + ) # Stairs total length + elif state == PIT: + obstacle_length = 4 # Default pit x size + obstacles_length.append(obstacle_length) + + # Total grass portion of the terrain + self.terrain_grass = (TERRAIN_LENGTH - sum(obstacles_length)) // len( + obstacles_length ) - self.terrain_grass = (TERRAIN_LENGTH - obstacles_length) // n_obstacles else: self.terrain_grass = TERRAIN_GRASS + self._terrain_metadata = dict(states=[]) + + def _generate_terrain_state(self, state: int) -> any: + GRASS, STUMP, STAIRS, PIT, _STATES_ = range(5) + + if self._predefined_terrain: + if state == GRASS: + next_state = self._terrain_metadata["states"][0] + return next_state["state"] + else: + next_state = self._terrain_metadata["states"].pop(0) + state_metadata = next_state["metadata"] + + else: + if state == GRASS: + next_state = self.np_random.integers(1, _STATES_) + return next_state + elif state == STUMP: + state_metadata = self.np_random.integers(1, 3) + elif state == STAIRS: + stair_height = +1 if self.np_random.random() > 0.5 else -1 + stair_width = self.np_random.integers(4, 5) + stair_steps = self.np_random.integers(3, 5) + state_metadata = (stair_height, stair_width, stair_steps) + elif state == PIT: + state_metadata = self.np_random.integers(3, 5) + + state_object = dict(state=state, metadata=state_metadata) + self._terrain_metadata["states"].append(state_object) + + return state_metadata + + def _generate_terrain(self, hardcore): + self._process_terrain_metadata() GRASS, STUMP, STAIRS, PIT, _STATES_ = range(5) state = GRASS @@ -446,19 +380,12 @@ def _generate_terrain(self, hardcore): if state == GRASS and not oneshot: velocity = 0.8 * velocity + 0.01 * np.sign(TERRAIN_HEIGHT - y) - if self._terrain_metadata.grass_y_variations and i > TERRAIN_STARTPAD: + if self._terrain_grass_y_variation and i > TERRAIN_STARTPAD: velocity += self.np_random.uniform(-1, 1) / SCALE # 1 y += velocity elif state == PIT and oneshot: - if generate: - counter = self.np_random.integers(3, 5) - self._terrain_metadata.set_metadata(state=PIT, value=counter) - else: - counter = self._terrain_metadata.get_metadata(state=PIT) - if not counter: - counter = self.np_random.integers(3, 5) - + counter = self._generate_terrain_state(state) poly = [ (x, y), (x + TERRAIN_STEP, y), @@ -485,12 +412,7 @@ def _generate_terrain(self, hardcore): y -= 4 * TERRAIN_STEP elif state == STUMP and oneshot: - if generate: - counter = self.np_random.integers(1, 3) - self._terrain_metadata.set_metadata(state=STUMP, value=counter) - else: - counter = self._terrain_metadata.get_metadata(state=STUMP) - + counter = self._generate_terrain_state(state) poly = [ (x, y), (x + counter * TERRAIN_STEP, y), @@ -503,17 +425,9 @@ def _generate_terrain(self, hardcore): self.terrain.append(t) elif state == STAIRS and oneshot: - if generate: - stair_height = +1 if self.np_random.random() > 0.5 else -1 - stair_width = self.np_random.integers(4, 5) - stair_steps = self.np_random.integers(3, 5) - self._terrain_metadata.set_metadata( - state=STAIRS, value=(stair_height, stair_width, stair_steps) - ) - else: - stair_height, stair_width, stair_steps = ( - self._terrain_metadata.get_metadata(state=STAIRS) - ) + stair_height, stair_width, stair_steps = self._generate_terrain_state( + state + ) original_y = y for s in range(stair_steps): @@ -550,7 +464,7 @@ def _generate_terrain(self, hardcore): self.terrain_y.append(y) counter -= 1 if counter == 0: - if self._terrain_metadata.grass_x_variations: + if self._terrain_grass_x_variation: counter = self.np_random.integers( self.terrain_grass / 2, self.terrain_grass ) @@ -558,11 +472,7 @@ def _generate_terrain(self, hardcore): counter = self.terrain_grass if state == GRASS and hardcore: - if generate: - state = self.np_random.integers(1, _STATES_) - self._terrain_metadata.add_state(state) - else: - state = self._terrain_metadata.get_state() + state = self._generate_terrain_state(state) oneshot = True else: state = GRASS @@ -616,12 +526,8 @@ def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): self.scroll = 0.0 self.lidar_render = 0 - if options and "metadata" in options.keys(): - metadata = options.get("metadata") - self._terrain_metadata = TerrainMetadata(metadata) - else: - self._terrain_metadata = TerrainMetadata() - + if options: + self._terrain_metadata = options.get("metadata", {}) self._generate_terrain(self.hardcore) self._generate_clouds() @@ -781,7 +687,8 @@ def step(self, action: np.ndarray): terminated = False if self.game_over or pos[0] < 0: - reward = -100 + if self.fall_down_penaly: + reward = -100 terminated = True if pos[0] > (TERRAIN_LENGTH - self.terrain_grass) * TERRAIN_STEP: terminated = True From b48a77f50b3bcf7e8271a6f83abc0330826e98eb Mon Sep 17 00:00:00 2001 From: arthur-ventura-astro Date: Tue, 15 Apr 2025 21:59:03 -0300 Subject: [PATCH 8/8] Reset state of terrain metadata for every reset method call --- gymnasium/envs/box2d/bipedal_walker.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gymnasium/envs/box2d/bipedal_walker.py b/gymnasium/envs/box2d/bipedal_walker.py index 981bab2294..c9b36f6d7b 100644 --- a/gymnasium/envs/box2d/bipedal_walker.py +++ b/gymnasium/envs/box2d/bipedal_walker.py @@ -526,8 +526,7 @@ def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): self.scroll = 0.0 self.lidar_render = 0 - if options: - self._terrain_metadata = options.get("metadata", {}) + self._terrain_metadata = options.get("metadata", {}) if options else {} self._generate_terrain(self.hardcore) self._generate_clouds()