balrog-ai · BartekCupial · Aug 2, 2025 · Aug 2, 2025 · Aug 13, 2025 · Aug 13, 2025
diff --git a/balrog/config/config.yaml b/balrog/config/config.yaml
@@ -41,6 +41,7 @@ envs:
   names: babyai-babaisai-textworld-crafter-nle-minihack   # Environments to evaluate, separated by hyphens
   env_kwargs:
     seed: null                # Random seed; null means a random seed is used
+    render_mode: null
   nle_kwargs:
     character: "@"            # Character representing the agent in NLE
     max_episode_steps: 100_000  # Max steps per episode in NLE

diff --git a/balrog/environments/env_wrapper.py b/balrog/environments/env_wrapper.py
@@ -16,7 +16,7 @@ def __init__(self, env, env_name, task_name):
 
     @property
     def max_steps(self):
-        return self.env.max_steps
+        return int(self.env.max_steps)
 
     def reset(self, **kwargs):
         obs, info = self.env.reset(**kwargs)
@@ -55,7 +55,7 @@ def get_instruction_prompt(self, instructions=None):
         if self.env_name == "nle":
             from balrog.environments.nle import get_instruction_prompt
 
-            return get_instruction_prompt()
+            return get_instruction_prompt(self.env, self.task_name)
         elif self.env_name == "minihack":
             from balrog.environments.minihack import get_instruction_prompt
 

diff --git a/balrog/environments/minihack/__init__.py b/balrog/environments/minihack/__init__.py
@@ -1,51 +1,5 @@
 from nle.language_wrapper.wrappers.nle_language_wrapper import NLELanguageWrapper
 
-ACTIONS = {
-    "north": "move north",
-    "east": "move east",
-    "south": "move south",
-    "west": "move west",
-    "northeast": "move northeast",
-    "southeast": "move southeast",
-    "southwest": "move southwest",
-    "northwest": "move northwest",
-    "far north": "move far north",
-    "far east": "move far east",
-    "far south": "move far south",
-    "far west": "move far west",
-    "far northeast": "move far northeast",
-    "far southeast": "move far southeast",
-    "far southwest": "move far southwest",
-    "far northwest": "move far northwest",
-    "up": "go up the stairs",
-    "down": "go down the stairs",
-    "wait": "rest one move while doing nothing",
-    "more": "display more of the message",
-    "apply": "apply (use) a tool",
-    "close": "close an adjacent door",
-    "open": "open an adjacent door",
-    "eat": "eat something",
-    "force": "force a lock",
-    "kick": "kick an enemy or a locked door or chest",
-    "loot": "loot a box on the floor",
-    "pickup": "pick up things at the current location if there are any",
-    "pray": "pray to the gods for help",
-    "puton": "put on an accessory",
-    "quaff": "quaff (drink) something",
-    "search": "search for hidden doors and passages",
-    "zap": "zap a wand",
-}
-
-
-def get_available_actions(env):
-    available_actions = {}
-    for action in env.actions:
-        action_key = NLELanguageWrapper.all_nle_action_map[action][0]
-        if action_key not in ACTIONS:
-            continue
-        available_actions[action_key] = ACTIONS[action_key]
-    return available_actions
-
 
 def get_instruction_prompt(env, task="MiniHack-ExploreMaze-Hard-Mapped-v0"):
     if "corridor" in task.lower():
@@ -57,7 +11,7 @@ def get_instruction_prompt(env, task="MiniHack-ExploreMaze-Hard-Mapped-v0"):
     else:
         goal = "Your goal is to get as far as possible in the game."
 
-    available_actions = get_available_actions(env)
+    available_actions = env.action_str_desc_map
     action_strings = ",\n".join(f"{action}: {description}" for action, description in available_actions.items())
     instruction_prompt = f"""
 You are an agent playing MiniHack. The following are the possible actions you can take in the game, followed by a short description of each action:

diff --git a/balrog/environments/minihack/minihack_env.py b/balrog/environments/minihack/minihack_env.py
@@ -1,16 +1,12 @@
 from typing import Optional
 
-import gym
-import minihack  # NOQA: F401
+import gymnasium as gym
+from gymnasium import registry
 
+import minihack  # NOQA: F401
 from balrog.environments.nle import AutoMore, NLELanguageWrapper
-from balrog.environments.wrappers import GymV21CompatibilityV0, NLETimeLimit
 
-MINIHACK_ENVS = []
-for env_spec in gym.envs.registry.all():
-    id = env_spec.id
-    if id.split("-")[0] == "MiniHack":
-        MINIHACK_ENVS.append(id)
+MINIHACK_ENVS = [env_spec.id for env_spec in registry.values() if "MiniHack" in env_spec.id]
 
 
 def make_minihack_env(env_name, task, config, render_mode: Optional[str] = None):
@@ -29,14 +25,11 @@ def make_minihack_env(env_name, task, config, render_mode: Optional[str] = None)
             "tty_colors",
         ],
         **minihack_kwargs,
+        render_mode=render_mode,
     )
     if skip_more:
         env = AutoMore(env)
-    env = NLELanguageWrapper(env, vlm=vlm)
 
-    # wrap NLE with timeout
-    env = NLETimeLimit(env)
-
-    env = GymV21CompatibilityV0(env=env, render_mode=render_mode)
+    env = NLELanguageWrapper(env, vlm=vlm)
 
     return env
diff --git a/balrog/environments/nle/__init__.py b/balrog/environments/nle/__init__.py
@@ -20,92 +20,8 @@ class Role(enum.Enum):
     WIZARD = "wiz"
 
 
-ACTIONS = {
-    "north": "move north",
-    "east": "move east",
-    "south": "move south",
-    "west": "move west",
-    "northeast": "move northeast",
-    "southeast": "move southeast",
-    "southwest": "move southwest",
-    "northwest": "move northwest",
-    "far north": "move far north",
-    "far east": "move far east",
-    "far south": "move far south",
-    "far west": "move far west",
-    "far northeast": "move far northeast",
-    "far southeast": "move far southeast",
-    "far southwest": "move far southwest",
-    "far northwest": "move far northwest",
-    "up": "go up a staircase",
-    "down": "go down a staircase (tip: you can only go down if you are standing on the stairs)",
-    "wait": "rest one move while doing nothing",
-    "more": "display more of the message (tip: ONLY ever use when current message ends with --More--)",
-    "annotate": "leave a note about the level",
-    "apply": "apply (use) a tool",
-    "call": "name a monster or object, or add an annotation",
-    "cast": "cast a spell",
-    "close": "close an adjacent door",
-    "open": "open an adjacent door",
-    "dip": "dip an object into something",
-    "drop": "drop an item",
-    "droptype": "drop specific item types (specify in the next prompt)",
-    "eat": "eat something (tip: replenish food when hungry)",
-    "esc": "exit menu or message",
-    "engrave": "engrave writing on the floor (tip: Elbereth)",
-    "enhance": "advance or check weapons skills",
-    "fire": "fire ammunition from quiver",
-    "fight": "fight a monster (even if you only guess one is there)",
-    "force": "force a lock",
-    "inventory": "show your inventory",
-    "invoke": "invoke ",
-    "jump": "jump to a location",
-    "kick": "kick an enemy or a locked door or chest",
-    "look": "look at what is under you",
-    "loot": "loot a box on the floor",
-    "monster": "use a monster's special ability (when polymorphed)",
-    "offer": "offer a sacrifice to the gods (tip: on an aligned altar)",
-    "overview": "display an overview of the dungeon",
-    "pay": "pay your shopping bill",
-    "pickup": "pick up things at the current location",
-    "pray": "pray to the gods for help",
-    "puton": "put on an accessory",
-    "quaff": "quaff (drink) something",
-    "quiver": "select ammunition for quiver",
-    "read": "read a scroll or spellbook",
-    "remove": "remove an accessory",
-    "rub": "rub a lamp or a stone",
-    "search": "search for hidden doors and passages",
-    "swap": "swap wielded and secondary weapons",
-    "takeoff": "take off one piece of armor",
-    "takeoffall": "take off all armor",
-    "teleport": "teleport to another level (if you have the ability)",
-    "throw": "throw something (e.g. a dagger or dart)",
-    "travel": "travel to a specific location on the map (tip: in the next action, specify > or < for stairs, { for fountain, and _ for altar)",
-    "twoweapon": "toggle two-weapon combat",
-    "untrap": "untrap something",
-    "wear": "wear a piece of armor",
-    "wield": "wield a weapon",
-    "wipe": "wipe off your face",
-    "zap": "zap a wand",
-    "minus": "-",
-    "space": " ",
-    "apos": "'",
-    "0": "0",
-    "1": "1",
-    "2": "2",
-    "3": "3",
-    "4": "4",
-    "5": "5",
-    "6": "6",
-    "7": "7",
-    "8": "8",
-    "9": "9",
-}
-
-
-def get_instruction_prompt(task=None):
-    action_strings = ",\n".join(f"{action}: {description}" for action, description in ACTIONS.items())
+def get_instruction_prompt(env, task=None):
+    action_strings = ",\n".join(f"{action}: {description}" for action, description in env.action_str_desc_map.items())
     instruction_prompt = f"""
 You are an agent playing NetHack. The following are the possible actions you can take in the game, followed by a short description of each action:
 

diff --git a/balrog/environments/nle/auto_more.py b/balrog/environments/nle/auto_more.py
@@ -1,4 +1,4 @@
-import gym
+import gymnasium as gym
 from nle import nle_language_obsv
 from nle.nethack import actions as A
 
@@ -9,25 +9,26 @@ def __init__(self, env):
         self.nle_language = nle_language_obsv.NLELanguageObsv()
 
     def reset(self, **kwargs):
-        obs = super().reset(**kwargs)
+        obs, info = self.env.reset(**kwargs)
         obs["text_message"] = self.nle_language.text_message(obs["tty_chars"]).decode("latin-1")
 
-        return obs
+        return obs, info
 
     def step(self, action):
-        obs, reward, done, info = super().step(action)
+        obs, reward, term, trun, info = self.env.step(action)
 
         message = self.nle_language.text_message(obs["tty_chars"]).decode("latin-1")
-
+        done = term or trun
         while "--More--" in message and not done:
             message = message.replace("--More--", "\n")
 
             action_index = self.env.actions.index(A.MiscAction.MORE)
-            obs, rew, done, info = super().step(action_index)
+            obs, rew, term, trun, info = self.env.step(action_index)
+            done = term or trun
             add = self.nle_language.text_message(obs["tty_chars"]).decode("latin-1")
             message += add
             reward += rew
 
         obs["text_message"] = message
 
-        return obs, reward, done, info
+        return obs, reward, term, trun, info