diff --git a/nle/env/__init__.py b/nle/env/__init__.py index 46da389a7..59a30a1e3 100644 --- a/nle/env/__init__.py +++ b/nle/env/__init__.py @@ -16,5 +16,8 @@ registration.register(id="NetHackGold-v0", entry_point="nle.env.tasks:NetHackGold") registration.register(id="NetHackEat-v0", entry_point="nle.env.tasks:NetHackEat") registration.register(id="NetHackScout-v0", entry_point="nle.env.tasks:NetHackScout") +registration.register( + id="NetHackChallenge-v0", entry_point="nle.env.tasks:NetHackChallenge" +) __all__ = ["NLE", "DUNGEON_SHAPE"] diff --git a/nle/env/base.py b/nle/env/base.py index b679d0e81..3658bee7d 100644 --- a/nle/env/base.py +++ b/nle/env/base.py @@ -94,7 +94,7 @@ ), ( "inv_strs", - gym.spaces.Box(low=0, high=127, **nethack.OBSERVATION_DESC["inv_strs"]), + gym.spaces.Box(low=0, high=255, **nethack.OBSERVATION_DESC["inv_strs"]), ), ( "inv_letters", @@ -116,13 +116,13 @@ ), ( "tty_chars", - gym.spaces.Box(low=0, high=127, **nethack.OBSERVATION_DESC["tty_chars"]), + gym.spaces.Box(low=0, high=255, **nethack.OBSERVATION_DESC["tty_chars"]), ), ( "tty_colors", gym.spaces.Box( - low=-15, - high=15, + low=0, + high=31, **nethack.OBSERVATION_DESC["tty_colors"], ), ), @@ -210,6 +210,7 @@ def __init__( options=None, wizard=False, allow_all_yn_questions=False, + allow_all_modes=False, space_dict=None, ): """Constructs a new NLE environment. @@ -235,11 +236,15 @@ def __init__( If set to True, no y/n questions in step() are declined. If set to False, only elements of SKIP_EXCEPTIONS are not declined. Defaults to False. + allow_all_modes (bool): + If set to True, do not decline menus, text input or auto 'MORE'. + If set to False, only skip click through 'MORE' on death. """ self.character = character self._max_episode_steps = max_episode_steps self._allow_all_yn_questions = allow_all_yn_questions + self._allow_all_modes = allow_all_modes if actions is None: actions = FULL_ACTIONS @@ -339,6 +344,9 @@ def print_action_meanings(self): for a_idx, a in enumerate(self._actions): print(a_idx, a) + def _check_abort(self, observation): + return self._steps >= self._max_episode_steps + def step(self, action: int): """Steps the environment. @@ -360,15 +368,17 @@ def step(self, action: int): last_observation = tuple(a.copy() for a in self.last_observation) observation, done = self.env.step(self._actions[action]) - observation, done = self._perform_known_steps( - observation, done, exceptions=True - ) + is_game_over = observation[self._program_state_index][0] == 1 + if is_game_over or not self._allow_all_modes: + observation, done = self._perform_known_steps( + observation, done, exceptions=True + ) self._steps += 1 self.last_observation = observation - if self._steps >= self._max_episode_steps: + if self._check_abort(observation): end_status = self.StepStatus.ABORTED else: end_status = self._is_episode_end(observation) diff --git a/nle/env/tasks.py b/nle/env/tasks.py index 525b4162f..e64eaffc7 100644 --- a/nle/env/tasks.py +++ b/nle/env/tasks.py @@ -278,3 +278,76 @@ def _reward_fn(self, last_observation, observation, end_status): self.dungeon_explored[key] = explored time_penalty = self._get_time_penalty(last_observation, observation) return reward + time_penalty + + +class NetHackChallenge(NetHackScore): + """Environment for the NetHack Challenge. + + The task is an augmentation of the standard NLE task. This is the NLE Score Task + but with some subtle differences: + * the action space is fixed to include the full keyboard + * menus and "" tokens are not skipped + * starting character is randomly assigned + """ + + def __init__( + self, + *args, + character="@", + allow_all_yn_questions=True, + allow_all_modes=True, + penalty_mode="constant", + penalty_step: float = -0.00, + penalty_time: float = -0.0, + max_episode_steps: int = 1e6, + observation_keys=( + "glyphs", + "chars", + "colors", + "specials", + "blstats", + "message", + "inv_glyphs", + "inv_strs", + "inv_letters", + "inv_oclasses", + "tty_chars", + "tty_colors", + "tty_cursor", + ), + no_progress_timeout: int = 10_000, + **kwargs, + ): + actions = nethack.ACTIONS + super().__init__( + *args, + actions=actions, + character=character, + allow_all_yn_questions=allow_all_yn_questions, + allow_all_modes=allow_all_modes, + penalty_mode=penalty_mode, + penalty_step=penalty_step, + penalty_time=penalty_time, + max_episode_steps=max_episode_steps, + observation_keys=observation_keys, + **kwargs, + ) + # If the in-game turn count doesn't change for 10_000 steps, we abort + self._turns = None + self._no_progress_count = 0 + self.no_progress_timeout = no_progress_timeout + + def _check_abort(self, observation): + """Check if time has stopped and no observations has changed long enough + to trigger an abort.""" + + turns = observation[self._blstats_index][20] + if self._turns == turns: + self._no_progress_count += 1 + else: + self._turns = turns + self._no_progress_count = 0 + return ( + self._steps >= self._max_episode_steps + or self._no_progress_count >= self.no_progress_timeout + ) diff --git a/nle/nethack/actions.py b/nle/nethack/actions.py index d96284893..aa67ab58e 100644 --- a/nle/nethack/actions.py +++ b/nle/nethack/actions.py @@ -14,9 +14,22 @@ def C(c): return 0x1F & c -# Missing here: -# Some characters for text input (e.g., +). -# General menu handling isn't part of this either. +class TextCharacters(enum.IntEnum): + PLUS = ord("+") + MINUS = ord("-") + SPACE = ord(" ") + APOS = ord("'") + QUOTE = ord('"') + NUM_0 = ord("0") + NUM_1 = ord("1") + NUM_2 = ord("2") + NUM_3 = ord("3") + NUM_4 = ord("4") + NUM_5 = ord("5") + NUM_6 = ord("6") + NUM_7 = ord("7") + NUM_8 = ord("8") + NUM_9 = ord("9") class CompassCardinalDirection(enum.IntEnum): @@ -76,6 +89,12 @@ class MiscAction(enum.IntEnum): MORE = ord("\r") # read the next message +class UnsafeActions(enum.IntEnum): + # currently these result in an error or undesirable behaviour + HELP = ord("?") # give a help message + PREVMSG = C("p") # view recent game messages + + class Command(enum.IntEnum): EXTCMD = ord("#") # perform an extended command EXTLIST = M("?") # list all extended commands @@ -100,7 +119,6 @@ class Command(enum.IntEnum): FIGHT = ord("F") # Prefix: force fight even if you don't see a monster FORCE = M("f") # force a lock GLANCE = ord(";") # show what type of thing a map symbol corresponds to - HELP = ord("?") # give a help message HISTORY = ord("V") # show long version and game history INVENTORY = ord("i") # show your inventory INVENTTYPE = ord("I") # inventory specific item types @@ -121,7 +139,6 @@ class Command(enum.IntEnum): PAY = ord("p") # pay your shopping bill PICKUP = ord(",") # pick up things at the current location PRAY = M("p") # pray to the gods for help - PREVMSG = C("p") # view recent game messages PUTON = ord("P") # put on an accessory (ring, amulet, etc) QUAFF = ord("q") # quaff (drink) something QUIT = M("q") # exit without saving current game @@ -132,6 +149,7 @@ class Command(enum.IntEnum): RIDE = M("R") # mount or dismount a saddled steed RUB = M("r") # rub a lamp or a stone RUSH = ord("g") # Prefix: rush until something interesting is seen + RUSH2 = ord("G") # Prefix: rush until something interesting is seen SAVE = ord("S") # save the game and exit SEARCH = ord("s") # search for traps and secret doors SEEALL = ord("*") # show all equipment in use @@ -163,6 +181,7 @@ class Command(enum.IntEnum): + list(MiscDirection) + list(MiscAction) + list(Command) + + list(TextCharacters) ) NON_RL_ACTIONS = ( @@ -172,13 +191,11 @@ class Command(enum.IntEnum): Command.EXTCMD, # Potentially useful for some wizard actions. Command.EXTLIST, Command.GLANCE, - Command.HELP, Command.HISTORY, Command.KNOWN, # Could potentially be useful. Command.KNOWNCLASS, # Could potentially be useful. Command.OPTIONS, Command.OVERVIEW, # Could potentially be useful. - Command.PREVMSG, # Could potentially be useful. Command.TELEPORT, Command.QUIT, Command.REDRAW, @@ -191,13 +208,15 @@ class Command(enum.IntEnum): ) _USEFUL_ACTIONS = list(ACTIONS) -for action in NON_RL_ACTIONS: +for action in NON_RL_ACTIONS + tuple(TextCharacters): _USEFUL_ACTIONS.remove(action) +_USEFUL_ACTIONS.append(TextCharacters.SPACE) USEFUL_ACTIONS = tuple(_USEFUL_ACTIONS) del _USEFUL_ACTIONS _ACTIONS_DICT = {} for enum_class in [ + TextCharacters, CompassDirection, CompassDirectionLonger, MiscDirection, diff --git a/nle/scripts/play.py b/nle/scripts/play.py index 10ba22894..878c415f8 100644 --- a/nle/scripts/play.py +++ b/nle/scripts/play.py @@ -45,11 +45,12 @@ def get_action(env, action_mode, is_raw_env): action = env.action_space.sample() else: action = random.choice(_ACTIONS) + print(action) elif action_mode == "human": while True: with no_echo(): ch = ord(os.read(0, 1)) - if ch in [nethack.C("c"), ord(b"q")]: + if ch in [nethack.C("c")]: print("Received exit code {}. Aborting.".format(ch)) return None try: @@ -67,7 +68,18 @@ def get_action(env, action_mode, is_raw_env): return action -def play(env, mode, ngames, max_steps, seeds, savedir, no_render, render_mode, debug): +def play( + env, + mode, + ngames, + max_steps, + seeds, + savedir, + no_render, + render_mode, + print_frames_separately, + **kwargs, +): env_name = env is_raw_env = env_name == "raw" @@ -100,10 +112,15 @@ def play(env, mode, ngames, max_steps, seeds, savedir, no_render, render_mode, d while True: if not no_render: if not is_raw_env: - print("Previous reward:", reward) - if action is not None: - print("Previous action: %s" % repr(env._actions[action])) + print("--------") + print(f"Previous reward: {str(reward):64s}") + act_str = repr(env._actions[action]) if action is not None else "" + print(f"Previous action: {str(act_str):64s}") + print("--------") env.render(render_mode) + print("--------") + if not print_frames_separately: + print("\033[31A") # Go up 31 lines. else: print("Previous action:", action) _, chars, _, _, blstats, message, *_ = obs @@ -114,6 +131,7 @@ def play(env, mode, ngames, max_steps, seeds, savedir, no_render, render_mode, d print(blstats) action = get_action(env, mode, is_raw_env) + if action is None: break @@ -194,7 +212,7 @@ def main(): parser.add_argument( "--max-steps", type=int, - default=10000, + default=1_000_000, help="Number of maximum steps per episode.", ) parser.add_argument( @@ -219,6 +237,12 @@ def main(): choices=["human", "full", "ansi"], help="Render mode. Defaults to 'human'.", ) + parser.add_argument( + "--print-frames-separately", + "-p", + action="store_true", + help="Don't overwrite frames, print them all.", + ) flags = parser.parse_args() if flags.debug: diff --git a/nle/scripts/ttyplay.py b/nle/scripts/ttyplay.py index 5d39f2fcc..5a44c60a3 100644 --- a/nle/scripts/ttyplay.py +++ b/nle/scripts/ttyplay.py @@ -24,6 +24,12 @@ action="store_true", help="Ignore timestamp data and don't wait between frames", ) +parser.add_argument( + "-f", + "--fixed_frame_wait", + action="store_true", + help="Wait a fixed time between each frame.", +) parser.add_argument( "-s", "--speed", default=1.0, type=float, help="Set playback speed multiplier" ) @@ -44,6 +50,9 @@ def wait(diff, speed, drift=0.0): if FLAGS.no_wait: return speed, drift, jump + if FLAGS.fixed_frame_wait: + time.sleep(0.2 / speed) + return speed, drift, jump start = time.time() @@ -101,6 +110,7 @@ def read_header(f, peek=False, no_input=False): CLRCODE = re.compile(rb"\033\[2?J") # https://stackoverflow.com/a/37778152/1136208 +INPUTS = ["KeyPress %i" % i for i in range(255)] def process(f): @@ -115,7 +125,6 @@ def process(f): lastpos = 0 frame = 0 - for timestamp, length, channel in read_header( f, peek=FLAGS.peek, no_input=FLAGS.no_input ): @@ -126,6 +135,12 @@ def process(f): continue if channel == 1: # Input channel. + os.write( + 1, b"\033[s\033[26;0f\033[37;1mFrame %d:\033[0m " % frame + ) # Save Cursor & Jump to L26 + os.write(1, INPUTS[ord(data)].encode("ascii")) + os.write(1, b" " * 32) + os.write(1, b" \033[u") # Jump back Cursor continue if jump == 0 and prev is not None: @@ -182,7 +197,6 @@ def main(): # Set to unbuffered, no echo. new[3] &= ~(termios.ICANON | termios.ECHO | termios.ECHONL) # lflags termios.tcsetattr(0, termios.TCSANOW, new) - try: if FLAGS.peek: # Skip all previous data. diff --git a/nle/scripts/ttyplay2.py b/nle/scripts/ttyplay2.py new file mode 100644 index 000000000..70b31aa1d --- /dev/null +++ b/nle/scripts/ttyplay2.py @@ -0,0 +1,6 @@ +from nle.nethack.actions import _ACTIONS_DICT +import nle.scripts.ttyplay as ttyplay + +if __name__ == "__main__": + ttyplay.ACTIONS = _ACTIONS_DICT + ttyplay.main() diff --git a/src/cmd.c b/src/cmd.c index 6b285240d..657233570 100644 --- a/src/cmd.c +++ b/src/cmd.c @@ -3351,6 +3351,26 @@ int final; en_win = WIN_ERR; } +int nle_dosave() { + pline("You get the feeling there's only one way to save yourself..."); + return 1; +} + +int nle_done2() { + pline("You can't quit now, you're having so much fun!"); + return 1; +} + +int nle_doset() { + pline("The options are already set perfectly for you!"); + return 1; +} + +int nle_noop() { + pline("Noop"); + return 1; +} + /* ordered by command name */ struct ext_func_tab extcmdlist[] = { { '#', "#", "perform an extended command", @@ -3391,7 +3411,7 @@ struct ext_func_tab extcmdlist[] = { { '\0', "herecmdmenu", "show menu of commands you can do here", doherecmdmenu, IFBURIED }, { 'V', "history", "show long version and game history", - dohistory, IFBURIED | GENERALCMD }, + nle_noop /* dohistory */, IFBURIED | GENERALCMD }, { 'i', "inventory", "show your inventory", ddoinv, IFBURIED }, { 'I', "inventtype", "inventory specific item types", dotypeinv, IFBURIED }, @@ -3421,7 +3441,7 @@ struct ext_func_tab extcmdlist[] = { dosacrifice, AUTOCOMPLETE }, { 'o', "open", "open a door", doopen }, { 'O', "options", "show option settings, possibly change them", - doset, IFBURIED | GENERALCMD }, + nle_doset /* doset */, IFBURIED | GENERALCMD }, { C('o'), "overview", "show a summary of the explored dungeon", dooverview, IFBURIED | AUTOCOMPLETE }, { '\0', "panic", "test panic routine (fatal to game)", @@ -3433,7 +3453,7 @@ struct ext_func_tab extcmdlist[] = { { M('p'), "pray", "pray to the gods for help", dopray, IFBURIED | AUTOCOMPLETE }, { C('p'), "prevmsg", "view recent game messages", - doprev_message, IFBURIED | GENERALCMD }, + nle_noop /* doprev_message */, IFBURIED | GENERALCMD }, { 'P', "puton", "put on an accessory (ring, amulet, etc)", doputon }, { 'q', "quaff", "quaff (drink) something", dodrink }, { M('q'), "quit", "exit without saving current game", @@ -3445,7 +3465,7 @@ struct ext_func_tab extcmdlist[] = { { M('R'), "ride", "mount or dismount a saddled steed", doride, AUTOCOMPLETE }, { M('r'), "rub", "rub a lamp or a stone", dorub, AUTOCOMPLETE }, - { 'S', "save", "save the game and exit", dosave, IFBURIED | GENERALCMD }, + { 'S', "save", "save the game and exit", nle_dosave /* dosave */, IFBURIED | GENERALCMD }, { 's', "search", "search for traps and secret doors", dosearch, IFBURIED, "searching" }, { '*', "seeall", "show all equipment in use", doprinuse, IFBURIED },