update system prompt

BartekCupial · BartekCupial · commit 308496737b5f · 2025-04-10T18:24:00.000+01:00
diff --git a/balrog/environments/battleships/__init__.py b/balrog/environments/battleships/__init__.py
@@ -33,15 +33,16 @@ def get_instruction_prompt(env, instruction):
 The enemy has the following ships:
 {ships_strings}
 
-In a moment I will present you an observation.
+In a moment I will present you an observation grid. This grid represents the current state of a Battleship game. The format uses the following notation:
+- O: Water (missed shot)
+- X: Hit (part of a ship that has been hit)
+- Z: Sunk (indicates that the entire ship has been sunk)
 
 Tips:
-- When you get a hit, explore adjacent cells to determine ship orientation
-- Avoid targeting cells adjacent to sunken ships
+- When you get a hit, try to sunk the ship as you get more reward for that.
+- Avoid targeting cells adjacent to sunken ships.
 
 IMPORTANT: Your response must be EXACTLY one coordinate in the format of a letter followed by a number (e.g., "E5", "A1", "J10"). Do not provide any explanation or reasoning in your response.
-Valid responses: "A1", "B3", "J10"
-Invalid responses: "A", "1", "Attack A1", "I choose A1"
 
 PLAY
 """.strip()
diff --git a/balrog/environments/battleships/base.py b/balrog/environments/battleships/base.py
@@ -43,11 +43,7 @@ def battleships_process_obsv(self, obs, reward, old_reward):
         text_observation = self.get_text_observation(dataframe)
         feedback = self.get_feedback(reward, old_reward)
 
-        prompt = (
-            f"Objects on the map:\n{text_observation}\n{feedback}"
-            if feedback
-            else f"Objects on the map:\n{text_observation}"
-        )
+        prompt = f"{text_observation}\n{feedback}" if feedback else f"Objects on the map:\n{text_observation}"
 
         obs = defaultdict(lambda: None)
 
@@ -114,16 +110,16 @@ def get_dataframe(self, obs):
         for i in self.sunk_ships:
             sunk_mask = np.logical_or(sunk_mask, self.ships == i)
 
-        board[obs[0] != 0] = "❌"
-        board[obs[1] != 0] = "⚫"
-        board[sunk_mask] = "💥"  # Sunk ships
+        board[obs[0] != 0] = "X"
+        board[obs[1] != 0] = "O"
+        board[sunk_mask] = "Z"  # Sunk ships
 
         num_rows, num_columns = board.shape
         columns = [chr(i) for i in range(ord("A"), ord("A") + num_columns)]
         index = [i + 1 for i in range(num_rows)]
 
         dataframe = pd.DataFrame(board, columns=columns, index=index)
-        dataframe = dataframe.replace([""], "⬜")
+        dataframe = dataframe.replace([""], " ")
 
         return dataframe