Skip to content

Commit 3084967

Browse files
committed
update system prompt
1 parent 706bdf8 commit 3084967

File tree

2 files changed

+11
-14
lines changed

2 files changed

+11
-14
lines changed

balrog/environments/battleships/__init__.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,16 @@ def get_instruction_prompt(env, instruction):
3333
The enemy has the following ships:
3434
{ships_strings}
3535
36-
In a moment I will present you an observation.
36+
In a moment I will present you an observation grid. This grid represents the current state of a Battleship game. The format uses the following notation:
37+
- O: Water (missed shot)
38+
- X: Hit (part of a ship that has been hit)
39+
- Z: Sunk (indicates that the entire ship has been sunk)
3740
3841
Tips:
39-
- When you get a hit, explore adjacent cells to determine ship orientation
40-
- Avoid targeting cells adjacent to sunken ships
42+
- When you get a hit, try to sunk the ship as you get more reward for that.
43+
- Avoid targeting cells adjacent to sunken ships.
4144
4245
IMPORTANT: Your response must be EXACTLY one coordinate in the format of a letter followed by a number (e.g., "E5", "A1", "J10"). Do not provide any explanation or reasoning in your response.
43-
Valid responses: "A1", "B3", "J10"
44-
Invalid responses: "A", "1", "Attack A1", "I choose A1"
4546
4647
PLAY
4748
""".strip()

balrog/environments/battleships/base.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,7 @@ def battleships_process_obsv(self, obs, reward, old_reward):
4343
text_observation = self.get_text_observation(dataframe)
4444
feedback = self.get_feedback(reward, old_reward)
4545

46-
prompt = (
47-
f"Objects on the map:\n{text_observation}\n{feedback}"
48-
if feedback
49-
else f"Objects on the map:\n{text_observation}"
50-
)
46+
prompt = f"{text_observation}\n{feedback}" if feedback else f"Objects on the map:\n{text_observation}"
5147

5248
obs = defaultdict(lambda: None)
5349

@@ -114,16 +110,16 @@ def get_dataframe(self, obs):
114110
for i in self.sunk_ships:
115111
sunk_mask = np.logical_or(sunk_mask, self.ships == i)
116112

117-
board[obs[0] != 0] = ""
118-
board[obs[1] != 0] = ""
119-
board[sunk_mask] = "💥" # Sunk ships
113+
board[obs[0] != 0] = "X"
114+
board[obs[1] != 0] = "O"
115+
board[sunk_mask] = "Z" # Sunk ships
120116

121117
num_rows, num_columns = board.shape
122118
columns = [chr(i) for i in range(ord("A"), ord("A") + num_columns)]
123119
index = [i + 1 for i in range(num_rows)]
124120

125121
dataframe = pd.DataFrame(board, columns=columns, index=index)
126-
dataframe = dataframe.replace([""], "")
122+
dataframe = dataframe.replace([""], " ")
127123

128124
return dataframe
129125

0 commit comments

Comments
 (0)