update dependencies to battleships

BartekCupial · BartekCupial · commit 3702c0ce3d9e · 2025-04-10T18:24:00.000+01:00
diff --git a/balrog/agents/__init__.py b/balrog/agents/__init__.py
@@ -1,13 +1,14 @@
 from balrog.client import create_llm_client
 
 from ..prompt_builder import create_prompt_builder
+from .battleships_naive import NaiveAgent as BattleshipsNaive
 from .chain_of_thought import ChainOfThoughtAgent
 from .custom import CustomAgent
 from .dummy import DummyAgent
 from .few_shot import FewShotAgent
 from .naive import NaiveAgent
-from .robust_naive import RobustNaiveAgent
 from .robust_cot import RobustCoTAgent
+from .robust_naive import RobustNaiveAgent
 
 
 class AgentFactory:
@@ -44,6 +45,8 @@ def create_agent(self):
 
         if self.config.agent.type == "naive":
             return NaiveAgent(client_factory, prompt_builder)
+        if self.config.agent.type == "battleships_naive":
+            return BattleshipsNaive(client_factory, prompt_builder)
         elif self.config.agent.type == "cot":
             return ChainOfThoughtAgent(client_factory, prompt_builder, config=self.config)
         elif self.config.agent.type == "dummy":
diff --git a/balrog/agents/battleships_naive.py b/balrog/agents/battleships_naive.py
@@ -0,0 +1,61 @@
+import copy
+import re
+
+from balrog.agents.base import BaseAgent
+
+
+class NaiveAgent(BaseAgent):
+    """An agent that generates actions based on observations without complex reasoning."""
+
+    def __init__(self, client_factory, prompt_builder):
+        """Initialize the NaiveAgent with a client and prompt builder."""
+        super().__init__(client_factory, prompt_builder)
+        self.client = client_factory()
+
+    def act(self, obs, prev_action=None):
+        """Generate the next action based on the observation and previous action.
+
+        Args:
+            obs (dict): The current observation in the environment.
+            prev_action (str, optional): The previous action taken.
+
+        Returns:
+            str: The selected action from the LLM response.
+        """
+        if prev_action:
+            self.prompt_builder.update_action(prev_action)
+
+        self.prompt_builder.update_observation(obs)
+
+        messages = self.prompt_builder.get_prompt()
+
+        naive_instruction = """
+It's your turn. What coordinate would you like to output?
+        """.strip()
+
+        if messages and messages[-1].role == "user":
+            messages[-1].content += "\n\n" + naive_instruction
+
+        response = self.client.generate(messages)
+
+        final_answer = self._extract_final_answer(response)
+
+        return final_answer
+
+    def _extract_final_answer(self, answer):
+        """Sanitize the final answer, keeping only alphabetic characters.
+
+        Args:
+            answer (LLMResponse): The response from the LLM.
+
+        Returns:
+            LLMResponse: The sanitized response.
+        """
+
+        def filter_letters(input_string):
+            return re.sub(r"[^a-zA-Z0-9\s:]", "", input_string)
+
+        final_answer = copy.deepcopy(answer)
+        final_answer = final_answer._replace(completion=filter_letters(final_answer.completion))
+
+        return final_answer
diff --git a/balrog/environments/battleships/README.md b/balrog/environments/battleships/README.md
@@ -3,5 +3,6 @@
 ### Installation
 
 ```
+pip install jupyter scipy
 pip install git+https://github.com/thomashirtz/gym-battleship#egg=gym-battleship
 ```