Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions balrog/agents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .dummy import DummyAgent
from .few_shot import FewShotAgent
from .naive import NaiveAgent
from .robust_naive import RobustNaiveAgent


class AgentFactory:
Expand Down Expand Up @@ -50,6 +51,8 @@ def create_agent(self):
return CustomAgent(client_factory, prompt_builder)
elif self.config.agent.type == "few_shot":
return FewShotAgent(client_factory, prompt_builder, self.config.agent.max_icl_history)
elif self.config.agent.type == "robust_naive":
return RobustNaiveAgent(client_factory, prompt_builder)

else:
raise ValueError(f"Unknown agent type: {self.config.agent}")
69 changes: 69 additions & 0 deletions balrog/agents/robust_naive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import copy
import re

from balrog.agents.base import BaseAgent


class RobustNaiveAgent(BaseAgent):
"""An agent that generates actions based on observations without complex reasoning."""

def __init__(self, client_factory, prompt_builder):
"""Initialize the NaiveAgent with a client and prompt builder."""
super().__init__(client_factory, prompt_builder)
self.client = client_factory()

def act(self, obs, prev_action=None):
"""Generate the next action based on the observation and previous action.

Args:
obs (dict): The current observation in the environment.
prev_action (str, optional): The previous action taken.

Returns:
str: The selected action from the LLM response.
"""
if prev_action:
self.prompt_builder.update_action(prev_action)

self.prompt_builder.update_observation(obs)

messages = self.prompt_builder.get_prompt()

# Updated instructions to require a very strict output format
naive_instruction = """
You must choose exactly one of the listed actions and output it strictly in the following format:

<|ACTION|>YOUR_CHOSEN_ACTION</|ACTION|>

You must not output any other text before or after these tags. No explanation, no reasoning, just the action within these tags.
""".strip()

if messages and messages[-1].role == "user":
messages[-1].content += "\n\n" + naive_instruction

response = self.client.generate(messages)
final_answer = self._extract_final_answer(response)
return final_answer

def _extract_final_answer(self, answer):
"""Extract the action from the completion by looking for <|ACTION|> ... </|ACTION|> tags.

Args:
answer (LLMResponse): The response from the LLM.

Returns:
LLMResponse: The sanitized response containing just the extracted action.
"""
completion_text = answer.completion
# Use a regex to find the text inside <|ACTION|> and </|ACTION|>
match = re.search(r"<\|ACTION\|>(.*?)</\|ACTION\|>", completion_text, re.DOTALL)
if match:
extracted_action = match.group(1).strip()
else:
# If no match is found, fallback to the original completion (or handle error)
extracted_action = completion_text.strip()

final_answer = copy.deepcopy(answer)
final_answer = final_answer._replace(completion=extracted_action)

return final_answer
2 changes: 2 additions & 0 deletions docs/agents.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ BALROG ships with two pre-built agents:
| **Agent Type** | **Description** |
|-------------------------|---------------------------------------------------------------------------------|
| **naive** | Outputs actions based on the current action/observation history without any additional reasoning. |
| **robust_naive** | Outputs actions based on the current action/observation history without any additional reasoning using a more robust template. |
| **chain_of_thought** | Generates actions through step-by-step reasoning, providing a final action output. |


We encourage the community to open PRs to include more agents to BALROG.

## 🤖 Creating Custom Agents
Expand Down
Loading