srtab · srtab · Apr 5, 2025 · Mar 31, 2025 · Apr 2, 2025 · Apr 2, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,10 +7,30 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 
+### Changed
+
+- Increased `max_tokens` to `4096` for `Anthropic` models.
+- Removed fallback logic from all agents, as it's not needed with the new `OpenRouter` integration.
+- Improved `PlanAndExecuteAgent`:
+  - Introduced `think` tool on both planning and execution phases to improve the reasoning capabilities of the agent, even when using models without that capability (https://www.anthropic.com/engineering/claude-think-tool).
+  - Improved planning prompt to focus on the atual state of the codebase and design plans to be more self contained and actionable.
+  - Improved execution prompt to focus on the execution to strictly follow the plan.
+  - File paths included on the plan are now preloaded as messages on the execution to improve the speed of the execution and reduce the number of tool calls.
+- Improved `ReviewAddressorAgent`:
+  - Improved planning prompt to focus on the atual diff hunk without losing the context on the comments from the reviewer.
+- Improved `PipelineFixerAgent`:
+  - Improved troubleshooting prompt to focus on verifiable knowledge and to improve the quality of the remediation steps.
+
 ### Added
 
 - Support for `OpenRouter` integration, unified API for LLM providers. **Breaking change: this will be the default provider from now on as it's more reliable and has more models available.**
 
+### Fixed
+
+- Command parameter `base_url` on `setup_webhooks` was declared required with a default value, forcing to pass it on every call. Now it's not required and will use the value of `DAIV_EXTERNAL_URL` if not provided.
+- Date and time provided to prompts were defined at compilation time, leading to a fixed date and time on all prompts. Now it's defined at runtime to provide the correct date and time for each execution.
+- CodebaseChat tool calls were being shown outside <thinking> tags on OpenWebUI, polluting the UI.
+
 ## [0.1.0-beta.3] - 2025-03-23
 
 ### Added

diff --git a/daiv/automation/agents/base.py b/daiv/automation/agents/base.py
@@ -116,7 +116,7 @@ def get_model_kwargs(
                 # As stated in docs: https://docs.anthropic.com/en/api/rate-limits#updated-rate-limits
                 # the OTPM is calculated based on the max_tokens. We need to use a fair value to avoid rate limiting.
                 # If needed, we can increase this value using the configurable field.
-                _kwargs["max_tokens"] = 2_048
+                _kwargs["max_tokens"] = 4_096
 
             if _kwargs["model"].startswith("claude-3-7-sonnet"):
                 # Enable token efficient tools to reduce the number of tokens used and
@@ -128,7 +128,7 @@ def get_model_kwargs(
                 _kwargs["reasoning_effort"] = thinking_level
 
         elif model_provider == ModelProvider.OPENROUTER:
-            _kwargs["model"] = _kwargs["model"].split(":")[1]
+            _kwargs["model"] = _kwargs["model"].split(":", 1)[1]
             # OpenRouter is OpenAI compatible, so we need to use the OpenAI model provider
             _kwargs["model_provider"] = ModelProvider.OPENAI
             _kwargs["model_kwargs"]["extra_headers"] = {
@@ -147,7 +147,7 @@ def get_model_kwargs(
 
             elif _kwargs["model"].startswith("anthropic"):
                 # Avoid rate limiting by setting a fair max_tokens value
-                _kwargs["max_tokens"] = 2_048
+                _kwargs["max_tokens"] = 4_096
 
         return _kwargs
 
@@ -156,7 +156,7 @@ def _get_anthropic_thinking_tokens(self, *, thinking_level: ThinkingLevel) -> tu
         Get the thinking tokens and max tokens for the model.
         """
         if thinking_level == ThinkingLevel.LOW:
-            return 4_096, {"type": "enabled", "budget_tokens": 2_048}
+            return 8_192, {"type": "enabled", "budget_tokens": 4_096}
         elif thinking_level == ThinkingLevel.MEDIUM:
             return 32_768, {"type": "enabled", "budget_tokens": 25_600}
         elif thinking_level == ThinkingLevel.HIGH:

diff --git a/daiv/automation/agents/codebase_chat/prompts.py b/daiv/automation/agents/codebase_chat/prompts.py
@@ -1,61 +1,61 @@
 from langchain_core.prompts import SystemMessagePromptTemplate
 
 codebase_chat_system = SystemMessagePromptTemplate.from_template(
-    """You're DAIV, an helpful assistant specialized on software development and codebases knowledge. Your main task is to reply to user's queries that are aligned with software development or with knowledge about the codebases that you can collect using available tools.
-
-IMPORTANT: You don't need to mention the knowledge base in your replies, just reply directly to the user's query. The user don't have access to the system context, only you have access to it, so NEVER refer to it in your response.
-
-Current date: {{ current_date_time }}.
-
-# Instructions
-1. Check if the user's query is related to software development or codebases. If not, just reply with a message indicating that you can only help with software development related queries. Otherwise, continue to the next step.
-2. Open a <thinking> tag and wrap you thinking process inside it. **IMPORTANT:** Don't close it until the end of your reply to the user's query.
-3. Analyse the user query using the rules "Query analysis rules".
-4. Call the `{{ search_code_snippets_name }}` tool following the rules "Tool usage rules" to ground your reply. Be specific about the code snippets you're searching for.
-5. Close the </thinking> tag. **IMPORTANT:** Only close it on the beginning of your reply to the user's query.
-6. Reply to the user's query.
-
-# Tone and style
-- Communicate in the first person, as if speaking directly to the developer.
-- Use a tone of a senior software developer who is confident and experienced.
-- Don't reply with unnecessary preamble or postamble (such as explaining your query analysis or summarizing your action).
-
-# Query analysis rules
-- Specific programming languages, frameworks, or technologies mentioned or implied, with an example of how each might be used in code.
-- Key search terms extracted from the query, prioritized based on relevance, with an example of how each might appear in code.
-- A prioritized list of key concepts or topics extracted from the query, with a brief explanation of why each is important.
-- Identification of multiple topics if present in the query, with an explanation of how they relate to each other. If multiple topics were identified in the query analysis, break down the plan for each topic.
-- References to specific files or repositories in the query, with an example of how each might be used in code.
-- Conversation history is important, as the user can follow-up queries. Use it to correlate the queries.
-
-# Tool usage rules
-Use the `{{ search_code_snippets_name }}` tool to search for code snippets in the following repositories. If the user's query is not related to the repositories below, you should not use the `{{ search_code_snippets_name }}` tool.
-**IMPORTANT:** Make use of parallel tool calls if you intend to call the same tool multiple times.
-
-# Output format
-Divide your reply to the user's query into two parts:
-- The first part is the reply to the user's query.
-- The second part is quoting repository files from the code snippets that are used as the basis for replying to the user. Use the `external_link` field from the <CodeSnippet> tags to create the links. If you didn't quote any code snippets, just don't include the second part.
-**IMPORTANT:** Only close the <thinking> tag on the beginning of your reply to the user's query.
-
-Example output, the values in the [] are placeholders:
+    """You're DAIV, a helpful assistant tasked with answering user queries aligned with software development or knowledge of the repositories you have access to. You have tools available to help you inspect the repositories related to the user's request.
+
+The current date and time is {{ current_date_time }}.
+
+When queried about the repositories, do not rely on your internal or prior knowledge. Instead, base all conclusions and recommendations strictly on verifiable, factual information from the repositories.
+
+<tone_and_style>
+When replying to the user, follow these guidelines:
+* Always reply to the user in the same language they are using.
+* You can use markdown formatting in your replies if helpful.
+* The user don't have access to the system context, only you have access to it, so NEVER refer to it in your replies.
+</tone_and_style>
+
+<query_analysis_rules>
+Here are the rules to analyze the user's query before replying and searching the repositories:
+* Specific programming languages, frameworks, or technologies mentioned or implied, with an example of how each might be used in code.
+* Key search terms extracted from the query, prioritized based on relevance, with an example of how each might appear in code.
+* A prioritized list of key concepts or topics extracted from the query, with a brief explanation of why each is important.
+* Identification of multiple topics if present in the query, with an explanation of how they relate to each other. If multiple topics were identified in the query analysis, break down the plan for each topic.
+* References to specific files or repositories in the query, with an example of how each might be used in code.
+* Conversation history is important, as the user can follow-up queries. Use it to try to correlate the queries.
+</query_analysis_rules>
+
+<tool_calling>
+You have tools at your disposal to search knowledge on the repositories. Follow these rules regarding tool calls:
+ * ALWAYS follow the tool call schema exactly as specified and make sure to provide all necessary parameters.
+ * Use the `{{ search_code_snippets_name }}` tool to search for code snippets in the repositories you have access to using the keywords extracted from the user's query. If the user's query is not related to the repositories you have access to, you should not use it.
+</tool_calling>
+
+<reply_output_format>
+Divide your reply to the user's query into two sections:
+- The first section is the reply to the user's query;
+- The second section is the references to the repository files from the code snippets that are used as the basis for replying to the user. Use the `external_link` field from the <CodeSnippet> tags to create the links. If you didn't quote any code snippets, just don't include this section.
+
+Example output format:
 ```markdown
-<thinking>
-[thinking process]
-[tool calls]
-</thinking>
-
 [reply to the user's query]
 
 **References:**
 - [repository/path/to/file.py](https://github.com/user/repo/blob/branch/path/to/file.py)
 ```
+</reply_output_format>
 
-# Repositories
+<repositories>
 DAIV has access to the following repositories:
 {% for repository in repositories %}
  - {{ repository }}
 {%- endfor %}
-""",  # noqa: E501
+</repositories>
+
+<searching_and_replying>
+The user's query must be related to software development or repositories. If not, simply reply with a message stating that you can only help with software development related queries. Otherwise, go ahead and analyze the user's request and inspect the repositories with the tools available to support your answer.
+Finally, answer the user's question based on the information you have gathered from the repositories.
+</searching_and_replying>
+
+Reply the user's query with grounded information.""",  # noqa: E501
     "jinja2",
 )
diff --git a/daiv/automation/agents/codebase_search/agent.py b/daiv/automation/agents/codebase_search/agent.py
@@ -44,9 +44,7 @@ def compile(self) -> Runnable:
                 llm=cast(
                     "BaseChatModel",
                     # this model shows better results for rephrasing
-                    self.get_model(model=settings.REPHRASE_MODEL_NAME).with_fallbacks([
-                        self.get_model(model=settings.REPHRASE_FALLBACK_MODEL_NAME)
-                    ]),
+                    self.get_model(model=settings.REPHRASE_MODEL_NAME),
                 ),
             )
         else:
@@ -57,9 +55,7 @@ def compile(self) -> Runnable:
                 llm=cast(
                     "BaseChatModel",
                     # this model shows better results for listwise reranking
-                    self.get_model(model=settings.RERANKING_MODEL_NAME).with_fallbacks([
-                        self.get_model(model=settings.RERANKING_FALLBACK_MODEL_NAME)
-                    ]),
+                    self.get_model(model=settings.RERANKING_MODEL_NAME),
                 ),
                 top_n=settings.TOP_N,
             ),

diff --git a/daiv/automation/agents/codebase_search/conf.py b/daiv/automation/agents/codebase_search/conf.py
@@ -16,15 +16,9 @@ class CodebaseSearchSettings(BaseSettings):
     REPHRASE_MODEL_NAME: ModelName = Field(
         default=ModelName.GPT_4O_MINI, description="Model name to be used for codebase search."
     )
-    REPHRASE_FALLBACK_MODEL_NAME: ModelName = Field(
-        default=ModelName.CLAUDE_3_5_HAIKU, description="Fallback model name to be used for codebase search."
-    )
     RERANKING_MODEL_NAME: ModelName = Field(
         default=ModelName.GPT_4O_MINI, description="Model name to be used for listwise reranking."
     )
-    RERANKING_FALLBACK_MODEL_NAME: ModelName = Field(
-        default=ModelName.CLAUDE_3_5_HAIKU, description="Fallback model name to be used for listwise reranking."
-    )
 
 
 settings = CodebaseSearchSettings()  # type: ignore
diff --git a/daiv/automation/agents/constants.py b/daiv/automation/agents/constants.py
@@ -13,7 +13,7 @@ class ModelName(StrEnum):
     CLAUDE_3_5_HAIKU = "openrouter:anthropic/claude-3-5-haiku"
     GPT_4O = "openrouter:openai/gpt-4o"
     GPT_4O_MINI = "openrouter:openai/gpt-4o-mini"
-    O1 = "openrouter:openai/o1"
     O3_MINI = "openrouter:openai/o3-mini"
     GEMINI_2_0_FLASH = "openrouter:google/gemini-2.0-flash-001"
     GEMINI_2_0_FLASH_LITE = "openrouter:google/gemini-2.0-flash-lite-001"
+    DEEPSEEK_CHAT_V3_0324 = "openrouter:deepseek/deepseek-chat-v3-0324"
diff --git a/daiv/automation/agents/image_url_extractor/agent.py b/daiv/automation/agents/image_url_extractor/agent.py
@@ -41,6 +41,8 @@ def compile(self) -> Runnable:
         prompt = ChatPromptTemplate.from_messages([system, human])
         return (
             prompt
-            | self.get_model(model=settings.MODEL_NAME).with_structured_output(ImageURLExtractorOutput)
+            | self.get_model(model=settings.MODEL_NAME).with_structured_output(
+                ImageURLExtractorOutput, method="function_calling"
+            )
             | RunnableLambda(_post_process, name="post_process_extracted_images")
         ).with_config({"run_name": settings.NAME})
diff --git a/daiv/automation/agents/image_url_extractor/conf.py b/daiv/automation/agents/image_url_extractor/conf.py
@@ -13,7 +13,7 @@ class ImageURLExtractorSettings(BaseSettings):
 
     NAME: str = Field(default="ImageURLExtractor", description="Name of the image URL extractor agent.")
     MODEL_NAME: ModelName = Field(
-        default=ModelName.GPT_4O_MINI, description="Model name to be used for image URL extractor."
+        default=ModelName.GEMINI_2_0_FLASH_LITE, description="Model name to be used for image URL extractor."
     )
 
 

diff --git a/daiv/automation/agents/image_url_extractor/schemas.py b/daiv/automation/agents/image_url_extractor/schemas.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Literal
+from typing import Literal, cast
 from urllib.parse import urlparse
 
 from pydantic import BaseModel, Field
@@ -50,7 +50,9 @@ def from_images(
                 and parsed_url.path.startswith("uploads/")
             ):
                 _repo_image_url = build_uri(f"{settings.GITLAB_URL}api/v4/projects/{project_id}/", image.url)
-                image_url = url_to_data_url(_repo_image_url, headers={"PRIVATE-TOKEN": settings.GITLAB_AUTH_TOKEN})
+                image_url = url_to_data_url(
+                    _repo_image_url, headers={"PRIVATE-TOKEN": cast("str", settings.GITLAB_AUTH_TOKEN)}
+                )
 
             if image_url:
                 image_templates.append(

diff --git a/daiv/automation/agents/issue_addressor/agent.py b/daiv/automation/agents/issue_addressor/agent.py
@@ -64,10 +64,8 @@ def assessment(self, state: OverallState) -> Command[Literal["prepare_data", "__
         prompt = ChatPromptTemplate.from_messages([issue_assessment_system, issue_assessment_human])
 
         evaluator = prompt | self.get_model(model=settings.ASSESSMENT_MODEL_NAME).with_structured_output(
-            IssueAssessment
-        ).with_fallbacks([
-            self.get_model(model=settings.FALLBACK_ASSESSMENT_MODEL_NAME).with_structured_output(IssueAssessment)
-        ])
+            IssueAssessment, method="function_calling"
+        )
 
         response = cast(
             "IssueAssessment",
@@ -105,13 +103,14 @@ def prepare_data(self, state: OverallState, config: RunnableConfig) -> Command[L
         return Command(
             goto="plan_and_execute",
             update={
+                "image_templates": extracted_images,
                 "messages": HumanMessagePromptTemplate.from_template(
-                    [issue_addressor_human, *extracted_images], "jinja2"
+                    [issue_addressor_human] + extracted_images, "jinja2"
                 ).format_messages(
                     issue_title=state["issue_title"],
                     issue_description=state["issue_description"],
                     project_description=repo_config.repository_description,
-                )
+                ),
             },
         )
 

diff --git a/daiv/automation/agents/issue_addressor/conf.py b/daiv/automation/agents/issue_addressor/conf.py
@@ -14,10 +14,7 @@ class IssueAddressorSettings(BaseSettings):
     NAME: str = Field(default="IssueAddressor", description="Name of the issue addressor agent.")
     RECURSION_LIMIT: int = Field(default=50, description="Recursion limit for the issue addressor agent.")
     ASSESSMENT_MODEL_NAME: ModelName = Field(
-        default=ModelName.GPT_4O_MINI, description="Model name to be used for issue assessment."
-    )
-    FALLBACK_ASSESSMENT_MODEL_NAME: ModelName = Field(
-        default=ModelName.CLAUDE_3_5_HAIKU, description="Fallback model name to be used for issue assessment."
+        default=ModelName.GEMINI_2_0_FLASH, description="Model name to be used for issue assessment."
     )
 
 

diff --git a/daiv/automation/agents/issue_addressor/prompts.py b/daiv/automation/agents/issue_addressor/prompts.py
@@ -95,9 +95,8 @@
     "jinja2",
 )
 
-issue_addressor_human = """# Issue to implement
-<issue_title>{{ issue_title }}</issue_title>
-<issue_description>{{ issue_description }}</issue_description>
+issue_addressor_human = """# {{ issue_title }}
+{{ issue_description }}
 
 {% if project_description -%}
 # Project Context

diff --git a/daiv/automation/agents/issue_addressor/state.py b/daiv/automation/agents/issue_addressor/state.py
@@ -16,3 +16,8 @@ class OverallState(MessagesState):
     """
     Whether the issue is a request for changes.
     """
+
+    image_templates: list[dict]
+    """
+    The image templates to be used in the issue addressor.
+    """
diff --git a/daiv/automation/agents/issue_addressor/templates.py b/daiv/automation/agents/issue_addressor/templates.py
@@ -14,14 +14,15 @@
 <details>
 <summary>
 
-{{ plan_task.title }} - `{{ plan_task.path }}`
+Changes to apply {% if plan_task.file_path %}to `{{ plan_task.file_path }}`{% else %}to the repository{% endif %}
 
 </summary>
 
-**{{ plan_task.description }}**
-{% for subtask in plan_task.subtasks %}
-- [ ] {{ subtask }}{% endfor %}
-
+{{ plan_task.details }}
+{% if plan_task.relevant_files %}
+Relevant files:{% for file in plan_task.relevant_files %}
+- `{{ file }}`{% endfor %}
+{% endif %}
 ---
 </details>
 {% endfor %}