Merge pull request #12 from divar-ir/fix-partial-analyze

miladosos · web-flow · commit 7c5a904a07ac · 2025-09-08T11:48:30.000+03:30
[FEAT] Fix intermittent cronjob failures with partial analysis tolerance and configurable retries
diff --git a/.env.sample b/.env.sample
@@ -1,35 +1,148 @@
+# ============================================================================
+# AI Doc Gen - Environment Configuration
+# ============================================================================
+# 
+# This file contains all configurable environment variables for the AI Doc Gen
+# application. Copy this file to `.env` and update the values as needed.
+#
+# 📋 Quick Setup Checklist:
+# 1. Copy this file: cp .env.sample .env
+# 2. Set your LLM API keys and base URLs
+# 3. Configure GitLab integration (for cronjob mode)
+# 4. Adjust retry/timeout settings based on your environment
+# 5. Enable observability tools if needed (Langfuse)
+#
+# 🚀 For rate-limited environments, increase retry values:
+#   - ANALYZER_AGENT_RETRIES=5
+#   - TOOL_FILE_READER_MAX_RETRIES=5
+#   - HTTP_RETRY_MAX_ATTEMPTS=10
+# ============================================================================
+
+# ------------- Core Application Settings ----------
 PYTHONPATH=src
-ENVIRONMENT=development
+ENVIRONMENT=development                  # Options: development, staging, production
 
-# ------------ Langfuse ------------
-ENABLE_LANGFUSE=false
-OTEL_SDK_DISABLED=false
+# ------------- Observability & Telemetry ----------
+# Langfuse provides LLM observability and analytics
+ENABLE_LANGFUSE=false                   # Set to 'true' to enable Langfuse tracking
+OTEL_SDK_DISABLED=false                 # OpenTelemetry SDK control
 
+# Langfuse Configuration (only needed if ENABLE_LANGFUSE=true)
 LANGFUSE_PUBLIC_KEY=YOUR_PUBLIC_KEY_HERE
 LANGFUSE_SECRET_KEY=YOUR_SECRET_KEY_HERE
 LANGFUSE_HOST=https://YOUR_LANGFUSE_HOST_HERE
 OTEL_EXPORTER_OTLP_ENDPOINT=YOUR_OTEL_ENDPOINT_HERE
 
-# ------------ LLM Agents ------------
+# ============================================================================
+# 🤖 LLM AGENTS CONFIGURATION
+# ============================================================================
+
+# ------------- Analyzer Agent (Code Analysis) ----------
+# The analyzer agent performs code structure, dependency, data flow, and API analysis
 ANALYZER_LLM_MODEL=claude-sonnet-4-20250514
-ANALYZER_LLM_BASE_URL=YOUR_ANALYZER_BASE_URL_HERE
-ANALYZER_LLM_API_KEY=YOUR_ANALYZER_API_KEY_HERE
-ANALYZER_PARALLEL_TOOL_CALLS=true
+ANALYZER_LLM_BASE_URL=YOUR_ANALYZER_BASE_URL_HERE     # e.g., https://api.anthropic.com
+ANALYZER_LLM_API_KEY=YOUR_ANALYZER_API_KEY_HERE       # Your LLM provider API key
+ANALYZER_PARALLEL_TOOL_CALLS=true                    # Enable parallel tool execution
+
+# Analyzer Agent Behavior Settings
+ANALYZER_AGENT_RETRIES=2                # Retries per analysis agent on failure
+ANALYZER_LLM_TIMEOUT=180                # Request timeout in seconds (3 minutes)
+ANALYZER_LLM_MAX_TOKENS=8192            # Maximum tokens in LLM responses
+ANALYZER_LLM_TEMPERATURE=0.0            # Response randomness (0.0=deterministic, 1.0=creative)
 
+# ------------- Documenter Agent (README Generation) ----------
+# The documenter agent generates comprehensive README.md files
 DOCUMENTER_LLM_MODEL=claude-sonnet-4-20250514
 DOCUMENTER_LLM_BASE_URL=YOUR_DOCUMENTER_BASE_URL_HERE
 DOCUMENTER_LLM_API_KEY=YOUR_DOCUMENTER_API_KEY_HERE
 DOCUMENTER_PARALLEL_TOOL_CALLS=true
 
-# ------------- Gitlab ----------
-GITLAB_API_URL=https://git.divar.cloud
-GITLAB_USER_NAME=AI Analyzer
-GITLAB_USER_USERNAME=agent_doc
-GITLAB_USER_EMAIL=YOUR_EMAIL_HERE
-GITLAB_OAUTH_TOKEN=YOUR_GITLAB_TOKEN_HERE
+# Documenter Agent Behavior Settings
+DOCUMENTER_AGENT_RETRIES=2              # Retries for documenter agent on failure
+DOCUMENTER_LLM_TIMEOUT=180              # Request timeout in seconds
+DOCUMENTER_LLM_MAX_TOKENS=8192          # Maximum tokens in LLM responses
+DOCUMENTER_LLM_TEMPERATURE=0.0          # Response randomness (0.0=deterministic)
+
+# ============================================================================
+# 🔧 RETRY & RESILIENCE CONFIGURATION
+# ============================================================================
+
+# ------------- Agent Tools Settings ----------
+# These settings control the retry behavior for internal tools used by agents
+TOOL_FILE_READER_MAX_RETRIES=2          # File reading tool retry attempts
+TOOL_LIST_FILES_MAX_RETRIES=2           # File listing tool retry attempts  
+
+# ------------- HTTP Retry Client ----------
+# Controls retry behavior for all HTTP requests to LLM providers
+# 💡 Increase these values if you encounter rate limiting issues
+HTTP_RETRY_MAX_ATTEMPTS=5               # Total HTTP retry attempts before failure
+HTTP_RETRY_MULTIPLIER=1                 # Exponential backoff multiplier (1s→2s→4s→8s)
+HTTP_RETRY_MAX_WAIT_PER_ATTEMPT=60      # Maximum wait between attempts (seconds)
+HTTP_RETRY_MAX_TOTAL_WAIT=300           # Maximum total wait time (5 minutes)
+
+# ⚡ Rate Limiting Solutions:
+# If you encounter "max retries exceeded" or "request limit" errors:
+# - Increase HTTP_RETRY_MAX_ATTEMPTS to 10+
+# - Increase ANALYZER_AGENT_RETRIES to 5+
+# - Increase timeout values (ANALYZER_LLM_TIMEOUT=300+)
+
+# ============================================================================
+# 🔗 GITLAB INTEGRATION (Required for Cronjob Mode)
+# ============================================================================
+
+# GitLab API Configuration
+GITLAB_API_URL=https://git.divar.cloud   # Your GitLab instance URL
+GITLAB_USER_NAME=AI Analyzer            # Display name for commits/MRs
+GITLAB_USER_USERNAME=agent_doc          # GitLab username for the bot
+GITLAB_USER_EMAIL=YOUR_EMAIL_HERE       # Email for Git commits
+GITLAB_OAUTH_TOKEN=YOUR_GITLAB_TOKEN_HERE # GitLab access token with repo permissions
+
+# 🔑 GitLab Token Permissions Required:
+# - api (full API access)
+# - read_repository (read repo contents)
+# - write_repository (create branches, commits)
+
+# ============================================================================
+# 📊 LOGGING & DEBUGGING
+# ============================================================================
+
+# Logging Configuration
+CONSOLE_LOG_LEVEL=WARNING               # Console output level
+FILE_LOG_LEVEL=INFO                     # File logging level
+
+# Available log levels (from most to least verbose):
+# DEBUG    - Detailed debugging information
+# INFO     - General information messages
+# WARNING  - Warning messages (default for console)
+# ERROR    - Error messages only
+# CRITICAL - Critical errors only
+
+# 🐛 For debugging issues:
+# Set CONSOLE_LOG_LEVEL=DEBUG to see detailed operation logs
+
+# ============================================================================
+# 🚀 DEPLOYMENT-SPECIFIC RECOMMENDATIONS
+# ============================================================================
+
+# Development Environment:
+# - Use DEBUG log levels for troubleshooting
+# - Lower retry counts for faster feedback
+# - Enable Langfuse for observability
+
+# Production Environment:
+# - Use WARNING/ERROR log levels
+# - Higher retry counts for resilience
+# - Monitor with Langfuse/OTEL
+# - Set appropriate timeout values based on your LLM provider
 
-# ------------- App ----------
-APP_VERSION=1.1.0
+# Rate-Limited Environments:
+# - ANALYZER_AGENT_RETRIES=5+
+# - HTTP_RETRY_MAX_ATTEMPTS=10+
+# - HTTP_RETRY_MAX_TOTAL_WAIT=600+
+# - ANALYZER_LLM_TIMEOUT=300+
 
-CONSOLE_LOG_LEVEL=WARNING # Options: DEBUG, INFO, WARNING, ERROR, CRITICAL
-FILE_LOG_LEVEL=INFO # Options: DEBUG, INFO, WARNING, ERROR, CRITICAL
+# High-Performance Environments:
+# - ANALYZER_PARALLEL_TOOL_CALLS=true
+# - DOCUMENTER_PARALLEL_TOOL_CALLS=true
+# - Higher ANALYZER_LLM_MAX_TOKENS values
+# - Lower timeout values for faster failures
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ai-doc-gen"
-version = "1.0.0"
+version = "1.2.0"
 description = "AI-powered code documentation generator that analyzes repositories and creates comprehensive documentation"
 authors = [{ name = "Milad Noroozi", email = "norooziosos@gmail.com" }]
 requires-python = ">=3.13,<3.14"
diff --git a/src/__init__.py b/src/__init__.py
@@ -1 +1 @@
-__version__ = "1.0.0"
+__version__ = "1.2.0"
diff --git a/src/agents/analyzer.py b/src/agents/analyzer.py
@@ -123,9 +123,14 @@ async def run(self):
         # Log results for each agent
         for i, result in enumerate(results):
             if isinstance(result, Exception):
-                Logger.error(f"Agent {i} failed: {result}")
+                Logger.error(
+                    f"Agent {tasks[i].agent.name} failed: {result}",
+                    exc_info=True,
+                )
             else:
-                Logger.info(f"Agent {i} completed successfully")
+                Logger.info(
+                    f"Agent {tasks[i].agent.name} completed successfully",
+                )
 
         self.validate_succession(analysis_files)
 
@@ -135,10 +140,23 @@ def validate_succession(self, analysis_files: List[Path]):
             if not file.exists():
                 missing_files.append(file)
 
-        if missing_files:
-            missing_files_str = ", ".join([str(file) for file in missing_files])
-            Logger.warning(f"Some analysis files not found: {missing_files_str}")
-            raise ValueError(f"Some analysis files not found: {missing_files_str}")
+        if not missing_files:
+            # All files exist - complete success
+            Logger.info(f"All {len(analysis_files)} analysis files generated successfully")
+            return
+
+        if len(missing_files) == len(analysis_files):
+            # ALL files missing - complete failure
+            Logger.error("Complete analysis failure: no analysis files were generated")
+            raise ValueError("Complete analysis failure: no analysis files were generated")
+
+        # SOME files missing - partial success, log warning but continue
+        missing_files_str = ", ".join([str(file) for file in missing_files])
+        successful_count = len(analysis_files) - len(missing_files)
+        Logger.warning(
+            f"Partial analysis success: {successful_count}/{len(analysis_files)} files generated. Missing: {missing_files_str}"
+        )
+        # Continue without raising error - partial results are better than no results
 
     async def _run_agent(self, agent: Agent, user_prompt: str, file_path: Path):
         trace.get_current_span().add_event(name=f"Running {agent.name}", attributes={"agent_name": agent.name})
@@ -194,9 +212,9 @@ def _llm_model(self) -> Tuple[Model, ModelSettings]:
         )
 
         settings = ModelSettings(
-            temperature=0.0,
-            max_tokens=8192,
-            timeout=180,
+            temperature=config.ANALYZER_LLM_TEMPERATURE,
+            max_tokens=config.ANALYZER_LLM_MAX_TOKENS,
+            timeout=config.ANALYZER_LLM_TIMEOUT,
             parallel_tool_calls=config.ANALYZER_PARALLEL_TOOL_CALLS,
         )
 
@@ -211,7 +229,7 @@ def _structure_analyzer_agent(self) -> Agent:
             model=model,
             model_settings=model_settings,
             output_type=AnalyzerResult,
-            retries=2,
+            retries=config.ANALYZER_AGENT_RETRIES,
             system_prompt=self._render_prompt("agents.structure_analyzer.system_prompt"),
             tools=[
                 FileReadTool().get_tool(),
@@ -229,7 +247,7 @@ def _data_flow_analyzer_agent(self) -> Agent:
             model=model,
             model_settings=model_settings,
             output_type=str,
-            retries=2,
+            retries=config.ANALYZER_AGENT_RETRIES,
             system_prompt=self._render_prompt("agents.data_flow_analyzer.system_prompt"),
             tools=[
                 FileReadTool().get_tool(),
@@ -247,7 +265,7 @@ def _dependency_analyzer_agent(self) -> Agent:
             model=model,
             model_settings=model_settings,
             output_type=str,
-            retries=2,
+            retries=config.ANALYZER_AGENT_RETRIES,
             system_prompt=self._render_prompt("agents.dependency_analyzer.system_prompt"),
             tools=[
                 FileReadTool().get_tool(),
@@ -265,7 +283,7 @@ def _request_flow_analyzer_agent(self) -> Agent:
             model=model,
             model_settings=model_settings,
             output_type=str,
-            retries=2,
+            retries=config.ANALYZER_AGENT_RETRIES,
             system_prompt=self._render_prompt("agents.request_flow_analyzer.system_prompt"),
             tools=[
                 FileReadTool().get_tool(),
@@ -283,7 +301,7 @@ def _api_analyzer_agent(self) -> Agent:
             model=model,
             model_settings=model_settings,
             output_type=AnalyzerResult,
-            retries=2,
+            retries=config.ANALYZER_AGENT_RETRIES,
             system_prompt=self._render_prompt("agents.api_analyzer.system_prompt"),
             tools=[
                 FileReadTool().get_tool(),
diff --git a/src/agents/documenter.py b/src/agents/documenter.py
@@ -151,9 +151,9 @@ def _llm_model(self) -> Tuple[Model, ModelSettings]:
             )
 
         settings = ModelSettings(
-            temperature=0.0,
-            max_tokens=8192,
-            timeout=180,
+            temperature=config.DOCUMENTER_LLM_TEMPERATURE,
+            max_tokens=config.DOCUMENTER_LLM_MAX_TOKENS,
+            timeout=config.DOCUMENTER_LLM_TIMEOUT,
             parallel_tool_calls=config.DOCUMENTER_PARALLEL_TOOL_CALLS,
         )
 
@@ -168,7 +168,7 @@ def _documenter_agent(self) -> Agent:
             model=model,
             model_settings=model_settings,
             output_type=DocumenterResult,
-            retries=2,
+            retries=config.DOCUMENTER_AGENT_RETRIES,
             system_prompt=self._render_prompt("agents.documenter.system_prompt"),
             tools=[
                 FileReadTool().get_tool(),
diff --git a/src/agents/tools/dir_tool/list_files.py b/src/agents/tools/dir_tool/list_files.py
@@ -6,6 +6,7 @@
 from opentelemetry import trace
 from pydantic_ai import Tool
 
+import config
 from utils import Logger
 
 
@@ -229,7 +230,7 @@ def __init__(
         self.ignored_extensions = ignored_extensions or []
 
     def get_tool(self):
-        return Tool(self._run, name="List-Files-Tool", takes_ctx=False, max_retries=2)
+        return Tool(self._run, name="List-Files-Tool", takes_ctx=False, max_retries=config.TOOL_LIST_FILES_MAX_RETRIES)
 
     def _run(self, directory: str) -> Any:
         """List files in a directory recursively, grouping them by directory.
diff --git a/src/agents/tools/file_tool/file_reader.py b/src/agents/tools/file_tool/file_reader.py
@@ -3,6 +3,7 @@
 from opentelemetry import trace
 from pydantic_ai import ModelRetry, Tool
 
+import config
 from utils import Logger
 
 
@@ -11,7 +12,7 @@ def __init__(self):
         pass
 
     def get_tool(self):
-        return Tool(self._run, name="Read-File", takes_ctx=False, max_retries=2)
+        return Tool(self._run, name="Read-File", takes_ctx=False, max_retries=config.TOOL_FILE_READER_MAX_RETRIES)
 
     def _run(self, file_path: str, line_number: int = 0, line_count: int = 200) -> str:
         """Read a file and return its contents.
diff --git a/src/config.py b/src/config.py
@@ -20,19 +20,32 @@ def str_to_bool(value: str) -> bool:
     else:
         raise ValueError(f"Invalid boolean value: {value}")
 
+VERSION = os.getenv("APP_VERSION", "1.2.0")
 
 # Analyzer
 ANALYZER_LLM_MODEL = os.environ["ANALYZER_LLM_MODEL"]
 ANALYZER_LLM_BASE_URL = os.environ["ANALYZER_LLM_BASE_URL"]
 ANALYZER_LLM_API_KEY = os.environ["ANALYZER_LLM_API_KEY"]
 ANALYZER_PARALLEL_TOOL_CALLS = str_to_bool(os.getenv("ANALYZER_PARALLEL_TOOL_CALLS", "true"))
 
+# Analyzer Agent Settings
+ANALYZER_AGENT_RETRIES = int(os.getenv("ANALYZER_AGENT_RETRIES", "2"))
+ANALYZER_LLM_TIMEOUT = int(os.getenv("ANALYZER_LLM_TIMEOUT", "180"))
+ANALYZER_LLM_MAX_TOKENS = int(os.getenv("ANALYZER_LLM_MAX_TOKENS", "8192"))
+ANALYZER_LLM_TEMPERATURE = float(os.getenv("ANALYZER_LLM_TEMPERATURE", "0.0"))
+
 # Documenter
 DOCUMENTER_LLM_MODEL = os.environ["DOCUMENTER_LLM_MODEL"]
 DOCUMENTER_LLM_BASE_URL = os.environ["DOCUMENTER_LLM_BASE_URL"]
 DOCUMENTER_LLM_API_KEY = os.environ["DOCUMENTER_LLM_API_KEY"]
 DOCUMENTER_PARALLEL_TOOL_CALLS = str_to_bool(os.getenv("DOCUMENTER_PARALLEL_TOOL_CALLS", "true"))
 
+# Documenter Agent Settings
+DOCUMENTER_AGENT_RETRIES = int(os.getenv("DOCUMENTER_AGENT_RETRIES", "2"))
+DOCUMENTER_LLM_TIMEOUT = int(os.getenv("DOCUMENTER_LLM_TIMEOUT", "180"))
+DOCUMENTER_LLM_MAX_TOKENS = int(os.getenv("DOCUMENTER_LLM_MAX_TOKENS", "8192"))
+DOCUMENTER_LLM_TEMPERATURE = float(os.getenv("DOCUMENTER_LLM_TEMPERATURE", "0.0"))
+
 # Langfuse
 ENABLE_LANGFUSE = str_to_bool(os.getenv("ENABLE_LANGFUSE", "false"))
 LANGFUSE_PUBLIC_KEY = os.getenv("LANGFUSE_PUBLIC_KEY")
@@ -48,11 +61,20 @@ def str_to_bool(value: str) -> bool:
 # General
 ENVIRONMENT = os.getenv("ENVIRONMENT", "development")
 
-VERSION = os.getenv("APP_VERSION", "1.0.0")
 
 CONSOLE_LOG_LEVEL = getattr(logging, os.getenv("CONSOLE_LOG_LEVEL", "DEBUG").upper())
 FILE_LOG_LEVEL = getattr(logging, os.getenv("FILE_LOG_LEVEL", "INFO").upper())
 
+# Agent Tools Settings
+TOOL_FILE_READER_MAX_RETRIES = int(os.getenv("TOOL_FILE_READER_MAX_RETRIES", "2"))
+TOOL_LIST_FILES_MAX_RETRIES = int(os.getenv("TOOL_LIST_FILES_MAX_RETRIES", "2"))
+
+# HTTP Retry Client Settings
+HTTP_RETRY_MAX_ATTEMPTS = int(os.getenv("HTTP_RETRY_MAX_ATTEMPTS", "5"))
+HTTP_RETRY_MULTIPLIER = int(os.getenv("HTTP_RETRY_MULTIPLIER", "1"))
+HTTP_RETRY_MAX_WAIT_PER_ATTEMPT = int(os.getenv("HTTP_RETRY_MAX_WAIT_PER_ATTEMPT", "60"))
+HTTP_RETRY_MAX_TOTAL_WAIT = int(os.getenv("HTTP_RETRY_MAX_TOTAL_WAIT", "300"))
+
 # --------------------------
 # Helper Function
 
diff --git a/src/utils/retry_client.py b/src/utils/retry_client.py

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "1.0.0"`
	`1`	`+__version__ = "1.2.0"`