-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
fix: implement token tracking in RouterAgent for cost observability #1190
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,7 +10,8 @@ | |
| from typing import Dict, List, Optional, Any, Union | ||
| from .agent import Agent | ||
| from ..llm.model_router import ModelRouter | ||
| from ..llm import LLM | ||
| from ..llm import LLM, TokenUsage | ||
| from ..trace.protocol import get_default_emitter | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
@@ -213,8 +214,8 @@ def _execute_with_model( | |
| full_prompt = f"{context}\n\n{prompt}" | ||
|
|
||
| try: | ||
| # Execute with the selected model | ||
| response = llm_instance.get_response( | ||
| # Execute with the selected model, requesting token usage tracking | ||
| result = llm_instance.get_response( | ||
| prompt=full_prompt, | ||
| system_prompt=self._build_system_prompt(), | ||
| tools=tools, | ||
|
|
@@ -225,16 +226,45 @@ def _execute_with_model( | |
| agent_role=self.role, | ||
| agent_tools=[t.__name__ if hasattr(t, '__name__') else str(t) for t in (tools or [])], | ||
| execute_tool_fn=self.execute_tool if tools else None, | ||
| return_token_usage=True, # Request token usage information | ||
| **kwargs | ||
| ) | ||
|
|
||
| # Extract response and token usage | ||
| if isinstance(result, tuple): | ||
| response, token_usage = result | ||
| else: | ||
| # Fallback for backward compatibility | ||
| response = result | ||
| token_usage = TokenUsage() | ||
|
|
||
| # Update usage statistics | ||
| self.model_usage_stats[model_name]['calls'] += 1 | ||
| self.model_usage_stats[model_name]['tokens'] += token_usage.total_tokens | ||
|
|
||
| # Calculate and store cost estimate | ||
| model_info = self.model_router.get_model_info(model_name) | ||
| if model_info and token_usage.total_tokens > 0: | ||
| cost = self.model_router.estimate_cost(model_name, token_usage.total_tokens) | ||
| self.model_usage_stats[model_name]['cost'] += cost | ||
|
Comment on lines
+245
to
+249
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Emit per-decision cost in the trace event.
Suggested patch- model_info = self.model_router.get_model_info(model_name)
- if model_info and token_usage.total_tokens > 0:
- cost = self.model_router.estimate_cost(model_name, token_usage.total_tokens)
+ cost = 0.0
+ model_info = self.model_router.get_model_info(model_name)
+ if model_info and token_usage.total_tokens > 0:
+ cost = self.model_router.estimate_cost(model_name, token_usage.total_tokens)
self.model_usage_stats[model_name]['cost'] += cost- 'estimated_cost': self.model_usage_stats[model_name]['cost'],
+ 'estimated_cost': cost,
+ 'cumulative_estimated_cost': self.model_usage_stats[model_name]['cost'],Also applies to: 251-263 🤖 Prompt for AI Agents |
||
|
|
||
| # TODO: Implement token tracking when LLM.get_response() is updated to return token usage | ||
| # The LLM response currently returns only text, but litellm provides usage info in: | ||
| # response.get("usage") with prompt_tokens, completion_tokens, and total_tokens | ||
| # This would require modifying the LLM class to return both text and metadata | ||
| # Emit token usage via trace system for observability | ||
| try: | ||
| trace_emitter = get_default_emitter() | ||
| trace_emitter.output( | ||
| content=f"RouterAgent routing decision completed", | ||
| agent_name=self.name, | ||
| metadata={ | ||
| 'selected_model': model_name, | ||
| 'routing_strategy': self.routing_strategy, | ||
| 'token_usage': token_usage.to_dict(), | ||
| 'estimated_cost': self.model_usage_stats[model_name]['cost'], | ||
| 'total_calls': self.model_usage_stats[model_name]['calls'], | ||
| } | ||
| ) | ||
| except Exception as trace_error: | ||
| # Don't fail the request if tracing fails | ||
| logger.debug(f"Failed to emit trace event: {trace_error}") | ||
|
|
||
| return response | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
2. Routeragent token_usage not persisted
📎 Requirement gap✧ QualityAgent Prompt
ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools