Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions src/praisonai/praisonai/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import argparse
import warnings
import os
import json

# Suppress Pydantic serialization warnings from LiteLLM BEFORE any imports
# These warnings occur when LiteLLM's response objects have field mismatches
Expand Down Expand Up @@ -963,6 +964,7 @@ def parse_args(self):

# Metrics - token/cost tracking
parser.add_argument("--metrics", action="store_true", help="Display token usage and cost metrics")
parser.add_argument("--metrics-json", action="store_true", help="Output structured cost and token data as JSON")
Copy link

Copilot AI Apr 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a unit test for the new --metrics-json flag parsing and output shape (similar to existing --metrics tests) to prevent regressions, especially around key names and interactions with other output modes.

Copilot uses AI. Check for mistakes.

# Image Description (Vision) - analyze existing images
parser.add_argument("--image", type=str, help="Path to image file for vision-based description/analysis")
Expand Down Expand Up @@ -4753,6 +4755,33 @@ def level_based_approve(function_name, arguments, risk_level):
if hasattr(self, 'args') and getattr(self.args, 'save', False):
self._save_output(prompt, result)

# Metrics JSON - Output structured cost data
if hasattr(self, 'args') and getattr(self.args, 'metrics_json', False):
try:
from .features.metrics import MetricsHandler
_mh = MetricsHandler(verbose=getattr(self.args, 'verbose', False))
# Extract from final_agent if it was used, otherwise from original agent
active_agent = final_agent if 'final_agent' in locals() else agent
agent_metrics = _mh.extract_metrics_from_agent(active_agent)
# Resolve model name: prefer what the agent reported, fall back to config
model_name = agent_metrics.get('model')
if not model_name:
model_name = agent_config.get('llm', 'unknown')
if isinstance(model_name, dict):
model_name = model_name.get('model', 'unknown')
metrics_out = {
"cost_usd": agent_metrics.get('cost', 0.0),
"tokens_in": agent_metrics.get('prompt_tokens', 0),
"tokens_out": agent_metrics.get('completion_tokens', 0),
"model": model_name or 'unknown',
"request_count": agent_metrics.get('llm_calls', 0),
}
print(json.dumps(metrics_out))
except Exception as exc:
print(f"[metrics-json] warning: could not extract metrics: {exc}", file=sys.stderr)
Comment thread
greptile-apps[bot] marked this conversation as resolved.
# CRITICAL: Always emit JSON when --metrics-json is set
print(json.dumps({"cost_usd": 0.0, "tokens_in": 0, "tokens_out": 0, "model": "unknown", "request_count": 0}))

return result
elif CREWAI_AVAILABLE:
from crewai import Agent, Task, Crew
Expand Down
155 changes: 155 additions & 0 deletions src/praisonai/tests/unit/test_metrics_json_flag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#!/usr/bin/env python3
"""
Unit tests for --metrics-json CLI flag.

These tests are pure unit tests: no network, no LLM calls, no side effects.
They validate argument parsing and the JSON output assembly logic in
handle_direct_prompt().
"""

import argparse
import json
import pytest


def _get_metrics_json_parser():
"""Minimal argument parser that mirrors the relevant CLI args."""
parser = argparse.ArgumentParser(description="praisonAI CLI")
parser.add_argument("--metrics", action="store_true")
parser.add_argument("--metrics-json", action="store_true")
parser.add_argument("--verbose", action="store_true")
parser.add_argument("command", nargs="?")
return parser


def _build_metrics_out(agent_metrics, agent_config):
"""
Mirror of the JSON-assembly block in handle_direct_prompt().

Centralized here so tests can validate logic without importing main.py.
"""
model_name = agent_metrics.get("model")
if not model_name:
model_name = agent_config.get("llm", "unknown")
if isinstance(model_name, dict):
model_name = model_name.get("model", "unknown")
return {
"cost_usd": agent_metrics.get("cost", 0.0),
"tokens_in": agent_metrics.get("prompt_tokens", 0),
"tokens_out": agent_metrics.get("completion_tokens", 0),
"model": model_name or "unknown",
"request_count": agent_metrics.get("llm_calls", 0),
}
Comment on lines +15 to +42
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Test the real CLI implementation path, not local replicas

Line 15 and Line 25 reimplement parser/output logic inside the test file. That can pass while src/praisonai/praisonai/cli/main.py regresses (including --metrics-json behavior on exception/final-agent paths). Please switch these assertions to exercise the real parser and handle_direct_prompt() output with monkeypatched dependencies/captured stdout.

Also applies to: 79-137

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/praisonai/tests/unit/test_metrics_json_flag.py` around lines 15 - 42, The
test currently reimplements CLI parsing and JSON assembly in
_get_metrics_json_parser and _build_metrics_out; replace those local replicas by
invoking the real CLI parser and logic: import and call the parser/entrypoint
from src/praisonai/praisonai/cli/main.py (the real CLI parse function or the
main entry that wires args) and call handle_direct_prompt() (monkeypatching
external dependencies like LLM client, network I/O, and timing) so the test
asserts the actual --metrics-json behavior on success, exception, and
final-agent paths; capture stdout/stderr to parse JSON output and
remove/redirect usages of _get_metrics_json_parser and _build_metrics_out so
tests validate the real implementation instead of duplicated logic.



class TestMetricsJsonArgParsing:
"""Argument-parsing behaviour for --metrics-json."""

def test_flag_stored_as_metrics_json(self):
"""--metrics-json is stored as metrics_json (underscore) on the namespace."""
args = _get_metrics_json_parser().parse_args(["--metrics-json", "task"])
assert args.metrics_json is True
assert args.command == "task"

def test_default_false_when_absent(self):
"""--metrics-json defaults to False when not supplied."""
args = _get_metrics_json_parser().parse_args(["task"])
assert args.metrics_json is False

def test_independent_from_metrics_flag(self):
"""--metrics and --metrics-json are independent boolean flags."""
args = _get_metrics_json_parser().parse_args(["--metrics", "task"])
assert args.metrics is True
assert args.metrics_json is False

args2 = _get_metrics_json_parser().parse_args(["--metrics-json", "task"])
assert args2.metrics is False
assert args2.metrics_json is True

def test_both_flags_together(self):
"""Both --metrics and --metrics-json can be set simultaneously."""
args = _get_metrics_json_parser().parse_args(["--metrics", "--metrics-json", "task"])
assert args.metrics is True
assert args.metrics_json is True


class TestMetricsJsonOutput:
"""JSON assembly logic for --metrics-json output."""

def test_output_has_required_keys(self):
"""Emitted JSON contains exactly the five required keys."""
payload = _build_metrics_out(
{"prompt_tokens": 42, "completion_tokens": 17, "cost": 0.000123, "model": "test-model"},
{"llm": "test-model"},
)
assert set(payload.keys()) == {"cost_usd", "tokens_in", "tokens_out", "model", "request_count"}

def test_tokens_mapped_from_correct_keys(self):
"""prompt_tokens → tokens_in, completion_tokens → tokens_out."""
payload = _build_metrics_out(
{"prompt_tokens": 42, "completion_tokens": 17},
{},
)
assert payload["tokens_in"] == 42
assert payload["tokens_out"] == 17

def test_cost_preserved(self):
"""cost value is preserved as cost_usd."""
payload = _build_metrics_out({"cost": 0.000123}, {})
assert abs(payload["cost_usd"] - 0.000123) < 1e-9

def test_request_count_defaults_to_zero(self):
"""request_count defaults to 0 when llm_calls is absent."""
payload = _build_metrics_out({}, {})
assert payload["request_count"] == 0

def test_request_count_from_llm_calls(self):
"""request_count is taken from llm_calls when present."""
payload = _build_metrics_out({"llm_calls": 3}, {})
assert payload["request_count"] == 3

def test_model_from_agent_metrics(self):
"""Model is taken from agent_metrics['model'] when available."""
payload = _build_metrics_out({"model": "test-model-from-agent"}, {"llm": "config-model"})
assert payload["model"] == "test-model-from-agent"

def test_model_fallback_to_config_string(self):
"""Falls back to agent_config['llm'] string when agent_metrics has no model."""
payload = _build_metrics_out({}, {"llm": "config-llm-string"})
assert payload["model"] == "config-llm-string"

def test_model_fallback_to_config_dict(self):
"""When agent_config['llm'] is a dict, extracts nested 'model' key."""
payload = _build_metrics_out({}, {"llm": {"model": "nested-model", "temperature": 0.5}})
assert payload["model"] == "nested-model"

def test_model_unknown_when_no_info(self):
"""Falls back to 'unknown' when neither agent nor config provides a model."""
payload = _build_metrics_out({}, {})
assert payload["model"] == "unknown"

def test_output_is_json_serialisable(self):
"""The output dict round-trips through JSON without error."""
payload = _build_metrics_out(
{"prompt_tokens": 10, "completion_tokens": 5, "cost": 0.00001},
{"llm": "test-model"},
)
assert json.loads(json.dumps(payload)) == payload

def test_regression_wrong_keys_produce_zeros(self):
"""
Regression guard: the old code used 'input_tokens'/'output_tokens' which
are never populated by MetricsHandler.extract_metrics_from_agent().
These should always be absent; the correct keys are prompt/completion_tokens.
"""
agent_metrics = {"prompt_tokens": 100, "completion_tokens": 50}
# Wrong keys (old bug):
assert agent_metrics.get("input_tokens", 0) == 0
assert agent_metrics.get("output_tokens", 0) == 0
# Correct keys (fixed):
assert agent_metrics.get("prompt_tokens", 0) == 100
assert agent_metrics.get("completion_tokens", 0) == 50


if __name__ == "__main__":
pytest.main([__file__, "-v"])
Loading