diff --git a/pyproject.toml b/pyproject.toml index 4659b88..a3b30cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "agent-eval" -version = "0.1.45" +version = "0.1.46" description = "Agent evaluation toolkit" readme = "README.md" requires-python = ">=3.10" diff --git a/src/agenteval/local_cost.py b/src/agenteval/local_cost.py index 3c50554..5b7cafd 100644 --- a/src/agenteval/local_cost.py +++ b/src/agenteval/local_cost.py @@ -1,4 +1,5 @@ from litellm.utils import CostPerToken +from pydantic import BaseModel # even where these exist in https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json # calling cost_per_token does not return a cost, perhaps due to the associated provider @@ -33,3 +34,23 @@ input_cost_per_token=1.8e-07, output_cost_per_token=1.8e-07 ), } + + +class CostPerTokenWithCache(BaseModel): + input_cost_per_token: float + output_cost_per_token: float + cache_read_input_token_cost: float + + +# Like CUSTOM_PRICING, but for models that also have a cache read discount. +# cost_per_token with usage_object doesn't work for these models in litellm 1.75.8, +# so costs are computed manually in compute_model_cost. +# key represents model name as found in inspect model_usage +CUSTOM_PRICING_WITH_CACHE = { + # costs from https://platform.moonshot.ai/docs/guide/kimi-k2-5-quickstart + "moonshotai/kimi-k2.5-0127": CostPerTokenWithCache( + input_cost_per_token=6e-07, + output_cost_per_token=3e-06, + cache_read_input_token_cost=1e-07, + ), +} diff --git a/src/agenteval/log.py b/src/agenteval/log.py index 28fba7d..4f9cae5 100644 --- a/src/agenteval/log.py +++ b/src/agenteval/log.py @@ -15,7 +15,7 @@ from litellm.types.utils import PromptTokensDetailsWrapper, Usage from pydantic import BaseModel -from .local_cost import CUSTOM_PRICING +from .local_cost import CUSTOM_PRICING, CUSTOM_PRICING_WITH_CACHE logger = getLogger(__name__) @@ -113,6 +113,17 @@ def compute_model_cost(model_usages: list[ModelUsageWithName]) -> float | None: custom_cost_per_token=CUSTOM_PRICING[model_usage.model], ) + elif model_usage.model in CUSTOM_PRICING_WITH_CACHE.keys(): + + pricing = CUSTOM_PRICING_WITH_CACHE[model_usage.model] + cache_read_tokens = model_usage.usage.input_tokens_cache_read or 0 + text_tokens = input_tokens - cache_read_tokens + prompt_cost = ( + text_tokens * pricing.input_cost_per_token + + cache_read_tokens * pricing.cache_read_input_token_cost + ) + completion_cost = output_tokens * pricing.output_cost_per_token + else: total_tokens = model_usage.usage.total_tokens