@@ -107,6 +107,64 @@ async def _agent_fn(
107107 )
108108 if model_name :
109109 setattr (turn , "model_name" , model_name )
110+ # Surface usage + cache telemetry on the returned MessageTurn so the
111+ # LifeOpsBench runner can populate TurnResult.cache_read_input_tokens
112+ # / cache_creation_input_tokens / cache_hit_pct via getattr(). The
113+ # hermes-agent OpenAI-compat surface exposes:
114+ # * OpenAI / Cerebras shape: usage.prompt_tokens_details.cached_tokens
115+ # Anthropic-shaped responses (cache_read_input_tokens /
116+ # cache_creation_input_tokens) are forwarded verbatim when present.
117+ usage = resp .params .get ("usage" ) if isinstance (resp .params , dict ) else None
118+ if isinstance (usage , dict ):
119+ _attach_usage_cache_fields (turn , usage )
110120 return turn
111121
112122 return _agent_fn
123+
124+
125+ def _attach_usage_cache_fields (turn : Any , usage : dict [str , Any ]) -> None :
126+ """Parse OpenAI / Cerebras / Anthropic-shaped usage onto the MessageTurn.
127+
128+ Sets ``input_tokens`` / ``output_tokens`` / ``cache_read_input_tokens`` /
129+ ``cache_creation_input_tokens`` / ``cache_supported`` as attributes on
130+ ``turn`` (via ``setattr``) so the LifeOpsBench runner can pick them up
131+ with ``getattr``. Cache fields stay ``None`` when the provider does not
132+ report them — per AGENTS.md Cmd #8, no silent ``0`` fallback.
133+ """
134+ prompt = usage .get ("prompt_tokens" )
135+ completion = usage .get ("completion_tokens" )
136+ # Anthropic shape: input_tokens / output_tokens.
137+ if not isinstance (prompt , (int , float )):
138+ prompt = usage .get ("input_tokens" )
139+ if not isinstance (completion , (int , float )):
140+ completion = usage .get ("output_tokens" )
141+ if isinstance (prompt , (int , float )):
142+ setattr (turn , "input_tokens" , int (prompt ))
143+ if isinstance (completion , (int , float )):
144+ setattr (turn , "output_tokens" , int (completion ))
145+
146+ # OpenAI / Cerebras: usage.prompt_tokens_details.cached_tokens
147+ prompt_details = usage .get ("prompt_tokens_details" ) or {}
148+ cache_read_raw = (
149+ prompt_details .get ("cached_tokens" )
150+ if isinstance (prompt_details , dict )
151+ else None
152+ )
153+ # Anthropic: cache_read_input_tokens at the usage root.
154+ if cache_read_raw is None :
155+ cache_read_raw = usage .get ("cache_read_input_tokens" )
156+ cache_creation_raw = usage .get ("cache_creation_input_tokens" )
157+
158+ cache_read_value : int | None = (
159+ int (cache_read_raw ) if isinstance (cache_read_raw , (int , float )) else None
160+ )
161+ cache_creation_value : int | None = (
162+ int (cache_creation_raw )
163+ if isinstance (cache_creation_raw , (int , float ))
164+ else None
165+ )
166+ setattr (turn , "cache_read_input_tokens" , cache_read_value )
167+ setattr (turn , "cache_creation_input_tokens" , cache_creation_value )
168+ # Hermes-template servers fronting Cerebras gpt-oss-120b or Anthropic
169+ # support prompt caching; cache_supported is a hard-true here.
170+ setattr (turn , "cache_supported" , True )
0 commit comments