Skip to content

Commit 935c279

Browse files
RKestcopybara-github
authored andcommitted
feat(otel): add minimal generate_content {model.name} spans and logs for non-gemini inference and when opentelemetry-inference-google-genai dependency is missing
Co-authored-by: Max Ind <maxind@google.com> PiperOrigin-RevId: 859667045
1 parent 82fa10b commit 935c279

File tree

5 files changed

+420
-21
lines changed

5 files changed

+420
-21
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ test = [
126126
"litellm>=1.75.5, <1.80.17", # For LiteLLM tests
127127
"llama-index-readers-file>=0.4.0", # For retrieval tests
128128
"openai>=1.100.2", # For LiteLLM
129+
"opentelemetry-instrumentation-google-genai>=0.3b0, <1.0.0",
129130
"pytest-asyncio>=0.25.0",
130131
"pytest-mock>=3.14.0",
131132
"pytest-xdist>=3.6.1",

src/google/adk/flows/llm_flows/base_llm_flow.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
from ...models.base_llm_connection import BaseLlmConnection
4242
from ...models.llm_request import LlmRequest
4343
from ...models.llm_response import LlmResponse
44+
from ...telemetry import tracing
4445
from ...telemetry.tracing import trace_call_llm
4546
from ...telemetry.tracing import trace_send_data
4647
from ...telemetry.tracing import tracer
@@ -771,7 +772,7 @@ async def _call_llm_async(
771772
llm = self.__get_llm(invocation_context)
772773

773774
async def _call_llm_with_tracing() -> AsyncGenerator[LlmResponse, None]:
774-
with tracer.start_as_current_span('call_llm'):
775+
with tracer.start_as_current_span('call_llm') as span:
775776
if invocation_context.run_config.support_cfc:
776777
invocation_context.live_request_queue = LiveRequestQueue()
777778
responses_generator = self.run_live(invocation_context)
@@ -822,6 +823,7 @@ async def _call_llm_with_tracing() -> AsyncGenerator[LlmResponse, None]:
822823
model_response_event.id,
823824
llm_request,
824825
llm_response,
826+
span,
825827
)
826828
# Runs after_model_callback if it exists.
827829
if altered_llm_response := await self._handle_after_model_callback(
@@ -1050,8 +1052,12 @@ async def _run_on_model_error_callbacks(
10501052

10511053
try:
10521054
async with Aclosing(response_generator) as agen:
1053-
async for response in agen:
1054-
yield response
1055+
with tracing.use_generate_content_span(
1056+
llm_request, invocation_context, model_response_event
1057+
) as span:
1058+
async for llm_response in agen:
1059+
tracing.trace_generate_content_result(span, llm_response)
1060+
yield llm_response
10551061
except Exception as model_error:
10561062
callback_context = CallbackContext(
10571063
invocation_context, event_actions=model_response_event.actions

src/google/adk/telemetry/tracing.py

Lines changed: 211 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,46 +23,77 @@
2323

2424
from __future__ import annotations
2525

26+
from collections.abc import Iterator
27+
from collections.abc import Mapping
28+
from contextlib import contextmanager
2629
import json
30+
import logging
2731
import os
2832
from typing import Any
29-
from typing import Optional
3033
from typing import TYPE_CHECKING
3134

3235
from google.genai import types
36+
from google.genai.models import Models
37+
from opentelemetry import _logs
3338
from opentelemetry import trace
39+
from opentelemetry._logs import LogRecord
40+
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_AGENT_DESCRIPTION
41+
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_AGENT_NAME
42+
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_CONVERSATION_ID
43+
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_OPERATION_NAME
44+
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_REQUEST_MODEL
45+
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_RESPONSE_FINISH_REASONS
46+
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_SYSTEM
47+
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_TOOL_CALL_ID
48+
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_TOOL_DESCRIPTION
49+
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_TOOL_NAME
50+
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_TOOL_TYPE
51+
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_USAGE_INPUT_TOKENS
52+
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_USAGE_OUTPUT_TOKENS
53+
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GenAiSystemValues
54+
from opentelemetry.semconv.schemas import Schemas
55+
from opentelemetry.trace import Span
56+
from opentelemetry.util.types import AnyValue
57+
from opentelemetry.util.types import AttributeValue
58+
from pydantic import BaseModel
3459

3560
from .. import version
36-
from ..events.event import Event
61+
from ..utils.model_name_utils import is_gemini_model
3762

3863
# By default some ADK spans include attributes with potential PII data.
3964
# This env, when set to false, allows to disable populating those attributes.
4065
ADK_CAPTURE_MESSAGE_CONTENT_IN_SPANS = 'ADK_CAPTURE_MESSAGE_CONTENT_IN_SPANS'
41-
# TODO: Replace with constant from opentelemetry.semconv when it reaches version 1.37 in g3.
42-
GEN_AI_AGENT_DESCRIPTION = 'gen_ai.agent.description'
43-
GEN_AI_AGENT_NAME = 'gen_ai.agent.name'
44-
GEN_AI_CONVERSATION_ID = 'gen_ai.conversation.id'
45-
GEN_AI_OPERATION_NAME = 'gen_ai.operation.name'
46-
GEN_AI_TOOL_CALL_ID = 'gen_ai.tool.call.id'
47-
GEN_AI_TOOL_DESCRIPTION = 'gen_ai.tool.description'
48-
GEN_AI_TOOL_NAME = 'gen_ai.tool.name'
49-
GEN_AI_TOOL_TYPE = 'gen_ai.tool.type'
66+
67+
# Standard OTEL env variable to enable logging of prompt/response content.
68+
OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT = (
69+
'OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT'
70+
)
71+
72+
USER_CONTENT_ELIDED = '<elided>'
5073

5174
# Needed to avoid circular imports
5275
if TYPE_CHECKING:
5376
from ..agents.base_agent import BaseAgent
5477
from ..agents.invocation_context import InvocationContext
78+
from ..events.event import Event
5579
from ..models.llm_request import LlmRequest
5680
from ..models.llm_response import LlmResponse
5781
from ..tools.base_tool import BaseTool
5882

5983
tracer = trace.get_tracer(
6084
instrumenting_module_name='gcp.vertex.agent',
6185
instrumenting_library_version=version.__version__,
62-
# TODO: Replace with constant from opentelemetry.semconv when it reaches version 1.37 in g3.
63-
schema_url='https://opentelemetry.io/schemas/1.37.0',
86+
schema_url=Schemas.V1_36_0.value,
87+
)
88+
89+
otel_logger = _logs.get_logger(
90+
instrumenting_module_name='gcp.vertex.agent',
91+
instrumenting_library_version=version.__version__,
92+
schema_url=Schemas.V1_36_0.value,
6493
)
6594

95+
logger = logging.getLogger('google_adk.' + __name__)
96+
6697

6798
def _safe_json_serialize(obj) -> str:
6899
"""Convert any Python object to a JSON-serializable type or string.
@@ -119,7 +150,7 @@ def trace_agent_invocation(
119150
def trace_tool_call(
120151
tool: BaseTool,
121152
args: dict[str, Any],
122-
function_response_event: Optional[Event],
153+
function_response_event: Event | None,
123154
):
124155
"""Traces tool call.
125156
@@ -234,6 +265,7 @@ def trace_call_llm(
234265
event_id: str,
235266
llm_request: LlmRequest,
236267
llm_response: LlmResponse,
268+
span: Span | None = None,
237269
):
238270
"""Traces a call to the LLM.
239271
@@ -246,7 +278,7 @@ def trace_call_llm(
246278
llm_request: The LLM request object.
247279
llm_response: The LLM response object.
248280
"""
249-
span = trace.get_current_span()
281+
span = span or trace.get_current_span()
250282
# Special standard Open Telemetry GenaI attributes that indicate
251283
# that this is a span related to a Generative AI system.
252284
span.set_attribute('gen_ai.system', 'gcp.vertex.agent')
@@ -390,3 +422,167 @@ def _should_add_request_response_to_spans() -> bool:
390422
ADK_CAPTURE_MESSAGE_CONTENT_IN_SPANS, 'true'
391423
).lower() in ('false', '0')
392424
return not disabled_via_env_var
425+
426+
427+
@contextmanager
428+
def use_generate_content_span(
429+
llm_request: LlmRequest,
430+
invocation_context: InvocationContext,
431+
model_response_event: Event,
432+
) -> Iterator[Span | None]:
433+
"""Context manager encompassing `generate_content {model.name}` span.
434+
435+
When an external library for inference instrumentation is installed (e.g. opentelemetry-instrumentation-google-genai),
436+
span creation is delegated to said library.
437+
"""
438+
439+
common_attributes = {
440+
GEN_AI_CONVERSATION_ID: invocation_context.session.id,
441+
'gcp.vertex.agent.event_id': model_response_event.id,
442+
}
443+
if (
444+
_is_gemini_agent(invocation_context.agent)
445+
and _instrumented_with_opentelemetry_instrumentation_google_genai()
446+
):
447+
yield None
448+
else:
449+
with _use_native_generate_content_span(
450+
llm_request=llm_request,
451+
common_attributes=common_attributes,
452+
) as span:
453+
yield span
454+
455+
456+
def _should_log_prompt_response_content() -> bool:
457+
return os.getenv(
458+
OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, ''
459+
).lower() in ('1', 'true')
460+
461+
462+
def _serialize_content(content: types.ContentUnion) -> AnyValue:
463+
if isinstance(content, BaseModel):
464+
return content.model_dump()
465+
if isinstance(content, str):
466+
return content
467+
if isinstance(content, list):
468+
return [_serialize_content(part) for part in content]
469+
return _safe_json_serialize(content)
470+
471+
472+
def _serialize_content_with_elision(
473+
content: types.ContentUnion | None,
474+
) -> AnyValue:
475+
if not _should_log_prompt_response_content():
476+
return USER_CONTENT_ELIDED
477+
if content is None:
478+
return None
479+
return _serialize_content(content)
480+
481+
482+
def _instrumented_with_opentelemetry_instrumentation_google_genai() -> bool:
483+
maybe_wrapped_function = Models.generate_content
484+
print(f'{Models.generate_content.__code__.co_filename=}')
485+
while wrapped := getattr(maybe_wrapped_function, '__wrapped__', None):
486+
if (
487+
'opentelemetry/instrumentation/google_genai'
488+
in maybe_wrapped_function.__code__.co_filename
489+
):
490+
return True
491+
maybe_wrapped_function = wrapped # pyright: ignore[reportAny]
492+
493+
return False
494+
495+
496+
def _is_gemini_agent(agent: BaseAgent) -> bool:
497+
from ..agents.llm_agent import LlmAgent
498+
499+
if not isinstance(agent, LlmAgent):
500+
return False
501+
502+
if isinstance(agent.model, str):
503+
return is_gemini_model(agent.model)
504+
505+
from ..models.google_llm import Gemini
506+
507+
return isinstance(agent.model, Gemini)
508+
509+
510+
@contextmanager
511+
def _use_native_generate_content_span(
512+
llm_request: LlmRequest,
513+
common_attributes: Mapping[str, AttributeValue],
514+
) -> Iterator[Span]:
515+
with tracer.start_as_current_span(
516+
f"generate_content {llm_request.model or ''}"
517+
) as span:
518+
span.set_attribute(GEN_AI_SYSTEM, _guess_gemini_system_name())
519+
span.set_attribute(GEN_AI_OPERATION_NAME, 'generate_content')
520+
span.set_attribute(GEN_AI_REQUEST_MODEL, llm_request.model or '')
521+
span.set_attributes(common_attributes)
522+
523+
otel_logger.emit(
524+
LogRecord(
525+
event_name='gen_ai.system.message',
526+
body={
527+
'content': _serialize_content_with_elision(
528+
llm_request.config.system_instruction
529+
)
530+
},
531+
attributes={GEN_AI_SYSTEM: _guess_gemini_system_name()},
532+
)
533+
)
534+
535+
for content in llm_request.contents:
536+
otel_logger.emit(
537+
LogRecord(
538+
event_name='gen_ai.user.message',
539+
body={'content': _serialize_content_with_elision(content)},
540+
attributes={GEN_AI_SYSTEM: _guess_gemini_system_name()},
541+
)
542+
)
543+
544+
yield span
545+
546+
547+
def trace_generate_content_result(span: Span | None, llm_response: LlmResponse):
548+
"""Trace result of the inference in generate_content span."""
549+
550+
if span is None:
551+
return
552+
553+
if llm_response.partial:
554+
return
555+
556+
if finish_reason := llm_response.finish_reason:
557+
span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [finish_reason.lower()])
558+
if usage_metadata := llm_response.usage_metadata:
559+
if usage_metadata.prompt_token_count is not None:
560+
span.set_attribute(
561+
GEN_AI_USAGE_INPUT_TOKENS, usage_metadata.prompt_token_count
562+
)
563+
if usage_metadata.candidates_token_count is not None:
564+
span.set_attribute(
565+
GEN_AI_USAGE_OUTPUT_TOKENS, usage_metadata.candidates_token_count
566+
)
567+
568+
otel_logger.emit(
569+
LogRecord(
570+
event_name='gen_ai.choice',
571+
body={
572+
'content': _serialize_content_with_elision(llm_response.content),
573+
'index': 0, # ADK always returns a single candidate
574+
}
575+
| {'finish_reason': llm_response.finish_reason.value}
576+
if llm_response.finish_reason is not None
577+
else {},
578+
attributes={GEN_AI_SYSTEM: _guess_gemini_system_name()},
579+
)
580+
)
581+
582+
583+
def _guess_gemini_system_name() -> str:
584+
return (
585+
GenAiSystemValues.VERTEX_AI.name.lower()
586+
if os.getenv('GOOGLE_GENAI_USE_VERTEXAI', '').lower() in ('true', '1')
587+
else GenAiSystemValues.GEMINI.name.lower()
588+
)

0 commit comments

Comments
 (0)