fix: Convert unsupported inline artifact MIME types to text in LoadArtifactsTool

GWeale · copybara-github · commit fdc98d5c927b · 2026-01-14T16:01:06.000-08:00
The LoadArtifactsTool now checks if an artifact's inline data MIME type is supported by Gemini. If not, it attempts to convert the artifact content into a text Part Close #4028 Co-authored-by: George Weale <gweale@google.com> PiperOrigin-RevId: 856404510
diff --git a/src/google/adk/tools/load_artifacts_tool.py b/src/google/adk/tools/load_artifacts_tool.py
@@ -14,6 +14,8 @@
 
 from __future__ import annotations
 
+import base64
+import binascii
 import json
 import logging
 from typing import Any
@@ -24,13 +26,99 @@
 
 from .base_tool import BaseTool
 
+# MIME types Gemini accepts for inline data in requests.
+_GEMINI_SUPPORTED_INLINE_MIME_PREFIXES = (
+    'image/',
+    'audio/',
+    'video/',
+)
+_GEMINI_SUPPORTED_INLINE_MIME_TYPES = frozenset({'application/pdf'})
+_TEXT_LIKE_MIME_TYPES = frozenset({
+    'application/csv',
+    'application/json',
+    'application/xml',
+})
+
 if TYPE_CHECKING:
   from ..models.llm_request import LlmRequest
   from .tool_context import ToolContext
 
 logger = logging.getLogger('google_adk.' + __name__)
 
 
+def _normalize_mime_type(mime_type: str | None) -> str | None:
+  """Returns the normalized MIME type, without parameters like charset."""
+  if not mime_type:
+    return None
+  return mime_type.split(';', 1)[0].strip()
+
+
+def _is_inline_mime_type_supported(mime_type: str | None) -> bool:
+  """Returns True if Gemini accepts this MIME type as inline data."""
+  normalized = _normalize_mime_type(mime_type)
+  if not normalized:
+    return False
+  return normalized.startswith(_GEMINI_SUPPORTED_INLINE_MIME_PREFIXES) or (
+      normalized in _GEMINI_SUPPORTED_INLINE_MIME_TYPES
+  )
+
+
+def _maybe_base64_to_bytes(data: str) -> bytes | None:
+  """Best-effort base64 decode for both std and urlsafe formats."""
+  try:
+    return base64.b64decode(data, validate=True)
+  except (binascii.Error, ValueError):
+    try:
+      return base64.urlsafe_b64decode(data)
+    except (binascii.Error, ValueError):
+      return None
+
+
+def _as_safe_part_for_llm(
+    artifact: types.Part, artifact_name: str
+) -> types.Part:
+  """Returns a Part that is safe to send to Gemini."""
+  inline_data = artifact.inline_data
+  if inline_data is None:
+    return artifact
+
+  if _is_inline_mime_type_supported(inline_data.mime_type):
+    return artifact
+
+  mime_type = _normalize_mime_type(inline_data.mime_type) or (
+      'application/octet-stream'
+  )
+  data = inline_data.data
+  if data is None:
+    return types.Part.from_text(
+        text=(
+            f'[Artifact: {artifact_name}, type: {mime_type}. '
+            'No inline data was provided.]'
+        )
+    )
+
+  if isinstance(data, str):
+    decoded = _maybe_base64_to_bytes(data)
+    if decoded is None:
+      return types.Part.from_text(text=data)
+    data = decoded
+
+  if mime_type.startswith('text/') or mime_type in _TEXT_LIKE_MIME_TYPES:
+    try:
+      return types.Part.from_text(text=data.decode('utf-8'))
+    except UnicodeDecodeError:
+      return types.Part.from_text(text=data.decode('utf-8', errors='replace'))
+
+  size_kb = len(data) / 1024
+  return types.Part.from_text(
+      text=(
+          f'[Binary artifact: {artifact_name}, '
+          f'type: {mime_type}, size: {size_kb:.1f} KB. '
+          'Content cannot be displayed inline.]'
+      )
+  )
+
+
 class LoadArtifactsTool(BaseTool):
   """A tool that loads the artifacts and adds them to the session."""
 
@@ -108,7 +196,8 @@ async def _append_artifacts_to_llm_request(
     if llm_request.contents and llm_request.contents[-1].parts:
       function_response = llm_request.contents[-1].parts[0].function_response
       if function_response and function_response.name == 'load_artifacts':
-        artifact_names = function_response.response['artifact_names']
+        response = function_response.response or {}
+        artifact_names = response.get('artifact_names', [])
         for artifact_name in artifact_names:
           # Try session-scoped first (default behavior)
           artifact = await tool_context.load_artifact(artifact_name)
@@ -122,14 +211,26 @@ async def _append_artifacts_to_llm_request(
           if artifact is None:
             logger.warning('Artifact "%s" not found, skipping', artifact_name)
             continue
+
+          artifact_part = _as_safe_part_for_llm(artifact, artifact_name)
+          if artifact_part is not artifact:
+            mime_type = (
+                artifact.inline_data.mime_type if artifact.inline_data else None
+            )
+            logger.debug(
+                'Converted artifact "%s" (mime_type=%s) to text Part',
+                artifact_name,
+                mime_type,
+            )
+
           llm_request.contents.append(
               types.Content(
                   role='user',
                   parts=[
                       types.Part.from_text(
                           text=f'Artifact {artifact_name} is:'
                       ),
-                      artifact,
+                      artifact_part,
                   ],
               )
           )
diff --git a/tests/unittests/tools/test_load_artifacts_tool.py b/tests/unittests/tools/test_load_artifacts_tool.py
@@ -0,0 +1,162 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import base64
+
+from google.adk.models.llm_request import LlmRequest
+from google.adk.tools.load_artifacts_tool import _maybe_base64_to_bytes
+from google.adk.tools.load_artifacts_tool import load_artifacts_tool
+from google.genai import types
+from pytest import mark
+
+
+class _StubToolContext:
+  """Minimal ToolContext stub for LoadArtifactsTool tests."""
+
+  def __init__(self, artifacts_by_name: dict[str, types.Part]):
+    self._artifacts_by_name = artifacts_by_name
+
+  async def list_artifacts(self) -> list[str]:
+    return list(self._artifacts_by_name.keys())
+
+  async def load_artifact(self, name: str) -> types.Part | None:
+    return self._artifacts_by_name.get(name)
+
+
+@mark.asyncio
+async def test_load_artifacts_converts_unsupported_mime_to_text():
+  """Unsupported inline MIME types are converted to text parts."""
+  artifact_name = 'test.csv'
+  csv_bytes = b'col1,col2\n1,2\n'
+  artifact = types.Part(
+      inline_data=types.Blob(data=csv_bytes, mime_type='application/csv')
+  )
+
+  tool_context = _StubToolContext({artifact_name: artifact})
+  llm_request = LlmRequest(
+      contents=[
+          types.Content(
+              role='user',
+              parts=[
+                  types.Part(
+                      function_response=types.FunctionResponse(
+                          name='load_artifacts',
+                          response={'artifact_names': [artifact_name]},
+                      )
+                  )
+              ],
+          )
+      ]
+  )
+
+  await load_artifacts_tool.process_llm_request(
+      tool_context=tool_context, llm_request=llm_request
+  )
+
+  assert llm_request.contents[-1].parts[0].text == (
+      f'Artifact {artifact_name} is:'
+  )
+  artifact_part = llm_request.contents[-1].parts[1]
+  assert artifact_part.inline_data is None
+  assert artifact_part.text == csv_bytes.decode('utf-8')
+
+
+@mark.asyncio
+async def test_load_artifacts_converts_base64_unsupported_mime_to_text():
+  """Unsupported base64 string data is converted to text parts."""
+  artifact_name = 'test.csv'
+  csv_bytes = b'col1,col2\n1,2\n'
+  csv_base64 = base64.b64encode(csv_bytes).decode('ascii')
+  artifact = types.Part(
+      inline_data=types.Blob(data=csv_base64, mime_type='application/csv')
+  )
+
+  tool_context = _StubToolContext({artifact_name: artifact})
+  llm_request = LlmRequest(
+      contents=[
+          types.Content(
+              role='user',
+              parts=[
+                  types.Part(
+                      function_response=types.FunctionResponse(
+                          name='load_artifacts',
+                          response={'artifact_names': [artifact_name]},
+                      )
+                  )
+              ],
+          )
+      ]
+  )
+
+  await load_artifacts_tool.process_llm_request(
+      tool_context=tool_context, llm_request=llm_request
+  )
+
+  artifact_part = llm_request.contents[-1].parts[1]
+  assert artifact_part.inline_data is None
+  assert artifact_part.text == csv_bytes.decode('utf-8')
+
+
+@mark.asyncio
+async def test_load_artifacts_keeps_supported_mime_types():
+  """Supported inline MIME types are passed through unchanged."""
+  artifact_name = 'test.pdf'
+  artifact = types.Part(
+      inline_data=types.Blob(data=b'%PDF-1.4', mime_type='application/pdf')
+  )
+
+  tool_context = _StubToolContext({artifact_name: artifact})
+  llm_request = LlmRequest(
+      contents=[
+          types.Content(
+              role='user',
+              parts=[
+                  types.Part(
+                      function_response=types.FunctionResponse(
+                          name='load_artifacts',
+                          response={'artifact_names': [artifact_name]},
+                      )
+                  )
+              ],
+          )
+      ]
+  )
+
+  await load_artifacts_tool.process_llm_request(
+      tool_context=tool_context, llm_request=llm_request
+  )
+
+  artifact_part = llm_request.contents[-1].parts[1]
+  assert artifact_part.inline_data is not None
+  assert artifact_part.inline_data.mime_type == 'application/pdf'
+
+
+def test_maybe_base64_to_bytes_decodes_standard_base64():
+  """Standard base64 encoded strings are decoded correctly."""
+  original = b'hello world'
+  encoded = base64.b64encode(original).decode('ascii')
+  assert _maybe_base64_to_bytes(encoded) == original
+
+
+def test_maybe_base64_to_bytes_decodes_urlsafe_base64():
+  """URL-safe base64 encoded strings are decoded correctly."""
+  original = b'\xfb\xff\xfe'  # bytes that produce +/ in std but -_ in urlsafe
+  encoded = base64.urlsafe_b64encode(original).decode('ascii')
+  assert _maybe_base64_to_bytes(encoded) == original
+
+
+def test_maybe_base64_to_bytes_returns_none_for_invalid():
+  """Invalid base64 strings return None."""
+  # Single character is invalid (base64 requires length % 4 == 0 after padding)
+  assert _maybe_base64_to_bytes('x') is None