forked from pydantic/pydantic-ai
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmessages.py
More file actions
2455 lines (1913 loc) · 95.4 KB
/
messages.py
File metadata and controls
2455 lines (1913 loc) · 95.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
from __future__ import annotations as _annotations
import base64
import hashlib
import mimetypes
import os
import re
from abc import ABC, abstractmethod
from collections.abc import Callable, Mapping, Sequence
from dataclasses import KW_ONLY, dataclass, field, replace
from datetime import datetime
from mimetypes import MimeTypes
from os import PathLike
from pathlib import Path
from typing import TYPE_CHECKING, Annotated, Any, Literal, TypeAlias, TypeGuard, cast, overload
from urllib.parse import urlparse
import pydantic
import pydantic_core
from genai_prices import calc_price, types as genai_types
from opentelemetry._logs import LogRecord
from opentelemetry.util.types import AnyValue
from pydantic.dataclasses import dataclass as pydantic_dataclass
from typing_extensions import TypeAliasType, deprecated
from . import _otel_messages, _utils
from ._utils import generate_tool_call_id as _generate_tool_call_id, now_utc as _now_utc
from .exceptions import UnexpectedModelBehavior
from .usage import RequestUsage
if TYPE_CHECKING:
from .models.instrumented import InstrumentationSettings
# Key used to wrap malformed tool-call arguments so they can still be round-tripped
# through a model API without crashing. The specific string 'INVALID_JSON' is the
# value recommended by the Anthropic docs for this situation:
# https://docs.anthropic.com/en/docs/build-with-claude/tool-use/overview#handling-tool-use-errors
INVALID_JSON_KEY = 'INVALID_JSON'
_mime_types = MimeTypes()
# Replicate what is being done in `mimetypes.init()`
_mime_types.read_windows_registry()
for file in mimetypes.knownfiles:
if os.path.isfile(file):
_mime_types.read(file) # pragma: lax no cover
# TODO check for added mimetypes in Python 3.11 when dropping support for Python 3.10:
# Document types
_mime_types.add_type('application/rtf', '.rtf')
_mime_types.add_type('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', '.xlsx')
_mime_types.add_type('application/vnd.openxmlformats-officedocument.wordprocessingml.document', '.docx')
_mime_types.add_type('text/markdown', '.mdx')
_mime_types.add_type('text/markdown', '.md')
_mime_types.add_type('text/x-asciidoc', '.asciidoc')
# Image types
_mime_types.add_type('image/webp', '.webp')
# Video types
_mime_types.add_type('video/3gpp', '.three_gp')
_mime_types.add_type('video/x-matroska', '.mkv')
_mime_types.add_type('video/x-ms-wmv', '.wmv')
_mime_types.add_type('video/x-flv', '.flv')
# Audio types
# NOTE: aac is platform specific (linux: audio/x-aac, macos: audio/aac) but x-aac is deprecated https://mimetype.io/audio/aac
_mime_types.add_type('audio/aac', '.aac')
_mime_types.add_type('audio/aiff', '.aiff')
_mime_types.add_type('audio/flac', '.flac')
_mime_types.add_type('audio/ogg', '.oga')
_mime_types.add_type('audio/wav', '.wav')
# Text/data file types not recognized by default mimetypes
# YAML: RFC 9512 (https://www.rfc-editor.org/rfc/rfc9512.html)
_mime_types.add_type('application/yaml', '.yaml')
_mime_types.add_type('application/yaml', '.yml')
# TOML: RFC 9519 (https://www.rfc-editor.org/rfc/rfc9519.html)
_mime_types.add_type('application/toml', '.toml')
# XML is recognized as `text/xml` on some systems, but it needs to be `application/xml` per RFC 7303 (https://www.rfc-editor.org/rfc/rfc7303.html)
_mime_types.add_type('application/xml', '.xml')
AudioMediaType: TypeAlias = Literal['audio/wav', 'audio/mpeg', 'audio/ogg', 'audio/flac', 'audio/aiff', 'audio/aac']
ImageMediaType: TypeAlias = Literal['image/jpeg', 'image/png', 'image/gif', 'image/webp']
DocumentMediaType: TypeAlias = Literal[
'application/pdf',
'text/plain',
'text/csv',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'text/html',
'text/markdown',
'application/msword',
'application/vnd.ms-excel',
]
VideoMediaType: TypeAlias = Literal[
'video/x-matroska',
'video/quicktime',
'video/mp4',
'video/webm',
'video/x-flv',
'video/mpeg',
'video/x-ms-wmv',
'video/3gpp',
]
AudioFormat: TypeAlias = Literal['wav', 'mp3', 'oga', 'flac', 'aiff', 'aac']
ImageFormat: TypeAlias = Literal['jpeg', 'png', 'gif', 'webp']
DocumentFormat: TypeAlias = Literal['csv', 'doc', 'docx', 'html', 'md', 'pdf', 'txt', 'xls', 'xlsx']
VideoFormat: TypeAlias = Literal['mkv', 'mov', 'mp4', 'webm', 'flv', 'mpeg', 'mpg', 'wmv', 'three_gp']
FinishReason: TypeAlias = Literal[
'stop',
'length',
'content_filter',
'tool_call',
'error',
]
"""Reason the model finished generating the response, normalized to OpenTelemetry values."""
ForceDownloadMode: TypeAlias = bool | Literal['allow-local']
"""Type for the force_download parameter on FileUrl subclasses.
- `False`: The URL is sent directly to providers that support it. For providers that don't,
the file is downloaded with SSRF protection (blocks private IPs and cloud metadata).
- `True`: The file is always downloaded with SSRF protection (blocks private IPs and cloud metadata).
- `'allow-local'`: The file is always downloaded, allowing private IPs but still blocking cloud metadata.
"""
ProviderDetailsDelta: TypeAlias = dict[str, Any] | Callable[[dict[str, Any] | None], dict[str, Any]] | None
"""Type for provider_details input: can be a static dict, a callback to update existing details, or None."""
@dataclass(repr=False)
class SystemPromptPart:
"""A system prompt, generally written by the application developer.
This gives the model context and guidance on how to respond.
"""
content: str
"""The content of the prompt."""
_: KW_ONLY
timestamp: datetime = field(default_factory=_now_utc)
"""The timestamp of the prompt."""
dynamic_ref: str | None = None
"""The ref of the dynamic system prompt function that generated this part.
Only set if system prompt is dynamic, see [`system_prompt`][pydantic_ai.agent.Agent.system_prompt] for more information.
"""
part_kind: Literal['system-prompt'] = 'system-prompt'
"""Part type identifier, this is available on all parts as a discriminator."""
def otel_event(self, settings: InstrumentationSettings) -> LogRecord:
return LogRecord(
attributes={'event.name': 'gen_ai.system.message'},
body={'role': 'system', **({'content': self.content} if settings.include_content else {})},
)
def otel_message_parts(self, settings: InstrumentationSettings) -> list[_otel_messages.MessagePart]:
return [_otel_messages.TextPart(type='text', **{'content': self.content} if settings.include_content else {})]
__repr__ = _utils.dataclasses_no_defaults_repr
def _multi_modal_content_identifier(identifier: str | bytes) -> str:
"""Generate stable identifier for multi-modal content to help LLM in finding a specific file in tool call responses."""
if isinstance(identifier, str):
identifier = identifier.encode('utf-8')
return hashlib.sha1(identifier).hexdigest()[:6]
@pydantic_dataclass(repr=False, config=pydantic.ConfigDict(validate_by_name=True))
class FileUrl(ABC):
"""Abstract base class for any URL-based file."""
url: str
"""The URL of the file."""
_: KW_ONLY
force_download: ForceDownloadMode = False
"""Controls whether the file is downloaded and how SSRF protection is applied:
* If `False`, the URL is sent directly to providers that support it. For providers that don't,
the file is downloaded with SSRF protection (blocks private IPs and cloud metadata).
* If `True`, the file is always downloaded with SSRF protection (blocks private IPs and cloud metadata).
* If `'allow-local'`, the file is always downloaded, allowing private IPs but still blocking cloud metadata.
"""
vendor_metadata: dict[str, Any] | None = None
"""Vendor-specific metadata for the file.
Supported by:
- `GoogleModel`: `VideoUrl.vendor_metadata` is used as `video_metadata`: https://ai.google.dev/gemini-api/docs/video-understanding#customize-video-processing
- `OpenAIChatModel`, `OpenAIResponsesModel`: `ImageUrl.vendor_metadata['detail']` is used as `detail` setting for images
- `XaiModel`: `ImageUrl.vendor_metadata['detail']` is used as `detail` setting for images
"""
_media_type: Annotated[str | None, pydantic.Field(alias='media_type', default=None, exclude=True)] = field(
compare=False, default=None
)
_identifier: Annotated[str | None, pydantic.Field(alias='identifier', default=None, exclude=True)] = field(
compare=False, default=None
)
# `pydantic_dataclass` replaces `__init__` so this method is never used.
# The signature is kept so that pyright/IDE hints recognize the `media_type` and `identifier` aliases.
def __init__(
self,
url: str,
*,
media_type: str | None = None,
identifier: str | None = None,
force_download: ForceDownloadMode = False,
vendor_metadata: dict[str, Any] | None = None,
# Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs.
_media_type: str | None = None,
_identifier: str | None = None,
) -> None: ... # pragma: no cover
@pydantic.computed_field
@property
def media_type(self) -> str:
"""Return the media type of the file, based on the URL or the provided `media_type`."""
return self._media_type or self._infer_media_type()
@pydantic.computed_field
@property
def identifier(self) -> str:
"""The identifier of the file, such as a unique ID.
This identifier can be provided to the model in a message to allow it to refer to this file in a tool call argument,
and the tool can look up the file in question by iterating over the message history and finding the matching `FileUrl`.
This identifier is only automatically passed to the model when the `FileUrl` is returned by a tool.
If you're passing the `FileUrl` as a user message, it's up to you to include a separate text part with the identifier,
e.g. "This is file <identifier>:" preceding the `FileUrl`.
It's also included in inline-text delimiters for providers that require inlining text documents, so the model can
distinguish multiple files.
"""
return self._identifier or _multi_modal_content_identifier(self.url)
@abstractmethod
def _infer_media_type(self) -> str:
"""Infer the media type of the file based on the URL."""
raise NotImplementedError
@property
@abstractmethod
def format(self) -> str:
"""The file format."""
raise NotImplementedError
__repr__ = _utils.dataclasses_no_defaults_repr
@pydantic_dataclass(repr=False, config=pydantic.ConfigDict(validate_by_name=True))
class VideoUrl(FileUrl):
"""A URL to a video."""
url: str
"""The URL of the video."""
_: KW_ONLY
kind: Literal['video-url'] = 'video-url'
"""Type identifier, this is available on all parts as a discriminator."""
# `pydantic_dataclass` replaces `__init__` so this method is never used.
# The signature is kept so that pyright/IDE hints recognize the aliases for the `_media_type` and `_identifier` fields.
def __init__(
self,
url: str,
*,
media_type: str | None = None,
identifier: str | None = None,
force_download: ForceDownloadMode = False,
vendor_metadata: dict[str, Any] | None = None,
kind: Literal['video-url'] = 'video-url',
# Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs.
_media_type: str | None = None,
_identifier: str | None = None,
) -> None: ... # pragma: no cover
def _infer_media_type(self) -> str:
"""Return the media type of the video, based on the url."""
# Assume that YouTube videos are mp4 because there would be no extension
# to infer from. This should not be a problem, as Gemini disregards media
# type for YouTube URLs.
if self.is_youtube:
return 'video/mp4'
mime_type, _ = _mime_types.guess_type(self.url)
if mime_type is None:
raise ValueError(
f'Could not infer media type from video URL: {self.url}. Explicitly provide a `media_type` instead.'
)
return mime_type
@property
def is_youtube(self) -> bool:
"""True if the URL has a YouTube domain."""
parsed = urlparse(self.url)
hostname = parsed.hostname
return hostname in ('youtu.be', 'youtube.com', 'www.youtube.com')
@property
def format(self) -> VideoFormat:
"""The file format of the video.
The choice of supported formats were based on the Bedrock Converse API. Other APIs don't require to use a format.
"""
return _video_format_lookup[self.media_type]
@pydantic_dataclass(repr=False, config=pydantic.ConfigDict(validate_by_name=True))
class AudioUrl(FileUrl):
"""A URL to an audio file."""
url: str
"""The URL of the audio file."""
_: KW_ONLY
kind: Literal['audio-url'] = 'audio-url'
"""Type identifier, this is available on all parts as a discriminator."""
# `pydantic_dataclass` replaces `__init__` so this method is never used.
# The signature is kept so that pyright/IDE hints recognize the aliases for the `_media_type` and `_identifier` fields.
def __init__(
self,
url: str,
*,
media_type: str | None = None,
identifier: str | None = None,
force_download: ForceDownloadMode = False,
vendor_metadata: dict[str, Any] | None = None,
kind: Literal['audio-url'] = 'audio-url',
# Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs.
_media_type: str | None = None,
_identifier: str | None = None,
) -> None: ... # pragma: no cover
def _infer_media_type(self) -> str:
"""Return the media type of the audio file, based on the url.
References:
- Gemini: https://ai.google.dev/gemini-api/docs/audio#supported-formats
"""
mime_type, _ = _mime_types.guess_type(self.url)
if mime_type is None:
raise ValueError(
f'Could not infer media type from audio URL: {self.url}. Explicitly provide a `media_type` instead.'
)
return mime_type
@property
def format(self) -> AudioFormat:
"""The file format of the audio file."""
return _audio_format_lookup[self.media_type]
@pydantic_dataclass(repr=False, config=pydantic.ConfigDict(validate_by_name=True))
class ImageUrl(FileUrl):
"""A URL to an image."""
url: str
"""The URL of the image."""
_: KW_ONLY
kind: Literal['image-url'] = 'image-url'
"""Type identifier, this is available on all parts as a discriminator."""
# `pydantic_dataclass` replaces `__init__` so this method is never used.
# The signature is kept so that pyright/IDE hints recognize the aliases for the `_media_type` and `_identifier` fields.
def __init__(
self,
url: str,
*,
media_type: str | None = None,
identifier: str | None = None,
force_download: ForceDownloadMode = False,
vendor_metadata: dict[str, Any] | None = None,
kind: Literal['image-url'] = 'image-url',
# Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs.
_media_type: str | None = None,
_identifier: str | None = None,
) -> None: ... # pragma: no cover
def _infer_media_type(self) -> str:
"""Return the media type of the image, based on the url."""
mime_type, _ = _mime_types.guess_type(self.url)
if mime_type is None:
raise ValueError(
f'Could not infer media type from image URL: {self.url}. Explicitly provide a `media_type` instead.'
)
return mime_type
@property
def format(self) -> ImageFormat:
"""The file format of the image.
The choice of supported formats were based on the Bedrock Converse API. Other APIs don't require to use a format.
"""
return _image_format_lookup[self.media_type]
@pydantic_dataclass(repr=False, config=pydantic.ConfigDict(validate_by_name=True))
class DocumentUrl(FileUrl):
"""The URL of the document."""
url: str
"""The URL of the document."""
_: KW_ONLY
kind: Literal['document-url'] = 'document-url'
"""Type identifier, this is available on all parts as a discriminator."""
# `pydantic_dataclass` replaces `__init__` so this method is never used.
# The signature is kept so that pyright/IDE hints recognize the aliases for the `_media_type` and `_identifier` fields.
def __init__(
self,
url: str,
*,
media_type: str | None = None,
identifier: str | None = None,
force_download: ForceDownloadMode = False,
vendor_metadata: dict[str, Any] | None = None,
kind: Literal['document-url'] = 'document-url',
# Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs.
_media_type: str | None = None,
_identifier: str | None = None,
) -> None: ... # pragma: no cover
def _infer_media_type(self) -> str:
"""Return the media type of the document, based on the url."""
mime_type, _ = _mime_types.guess_type(self.url)
if mime_type is None:
raise ValueError(
f'Could not infer media type from document URL: {self.url}. Explicitly provide a `media_type` instead.'
)
return mime_type
@property
def format(self) -> DocumentFormat:
"""The file format of the document.
The choice of supported formats were based on the Bedrock Converse API. Other APIs don't require to use a format.
"""
media_type = self.media_type
try:
return _document_format_lookup[media_type]
except KeyError as e:
raise ValueError(f'Unknown document media type: {media_type}') from e
@pydantic_dataclass(
repr=False,
config=pydantic.ConfigDict(
ser_json_bytes='base64',
val_json_bytes='base64',
),
)
class BinaryContent:
"""Binary content, e.g. an audio or image file."""
data: bytes
"""The binary file data.
Use `.base64` to get the base64-encoded string.
"""
_: KW_ONLY
media_type: AudioMediaType | ImageMediaType | DocumentMediaType | str
"""The media type of the binary data."""
vendor_metadata: dict[str, Any] | None = None
"""Vendor-specific metadata for the file.
Supported by:
- `GoogleModel`: `BinaryContent.vendor_metadata` is used as `video_metadata`: https://ai.google.dev/gemini-api/docs/video-understanding#customize-video-processing
- `OpenAIChatModel`, `OpenAIResponsesModel`: `BinaryContent.vendor_metadata['detail']` is used as `detail` setting for images
- `XaiModel`: `BinaryContent.vendor_metadata['detail']` is used as `detail` setting for images
"""
_identifier: Annotated[str | None, pydantic.Field(alias='identifier', default=None, exclude=True)] = field(
compare=False, default=None
)
kind: Literal['binary'] = 'binary'
"""Type identifier, this is available on all parts as a discriminator."""
# `pydantic_dataclass` replaces `__init__` so this method is never used.
# The signature is kept so that pyright/IDE hints recognize the `identifier` alias for the `_identifier` field.
def __init__(
self,
data: bytes,
*,
media_type: AudioMediaType | ImageMediaType | DocumentMediaType | str,
identifier: str | None = None,
vendor_metadata: dict[str, Any] | None = None,
kind: Literal['binary'] = 'binary',
# Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs.
_identifier: str | None = None,
) -> None: ... # pragma: no cover
@staticmethod
def narrow_type(bc: BinaryContent) -> BinaryContent | BinaryImage:
"""Narrow the type of the `BinaryContent` to `BinaryImage` if it's an image."""
if bc.is_image:
return BinaryImage(
data=bc.data,
media_type=bc.media_type,
identifier=bc.identifier,
vendor_metadata=bc.vendor_metadata,
)
else:
return bc
@classmethod
def from_data_uri(cls, data_uri: str) -> BinaryContent:
"""Create a `BinaryContent` from a data URI."""
prefix = 'data:'
if not data_uri.startswith(prefix):
raise ValueError('Data URI must start with "data:"')
media_type, data = data_uri[len(prefix) :].split(';base64,', 1)
return cls.narrow_type(cls(data=base64.b64decode(data), media_type=media_type))
@classmethod
def from_path(cls, path: PathLike[str]) -> BinaryContent:
"""Create a `BinaryContent` from a path.
Defaults to 'application/octet-stream' if the media type cannot be inferred.
Raises:
FileNotFoundError: if the file does not exist.
PermissionError: if the file cannot be read.
"""
path = Path(path)
if not path.exists():
raise FileNotFoundError(f'File not found: {path}')
media_type, _ = _mime_types.guess_type(path)
if media_type is None:
media_type = 'application/octet-stream'
return cls.narrow_type(cls(data=path.read_bytes(), media_type=media_type))
@pydantic.computed_field
@property
def identifier(self) -> str:
"""Identifier for the binary content, such as a unique ID.
This identifier can be provided to the model in a message to allow it to refer to this file in a tool call argument,
and the tool can look up the file in question by iterating over the message history and finding the matching `BinaryContent`.
This identifier is only automatically passed to the model when the `BinaryContent` is returned by a tool.
If you're passing the `BinaryContent` as a user message, it's up to you to include a separate text part with the identifier,
e.g. "This is file <identifier>:" preceding the `BinaryContent`.
It's also included in inline-text delimiters for providers that require inlining text documents, so the model can
distinguish multiple files.
"""
return self._identifier or _multi_modal_content_identifier(self.data)
@property
def data_uri(self) -> str:
"""Convert the `BinaryContent` to a data URI."""
return f'data:{self.media_type};base64,{self.base64}'
@property
def base64(self) -> str:
"""Return the binary data as a base64-encoded string. Default encoding is UTF-8."""
return base64.b64encode(self.data).decode()
@property
def is_audio(self) -> bool:
"""Return `True` if the media type is an audio type."""
return self.media_type.startswith('audio/')
@property
def is_image(self) -> bool:
"""Return `True` if the media type is an image type."""
return self.media_type.startswith('image/')
@property
def is_video(self) -> bool:
"""Return `True` if the media type is a video type."""
return self.media_type.startswith('video/')
@property
def is_document(self) -> bool:
"""Return `True` if the media type is a document type."""
return self.media_type in _document_format_lookup
@property
def format(self) -> str:
"""The file format of the binary content."""
try:
if self.is_audio:
return _audio_format_lookup[self.media_type]
elif self.is_image:
return _image_format_lookup[self.media_type]
elif self.is_video:
return _video_format_lookup[self.media_type]
else:
return _document_format_lookup[self.media_type]
except KeyError as e:
raise ValueError(f'Unknown media type: {self.media_type}') from e
__repr__ = _utils.dataclasses_no_defaults_repr
@pydantic_dataclass(
repr=False,
config=pydantic.ConfigDict(
ser_json_bytes='base64',
val_json_bytes='base64',
),
)
class BinaryImage(BinaryContent):
"""Binary content that's guaranteed to be an image."""
# `pydantic_dataclass` replaces `__init__` so this method is never used.
# The signature is kept so that pyright/IDE hints recognize the `identifier` alias for the `_identifier` field.
def __init__(
self,
data: bytes,
*,
media_type: ImageMediaType | str,
identifier: str | None = None,
vendor_metadata: dict[str, Any] | None = None,
kind: Literal['binary'] = 'binary',
# Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs.
_identifier: str | None = None,
) -> None: ... # pragma: no cover
def __post_init__(self):
if not self.is_image:
raise ValueError('`BinaryImage` must have a media type that starts with "image/"')
@dataclass
class CachePoint:
"""A cache point marker for prompt caching.
Can be inserted into UserPromptPart.content to mark cache boundaries.
Models that don't support caching will filter these out.
Supported by:
- Anthropic
- Amazon Bedrock (Converse API)
"""
kind: Literal['cache-point'] = 'cache-point'
"""Type identifier, this is available on all parts as a discriminator."""
ttl: Literal['5m', '1h'] = '5m'
"""The cache time-to-live, either "5m" (5 minutes) or "1h" (1 hour).
Supported by:
* Anthropic (automatically omitted for Bedrock, as it does not support explicit TTL). See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information."""
UploadedFileProviderName: TypeAlias = Literal['anthropic', 'openai', 'google-gla', 'google-vertex', 'bedrock', 'xai']
"""Provider names supported by [`UploadedFile`][pydantic_ai.messages.UploadedFile]."""
@pydantic_dataclass(repr=False, config=pydantic.ConfigDict(validate_by_name=True))
class UploadedFile:
"""A reference to a file uploaded to a provider's file storage by ID.
This allows referencing files that have been uploaded via provider-specific file APIs
rather than providing the file content directly.
Supported by:
- [`AnthropicModel`][pydantic_ai.models.anthropic.AnthropicModel]
- [`OpenAIChatModel`][pydantic_ai.models.openai.OpenAIChatModel]
- [`OpenAIResponsesModel`][pydantic_ai.models.openai.OpenAIResponsesModel]
- [`BedrockConverseModel`][pydantic_ai.models.bedrock.BedrockConverseModel]
- [`GoogleModel`][pydantic_ai.models.google.GoogleModel] (GLA: [Files API](https://ai.google.dev/gemini-api/docs/files) URIs, Vertex: GCS `gs://` URIs)
- [`XaiModel`][pydantic_ai.models.xai.XaiModel]
"""
file_id: str
"""The provider-specific file identifier.
For most providers, this is the file ID returned by the provider's upload API.
For GoogleModel (Vertex), this must be a GCS URI (`gs://bucket/path`).
For GoogleModel (GLA), this must be a Google Files API URI (`https://generativelanguage.googleapis.com/...`).
For BedrockConverseModel, this must be an S3 URI (`s3://bucket/key`).
"""
provider_name: UploadedFileProviderName
"""The provider this file belongs to.
This is required because file IDs are not portable across providers, and using a file ID
with the wrong provider will always result in an error.
Tip: Use `model.system` to get the provider name dynamically.
"""
_: KW_ONLY
vendor_metadata: dict[str, Any] | None = None
"""Vendor-specific metadata for the file.
The expected shape of this dictionary depends on the provider:
Supported by:
- `GoogleModel`: used as `video_metadata` for video files
"""
_media_type: Annotated[str | None, pydantic.Field(alias='media_type', default=None, exclude=True)] = field(
compare=False, default=None
)
_identifier: Annotated[str | None, pydantic.Field(alias='identifier', default=None, exclude=True)] = field(
compare=False, default=None
)
kind: Literal['uploaded-file'] = 'uploaded-file'
"""Type identifier, this is available on all parts as a discriminator."""
# `pydantic_dataclass` replaces `__init__` so this method is never used.
# The signature is kept so that pyright/IDE hints recognize the `media_type` and `identifier` aliases.
def __init__(
self,
file_id: str,
provider_name: UploadedFileProviderName,
*,
media_type: str | None = None,
vendor_metadata: dict[str, Any] | None = None,
identifier: str | None = None,
kind: Literal['uploaded-file'] = 'uploaded-file',
# Required for inline-snapshot which expects all dataclass `__init__` methods to take all field names as kwargs.
_media_type: str | None = None,
_identifier: str | None = None,
) -> None: ... # pragma: no cover
@pydantic.computed_field
@property
def media_type(self) -> str:
"""Return the media type of the file, inferred from `file_id` if not explicitly provided.
Note: Inference relies on the file extension in `file_id`.
For opaque file IDs (e.g., `'file-abc123'`), the media type will default to `'application/octet-stream'`.
Inference relies on Python's `mimetypes` module, whose results may vary across platforms.
Required by some providers (e.g., Bedrock) for certain file types.
"""
if self._media_type is not None:
return self._media_type
parsed = urlparse(self.file_id)
mime_type, _ = _mime_types.guess_type(parsed.path)
return mime_type or 'application/octet-stream'
@pydantic.computed_field
@property
def identifier(self) -> str:
"""The identifier of the file, such as a unique ID.
This identifier can be provided to the model in a message to allow it to refer to this file in a tool call argument,
and the tool can look up the file in question by iterating over the message history and finding the matching `UploadedFile`.
This identifier is only automatically passed to the model when the `UploadedFile` is returned by a tool.
If you're passing the `UploadedFile` as a user message, it's up to you to include a separate text part with the identifier,
e.g. "This is file <identifier>:" preceding the `UploadedFile`.
"""
return self._identifier or _multi_modal_content_identifier(self.file_id)
@property
def format(self) -> str:
"""A general-purpose media-type-to-format mapping.
Maps media types to format strings (e.g. `'image/png'` -> `'png'`). Covers image, video,
audio, and document types. Currently used by Bedrock, which requires explicit format strings.
"""
media_type = self.media_type
try:
if media_type.startswith('image/'):
return _image_format_lookup[media_type]
elif media_type.startswith('video/'):
return _video_format_lookup[media_type]
elif media_type.startswith('audio/'):
return _audio_format_lookup[media_type]
else:
return _document_format_lookup[media_type]
except KeyError as e:
raise ValueError(f'Unknown media type: {media_type}') from e
__repr__ = _utils.dataclasses_no_defaults_repr
MultiModalContent = Annotated[
ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent | UploadedFile, pydantic.Discriminator('kind')
]
"""Union of all multi-modal content types with a discriminator for Pydantic validation."""
# Explicit tuple for readability; validated against MultiModalContent in tests
MULTI_MODAL_CONTENT_TYPES: tuple[type, ...] = (ImageUrl, AudioUrl, DocumentUrl, VideoUrl, BinaryContent, UploadedFile)
def is_multi_modal_content(obj: Any) -> TypeGuard[MultiModalContent]:
"""Check if obj is a MultiModalContent type, enabling type narrowing."""
return isinstance(obj, MULTI_MODAL_CONTENT_TYPES)
UserContent: TypeAlias = str | MultiModalContent | CachePoint
@dataclass(repr=False)
class ToolReturn:
"""A structured tool return that separates the tool result from additional content sent to the model."""
return_value: ToolReturnContent
"""The return value to be used in the tool response."""
_: KW_ONLY
content: str | Sequence[UserContent] | None = None
"""Content sent to the model as a separate `UserPromptPart`.
Use this when you want content to appear outside the tool result message.
For multimodal content that should be sent natively in the tool result,
return it directly from the tool function or include it in `return_value`.
"""
metadata: Any = None
"""Additional data accessible by the application but not sent to the LLM."""
kind: Literal['tool-return'] = 'tool-return'
__repr__ = _utils.dataclasses_no_defaults_repr
_document_format_lookup: dict[str, DocumentFormat] = {
'application/pdf': 'pdf',
'text/plain': 'txt',
'text/csv': 'csv',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
'text/html': 'html',
'text/markdown': 'md',
'application/msword': 'doc',
'application/vnd.ms-excel': 'xls',
}
_audio_format_lookup: dict[str, AudioFormat] = {
'audio/mpeg': 'mp3',
'audio/wav': 'wav',
'audio/flac': 'flac',
'audio/ogg': 'oga',
'audio/aiff': 'aiff',
'audio/aac': 'aac',
}
_image_format_lookup: dict[str, ImageFormat] = {
'image/jpeg': 'jpeg',
'image/png': 'png',
'image/gif': 'gif',
'image/webp': 'webp',
}
_video_format_lookup: dict[str, VideoFormat] = {
'video/x-matroska': 'mkv',
'video/quicktime': 'mov',
'video/mp4': 'mp4',
'video/webm': 'webm',
'video/x-flv': 'flv',
'video/mpeg': 'mpeg',
'video/x-ms-wmv': 'wmv',
'video/3gpp': 'three_gp',
}
_kind_to_modality_lookup: dict[str, Literal['image', 'audio', 'video']] = {
'image-url': 'image',
'audio-url': 'audio',
'video-url': 'video',
}
def _infer_modality_from_media_type(media_type: str) -> Literal['image', 'audio', 'video'] | None:
"""Infer modality from media type for OTel GenAI semantic conventions."""
if media_type.startswith('image/'):
return 'image'
elif media_type.startswith('audio/'):
return 'audio'
elif media_type.startswith('video/'):
return 'video'
return None
def _convert_binary_to_otel_part(
media_type: str, base64_content: Callable[[], str], settings: InstrumentationSettings
) -> _otel_messages.BlobPart | _otel_messages.BinaryDataPart:
"""Convert binary content to OTel message part based on version."""
if settings.version >= 4:
blob_part = _otel_messages.BlobPart(type='blob', mime_type=media_type)
modality = _infer_modality_from_media_type(media_type)
if modality is not None:
blob_part['modality'] = modality
if settings.include_content and settings.include_binary_content:
blob_part['content'] = base64_content()
return blob_part
else:
converted_part = _otel_messages.BinaryDataPart(type='binary', media_type=media_type)
if settings.include_content and settings.include_binary_content:
converted_part['content'] = base64_content()
return converted_part
@dataclass(repr=False)
class UserPromptPart:
"""A user prompt, generally written by the end user.
Content comes from the `user_prompt` parameter of [`Agent.run`][pydantic_ai.agent.AbstractAgent.run],
[`Agent.run_sync`][pydantic_ai.agent.AbstractAgent.run_sync], and [`Agent.run_stream`][pydantic_ai.agent.AbstractAgent.run_stream].
"""
content: str | Sequence[UserContent]
"""The content of the prompt."""
_: KW_ONLY
timestamp: datetime = field(default_factory=_now_utc)
"""The timestamp of the prompt."""
part_kind: Literal['user-prompt'] = 'user-prompt'
"""Part type identifier, this is available on all parts as a discriminator."""
def otel_event(self, settings: InstrumentationSettings) -> LogRecord:
content: Any = [{'kind': part.pop('type'), **part} for part in self.otel_message_parts(settings)]
for part in content:
if part['kind'] == 'binary' and 'content' in part:
part['binary_content'] = part.pop('content')
content = [
part['content'] if part == {'kind': 'text', 'content': part.get('content')} else part for part in content
]
if content in ([{'kind': 'text'}], [self.content]):
content = content[0]
return LogRecord(attributes={'event.name': 'gen_ai.user.message'}, body={'content': content, 'role': 'user'})
def otel_message_parts(self, settings: InstrumentationSettings) -> list[_otel_messages.MessagePart]:
parts: list[_otel_messages.MessagePart] = []
content: Sequence[UserContent] = [self.content] if isinstance(self.content, str) else self.content
for part in content:
if isinstance(part, str):
parts.append(
_otel_messages.TextPart(type='text', **({'content': part} if settings.include_content else {}))
)
elif isinstance(part, ImageUrl | AudioUrl | DocumentUrl | VideoUrl):
if settings.version >= 4:
uri_part = _otel_messages.UriPart(type='uri')
modality = _kind_to_modality_lookup.get(part.kind)
if modality is not None:
uri_part['modality'] = modality
try: # don't fail the whole message if media type can't be inferred for some reason, just omit it
uri_part['mime_type'] = part.media_type
except ValueError:
pass
if settings.include_content:
uri_part['uri'] = part.url
parts.append(uri_part)
else:
parts.append(
_otel_messages.MediaUrlPart(
type=part.kind,
**{'url': part.url} if settings.include_content else {},
)
)
elif isinstance(part, BinaryContent):
parts.append(_convert_binary_to_otel_part(part.media_type, lambda p=part: p.base64, settings))
elif isinstance(part, UploadedFile):
# UploadedFile references provider-hosted files by file_id (OTel GenAI spec FilePart)
# Infer modality from media_type - OTel spec defines: image, video, audio (or any string)
category = part.media_type.split('/', 1)[0]
if category in ('image', 'audio', 'video'):
modality = category
else:
modality = 'document' # default for PDFs, text, etc.
file_part = _otel_messages.FilePart(type='file', modality=modality, mime_type=part.media_type)
if settings.include_content:
file_part['file_id'] = part.file_id
parts.append(file_part)
elif isinstance(part, CachePoint):
# CachePoint is a marker, not actual content - skip it for otel
pass
else:
parts.append({'type': part.kind}) # pragma: no cover
return parts