Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion docs/types/message.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,17 @@ Completions-related message types
Chat completions related message types
--------------------------------------

.. autotypeddict:: yandex_cloud_ml_sdk._chat.completions.message.ChatFunctionResultMessageDict
.. currentmodule:: yandex_cloud_ml_sdk._chat.completions.message

.. autotypeddict:: ChatFunctionResultMessageDict

.. autotypeddict:: MultimodalMessageDict

.. autotypeddict:: TextContent

.. autotypeddict:: ImageUrlContent

.. autotypeddict:: ImageUrlDict


Image generation messages
Expand Down
Binary file added examples/async/chat/example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
51 changes: 51 additions & 0 deletions examples/async/chat/multimodal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/usr/bin/env python3

from __future__ import annotations

import asyncio
import base64
import pathlib

from yandex_cloud_ml_sdk import AsyncYCloudML


def get_image_base64():
image_path = pathlib.Path(__file__).parent / 'example.png'
image_data = image_path.read_bytes()
image_base64 = base64.b64encode(image_data)
return image_base64.decode('utf-8')


async def main() -> None:
sdk = AsyncYCloudML(folder_id='b1ghsjum2v37c2un8h64')
sdk.setup_default_logging()

# at this moment this is only model which supports image processing
model = sdk.chat.completions('gemma-3-27b-it')

request = [
# this is special kind of multimodal message which allows you to
# mix image with text data;
{
'role': 'user',
'content': [
{
'type': 'text', 'text': "What is depicted in the following image",
},
{
'type': 'image_url',
'image_url': {
'url': f'data:image/png;base64,{get_image_base64()}'
}
}
]
}
]

result = await model.run(request)

print(result.text)


if __name__ == '__main__':
asyncio.run(main())
Binary file added examples/sync/chat/example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
50 changes: 50 additions & 0 deletions examples/sync/chat/multimodal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/env python3

from __future__ import annotations

import base64
import pathlib

from yandex_cloud_ml_sdk import YCloudML


def get_image_base64():
image_path = pathlib.Path(__file__).parent / 'example.png'
image_data = image_path.read_bytes()
image_base64 = base64.b64encode(image_data)
return image_base64.decode('utf-8')


def main() -> None:
sdk = YCloudML(folder_id='b1ghsjum2v37c2un8h64')
sdk.setup_default_logging()

# at this moment this is only model which supports image processing
model = sdk.chat.completions('gemma-3-27b-it')

request = [
# this is special kind of multimodal message which allows you to
# mix image with text data;
{
'role': 'user',
'content': [
{
'type': 'text', 'text': "What is depicted in the following image",
},
{
'type': 'image_url',
'image_url': {
'url': f'data:image/png;base64,{get_image_base64()}'
}
}
]
}
]

result = model.run(request)

print(result.text)


if __name__ == '__main__':
main()
48 changes: 36 additions & 12 deletions src/yandex_cloud_ml_sdk/_chat/completions/message.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from collections.abc import Iterable
from typing import TypedDict, Union, cast
from collections.abc import Iterable, Sequence
from typing import Literal, TypedDict, Union, cast

from typing_extensions import NotRequired, Required

Expand All @@ -20,7 +20,26 @@ class ChatFunctionResultMessageDict(TypedDict):
content: Required[str]


ChatCompletionsMessageType = Union[MessageType, ChatFunctionResultMessageDict, MessageInputType]
class ImageUrlDict(TypedDict):
url: str


class ImageUrlContent(TypedDict):
type: Literal['image_url']
image_url: ImageUrlDict


class TextContent(TypedDict):
type: Literal['text']
text: str


class MultimodalMessageDict(TypedDict):
role: NotRequired[str]
content: Sequence[ImageUrlDict | TextContent]


ChatCompletionsMessageType = Union[MessageType, ChatFunctionResultMessageDict, MessageInputType, MultimodalMessageDict]
ChatMessageInputType = Union[ChatCompletionsMessageType, Iterable[ChatCompletionsMessageType]]


Expand All @@ -43,41 +62,46 @@ def message_to_json(message: ChatCompletionsMessageType, tool_name_ids: dict[str
"content": message.text,
"role": message.role,
}

if isinstance(message, dict):
text = message.get('text') or message.get('content', '')
role: str | None = message.get('role')
content: Sequence | str | None = message.get('content') # type: ignore[assignment]
if isinstance(content, Sequence) and not isinstance(content, (str, bytes)):
return {
'role': role or 'user',
'content': list(content),
}

text: str | None = message.get('text') or content or '' # type: ignore[assignment]
assert isinstance(text, str)

if tool_call_id := message.get('tool_call_id'):
assert isinstance(tool_call_id, str)
message = cast(ChatFunctionResultMessageDict, message)
role = message.get('role', 'tool')
return {
'role': role,
'role': role or 'tool',
'content': text,
'tool_call_id': tool_call_id,
}

if tool_calls := message.get('tool_calls'):
tool_calls = cast(JsonObject, tool_calls)
role = message.get('role', 'assistant')
return {
'tool_calls': tool_calls,
'role': role,
'role': role or 'assistant',
}

if text:
message = cast(TextMessageDict, message)
role = message.get('role', 'user')
return {
'content': text,
'role': role
'role': role or 'user'
}

if tool_results := message.get('tool_results'):
assert isinstance(tool_results, list)
message = cast(FunctionResultMessageDict, message)

role = message.get('role', 'tool')
result: list[JsonObject] = []
for tool_result in tool_results:
tool_result = cast(ToolResultDictType, tool_result)
Expand All @@ -91,7 +115,7 @@ def message_to_json(message: ChatCompletionsMessageType, tool_name_ids: dict[str
)

result.append({
'role': role,
'role': role or 'tool',
'content': content,
'tool_call_id': id_,
})
Expand Down
56 changes: 56 additions & 0 deletions tests/chat/cassettes/test_completions/test_multimodal.yaml

Large diffs are not rendered by default.

Binary file added tests/chat/example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
29 changes: 29 additions & 0 deletions tests/chat/test_completions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import annotations

import base64
import json
import pathlib
from typing import cast

import pytest
Expand Down Expand Up @@ -359,3 +361,30 @@ async def test_tool_choice(async_sdk: AsyncYCloudML, tool, schema) -> None:
model = model.configure(tool_choice=None) # type: ignore[arg-type]
result = await model.run(message)
assert result.status.name == 'TOOL_CALLS'


async def test_multimodal(async_sdk: AsyncYCloudML) -> None:
model = async_sdk.chat.completions('gemma-3-27b-it')
image_path = pathlib.Path(__file__).parent / 'example.png'
image_data = image_path.read_bytes()
image_base64 = base64.b64encode(image_data)
image = image_base64.decode('utf-8')

request = [
{
'role': 'user',
'content': [
{
'type': 'text', 'text': "What is depicted in the following image",
},
{
'type': 'image_url',
'image_url': {
'url': f'data:image/png;base64,{image}'
}
}
]
}
]
result = await model.run(request)
assert 'bricks' in result.text