Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
('py:class', 'yandex_cloud_ml_sdk._types.schemas.JsonArray'),
('py:class', "'yandex_cloud_ml_sdk._types.schemas.JsonArray'"),
('py:class', 'JsonObject'),
('py:class', 'JsonArray'),
('py:class', 'JsonSchemaType'),
('py:class', 'ResponseType'),
}
Expand Down
18 changes: 18 additions & 0 deletions docs/types/other.rst
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,21 @@ Miscellaneous types
:no-inherited-members:

.. py:class:: yandex_cloud_ml_sdk._tools.tool_call_list.HttpToolCallList


Completions-related types
~~~~~~~~~~~~~~~~~~~~~~~~~

.. py:class:: yandex_cloud_ml_sdk._chat.completions.config.ChatReasoningModeType

.. autodata:: yandex_cloud_ml_sdk._chat.completions.config.ChatReasoningModeType

.. py:class:: yandex_cloud_ml_sdk._chat.completions.config.QueryType

.. py:class:: yandex_cloud_ml_sdk._models.completions.config.CompletionTool

.. autodata:: yandex_cloud_ml_sdk._models.completions.config.CompletionTool

.. py:class:: yandex_cloud_ml_sdk._types.tools.tool_choice.ToolChoiceType

.. autodata:: yandex_cloud_ml_sdk._types.tools.tool_choice.ToolChoiceType
56 changes: 56 additions & 0 deletions examples/async/chat/extra_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/env python3

from __future__ import annotations

import asyncio

from yandex_cloud_ml_sdk import AsyncYCloudML


async def get_model(sdk: AsyncYCloudML):
models = await sdk.chat.completions.list()
i = 0
print('You have access to the following models:')
for i, model in enumerate(models):
print(f" [{i:2}] {model.uri}")

raw_number = input(f"Please, input model number from 0 to {i}: ")
number = int(raw_number)
return models[number]


async def main() -> None:
sdk = AsyncYCloudML(folder_id='b1ghsjum2v37c2un8h64')
sdk.setup_default_logging()

model = await get_model(sdk)

# You could pass any extra query parameters to the model
# via extra_query configuration parameter
model = model.configure(temperature=0.5, extra_query={'top_p': 0.2})

# Note that reconfiguring extra_query will rewrite it's value entirely
# without any merging
model = model.configure(extra_query={'top_k': 2})
print(f"{model.config.extra_query=} {model.config.temperature=}")

request = 'Say random number from 0 to 10'
for title, extra_query in (
('deterministic', {'top_k': 2, 'top_p': 0.1}),
('another deterministic', {'top_k': 2, 'top_p': 0.1}),
('more random', {'top_k': 5, 'top_p': 1}),
('another more random', {'top_k': 5, 'top_p': 1}),
):
model = model.configure(extra_query=extra_query)
result = await model.run(request)
print(f"{title} result: {result.text}")

# Also note that there is no client validation about extra query value at all:
model = model.configure(extra_query={'foo': 2})
# This will not fail:
await model.run(request)
# So, refer to models documentation to find out about extra model parameters


if __name__ == '__main__':
asyncio.run(main())
54 changes: 54 additions & 0 deletions examples/sync/chat/extra_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env python3

from __future__ import annotations

from yandex_cloud_ml_sdk import YCloudML


def get_model(sdk: YCloudML):
models = sdk.chat.completions.list()
i = 0
print('You have access to the following models:')
for i, model in enumerate(models):
print(f" [{i:2}] {model.uri}")

raw_number = input(f"Please, input model number from 0 to {i}: ")
number = int(raw_number)
return models[number]


def main() -> None:
sdk = YCloudML(folder_id='b1ghsjum2v37c2un8h64')
sdk.setup_default_logging()

model = get_model(sdk)

# You could pass any extra query parameters to the model
# via extra_query configuration parameter
model = model.configure(temperature=0.5, extra_query={'top_p': 0.2})

# Note that reconfiguring extra_query will rewrite it's value entirely
# without any merging
model = model.configure(extra_query={'top_k': 2})
print(f"{model.config.extra_query=} {model.config.temperature=}")

request = 'Say random number from 0 to 10'
for title, extra_query in (
('deterministic', {'top_k': 2, 'top_p': 0.1}),
('another deterministic', {'top_k': 2, 'top_p': 0.1}),
('more random', {'top_k': 5, 'top_p': 1}),
('another more random', {'top_k': 5, 'top_p': 1}),
):
model = model.configure(extra_query=extra_query)
result = model.run(request)
print(f"{title} result: {result.text}")

# Also note that there is no client validation about extra query value at all:
model = model.configure(extra_query={'foo': 2})
# This will not fail:
model.run(request)
# So, refer to models documentation to find out about extra model parameters


if __name__ == '__main__':
main()
11 changes: 10 additions & 1 deletion src/yandex_cloud_ml_sdk/_chat/completions/config.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
from __future__ import annotations

from copy import deepcopy
from dataclasses import dataclass
from enum import Enum
from typing import Any, Union

from typing_extensions import Self
from typing_extensions import Self, TypeAlias

from yandex_cloud_ml_sdk._models.completions.config import CompletionTool, GPTModelConfig
from yandex_cloud_ml_sdk._tools.tool import BaseTool
from yandex_cloud_ml_sdk._types.schemas import JsonObject
from yandex_cloud_ml_sdk._utils.coerce import coerce_tuple


Expand All @@ -26,12 +28,14 @@ def _coerce(cls, value: ChatReasoningModeType) -> Self:


ChatReasoningModeType = Union[str, ChatReasoningMode]
QueryType: TypeAlias = JsonObject


@dataclass(frozen=True)
class ChatModelConfig(GPTModelConfig):
reasoning_mode: ChatReasoningMode | None = None
tools: tuple[CompletionTool, ...] | None = None
extra_query: QueryType | None = None

def _replace(self, **kwargs: Any) -> Self:
if reasoning_mode := kwargs.get('reasoning_mode'):
Expand All @@ -40,4 +44,9 @@ def _replace(self, **kwargs: Any) -> Self:
if tools := kwargs.get('tools'):
kwargs['tools'] = coerce_tuple(tools, BaseTool) # type: ignore[type-abstract]

extra_query: QueryType | None
if extra_query := kwargs.get('extra_query'):
assert isinstance(extra_query, dict)
kwargs['extra_query'] = deepcopy(extra_query)

return super()._replace(**kwargs)
8 changes: 7 additions & 1 deletion src/yandex_cloud_ml_sdk/_chat/completions/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from yandex_cloud_ml_sdk._types.tools.tool_choice import coerce_to_json as coerce_tool_choice_to_json
from yandex_cloud_ml_sdk._utils.sync import run_sync, run_sync_generator

from .config import ChatModelConfig, ChatReasoningModeType
from .config import ChatModelConfig, ChatReasoningModeType, QueryType
from .message import ChatMessageInputType, messages_to_json
from .result import ChatModelResult

Expand All @@ -40,6 +40,7 @@ def configure( # type: ignore[override]
tools: UndefinedOr[Sequence[CompletionTool] | CompletionTool] = UNDEFINED,
parallel_tool_calls: UndefinedOr[bool] = UNDEFINED,
tool_choice: UndefinedOr[ToolChoiceType] = UNDEFINED,
extra_query: UndefinedOr[QueryType] = UNDEFINED,
) -> Self:
return super().configure(
temperature=temperature,
Expand All @@ -49,6 +50,7 @@ def configure( # type: ignore[override]
tools=tools,
parallel_tool_calls=parallel_tool_calls,
tool_choice=tool_choice,
extra_query=extra_query,
)

def _build_request_json(self, messages: ChatMessageInputType, stream: bool) -> dict[str, Any]:
Expand Down Expand Up @@ -85,6 +87,10 @@ def _build_request_json(self, messages: ChatMessageInputType, stream: bool) -> d

if c.tool_choice is not None:
result['tool_choice'] = coerce_tool_choice_to_json(c.tool_choice)

if c.extra_query is not None:
result.update(c.extra_query)

return result

@override
Expand Down
92 changes: 92 additions & 0 deletions tests/chat/cassettes/test_completions/test_extra_query.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
interactions:
- request:
body: '{"model":"gpt://b1ghsjum2v37c2un8h64/yandexgpt/latest","messages":[{"role":"user","content":"Say
random number from 0 to 10"}],"stream":false}'
headers:
accept:
- '*/*'
accept-encoding:
- gzip, deflate, zstd
connection:
- keep-alive
content-length:
- '142'
content-type:
- application/json
host:
- llm.api.cloud.yandex.net
user-agent:
- yandex-cloud-ml-sdk/0.15.0 python/3.12
x-client-request-id:
- e39a16aa-9ecc-4769-a220-d99a47e9997c
method: POST
uri: https://llm.api.cloud.yandex.net/v1/chat/completions
response:
body:
string: '{"id":"09afef25-7de1-4a86-8cda-1cb673f2ddc1","object":"chat.completion","created":1758116486,"model":"gpt://b1ghsjum2v37c2un8h64/yandexgpt/latest","choices":[{"index":0,"message":{"role":"assistant","content":"7"},"finish_reason":"stop"}],"usage":{"prompt_tokens":20,"total_tokens":22,"completion_tokens":2}}

'
headers:
content-length:
- '309'
content-type:
- application/json
date:
- Wed, 17 Sep 2025 13:41:26 GMT
server:
- ycalb
x-client-request-id:
- e39a16aa-9ecc-4769-a220-d99a47e9997c
x-request-id:
- c3402ee1-93c6-4c99-a660-f978ec004b35
x-server-trace-id:
- 990d344b241074c2:295875c4dbdaafe7:990d344b241074c2:1
status:
code: 200
message: OK
- request:
body: '{"model":"gpt://b1ghsjum2v37c2un8h64/yandexgpt/latest","messages":[{"role":"user","content":"Say
random number from 0 to 10"}],"stream":false,"top_k":3}'
headers:
accept:
- '*/*'
accept-encoding:
- gzip, deflate, zstd
connection:
- keep-alive
content-length:
- '152'
content-type:
- application/json
host:
- llm.api.cloud.yandex.net
user-agent:
- yandex-cloud-ml-sdk/0.15.0 python/3.12
x-client-request-id:
- 5a710077-50b4-43ed-b269-78116ab15066
method: POST
uri: https://llm.api.cloud.yandex.net/v1/chat/completions
response:
body:
string: '{"id":"1ee562fa-00fa-4d56-84a5-4bcea60cd694","object":"chat.completion","created":1758116487,"model":"gpt://b1ghsjum2v37c2un8h64/yandexgpt/latest","choices":[{"index":0,"message":{"role":"assistant","content":"7"},"finish_reason":"stop"}],"usage":{"prompt_tokens":20,"total_tokens":22,"completion_tokens":2}}

'
headers:
content-length:
- '309'
content-type:
- application/json
date:
- Wed, 17 Sep 2025 13:41:27 GMT
server:
- ycalb
x-client-request-id:
- 5a710077-50b4-43ed-b269-78116ab15066
x-request-id:
- e3cb30bc-4cdb-4110-a4ba-90c9b2744aa9
x-server-trace-id:
- 16405981e4f1ec7d:64b3b33e662bcec:16405981e4f1ec7d:1
status:
code: 200
message: OK
version: 1
25 changes: 25 additions & 0 deletions tests/chat/test_completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pathlib
from typing import cast

import httpx._client
import pytest

from yandex_cloud_ml_sdk import AsyncYCloudML
Expand Down Expand Up @@ -388,3 +389,27 @@ async def test_multimodal(async_sdk: AsyncYCloudML) -> None:
]
result = await model.run(request)
assert 'complex' in result.text


async def test_extra_query(async_sdk: AsyncYCloudML, monkeypatch) -> None:
top_k = None

original = httpx._client.AsyncClient.request

async def patched_request(*args, **kwargs):
nonlocal top_k
top_k = kwargs.get('json', {}).get('top_k')
return await original(*args, **kwargs)

monkeypatch.setattr("httpx._client.AsyncClient.request", patched_request)

query = "Say random number from 0 to 10"

model = async_sdk.chat.completions('yandexgpt')

await model.run(query)
assert not top_k

model = model.configure(extra_query={'top_k': 3})
await model.run(query)
assert top_k == 3