diff --git a/docs/conf.py b/docs/conf.py index 8135a723..f3db9244 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -76,6 +76,7 @@ ('py:class', 'yandex_cloud_ml_sdk._types.schemas.JsonArray'), ('py:class', "'yandex_cloud_ml_sdk._types.schemas.JsonArray'"), ('py:class', 'JsonObject'), + ('py:class', 'JsonArray'), ('py:class', 'JsonSchemaType'), ('py:class', 'ResponseType'), } diff --git a/docs/types/other.rst b/docs/types/other.rst index f9fff489..89a09fe0 100644 --- a/docs/types/other.rst +++ b/docs/types/other.rst @@ -84,3 +84,21 @@ Miscellaneous types :no-inherited-members: .. py:class:: yandex_cloud_ml_sdk._tools.tool_call_list.HttpToolCallList + + +Completions-related types +~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. py:class:: yandex_cloud_ml_sdk._chat.completions.config.ChatReasoningModeType + +.. autodata:: yandex_cloud_ml_sdk._chat.completions.config.ChatReasoningModeType + +.. py:class:: yandex_cloud_ml_sdk._chat.completions.config.QueryType + +.. py:class:: yandex_cloud_ml_sdk._models.completions.config.CompletionTool + +.. autodata:: yandex_cloud_ml_sdk._models.completions.config.CompletionTool + +.. py:class:: yandex_cloud_ml_sdk._types.tools.tool_choice.ToolChoiceType + +.. autodata:: yandex_cloud_ml_sdk._types.tools.tool_choice.ToolChoiceType diff --git a/examples/async/chat/extra_query.py b/examples/async/chat/extra_query.py new file mode 100755 index 00000000..21619500 --- /dev/null +++ b/examples/async/chat/extra_query.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 + +from __future__ import annotations + +import asyncio + +from yandex_cloud_ml_sdk import AsyncYCloudML + + +async def get_model(sdk: AsyncYCloudML): + models = await sdk.chat.completions.list() + i = 0 + print('You have access to the following models:') + for i, model in enumerate(models): + print(f" [{i:2}] {model.uri}") + + raw_number = input(f"Please, input model number from 0 to {i}: ") + number = int(raw_number) + return models[number] + + +async def main() -> None: + sdk = AsyncYCloudML(folder_id='b1ghsjum2v37c2un8h64') + sdk.setup_default_logging() + + model = await get_model(sdk) + + # You could pass any extra query parameters to the model + # via extra_query configuration parameter + model = model.configure(temperature=0.5, extra_query={'top_p': 0.2}) + + # Note that reconfiguring extra_query will rewrite it's value entirely + # without any merging + model = model.configure(extra_query={'top_k': 2}) + print(f"{model.config.extra_query=} {model.config.temperature=}") + + request = 'Say random number from 0 to 10' + for title, extra_query in ( + ('deterministic', {'top_k': 2, 'top_p': 0.1}), + ('another deterministic', {'top_k': 2, 'top_p': 0.1}), + ('more random', {'top_k': 5, 'top_p': 1}), + ('another more random', {'top_k': 5, 'top_p': 1}), + ): + model = model.configure(extra_query=extra_query) + result = await model.run(request) + print(f"{title} result: {result.text}") + + # Also note that there is no client validation about extra query value at all: + model = model.configure(extra_query={'foo': 2}) + # This will not fail: + await model.run(request) + # So, refer to models documentation to find out about extra model parameters + + +if __name__ == '__main__': + asyncio.run(main()) diff --git a/examples/sync/chat/extra_query.py b/examples/sync/chat/extra_query.py new file mode 100755 index 00000000..cb466c31 --- /dev/null +++ b/examples/sync/chat/extra_query.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 + +from __future__ import annotations + +from yandex_cloud_ml_sdk import YCloudML + + +def get_model(sdk: YCloudML): + models = sdk.chat.completions.list() + i = 0 + print('You have access to the following models:') + for i, model in enumerate(models): + print(f" [{i:2}] {model.uri}") + + raw_number = input(f"Please, input model number from 0 to {i}: ") + number = int(raw_number) + return models[number] + + +def main() -> None: + sdk = YCloudML(folder_id='b1ghsjum2v37c2un8h64') + sdk.setup_default_logging() + + model = get_model(sdk) + + # You could pass any extra query parameters to the model + # via extra_query configuration parameter + model = model.configure(temperature=0.5, extra_query={'top_p': 0.2}) + + # Note that reconfiguring extra_query will rewrite it's value entirely + # without any merging + model = model.configure(extra_query={'top_k': 2}) + print(f"{model.config.extra_query=} {model.config.temperature=}") + + request = 'Say random number from 0 to 10' + for title, extra_query in ( + ('deterministic', {'top_k': 2, 'top_p': 0.1}), + ('another deterministic', {'top_k': 2, 'top_p': 0.1}), + ('more random', {'top_k': 5, 'top_p': 1}), + ('another more random', {'top_k': 5, 'top_p': 1}), + ): + model = model.configure(extra_query=extra_query) + result = model.run(request) + print(f"{title} result: {result.text}") + + # Also note that there is no client validation about extra query value at all: + model = model.configure(extra_query={'foo': 2}) + # This will not fail: + model.run(request) + # So, refer to models documentation to find out about extra model parameters + + +if __name__ == '__main__': + main() diff --git a/src/yandex_cloud_ml_sdk/_chat/completions/config.py b/src/yandex_cloud_ml_sdk/_chat/completions/config.py index 7450ad2f..394e0870 100644 --- a/src/yandex_cloud_ml_sdk/_chat/completions/config.py +++ b/src/yandex_cloud_ml_sdk/_chat/completions/config.py @@ -1,13 +1,15 @@ from __future__ import annotations +from copy import deepcopy from dataclasses import dataclass from enum import Enum from typing import Any, Union -from typing_extensions import Self +from typing_extensions import Self, TypeAlias from yandex_cloud_ml_sdk._models.completions.config import CompletionTool, GPTModelConfig from yandex_cloud_ml_sdk._tools.tool import BaseTool +from yandex_cloud_ml_sdk._types.schemas import JsonObject from yandex_cloud_ml_sdk._utils.coerce import coerce_tuple @@ -26,12 +28,14 @@ def _coerce(cls, value: ChatReasoningModeType) -> Self: ChatReasoningModeType = Union[str, ChatReasoningMode] +QueryType: TypeAlias = JsonObject @dataclass(frozen=True) class ChatModelConfig(GPTModelConfig): reasoning_mode: ChatReasoningMode | None = None tools: tuple[CompletionTool, ...] | None = None + extra_query: QueryType | None = None def _replace(self, **kwargs: Any) -> Self: if reasoning_mode := kwargs.get('reasoning_mode'): @@ -40,4 +44,9 @@ def _replace(self, **kwargs: Any) -> Self: if tools := kwargs.get('tools'): kwargs['tools'] = coerce_tuple(tools, BaseTool) # type: ignore[type-abstract] + extra_query: QueryType | None + if extra_query := kwargs.get('extra_query'): + assert isinstance(extra_query, dict) + kwargs['extra_query'] = deepcopy(extra_query) + return super()._replace(**kwargs) diff --git a/src/yandex_cloud_ml_sdk/_chat/completions/model.py b/src/yandex_cloud_ml_sdk/_chat/completions/model.py index 52582ffc..d4192b78 100644 --- a/src/yandex_cloud_ml_sdk/_chat/completions/model.py +++ b/src/yandex_cloud_ml_sdk/_chat/completions/model.py @@ -15,7 +15,7 @@ from yandex_cloud_ml_sdk._types.tools.tool_choice import coerce_to_json as coerce_tool_choice_to_json from yandex_cloud_ml_sdk._utils.sync import run_sync, run_sync_generator -from .config import ChatModelConfig, ChatReasoningModeType +from .config import ChatModelConfig, ChatReasoningModeType, QueryType from .message import ChatMessageInputType, messages_to_json from .result import ChatModelResult @@ -40,6 +40,7 @@ def configure( # type: ignore[override] tools: UndefinedOr[Sequence[CompletionTool] | CompletionTool] = UNDEFINED, parallel_tool_calls: UndefinedOr[bool] = UNDEFINED, tool_choice: UndefinedOr[ToolChoiceType] = UNDEFINED, + extra_query: UndefinedOr[QueryType] = UNDEFINED, ) -> Self: return super().configure( temperature=temperature, @@ -49,6 +50,7 @@ def configure( # type: ignore[override] tools=tools, parallel_tool_calls=parallel_tool_calls, tool_choice=tool_choice, + extra_query=extra_query, ) def _build_request_json(self, messages: ChatMessageInputType, stream: bool) -> dict[str, Any]: @@ -85,6 +87,10 @@ def _build_request_json(self, messages: ChatMessageInputType, stream: bool) -> d if c.tool_choice is not None: result['tool_choice'] = coerce_tool_choice_to_json(c.tool_choice) + + if c.extra_query is not None: + result.update(c.extra_query) + return result @override diff --git a/tests/chat/cassettes/test_completions/test_extra_query.yaml b/tests/chat/cassettes/test_completions/test_extra_query.yaml new file mode 100644 index 00000000..9bcad66c --- /dev/null +++ b/tests/chat/cassettes/test_completions/test_extra_query.yaml @@ -0,0 +1,92 @@ +interactions: +- request: + body: '{"model":"gpt://b1ghsjum2v37c2un8h64/yandexgpt/latest","messages":[{"role":"user","content":"Say + random number from 0 to 10"}],"stream":false}' + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '142' + content-type: + - application/json + host: + - llm.api.cloud.yandex.net + user-agent: + - yandex-cloud-ml-sdk/0.15.0 python/3.12 + x-client-request-id: + - e39a16aa-9ecc-4769-a220-d99a47e9997c + method: POST + uri: https://llm.api.cloud.yandex.net/v1/chat/completions + response: + body: + string: '{"id":"09afef25-7de1-4a86-8cda-1cb673f2ddc1","object":"chat.completion","created":1758116486,"model":"gpt://b1ghsjum2v37c2un8h64/yandexgpt/latest","choices":[{"index":0,"message":{"role":"assistant","content":"7"},"finish_reason":"stop"}],"usage":{"prompt_tokens":20,"total_tokens":22,"completion_tokens":2}} + + ' + headers: + content-length: + - '309' + content-type: + - application/json + date: + - Wed, 17 Sep 2025 13:41:26 GMT + server: + - ycalb + x-client-request-id: + - e39a16aa-9ecc-4769-a220-d99a47e9997c + x-request-id: + - c3402ee1-93c6-4c99-a660-f978ec004b35 + x-server-trace-id: + - 990d344b241074c2:295875c4dbdaafe7:990d344b241074c2:1 + status: + code: 200 + message: OK +- request: + body: '{"model":"gpt://b1ghsjum2v37c2un8h64/yandexgpt/latest","messages":[{"role":"user","content":"Say + random number from 0 to 10"}],"stream":false,"top_k":3}' + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '152' + content-type: + - application/json + host: + - llm.api.cloud.yandex.net + user-agent: + - yandex-cloud-ml-sdk/0.15.0 python/3.12 + x-client-request-id: + - 5a710077-50b4-43ed-b269-78116ab15066 + method: POST + uri: https://llm.api.cloud.yandex.net/v1/chat/completions + response: + body: + string: '{"id":"1ee562fa-00fa-4d56-84a5-4bcea60cd694","object":"chat.completion","created":1758116487,"model":"gpt://b1ghsjum2v37c2un8h64/yandexgpt/latest","choices":[{"index":0,"message":{"role":"assistant","content":"7"},"finish_reason":"stop"}],"usage":{"prompt_tokens":20,"total_tokens":22,"completion_tokens":2}} + + ' + headers: + content-length: + - '309' + content-type: + - application/json + date: + - Wed, 17 Sep 2025 13:41:27 GMT + server: + - ycalb + x-client-request-id: + - 5a710077-50b4-43ed-b269-78116ab15066 + x-request-id: + - e3cb30bc-4cdb-4110-a4ba-90c9b2744aa9 + x-server-trace-id: + - 16405981e4f1ec7d:64b3b33e662bcec:16405981e4f1ec7d:1 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/chat/test_completions.py b/tests/chat/test_completions.py index 96483451..30a2fcc6 100644 --- a/tests/chat/test_completions.py +++ b/tests/chat/test_completions.py @@ -5,6 +5,7 @@ import pathlib from typing import cast +import httpx._client import pytest from yandex_cloud_ml_sdk import AsyncYCloudML @@ -388,3 +389,27 @@ async def test_multimodal(async_sdk: AsyncYCloudML) -> None: ] result = await model.run(request) assert 'complex' in result.text + + +async def test_extra_query(async_sdk: AsyncYCloudML, monkeypatch) -> None: + top_k = None + + original = httpx._client.AsyncClient.request + + async def patched_request(*args, **kwargs): + nonlocal top_k + top_k = kwargs.get('json', {}).get('top_k') + return await original(*args, **kwargs) + + monkeypatch.setattr("httpx._client.AsyncClient.request", patched_request) + + query = "Say random number from 0 to 10" + + model = async_sdk.chat.completions('yandexgpt') + + await model.run(query) + assert not top_k + + model = model.configure(extra_query={'top_k': 3}) + await model.run(query) + assert top_k == 3