yandex-cloud
diff --git a/‎examples/async/assistants/prompt_truncation_options.py‎
Lines changed: 79 additions & 0 deletions b/‎examples/async/assistants/prompt_truncation_options.py‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎examples/sync/assistants/prompt_truncation_options.py‎
Lines changed: 77 additions & 0 deletions b/‎examples/sync/assistants/prompt_truncation_options.py‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎src/yandex_cloud_ml_sdk/_assistants/assistant.py‎
Lines changed: 38 additions & 10 deletions b/‎src/yandex_cloud_ml_sdk/_assistants/assistant.py‎
Lines changed: 38 additions & 10 deletions
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+
+from __future__ import annotations
+
+import asyncio
+
+from yandex_cloud_ml_sdk import AsyncYCloudML
+from yandex_cloud_ml_sdk.assistants import AutoPromptTruncationStrategy, LastMessagesPromptTruncationStrategy
+
+LABEL_KEY = 'yc-ml-sdk-example'
+LABEL_VALUE = 'prompt-truncation-options'
+
+
+async def new_thread(sdk):
+    thread = await sdk.threads.create(labels={LABEL_KEY: LABEL_VALUE})
+    await thread.write('hey, how are you?')
+    await thread.write('what is your name?')
+    return thread
+
+
+async def delete_labeled_entities(iterator):
+    async for entity in iterator:
+        if entity.labels and entity.labels.get(LABEL_KEY) == LABEL_VALUE:
+            print(f'deleting {entity.__class__.__name__} with id={entity.id!r}')
+            await entity.delete()
+
+
+async def main() -> None:
+    sdk = AsyncYCloudML(folder_id='b1ghsjum2v37c2un8h64')
+    sdk.setup_default_logging()
+
+    assistant = await sdk.assistants.create(
+        'yandexgpt',
+        labels={LABEL_KEY: LABEL_VALUE},
+        # you could choose value for max_prompt_tokens, default value
+        # is 7000 by the time I'm making this example
+        max_prompt_tokens=500,
+        # default prompt truncation strategy is AutoPromptTruncationStrategy, you could
+        # change it as well
+        prompt_truncation_strategy=LastMessagesPromptTruncationStrategy(num_messages=10),
+    )
+
+    thread = await new_thread(sdk)
+    # You could also override prompt trunction options vis custom_* run() parameters:
+    run = await assistant.run(
+        thread,
+        custom_max_prompt_tokens=1,
+        custom_prompt_truncation_strategy=AutoPromptTruncationStrategy()
+    )
+    result = await run
+    # This run should be failed because of custom_max_prompt_tokens=1
+    assert result.is_failed
+    print(f'{result.error=}')
+
+    thread = await new_thread(sdk)
+    run = await assistant.run(
+        thread,
+        custom_prompt_truncation_strategy=LastMessagesPromptTruncationStrategy(num_messages=1)
+    )
+    result = await run
+    assert result.usage
+    one_message_input_tokens = result.usage.input_text_tokens
+
+    thread = await new_thread(sdk)
+    # NB: 'auto' is a shortcut for AutoPromptTruncationStrategy
+    run = await assistant.run(thread, custom_prompt_truncation_strategy='auto')
+    result = await run
+    assert result.usage
+    two_message_input_tokens = result.usage.input_text_tokens
+
+    print('Input tokens used with LastMessagesPromptTruncationStrategy(1) < AutoPromptTruncationStrategy():')
+    print(f'    {one_message_input_tokens} < {two_message_input_tokens}')
+
+    await delete_labeled_entities(sdk.assistants.list())
+    await delete_labeled_entities(sdk.threads.list())
+
+
+if __name__ == '__main__':
+    asyncio.run(main())
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+
+from __future__ import annotations
+
+from yandex_cloud_ml_sdk import YCloudML
+from yandex_cloud_ml_sdk.assistants import AutoPromptTruncationStrategy, LastMessagesPromptTruncationStrategy
+
+LABEL_KEY = 'yc-ml-sdk-example'
+LABEL_VALUE = 'prompt-truncation-options'
+
+
+def new_thread(sdk):
+    thread = sdk.threads.create(labels={LABEL_KEY: LABEL_VALUE})
+    thread.write('hey, how are you?')
+    thread.write('what is your name?')
+    return thread
+
+
+def delete_labeled_entities(iterator):
+    for entity in iterator:
+        if entity.labels and entity.labels.get(LABEL_KEY) == LABEL_VALUE:
+            print(f'deleting {entity.__class__.__name__} with id={entity.id!r}')
+            entity.delete()
+
+
+def main() -> None:
+    sdk = YCloudML(folder_id='b1ghsjum2v37c2un8h64')
+    sdk.setup_default_logging()
+
+    assistant = sdk.assistants.create(
+        'yandexgpt',
+        labels={LABEL_KEY: LABEL_VALUE},
+        # you could choose value for max_prompt_tokens, default value
+        # is 7000 by the time I'm making this example
+        max_prompt_tokens=500,
+        # default prompt truncation strategy is AutoPromptTruncationStrategy, you could
+        # change it as well
+        prompt_truncation_strategy=LastMessagesPromptTruncationStrategy(num_messages=10),
+    )
+
+    thread = new_thread(sdk)
+    # You could also override prompt trunction options vis custom_* run() parameters:
+    run = assistant.run(
+        thread,
+        custom_max_prompt_tokens=1,
+        custom_prompt_truncation_strategy=AutoPromptTruncationStrategy()
+    )
+    result = run.wait()
+    # This run should be failed because of custom_max_prompt_tokens=1
+    assert result.is_failed
+    print(f'{result.error=}')
+
+    thread = new_thread(sdk)
+    run = assistant.run(
+        thread,
+        custom_prompt_truncation_strategy=LastMessagesPromptTruncationStrategy(num_messages=1)
+    )
+    result = run.wait()
+    assert result.usage
+    one_message_input_tokens = result.usage.input_text_tokens
+
+    thread = new_thread(sdk)
+    # NB: 'auto' is a shortcut for AutoPromptTruncationStrategy
+    run = assistant.run(thread, custom_prompt_truncation_strategy='auto')
+    result = run.wait()
+    assert result.usage
+    two_message_input_tokens = result.usage.input_text_tokens
+
+    print('Input tokens used with LastMessagesPromptTruncationStrategy(1) < AutoPromptTruncationStrategy():')
+    print(f'    {one_message_input_tokens} < {two_message_input_tokens}')
+
+    delete_labeled_entities(sdk.assistants.list())
+    delete_labeled_entities(sdk.threads.list())
+
+
+if __name__ == '__main__':
+    main()
@@ -25,7 +25,8 @@
 from yandex_cloud_ml_sdk._utils.coerce import coerce_tuple
 from yandex_cloud_ml_sdk._utils.sync import run_sync_generator_impl, run_sync_impl
 
-from .utils import get_completion_options, get_prompt_trunctation_options
+from .prompt_truncation_options import PromptTruncationOptions, PromptTruncationStrategyType
+from .utils import get_completion_options
 
 if TYPE_CHECKING:
     from yandex_cloud_ml_sdk._sdk import BaseSDK
@@ -36,9 +37,13 @@ class BaseAssistant(ExpirableResource, Generic[RunTypeT, ThreadTypeT]):
     expiration_config: ExpirationConfig
     model: BaseGPTModel
     instruction: str | None
-    max_prompt_tokens: int | None
+    prompt_truncation_options: PromptTruncationOptions
     tools: tuple[BaseTool, ...]
 
+    @property
+    def max_prompt_tokens(self) -> int | None:
+        return self.prompt_truncation_options.max_prompt_tokens
+
     @classmethod
     def _kwargs_from_message(cls, proto: ProtoAssistant, sdk: BaseSDK) -> dict[str, Any]:  # type: ignore[override]
         kwargs = super()._kwargs_from_message(proto, sdk=sdk)
@@ -55,9 +60,10 @@ def _kwargs_from_message(cls, proto: ProtoAssistant, sdk: BaseSDK) -> dict[str,
             BaseTool._from_upper_proto(tool, sdk=sdk)
             for tool in proto.tools
         )
-
-        if max_prompt_tokens := proto.prompt_truncation_options.max_prompt_tokens.value:
-            kwargs['max_prompt_tokens'] = max_prompt_tokens
+        kwargs['prompt_truncation_options'] = PromptTruncationOptions._from_proto(
+            proto=proto.prompt_truncation_options,
+            sdk=sdk
+        )
 
         return kwargs
 
@@ -71,6 +77,7 @@ async def _update(
         max_tokens: UndefinedOr[int] = UNDEFINED,
         instruction: UndefinedOr[str] = UNDEFINED,
         max_prompt_tokens: UndefinedOr[int] = UNDEFINED,
+        prompt_truncation_strategy: UndefinedOr[PromptTruncationStrategyType] = UNDEFINED,
         name: UndefinedOr[str] = UNDEFINED,
         description: UndefinedOr[str] = UNDEFINED,
         labels: UndefinedOr[dict[str, str]] = UNDEFINED,
@@ -104,16 +111,20 @@ async def _update(
             else:
                 raise TypeError('model argument must be str, GPTModel object either undefined')
 
+        prompt_truncation_options = PromptTruncationOptions._coerce(
+            max_prompt_tokens=max_prompt_tokens,
+            strategy=prompt_truncation_strategy
+        )
+        proto_prompt_trunction_options = prompt_truncation_options._to_proto()
+
         request = UpdateAssistantRequest(
             assistant_id=self.id,
             name=get_defined_value(name, ''),
             description=get_defined_value(description, ''),
             labels=get_defined_value(labels, {}),
             instruction=get_defined_value(instruction, ''),
             expiration_config=expiration_config.to_proto(),
-            prompt_truncation_options=get_prompt_trunctation_options(
-                max_prompt_tokens=get_defined_value(max_prompt_tokens, None)
-            ),
+            prompt_truncation_options=proto_prompt_trunction_options,
             completion_options=get_completion_options(
                 temperature=temperature,
                 max_tokens=max_tokens,
@@ -135,9 +146,8 @@ async def _update(
                 'model_uri': model_uri,
                 'completion_options.temperature': temperature,
                 'completion_options.max_tokens': max_tokens,
-                'prompt_truncation_options.max_prompt_tokens': max_prompt_tokens,
                 'tools': tools,
-            }
+            } | prompt_truncation_options._get_update_paths()
         )
 
         async with self._client.get_service_stub(AssistantServiceStub, timeout=timeout) as stub:
@@ -215,6 +225,7 @@ async def _run_impl(
         custom_temperature: UndefinedOr[float] = UNDEFINED,
         custom_max_tokens: UndefinedOr[int] = UNDEFINED,
         custom_max_prompt_tokens: UndefinedOr[int] = UNDEFINED,
+        custom_prompt_truncation_strategy: UndefinedOr[PromptTruncationStrategyType] = UNDEFINED,
         timeout: float = 60,
     ) -> RunTypeT:
         return await self._sdk.runs._create(
@@ -224,6 +235,7 @@ async def _run_impl(
             custom_temperature=custom_temperature,
             custom_max_tokens=custom_max_tokens,
             custom_max_prompt_tokens=custom_max_prompt_tokens,
+            custom_prompt_truncation_strategy=custom_prompt_truncation_strategy,
             timeout=timeout,
         )
 
@@ -234,6 +246,7 @@ async def _run(
         custom_temperature: UndefinedOr[float] = UNDEFINED,
         custom_max_tokens: UndefinedOr[int] = UNDEFINED,
         custom_max_prompt_tokens: UndefinedOr[int] = UNDEFINED,
+        custom_prompt_truncation_strategy: UndefinedOr[PromptTruncationStrategyType] = UNDEFINED,
         timeout: float = 60,
     ) -> RunTypeT:
         return await self._run_impl(
@@ -242,6 +255,7 @@ async def _run(
             custom_temperature=custom_temperature,
             custom_max_tokens=custom_max_tokens,
             custom_max_prompt_tokens=custom_max_prompt_tokens,
+            custom_prompt_truncation_strategy=custom_prompt_truncation_strategy,
             timeout=timeout,
         )
 
@@ -252,6 +266,7 @@ async def _run_stream(
         custom_temperature: UndefinedOr[float] = UNDEFINED,
         custom_max_tokens: UndefinedOr[int] = UNDEFINED,
         custom_max_prompt_tokens: UndefinedOr[int] = UNDEFINED,
+        custom_prompt_truncation_strategy: UndefinedOr[PromptTruncationStrategyType] = UNDEFINED,
         timeout: float = 60,
     ) -> RunTypeT:
         return await self._run_impl(
@@ -260,6 +275,7 @@ async def _run_stream(
             custom_temperature=custom_temperature,
             custom_max_tokens=custom_max_tokens,
             custom_max_prompt_tokens=custom_max_prompt_tokens,
+            custom_prompt_truncation_strategy=custom_prompt_truncation_strategy,
             timeout=timeout,
         )
 
@@ -293,6 +309,7 @@ async def update(
         max_tokens: UndefinedOr[int] = UNDEFINED,
         instruction: UndefinedOr[str] = UNDEFINED,
         max_prompt_tokens: UndefinedOr[int] = UNDEFINED,
+        prompt_truncation_strategy: UndefinedOr[PromptTruncationStrategyType] = UNDEFINED,
         name: UndefinedOr[str] = UNDEFINED,
         description: UndefinedOr[str] = UNDEFINED,
         labels: UndefinedOr[dict[str, str]] = UNDEFINED,
@@ -307,6 +324,7 @@ async def update(
             max_tokens=max_tokens,
             instruction=instruction,
             max_prompt_tokens=max_prompt_tokens,
+            prompt_truncation_strategy=prompt_truncation_strategy,
             name=name,
             description=description,
             labels=labels,
@@ -343,13 +361,15 @@ async def run(
         custom_temperature: UndefinedOr[float] = UNDEFINED,
         custom_max_tokens: UndefinedOr[int] = UNDEFINED,
         custom_max_prompt_tokens: UndefinedOr[int] = UNDEFINED,
+        custom_prompt_truncation_strategy: UndefinedOr[PromptTruncationStrategyType] = UNDEFINED,
         timeout: float = 60,
     ) -> AsyncRun:
         return await self._run(
             thread=thread,
             custom_temperature=custom_temperature,
             custom_max_tokens=custom_max_tokens,
             custom_max_prompt_tokens=custom_max_prompt_tokens,
+            custom_prompt_truncation_strategy=custom_prompt_truncation_strategy,
             timeout=timeout
         )
 
@@ -360,13 +380,15 @@ async def run_stream(
         custom_temperature: UndefinedOr[float] = UNDEFINED,
         custom_max_tokens: UndefinedOr[int] = UNDEFINED,
         custom_max_prompt_tokens: UndefinedOr[int] = UNDEFINED,
+        custom_prompt_truncation_strategy: UndefinedOr[PromptTruncationStrategyType] = UNDEFINED,
         timeout: float = 60,
     ) -> AsyncRun:
         return await self._run_stream(
             thread=thread,
             custom_temperature=custom_temperature,
             custom_max_tokens=custom_max_tokens,
             custom_max_prompt_tokens=custom_max_prompt_tokens,
+            custom_prompt_truncation_strategy=custom_prompt_truncation_strategy,
             timeout=timeout
         )
 
@@ -380,6 +402,7 @@ def update(
         max_tokens: UndefinedOr[int] = UNDEFINED,
         instruction: UndefinedOr[str] = UNDEFINED,
         max_prompt_tokens: UndefinedOr[int] = UNDEFINED,
+        prompt_truncation_strategy: UndefinedOr[PromptTruncationStrategyType] = UNDEFINED,
         name: UndefinedOr[str] = UNDEFINED,
         description: UndefinedOr[str] = UNDEFINED,
         labels: UndefinedOr[dict[str, str]] = UNDEFINED,
@@ -394,6 +417,7 @@ def update(
             max_tokens=max_tokens,
             instruction=instruction,
             max_prompt_tokens=max_prompt_tokens,
+            prompt_truncation_strategy=prompt_truncation_strategy,
             name=name,
             description=description,
             labels=labels,
@@ -432,13 +456,15 @@ def run(
         custom_temperature: UndefinedOr[float] = UNDEFINED,
         custom_max_tokens: UndefinedOr[int] = UNDEFINED,
         custom_max_prompt_tokens: UndefinedOr[int] = UNDEFINED,
+        custom_prompt_truncation_strategy: UndefinedOr[PromptTruncationStrategyType] = UNDEFINED,
         timeout: float = 60,
     ) -> Run:
         return run_sync_impl(self._run(
             thread=thread,
             custom_temperature=custom_temperature,
             custom_max_tokens=custom_max_tokens,
             custom_max_prompt_tokens=custom_max_prompt_tokens,
+            custom_prompt_truncation_strategy=custom_prompt_truncation_strategy,
             timeout=timeout
         ), self._sdk)
 
@@ -449,13 +475,15 @@ def run_stream(
         custom_temperature: UndefinedOr[float] = UNDEFINED,
         custom_max_tokens: UndefinedOr[int] = UNDEFINED,
         custom_max_prompt_tokens: UndefinedOr[int] = UNDEFINED,
+        custom_prompt_truncation_strategy: UndefinedOr[PromptTruncationStrategyType] = UNDEFINED,
         timeout: float = 60,
     ) -> Run:
         return run_sync_impl(self._run_stream(
             thread=thread,
             custom_temperature=custom_temperature,
             custom_max_tokens=custom_max_tokens,
             custom_max_prompt_tokens=custom_max_prompt_tokens,
+            custom_prompt_truncation_strategy=custom_prompt_truncation_strategy,
             timeout=timeout
         ), self._sdk)