yandex-cloud
diff --git a/‎src/yandex_cloud_ml_sdk/_assistants/assistant.py‎
Lines changed: 111 additions & 3 deletions b/‎src/yandex_cloud_ml_sdk/_assistants/assistant.py‎
Lines changed: 111 additions & 3 deletions
@@ -22,6 +22,7 @@
 from yandex_cloud_ml_sdk._types.misc import UNDEFINED, UndefinedOr, get_defined_value
 from yandex_cloud_ml_sdk._types.resource import ExpirableResource, safe_on_delete
 from yandex_cloud_ml_sdk._types.schemas import ResponseType
+from yandex_cloud_ml_sdk._utils.doc import doc_from
 from yandex_cloud_ml_sdk._utils.proto import proto_to_dict
 from yandex_cloud_ml_sdk._utils.sync import run_sync_generator_impl, run_sync_impl
 
@@ -33,15 +34,24 @@
 
 @dataclasses.dataclass(frozen=True)
 class BaseAssistant(ExpirableResource[ProtoAssistant], Generic[RunTypeT, ThreadTypeT]):
+    #: Expiration configuration for the assistant.
     expiration_config: ExpirationConfig
+    #: The GPT model used by the assistant.
     model: BaseGPTModel
+    #: Instructions or guidelines that the assistant should follow. These instructions guide the assistant's behavior and responses.
     instruction: str | None
+    #: Options for truncating thread messages. Controls how messages are truncated when forming the prompt.
     prompt_truncation_options: PromptTruncationOptions
+    #: Tools available to the assistant. Can be a sequence or a single tool. Tools must implement BaseTool interface.
     tools: tuple[BaseTool, ...]
+    #: A format of the response returned by the model. Could be a JsonSchema, a JSON string, or a pydantic model
     response_format: ResponseType | None
 
     @property
     def max_prompt_tokens(self) -> int | None:
+        """
+        Returns the maximum number of prompt tokens allowed for the assistant.
+        """
         return self.prompt_truncation_options.max_prompt_tokens
 
     @classmethod
@@ -99,6 +109,30 @@ async def _update(
         response_format: UndefinedOr[ResponseType] = UNDEFINED,
         timeout: float = 60,
     ) -> Self:
+        """
+        Update the assistant's configuration with new parameters.
+
+        This method sends an update request to Yandex Cloud ML API to modify the assistant's
+        configuration. Only specified parameters will be updated, others remain unchanged.
+
+        :param model: New model URI or BaseGPTModel instance to use
+        :param temperature: A sampling temperature to use - higher values mean more random results. Should be a double number between 0 (inclusive) and 1 (inclusive).
+        :param max_tokens: Maximum number of tokens to generate
+        :param instruction: New instructions for the assistant
+        :param max_prompt_tokens: Maximum tokens allowed in the prompt
+        :param prompt_truncation_strategy: Strategy for truncating long prompts
+        :param name: New name for the assistant
+        :param description: New description for the assistant
+        :param labels: New key-value labels for the assistant
+        :param ttl_days: Time-to-live in days before automatic deletion
+        :param tools: Tools to use for completion. Can be a sequence or a single tool.
+        :param expiration_policy: Policy for handling expiration
+        :param response_format: A format of the response returned by the model. Could be a JsonSchema, a JSON string, or a pydantic model.
+            Read more about possible response formats in the
+            `structured output documentation <https://yandex.cloud/docs/foundation-models/concepts/yandexgpt/#structured-output>`_.
+        :param timeout: The timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        """
         # pylint: disable=too-many-locals
         prompt_truncation_options = PromptTruncationOptions._coerce(
             max_prompt_tokens=max_prompt_tokens,
@@ -161,6 +195,15 @@ async def _delete(
         *,
         timeout: float = 60,
     ) -> None:
+        """
+        Delete the assistant from Yandex Cloud ML.
+
+        Sends a delete request to the Yandex Cloud ML API to remove the assistant.
+        After successful deletion, marks the assistant as deleted internally.
+
+        :param timeout: The timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        """
         request = DeleteAssistantRequest(assistant_id=self.id)
 
         async with self._client.get_service_stub(AssistantServiceStub, timeout=timeout) as stub:
@@ -178,6 +221,16 @@ async def _list_versions(
         page_token: UndefinedOr[str] = UNDEFINED,
         timeout: float = 60
     ) -> AsyncIterator[AssistantVersion]:
+        """
+        List all versions of the assistant.
+
+        This method retrieves historical versions of the assistant in a paginated manner.
+
+        :param page_size: Maximum number of versions to return per page
+        :param page_token: Token for pagination
+        :param timeout: The timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        """
         page_token_ = get_defined_value(page_token, '')
         page_size_ = get_defined_value(page_size, 0)
 
@@ -246,6 +299,18 @@ async def _run(
         custom_response_format: UndefinedOr[ResponseType] = UNDEFINED,
         timeout: float = 60,
     ) -> RunTypeT:
+        """
+        Execute a non-streaming run with the assistant on the given thread.
+
+        :param thread: Thread ID or Thread object to run on
+        :param custom_temperature: Override for model temperature
+        :param custom_max_tokens: Override for max tokens to generate
+        :param custom_max_prompt_tokens: Override for max prompt tokens
+        :param custom_prompt_truncation_strategy: Override for prompt truncation strategy
+        :param custom_response_format: Override for response format
+        :param timeout: The timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        """
         return await self._run_impl(
             thread=thread,
             stream=False,
@@ -268,6 +333,18 @@ async def _run_stream(
         custom_response_format: UndefinedOr[ResponseType] = UNDEFINED,
         timeout: float = 60,
     ) -> RunTypeT:
+        """
+        Execute a streaming run with the assistant on the given thread.
+
+        :param thread: Thread ID or Thread object to run on
+        :param custom_temperature: Override for model temperature
+        :param custom_max_tokens: Override for max tokens to generate
+        :param custom_max_prompt_tokens: Override for max prompt tokens
+        :param custom_prompt_truncation_strategy: Override for prompt truncation strategy
+        :param custom_response_format: Override for response format
+        :param timeout: The timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        """
         return await self._run_impl(
             thread=thread,
             stream=True,
@@ -283,24 +360,46 @@ async def _run_stream(
 
 @dataclasses.dataclass(frozen=True)
 class ReadOnlyAssistant(BaseAssistant[RunTypeT, ThreadTypeT]):
+    """
+    Base class providing read-only access to Yandex Cloud ML Assistant configuration and metadata.
+
+    This class implements the core interface for interacting with Yandex Cloud ML Assistant API
+    in a read-only manner. It serves as the parent class for both synchronous (Assistant)
+    and asynchronous (AsyncAssistant) implementations.
+    """
+
+    #: The name of the assistant.
     name: str | None
+    #: The description of the assistant.
     description: str | None
+    #: The identifier of the user who created the assistant.
     created_by: str
+    #: The timestamp when the assistant was created.
     created_at: datetime
+    #: The identifier of the user who last updated the assistant.
     updated_by: str
+    #: The timestamp when the assistant was last updated.
     updated_at: datetime
+    #: The timestamp when the assistant will expire.
     expires_at: datetime
+    #: Additional labels associated with the assistant.
     labels: dict[str, str] | None
 
-
 @dataclasses.dataclass(frozen=True)
 class AssistantVersion:
+    """
+    Represents a specific version of an Assistant.
+    """
+    #: ID of the assistant version.
     id: str
+    #: The assistant instance for this version.
     assistant: ReadOnlyAssistant
+    #: Mask specifying which fields were updated in this version. Mask also have a custom JSON encoding
     update_mask: tuple[str, ...]
 
-
+@doc_from(ReadOnlyAssistant)
 class AsyncAssistant(ReadOnlyAssistant[AsyncRun, AsyncThread]):
+    @doc_from(ReadOnlyAssistant._update)
     async def update(
         self,
         *,
@@ -336,13 +435,15 @@ async def update(
             timeout=timeout
         )
 
+    @doc_from(ReadOnlyAssistant._delete)
     async def delete(
         self,
         *,
         timeout: float = 60,
     ) -> None:
         await self._delete(timeout=timeout)
 
+    @doc_from(ReadOnlyAssistant._list_versions)
     async def list_versions(
         self,
         page_size: UndefinedOr[int] = UNDEFINED,
@@ -356,6 +457,7 @@ async def list_versions(
         ):
             yield version
 
+    @doc_from(ReadOnlyAssistant._run)
     async def run(
         self,
         thread: str | AsyncThread,
@@ -377,6 +479,7 @@ async def run(
             timeout=timeout
         )
 
+    @doc_from(ReadOnlyAssistant._run_stream)
     async def run_stream(
         self,
         thread: str | AsyncThread,
@@ -398,8 +501,9 @@ async def run_stream(
             timeout=timeout
         )
 
-
+@doc_from(ReadOnlyAssistant)
 class Assistant(ReadOnlyAssistant[Run, Thread]):
+    @doc_from(ReadOnlyAssistant._update)
     def update(
         self,
         *,
@@ -435,13 +539,15 @@ def update(
             timeout=timeout
         ), self._sdk)
 
+    @doc_from(ReadOnlyAssistant._delete)
     def delete(
         self,
         *,
         timeout: float = 60,
     ) -> None:
         run_sync_impl(self._delete(timeout=timeout), self._sdk)
 
+    @doc_from(ReadOnlyAssistant._list_versions)
     def list_versions(
         self,
         page_size: UndefinedOr[int] = UNDEFINED,
@@ -457,6 +563,7 @@ def list_versions(
             self._sdk
         )
 
+    @doc_from(ReadOnlyAssistant._run)
     def run(
         self,
         thread: str | Thread,
@@ -478,6 +585,7 @@ def run(
             timeout=timeout
         ), self._sdk)
 
+    @doc_from(ReadOnlyAssistant._run_stream)
     def run_stream(
         self,
         thread: str | Thread,