yandex-cloud · vhaldemar · Jul 16, 2025 · Jul 12, 2025 · Jul 13, 2025 · Jul 13, 2025
diff --git a/src/yandex_cloud_ml_sdk/_models/completions/function.py b/src/yandex_cloud_ml_sdk/_models/completions/function.py
@@ -3,18 +3,38 @@
 from typing_extensions import override
 
 from yandex_cloud_ml_sdk._types.function import BaseModelFunction, ModelTypeT
+from yandex_cloud_ml_sdk._utils.doc import doc_from
 
 from .model import AsyncGPTModel, GPTModel
 
 
 class BaseCompletions(BaseModelFunction[ModelTypeT]):
+    """
+    A class for handling completions models.
+
+    It defines the core functionality for calling a model
+    to generate completions based on the provided model name and version.
+    """
     @override
     def __call__(
         self,
         model_name: str,
         *,
         model_version: str = 'latest',
     ) -> ModelTypeT:
+        """
+        Create a model object to call for generating completions.
+
+        This method constructs the URI for the model based on the provided
+        name and version. If the name contains ``://``, it is
+        treated as a full URI. Otherwise, it looks up the model name in
+        the well-known names dictionary. But after this, in any case,
+        we construct a URI in the form ``gpt://<folder_id>/<model>/<version>``.
+
+        :param model_name: the name or URI of the model to call.
+        :param model_version: the version of the model to use.
+            Defaults to 'latest'.
+        """
         if '://' in model_name:
             uri = model_name
         else:
@@ -26,10 +46,10 @@ def __call__(
             uri=uri,
         )
 
-
+@doc_from(BaseCompletions)
 class Completions(BaseCompletions[GPTModel]):
     _model_type = GPTModel
 
-
+@doc_from(BaseCompletions)
 class AsyncCompletions(BaseCompletions[AsyncGPTModel]):
     _model_type = AsyncGPTModel
diff --git a/src/yandex_cloud_ml_sdk/_models/completions/langchain.py b/src/yandex_cloud_ml_sdk/_models/completions/langchain.py
@@ -51,6 +51,8 @@ def _transform_messages(history: list[BaseMessage]) -> list[TextMessageDict]:
 
 
 class ChatYandexGPT(BaseYandexLanguageModel[BaseGPTModel], BaseChatModel):
+    """Chat model for Yandex GPT integration.
+    This class provides integration with the `LangChain <https://python.langchain.com/docs/introduction/>`_ library."""
     class Config:
         arbitrary_types_allowed = True
 

diff --git a/src/yandex_cloud_ml_sdk/_models/completions/message.py b/src/yandex_cloud_ml_sdk/_models/completions/message.py
@@ -17,10 +17,18 @@
 
 @runtime_checkable
 class TextMessageWithToolCallsProtocol(TextMessageProtocol, Protocol):
+    """
+    A class with a protocol which defines a text message structure with associated tool calls.
+    The protocol extends the TextMessageProtocol and requires a list of tool calls.
+    """
     tool_calls: ToolCallList
 
 
 class FunctionResultMessageDict(TypedDict):
+    """
+    A class with the TypedDict representing the structure of a function result message.
+    The dictionary contains the role of the message sender and the results of tool calls.
+    """
     role: NotRequired[str]
     tool_results: Required[Iterable[ToolResultDictType]]
 
@@ -31,12 +39,14 @@ class _ProtoMessageKwargs(TypedDict):
     tool_result_list: NotRequired[ProtoCompletionsToolResultList]
     tool_call_list: NotRequired[ProtoCompletionsToolCallList]
 
-
+#: a type alias for a message that can either be a standard message or a function result message.
 CompletionsMessageType = Union[MessageType, FunctionResultMessageDict]
+#: a type alias for input that can be either a single completion message or a collection (i.e. an iterable) of completion messages.
 MessageInputType = Union[CompletionsMessageType, Iterable[CompletionsMessageType]]
 
 
 def messages_to_proto(messages: MessageInputType) -> list[ProtoMessage]:
+    """:meta private:"""
     msgs: tuple[CompletionsMessageType, ...] = coerce_tuple(
         messages,
         (dict, str, TextMessageProtocol),  # type: ignore[arg-type]

diff --git a/src/yandex_cloud_ml_sdk/_models/completions/model.py b/src/yandex_cloud_ml_sdk/_models/completions/model.py
@@ -34,6 +34,7 @@
 from yandex_cloud_ml_sdk._types.tuning.schedulers import BaseScheduler
 from yandex_cloud_ml_sdk._types.tuning.tuning_types import BaseTuningType
 from yandex_cloud_ml_sdk._utils.coerce import coerce_tuple
+from yandex_cloud_ml_sdk._utils.doc import doc_from
 from yandex_cloud_ml_sdk._utils.sync import run_sync, run_sync_generator
 
 from .config import CompletionTool, GPTModelConfig, ReasoningMode, ReasoningModeType
@@ -54,6 +55,10 @@ class BaseGPTModel(
     ModelTuneMixin[GPTModelConfig, GPTModelResult[ToolCallTypeT], GPTModelTuneParams, TuningTaskTypeT],
     BaseModelBatchMixin[GPTModelConfig, GPTModelResult[ToolCallTypeT], BatchSubdomainTypeT],
 ):
+    """
+    A class for GPT models providing various functionalities
+    including tuning, and batch processing.
+    """
     _config_type = GPTModelConfig
     _result_type: type[GPTModelResult[ToolCallTypeT]]
     _operation_type: type[OperationTypeT]
@@ -67,6 +72,14 @@ class BaseGPTModel(
     _batch_proto_metadata_type = BatchCompletionMetadata
 
     def langchain(self, model_type: Literal["chat"] = "chat", timeout: int = 60) -> BaseYandexLanguageModel:
+        """
+        Initializes a langchain model based on the specified model type.
+
+        :param model_type: the type of langchain model to initialize.
+            Defaults to ``"chat"``.
+        :param timeout: the timeout which sets the default for the langchain model object.
+            Defaults to 60 seconds.
+        """
         from .langchain import ChatYandexGPT  # pylint: disable=import-outside-toplevel
 
         if model_type == "chat":
@@ -86,6 +99,19 @@ def configure(  # type: ignore[override]
         tools: UndefinedOr[Sequence[CompletionTool] | CompletionTool] = UNDEFINED,
         parallel_tool_calls: UndefinedOr[bool] = UNDEFINED,
     ) -> Self:
+        """
+        Configures the model with specified parameters.
+
+        :param temperature: a sampling temperature to use - higher values mean more random results. Should be a double number between 0 (inclusive) and 1 (inclusive).
+        :param max_tokens: a maximum number of tokens to generate in the response.
+        :param reasoning_mode: the mode of reasoning to apply during generation, allowing the model to perform internal reasoning before responding.
+            Read more about possible modes in the `documentation <https://yandex.cloud/docs/foundation-models/text-generation/api-ref/TextGeneration/completion#yandex.cloud.ai.foundation_models.v1.ReasoningOptions>`_.
+        :param response_format: a format of the response returned by the model. Could be a JsonSchema, a JSON string, or a pydantic model.
+            Read more about possible response formats in the `documentation <https://yandex.cloud/docs/foundation-models/concepts/yandexgpt/#structured-output>`_.
+        :param tools: tools to use for completion. Can be a sequence or a single tool.
+        :param parallel_tool_calls: whether to allow parallel calls to tools during completion.
+            Defaults to ``true``.
+        """
         return super().configure(
             temperature=temperature,
             max_tokens=max_tokens,
@@ -259,7 +285,7 @@ async def _tokenize(
             )
             return tuple(Token._from_proto(t) for t in response.tokens)
 
-
+@doc_from(BaseGPTModel)
 class AsyncGPTModel(
     BaseGPTModel[
         AsyncOperation[GPTModelResult[AsyncToolCall]],
@@ -279,6 +305,14 @@ async def run(
         *,
         timeout=60,
     ) -> GPTModelResult[AsyncToolCall]:
+        """
+        Executes the model with the provided messages.
+
+        :param messages: the input messages to process. Could be a string, a dictionary, or a result object.
+            Read more about other possible message types in the `documentation <https://yandex.cloud/docs/foundation-models/text-generation/api-ref/TextGeneration/completion#yandex.cloud.ai.foundation_models.v1.Message>`_.
+        :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        """
         return await self._run(
             messages=messages,
             timeout=timeout
@@ -290,6 +324,14 @@ async def run_stream(
         *,
         timeout=60,
     ) -> AsyncIterator[GPTModelResult[AsyncToolCall]]:
+        """
+        Executes the model with the provided messages
+        and yields partial results as they become available.
+
+        :param messages: the input messages to process.
+        :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        """
         async for result in self._run_stream(
             messages=messages,
             timeout=timeout
@@ -302,12 +344,26 @@ async def run_deferred(
         *,
         timeout=60
     ) -> AsyncOperation[GPTModelResult[AsyncToolCall]]:
+        """
+        Initiates a deferred execution of the model with the provided messages.
+
+        :param messages: the input messages to process.
+        :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        """
         return await self._run_deferred(
             messages=messages,
             timeout=timeout,
         )
 
     async def attach_deferred(self, operation_id: str, timeout: float = 60) -> AsyncOperation[GPTModelResult[AsyncToolCall]]:
+        """
+        Attaches to an ongoing deferred operation using its operation id.
+
+        :param operation_id: the id of the deferred operation to attach to.
+        :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        """
         return await self._attach_deferred(operation_id=operation_id, timeout=timeout)
 
     async def tokenize(
@@ -316,6 +372,13 @@ async def tokenize(
         *,
         timeout=60
     ) -> tuple[Token, ...]:
+        """
+        Tokenizes the provided messages into a tuple of tokens.
+
+        :param messages: the input messages to tokenize.
+        :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        """
         return await self._tokenize(
             messages=messages,
             timeout=timeout
@@ -339,6 +402,23 @@ async def tune_deferred(
         optimizer: UndefinedOr[BaseOptimizer] = UNDEFINED,
         timeout: float = 60,
     ) -> AsyncTuningTask['AsyncGPTModel']:
+        """Initiate a deferred tuning process for the model.
+
+        :param train_datasets: the dataset objects and/or dataset ids used for training of the model.
+        :param validation_datasets: the dataset objects and/or dataset ids used for validation of the model.
+        :param name: the name of the tuning task.
+        :param description: the description of the tuning task.
+        :param labels: labels for the tuning task.
+        :param seed: a random seed for reproducibility.
+        :param lr: a learning rate for tuning.
+        :param n_samples: a number of samples for tuning.
+        :param additional_arguments: additional arguments for tuning.
+        :param tuning_type: a type of tuning to be applied.
+        :param scheduler: a scheduler for tuning.
+        :param optimizer: an optimizer for tuning.
+        :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        """
         return await self._tune_deferred(
             train_datasets=train_datasets,
             validation_datasets=validation_datasets,
@@ -375,6 +455,27 @@ async def tune(
         poll_timeout: int = 72 * 60 * 60,
         poll_interval: float = 60,
     ) -> Self:
+        """Tune the model with the specified training datasets and parameters.
+
+        :param train_datasets: the dataset objects and/or dataset ids used for training of the model.
+        :param validation_datasets: the dataset objects and/or dataset ids used for validation of the model.
+        :param name: the name of the tuning task.
+        :param description: the description of the tuning task.
+        :param labels: labels for the tuning task.
+        :param seed: a random seed for reproducibility.
+        :param lr: a learning rate for tuning.
+        :param n_samples: a number of samples for tuning.
+        :param additional_arguments: additional arguments for tuning.
+        :param tuning_type: a type of tuning to be applied.
+        :param scheduler: a scheduler for tuning.
+        :param optimizer: an optimizer for tuning.
+        :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        :param poll_timeout: the maximum time to wait while polling for completion of the tuning task.
+            Defaults to 259200 seconds (72 hours).
+        :param poll_interval: the interval between polling attempts during the tuning process.
+            Defaults to 60 seconds.
+        """
         return await self._tune(
             train_datasets=train_datasets,
             validation_datasets=validation_datasets,
@@ -394,9 +495,15 @@ async def tune(
         )
 
     async def attach_tune_deferred(self, task_id: str, *, timeout: float = 60) -> AsyncTuningTask['AsyncGPTModel']:
-        return await self._attach_tune_deferred(task_id=task_id, timeout=timeout)
+        """Attach a deferred tuning task using its task id.
 
+        :param task_id: the id of the deferred tuning task to attach to.
+        :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        """
+        return await self._attach_tune_deferred(task_id=task_id, timeout=timeout)
 
+@doc_from(BaseGPTModel)
 class GPTModel(
     BaseGPTModel[
         Operation[GPTModelResult[ToolCall]],
@@ -418,6 +525,7 @@ class GPTModel(
     __tune = run_sync(BaseGPTModel._tune)
     __attach_tune_deferred = run_sync(BaseGPTModel._attach_tune_deferred)
 
+    @doc_from(AsyncGPTModel.run)
     def run(
         self,
         messages: MessageInputType,
@@ -429,6 +537,7 @@ def run(
             timeout=timeout
         )
 
+    @doc_from(AsyncGPTModel.run_stream)
     def run_stream(
         self,
         messages: MessageInputType,
@@ -440,6 +549,7 @@ def run_stream(
             timeout=timeout
         )
 
+    @doc_from(AsyncGPTModel.run_deferred)
     def run_deferred(
         self,
         messages: MessageInputType,
@@ -451,12 +561,14 @@ def run_deferred(
             timeout=timeout,
         )
 
+    @doc_from(AsyncGPTModel.attach_deferred)
     def attach_deferred(self, operation_id: str, timeout: float = 60) -> Operation[GPTModelResult[ToolCall]]:
         return cast(
             Operation[GPTModelResult[ToolCall]],
             self.__attach_deferred(operation_id=operation_id, timeout=timeout)
         )
 
+    @doc_from(AsyncGPTModel.tokenize)
     def tokenize(
         self,
         messages: MessageInputType,
@@ -469,6 +581,7 @@ def tokenize(
         )
 
     # pylint: disable=too-many-locals
+    @doc_from(AsyncGPTModel.tune_deferred)
     def tune_deferred(
         self,
         train_datasets: TuningDatasetsType,
@@ -504,6 +617,7 @@ def tune_deferred(
         return cast(TuningTask[GPTModel], result)
 
     # pylint: disable=too-many-locals
+    @doc_from(AsyncGPTModel.tune)
     def tune(
         self,
         train_datasets: TuningDatasetsType,
@@ -541,6 +655,7 @@ def tune(
             poll_interval=poll_interval,
         )
 
+    @doc_from(AsyncGPTModel.attach_tune_deferred)
     def attach_tune_deferred(self, task_id: str, *, timeout: float = 60) -> TuningTask[GPTModel]:
         return cast(
             TuningTask[GPTModel],