diff --git a/src/yandex_cloud_ml_sdk/_models/completions/config.py b/src/yandex_cloud_ml_sdk/_models/completions/config.py
index cbed5b1d..7214f397 100644
--- a/src/yandex_cloud_ml_sdk/_models/completions/config.py
+++ b/src/yandex_cloud_ml_sdk/_models/completions/config.py
@@ -19,21 +19,42 @@
 
 
 class ReasoningMode(ProtoEnumBase, Enum):
+    """Enumeration for reasoning modes.
+
+    This class defines the various modes of reasoning that can be used
+    in the model's configurations.
+    """
+    #: indicates that the reasoning mode is unspecified
     REASONING_MODE_UNSPECIFIED = _m.REASONING_MODE_UNSPECIFIED
+    #: indicates that reasoning is disabled
     DISABLED = _m.DISABLED
+    #: indicates that reasoning is enabled but hidden
     ENABLED_HIDDEN = _m.ENABLED_HIDDEN
 
-
+#: type alias for reasoning mode representation
 ReasoningModeType = Union[int, str, ReasoningMode]
+#: type alias for completion tools
 CompletionTool: TypeAlias = FunctionTool
 
 
 @dataclass(frozen=True)
 class GPTModelConfig(BaseModelConfig):
+    """Configuration for the GPT model.
+
+    It holds the configuration settings for the GPT model,
+    including parameters for generation and tool usage.
+    """
+    #: a sampling temperature to use - higher values mean more random results; should be a double number between 0 (inclusive) and 1 (inclusive)
     temperature: float | None = None
+    #: a maximum number of tokens to generate in the response
     max_tokens: int | None = None
+    #: the mode of reasoning to apply during generation, allowing the model to perform internal reasoning before responding
     reasoning_mode: ReasoningModeType | None = None
+    #: a format of the response returned by the model. Could be a JsonSchema, a JSON string, or a pydantic model
     response_format: ResponseType | None = None
+    #: tools to use for completion. Can be a sequence or a single tool
     tools: Sequence[CompletionTool] | CompletionTool | None = None
+    #: whether to allow parallel calls to tools during completion; defaults to 'true'
     parallel_tool_calls: bool | None = None
+    #: the strategy for choosing tools: depending on this parameter, the model can always call some tool, call the specific tool or don't call any tool.
     tool_choice: ToolChoiceType | None = None
diff --git a/src/yandex_cloud_ml_sdk/_models/completions/function.py b/src/yandex_cloud_ml_sdk/_models/completions/function.py
index 25ee7388..097a0cbd 100644
--- a/src/yandex_cloud_ml_sdk/_models/completions/function.py
+++ b/src/yandex_cloud_ml_sdk/_models/completions/function.py
@@ -3,11 +3,18 @@
 from typing_extensions import override
 
 from yandex_cloud_ml_sdk._types.function import BaseModelFunction, ModelTypeT
+from yandex_cloud_ml_sdk._utils.doc import doc_from
 
 from .model import AsyncGPTModel, GPTModel
 
 
 class BaseCompletions(BaseModelFunction[ModelTypeT]):
+    """
+    A class for handling completions models.
+
+    It defines the core functionality for calling a model
+    to generate completions based on the provided model name and version.
+    """
     @override
     def __call__(
         self,
@@ -15,6 +22,19 @@ def __call__(
         *,
         model_version: str = 'latest',
     ) -> ModelTypeT:
+        """
+        Create a model object to call for generating completions.
+
+        This method constructs the URI for the model based on the provided
+        name and version. If the name contains ``://``, it is
+        treated as a full URI. Otherwise, it looks up the model name in
+        the well-known names dictionary. But after this, in any case,
+        we construct a URI in the form ``gpt://<folder_id>/<model>/<version>``.
+
+        :param model_name: the name or URI of the model to call.
+        :param model_version: the version of the model to use.
+            Defaults to 'latest'.
+        """
         if '://' in model_name:
             uri = model_name
         else:
@@ -26,10 +46,10 @@ def __call__(
             uri=uri,
         )
 
-
+@doc_from(BaseCompletions)
 class Completions(BaseCompletions[GPTModel]):
     _model_type = GPTModel
 
-
+@doc_from(BaseCompletions)
 class AsyncCompletions(BaseCompletions[AsyncGPTModel]):
     _model_type = AsyncGPTModel
diff --git a/src/yandex_cloud_ml_sdk/_models/completions/langchain.py b/src/yandex_cloud_ml_sdk/_models/completions/langchain.py
index b3e17e7a..d63cf2ab 100644
--- a/src/yandex_cloud_ml_sdk/_models/completions/langchain.py
+++ b/src/yandex_cloud_ml_sdk/_models/completions/langchain.py
@@ -51,6 +51,8 @@ def _transform_messages(history: list[BaseMessage]) -> list[TextMessageDict]:
 
 
 class ChatYandexGPT(BaseYandexLanguageModel[BaseGPTModel], BaseChatModel):
+    """Chat model for Yandex GPT integration.
+    This class provides integration with the `LangChain <https://python.langchain.com/docs/introduction/>`_ library."""
     class Config:
         arbitrary_types_allowed = True
 
diff --git a/src/yandex_cloud_ml_sdk/_models/completions/message.py b/src/yandex_cloud_ml_sdk/_models/completions/message.py
index 71469091..254d5b2d 100644
--- a/src/yandex_cloud_ml_sdk/_models/completions/message.py
+++ b/src/yandex_cloud_ml_sdk/_models/completions/message.py
@@ -17,10 +17,18 @@
 
 @runtime_checkable
 class TextMessageWithToolCallsProtocol(TextMessageProtocol, Protocol):
+    """
+    A class with a protocol which defines a text message structure with associated tool calls.
+    The protocol extends the TextMessageProtocol and requires a list of tool calls.
+    """
     tool_calls: ToolCallList
 
 
 class FunctionResultMessageDict(TypedDict):
+    """
+    A class with the TypedDict representing the structure of a function result message.
+    The dictionary contains the role of the message sender and the results of tool calls.
+    """
     role: NotRequired[str]
     tool_results: Required[Iterable[ToolResultDictType]]
 
@@ -31,12 +39,14 @@ class _ProtoMessageKwargs(TypedDict):
     tool_result_list: NotRequired[ProtoCompletionsToolResultList]
     tool_call_list: NotRequired[ProtoCompletionsToolCallList]
 
-
+#: a type alias for a message that can either be a standard message or a function result message.
 CompletionsMessageType = Union[MessageType, FunctionResultMessageDict]
+#: a type alias for input that can be either a single completion message or a collection (i.e. an iterable) of completion messages.
 MessageInputType = Union[CompletionsMessageType, Iterable[CompletionsMessageType]]
 
 
 def messages_to_proto(messages: MessageInputType) -> list[ProtoMessage]:
+    """:meta private:"""
     msgs: tuple[CompletionsMessageType, ...] = coerce_tuple(
         messages,
         (dict, str, TextMessageProtocol),  # type: ignore[arg-type]
diff --git a/src/yandex_cloud_ml_sdk/_models/completions/model.py b/src/yandex_cloud_ml_sdk/_models/completions/model.py
index affe7e72..7a7b6f2c 100644
--- a/src/yandex_cloud_ml_sdk/_models/completions/model.py
+++ b/src/yandex_cloud_ml_sdk/_models/completions/model.py
@@ -37,6 +37,7 @@
 from yandex_cloud_ml_sdk._types.tuning.schedulers import BaseScheduler
 from yandex_cloud_ml_sdk._types.tuning.tuning_types import BaseTuningType
 from yandex_cloud_ml_sdk._utils.coerce import coerce_tuple
+from yandex_cloud_ml_sdk._utils.doc import doc_from
 from yandex_cloud_ml_sdk._utils.sync import run_sync, run_sync_generator
 
 from .config import CompletionTool, GPTModelConfig, ReasoningMode, ReasoningModeType
@@ -57,6 +58,10 @@ class BaseGPTModel(
     ModelTuneMixin[GPTModelConfig, GPTModelResult[ToolCallTypeT], GPTModelTuneParams, TuningTaskTypeT],
     BaseModelBatchMixin[GPTModelConfig, GPTModelResult[ToolCallTypeT], BatchSubdomainTypeT],
 ):
+    """
+    A class for GPT models providing various functionalities
+    including tuning, and batch processing.
+    """
     _config_type = GPTModelConfig
     _result_type: type[GPTModelResult[ToolCallTypeT]]
     _operation_type: type[OperationTypeT]
@@ -70,6 +75,14 @@ class BaseGPTModel(
     _batch_proto_metadata_type = BatchCompletionMetadata
 
     def langchain(self, model_type: Literal["chat"] = "chat", timeout: int = 60) -> BaseYandexLanguageModel:
+        """
+        Initializes a langchain model based on the specified model type.
+
+        :param model_type: the type of langchain model to initialize.
+            Defaults to ``"chat"``.
+        :param timeout: the timeout which sets the default for the langchain model object.
+            Defaults to 60 seconds.
+        """
         from .langchain import ChatYandexGPT  # pylint: disable=import-outside-toplevel
 
         if model_type == "chat":
@@ -90,6 +103,24 @@ def configure(  # type: ignore[override]
         parallel_tool_calls: UndefinedOr[bool] = UNDEFINED,
         tool_choice: UndefinedOr[ToolChoiceType] = UNDEFINED,
     ) -> Self:
+        """
+        Configures the model with specified parameters.
+
+        :param temperature: a sampling temperature to use - higher values mean more random results. Should be a double number between 0 (inclusive) and 1 (inclusive).
+        :param max_tokens: a maximum number of tokens to generate in the response.
+        :param reasoning_mode: the mode of reasoning to apply during generation, allowing the model to perform internal reasoning before responding.
+            Read more about possible modes in the `documentation <https://yandex.cloud/docs/foundation-models/text-generation/api-ref/TextGeneration/completion#yandex.cloud.ai.foundation_models.v1.ReasoningOptions>`_.
+        :param response_format: a format of the response returned by the model. Could be a JsonSchema, a JSON string, or a pydantic model.
+            Read more about possible response formats in the `documentation <https://yandex.cloud/docs/foundation-models/concepts/yandexgpt/#structured-output>`_.
+        :param tools: tools to use for completion. Can be a sequence or a single tool.
+        :param parallel_tool_calls: whether to allow parallel calls to tools during completion.
+            Defaults to ``true``.
+        :param tool_choice: the strategy for choosing tools.
+            There are several ways to configure ``tool_choice`` for query processing:
+            - no tools to call (tool_choice=``'none'``);
+            - required to call any tool (tool_choice=``'required'``);
+            - call a specific tool (tool_choice=``{'type': 'function', 'function': {'name': 'another_calculator'}}`` or directly passing a tool object).
+        """
         return super().configure(
             temperature=temperature,
             max_tokens=max_tokens,
@@ -269,7 +300,7 @@ async def _tokenize(
             )
             return tuple(Token._from_proto(t) for t in response.tokens)
 
-
+@doc_from(BaseGPTModel)
 class AsyncGPTModel(
     BaseGPTModel[
         AsyncOperation[GPTModelResult[AsyncToolCall]],
@@ -289,6 +320,14 @@ async def run(
         *,
         timeout=60,
     ) -> GPTModelResult[AsyncToolCall]:
+        """
+        Executes the model with the provided messages.
+
+        :param messages: the input messages to process. Could be a string, a dictionary, or a result object.
+            Read more about other possible message types in the `documentation <https://yandex.cloud/docs/foundation-models/sdk/#usage>`_.
+        :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        """
         return await self._run(
             messages=messages,
             timeout=timeout
@@ -300,6 +339,14 @@ async def run_stream(
         *,
         timeout=60,
     ) -> AsyncIterator[GPTModelResult[AsyncToolCall]]:
+        """
+        Executes the model with the provided messages
+        and yields partial results as they become available.
+
+        :param messages: the input messages to process.
+        :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        """
         async for result in self._run_stream(
             messages=messages,
             timeout=timeout
@@ -312,12 +359,26 @@ async def run_deferred(
         *,
         timeout=60
     ) -> AsyncOperation[GPTModelResult[AsyncToolCall]]:
+        """
+        Initiates a deferred execution of the model with the provided messages.
+
+        :param messages: the input messages to process.
+        :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        """
         return await self._run_deferred(
             messages=messages,
             timeout=timeout,
         )
 
     async def attach_deferred(self, operation_id: str, timeout: float = 60) -> AsyncOperation[GPTModelResult[AsyncToolCall]]:
+        """
+        Attaches to an ongoing deferred operation using its operation id.
+
+        :param operation_id: the id of the deferred operation to attach to.
+        :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        """
         return await self._attach_deferred(operation_id=operation_id, timeout=timeout)
 
     async def tokenize(
@@ -326,6 +387,13 @@ async def tokenize(
         *,
         timeout=60
     ) -> tuple[Token, ...]:
+        """
+        Tokenizes the provided messages into a tuple of tokens.
+
+        :param messages: the input messages to tokenize.
+        :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        """
         return await self._tokenize(
             messages=messages,
             timeout=timeout
@@ -349,6 +417,23 @@ async def tune_deferred(
         optimizer: UndefinedOr[BaseOptimizer] = UNDEFINED,
         timeout: float = 60,
     ) -> AsyncTuningTask['AsyncGPTModel']:
+        """Initiate a deferred tuning process for the model.
+
+        :param train_datasets: the dataset objects and/or dataset ids used for training of the model.
+        :param validation_datasets: the dataset objects and/or dataset ids used for validation of the model.
+        :param name: the name of the tuning task.
+        :param description: the description of the tuning task.
+        :param labels: labels for the tuning task.
+        :param seed: a random seed for reproducibility.
+        :param lr: a learning rate for tuning.
+        :param n_samples: a number of samples for tuning.
+        :param additional_arguments: additional arguments for tuning.
+        :param tuning_type: a type of tuning to be applied.
+        :param scheduler: a scheduler for tuning.
+        :param optimizer: an optimizer for tuning.
+        :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        """
         return await self._tune_deferred(
             train_datasets=train_datasets,
             validation_datasets=validation_datasets,
@@ -385,6 +470,27 @@ async def tune(
         poll_timeout: int = 72 * 60 * 60,
         poll_interval: float = 60,
     ) -> Self:
+        """Tune the model with the specified training datasets and parameters.
+
+        :param train_datasets: the dataset objects and/or dataset ids used for training of the model.
+        :param validation_datasets: the dataset objects and/or dataset ids used for validation of the model.
+        :param name: the name of the tuning task.
+        :param description: the description of the tuning task.
+        :param labels: labels for the tuning task.
+        :param seed: a random seed for reproducibility.
+        :param lr: a learning rate for tuning.
+        :param n_samples: a number of samples for tuning.
+        :param additional_arguments: additional arguments for tuning.
+        :param tuning_type: a type of tuning to be applied.
+        :param scheduler: a scheduler for tuning.
+        :param optimizer: an optimizer for tuning.
+        :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        :param poll_timeout: the maximum time to wait while polling for completion of the tuning task.
+            Defaults to 259200 seconds (72 hours).
+        :param poll_interval: the interval between polling attempts during the tuning process.
+            Defaults to 60 seconds.
+        """
         return await self._tune(
             train_datasets=train_datasets,
             validation_datasets=validation_datasets,
@@ -404,9 +510,15 @@ async def tune(
         )
 
     async def attach_tune_deferred(self, task_id: str, *, timeout: float = 60) -> AsyncTuningTask['AsyncGPTModel']:
-        return await self._attach_tune_deferred(task_id=task_id, timeout=timeout)
+        """Attach a deferred tuning task using its task id.
 
+        :param task_id: the id of the deferred tuning task to attach to.
+        :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        """
+        return await self._attach_tune_deferred(task_id=task_id, timeout=timeout)
 
+@doc_from(BaseGPTModel)
 class GPTModel(
     BaseGPTModel[
         Operation[GPTModelResult[ToolCall]],
@@ -428,6 +540,7 @@ class GPTModel(
     __tune = run_sync(BaseGPTModel._tune)
     __attach_tune_deferred = run_sync(BaseGPTModel._attach_tune_deferred)
 
+    @doc_from(AsyncGPTModel.run)
     def run(
         self,
         messages: MessageInputType,
@@ -439,6 +552,7 @@ def run(
             timeout=timeout
         )
 
+    @doc_from(AsyncGPTModel.run_stream)
     def run_stream(
         self,
         messages: MessageInputType,
@@ -450,6 +564,7 @@ def run_stream(
             timeout=timeout
         )
 
+    @doc_from(AsyncGPTModel.run_deferred)
     def run_deferred(
         self,
         messages: MessageInputType,
@@ -461,12 +576,14 @@ def run_deferred(
             timeout=timeout,
         )
 
+    @doc_from(AsyncGPTModel.attach_deferred)
     def attach_deferred(self, operation_id: str, timeout: float = 60) -> Operation[GPTModelResult[ToolCall]]:
         return cast(
             Operation[GPTModelResult[ToolCall]],
             self.__attach_deferred(operation_id=operation_id, timeout=timeout)
         )
 
+    @doc_from(AsyncGPTModel.tokenize)
     def tokenize(
         self,
         messages: MessageInputType,
@@ -479,6 +596,7 @@ def tokenize(
         )
 
     # pylint: disable=too-many-locals
+    @doc_from(AsyncGPTModel.tune_deferred)
     def tune_deferred(
         self,
         train_datasets: TuningDatasetsType,
@@ -514,6 +632,7 @@ def tune_deferred(
         return cast(TuningTask[GPTModel], result)
 
     # pylint: disable=too-many-locals
+    @doc_from(AsyncGPTModel.tune)
     def tune(
         self,
         train_datasets: TuningDatasetsType,
@@ -551,6 +670,7 @@ def tune(
             poll_interval=poll_interval,
         )
 
+    @doc_from(AsyncGPTModel.attach_tune_deferred)
     def attach_tune_deferred(self, task_id: str, *, timeout: float = 60) -> TuningTask[GPTModel]:
         return cast(
             TuningTask[GPTModel],
diff --git a/src/yandex_cloud_ml_sdk/_models/completions/result.py b/src/yandex_cloud_ml_sdk/_models/completions/result.py
index 16778d28..51ae378a 100644
--- a/src/yandex_cloud_ml_sdk/_models/completions/result.py
+++ b/src/yandex_cloud_ml_sdk/_models/completions/result.py
@@ -18,13 +18,23 @@
 
 @dataclass(frozen=True)
 class Usage:
+    """A class representing usage statistics for a completion request."""
+    #: the number of tokens in the input text
     input_text_tokens: int
+    #: the number of tokens generated in the completion
     completion_tokens: int
+    #: the total number of tokens used (input + completion)
     total_tokens: int
 
 
 @dataclass(frozen=True)
 class CompletionUsage(Usage, ProtoBased[ProtoUsage]):
+    """
+    A class representing detailed usage statistics for a completion request,
+    including reasoning tokens.
+    Inherits from :class:`.Usage` and includes additional information about reasoning tokens.
+    """
+    #: the number of tokens used for reasoning in the completion
     reasoning_tokens: int
 
     @classmethod
@@ -40,13 +50,23 @@ def _from_proto(cls, *, proto: ProtoUsage, **_) -> CompletionUsage:
 _s = ProtoAlternative
 
 class AlternativeStatus(int, Enum):
+    """
+    A class with an enumeration representing the status of an alternative.
+    This enum defines various statuses that an alternative can have during processing.
+    """
+    #: the status is not specified
     UNSPECIFIED = _s.ALTERNATIVE_STATUS_UNSPECIFIED
+    #: the alternative is partially complete
     PARTIAL = _s.ALTERNATIVE_STATUS_PARTIAL
+    #: the alternative is truncated but considered final
     TRUNCATED_FINAL = _s.ALTERNATIVE_STATUS_TRUNCATED_FINAL
+    #: the alternative is complete and final
     FINAL = _s.ALTERNATIVE_STATUS_FINAL
+    #: the alternative has been filtered for content
     CONTENT_FILTER = _s.ALTERNATIVE_STATUS_CONTENT_FILTER
+    #: the alternative involves tool calls
     TOOL_CALLS = _s.ALTERNATIVE_STATUS_TOOL_CALLS
-
+    #: represents an unknown status (-1)
     UNKNOWN = -1
 
     @classmethod
@@ -59,7 +79,10 @@ def _from_proto(cls, status: int):
 
 @dataclass(frozen=True)
 class Alternative(TextMessage, ProtoBased[ProtoAlternative], HaveToolCalls[ToolCallTypeT]):
+    """A class representing one of the generated completion alternatives, including its content and generation status."""
+    #: the status of the alternative
     status: AlternativeStatus
+    #: a list of tool calls associated with this alternative, or None if no tool calls are present
     tool_calls: ToolCallList[ProtoCompletionsToolCallList, ToolCallTypeT] | None
 
     @classmethod
@@ -87,8 +110,12 @@ def _from_proto(cls, *, proto: ProtoAlternative, sdk: SDKType) -> Alternative:
 
 @dataclass(frozen=True)
 class GPTModelResult(BaseResult[CompletionResponse], Sequence, HaveToolCalls[ToolCallTypeT]):
+    """A class representing the result of a GPT model completion request."""
+    #: a tuple of alternatives generated by the model
     alternatives: tuple[Alternative[ToolCallTypeT], ...]
+    #: a usage statistics related to the completion request
     usage: CompletionUsage
+    #: the version of the GPT model used for generating the result
     model_version: str
 
     @classmethod
diff --git a/src/yandex_cloud_ml_sdk/_models/completions/token.py b/src/yandex_cloud_ml_sdk/_models/completions/token.py
index bd08396e..94a14b89 100644
--- a/src/yandex_cloud_ml_sdk/_models/completions/token.py
+++ b/src/yandex_cloud_ml_sdk/_models/completions/token.py
@@ -9,8 +9,13 @@
 
 @dataclass(frozen=True)
 class Token:
+    """This class encapsulates the properties of a token
+    and represents it in a text processing context."""
+    #: a unique identifier for the token
     id: int
+    #: a flag indicating if the token is a special one
     special: bool
+    #: the textual representation of the token
     text: str
 
     @classmethod
diff --git a/src/yandex_cloud_ml_sdk/_models/completions/tune_params.py b/src/yandex_cloud_ml_sdk/_models/completions/tune_params.py
index 05499536..cdd945f5 100644
--- a/src/yandex_cloud_ml_sdk/_models/completions/tune_params.py
+++ b/src/yandex_cloud_ml_sdk/_models/completions/tune_params.py
@@ -10,6 +10,8 @@
 
 @dataclass(frozen=True)
 class GPTModelTuneParams(BaseTuningParams):
+    """This class encapsulates the parameters used for tuning a GPT model
+    in a text-to-text completion context."""
     @property
     def _proto_tuning_params_type(self):
         return TextToTextCompletionTuningParams
@@ -18,7 +20,11 @@ def _proto_tuning_params_type(self):
     def _proto_tuning_argument_name(self):
         return 'text_to_text_completion'
 
+    #: random seed for reproducibility
     seed: int | None = None
+    #: a learning rate for the tuning process
     lr: float | None = None
+    #: a number of samples to use for tuning
     n_samples: int | None = None
+    #: any additional arguments required for tuning
     additional_arguments: str | None = None