diff --git a/src/yandex_cloud_ml_sdk/_models/completions/config.py b/src/yandex_cloud_ml_sdk/_models/completions/config.py index cbed5b1d..7214f397 100644 --- a/src/yandex_cloud_ml_sdk/_models/completions/config.py +++ b/src/yandex_cloud_ml_sdk/_models/completions/config.py @@ -19,21 +19,42 @@ class ReasoningMode(ProtoEnumBase, Enum): + """Enumeration for reasoning modes. + + This class defines the various modes of reasoning that can be used + in the model's configurations. + """ + #: indicates that the reasoning mode is unspecified REASONING_MODE_UNSPECIFIED = _m.REASONING_MODE_UNSPECIFIED + #: indicates that reasoning is disabled DISABLED = _m.DISABLED + #: indicates that reasoning is enabled but hidden ENABLED_HIDDEN = _m.ENABLED_HIDDEN - +#: type alias for reasoning mode representation ReasoningModeType = Union[int, str, ReasoningMode] +#: type alias for completion tools CompletionTool: TypeAlias = FunctionTool @dataclass(frozen=True) class GPTModelConfig(BaseModelConfig): + """Configuration for the GPT model. + + It holds the configuration settings for the GPT model, + including parameters for generation and tool usage. + """ + #: a sampling temperature to use - higher values mean more random results; should be a double number between 0 (inclusive) and 1 (inclusive) temperature: float | None = None + #: a maximum number of tokens to generate in the response max_tokens: int | None = None + #: the mode of reasoning to apply during generation, allowing the model to perform internal reasoning before responding reasoning_mode: ReasoningModeType | None = None + #: a format of the response returned by the model. Could be a JsonSchema, a JSON string, or a pydantic model response_format: ResponseType | None = None + #: tools to use for completion. Can be a sequence or a single tool tools: Sequence[CompletionTool] | CompletionTool | None = None + #: whether to allow parallel calls to tools during completion; defaults to 'true' parallel_tool_calls: bool | None = None + #: the strategy for choosing tools: depending on this parameter, the model can always call some tool, call the specific tool or don't call any tool. tool_choice: ToolChoiceType | None = None diff --git a/src/yandex_cloud_ml_sdk/_models/completions/function.py b/src/yandex_cloud_ml_sdk/_models/completions/function.py index 25ee7388..097a0cbd 100644 --- a/src/yandex_cloud_ml_sdk/_models/completions/function.py +++ b/src/yandex_cloud_ml_sdk/_models/completions/function.py @@ -3,11 +3,18 @@ from typing_extensions import override from yandex_cloud_ml_sdk._types.function import BaseModelFunction, ModelTypeT +from yandex_cloud_ml_sdk._utils.doc import doc_from from .model import AsyncGPTModel, GPTModel class BaseCompletions(BaseModelFunction[ModelTypeT]): + """ + A class for handling completions models. + + It defines the core functionality for calling a model + to generate completions based on the provided model name and version. + """ @override def __call__( self, @@ -15,6 +22,19 @@ def __call__( *, model_version: str = 'latest', ) -> ModelTypeT: + """ + Create a model object to call for generating completions. + + This method constructs the URI for the model based on the provided + name and version. If the name contains ``://``, it is + treated as a full URI. Otherwise, it looks up the model name in + the well-known names dictionary. But after this, in any case, + we construct a URI in the form ``gpt:////``. + + :param model_name: the name or URI of the model to call. + :param model_version: the version of the model to use. + Defaults to 'latest'. + """ if '://' in model_name: uri = model_name else: @@ -26,10 +46,10 @@ def __call__( uri=uri, ) - +@doc_from(BaseCompletions) class Completions(BaseCompletions[GPTModel]): _model_type = GPTModel - +@doc_from(BaseCompletions) class AsyncCompletions(BaseCompletions[AsyncGPTModel]): _model_type = AsyncGPTModel diff --git a/src/yandex_cloud_ml_sdk/_models/completions/langchain.py b/src/yandex_cloud_ml_sdk/_models/completions/langchain.py index b3e17e7a..d63cf2ab 100644 --- a/src/yandex_cloud_ml_sdk/_models/completions/langchain.py +++ b/src/yandex_cloud_ml_sdk/_models/completions/langchain.py @@ -51,6 +51,8 @@ def _transform_messages(history: list[BaseMessage]) -> list[TextMessageDict]: class ChatYandexGPT(BaseYandexLanguageModel[BaseGPTModel], BaseChatModel): + """Chat model for Yandex GPT integration. + This class provides integration with the `LangChain `_ library.""" class Config: arbitrary_types_allowed = True diff --git a/src/yandex_cloud_ml_sdk/_models/completions/message.py b/src/yandex_cloud_ml_sdk/_models/completions/message.py index 71469091..254d5b2d 100644 --- a/src/yandex_cloud_ml_sdk/_models/completions/message.py +++ b/src/yandex_cloud_ml_sdk/_models/completions/message.py @@ -17,10 +17,18 @@ @runtime_checkable class TextMessageWithToolCallsProtocol(TextMessageProtocol, Protocol): + """ + A class with a protocol which defines a text message structure with associated tool calls. + The protocol extends the TextMessageProtocol and requires a list of tool calls. + """ tool_calls: ToolCallList class FunctionResultMessageDict(TypedDict): + """ + A class with the TypedDict representing the structure of a function result message. + The dictionary contains the role of the message sender and the results of tool calls. + """ role: NotRequired[str] tool_results: Required[Iterable[ToolResultDictType]] @@ -31,12 +39,14 @@ class _ProtoMessageKwargs(TypedDict): tool_result_list: NotRequired[ProtoCompletionsToolResultList] tool_call_list: NotRequired[ProtoCompletionsToolCallList] - +#: a type alias for a message that can either be a standard message or a function result message. CompletionsMessageType = Union[MessageType, FunctionResultMessageDict] +#: a type alias for input that can be either a single completion message or a collection (i.e. an iterable) of completion messages. MessageInputType = Union[CompletionsMessageType, Iterable[CompletionsMessageType]] def messages_to_proto(messages: MessageInputType) -> list[ProtoMessage]: + """:meta private:""" msgs: tuple[CompletionsMessageType, ...] = coerce_tuple( messages, (dict, str, TextMessageProtocol), # type: ignore[arg-type] diff --git a/src/yandex_cloud_ml_sdk/_models/completions/model.py b/src/yandex_cloud_ml_sdk/_models/completions/model.py index affe7e72..7a7b6f2c 100644 --- a/src/yandex_cloud_ml_sdk/_models/completions/model.py +++ b/src/yandex_cloud_ml_sdk/_models/completions/model.py @@ -37,6 +37,7 @@ from yandex_cloud_ml_sdk._types.tuning.schedulers import BaseScheduler from yandex_cloud_ml_sdk._types.tuning.tuning_types import BaseTuningType from yandex_cloud_ml_sdk._utils.coerce import coerce_tuple +from yandex_cloud_ml_sdk._utils.doc import doc_from from yandex_cloud_ml_sdk._utils.sync import run_sync, run_sync_generator from .config import CompletionTool, GPTModelConfig, ReasoningMode, ReasoningModeType @@ -57,6 +58,10 @@ class BaseGPTModel( ModelTuneMixin[GPTModelConfig, GPTModelResult[ToolCallTypeT], GPTModelTuneParams, TuningTaskTypeT], BaseModelBatchMixin[GPTModelConfig, GPTModelResult[ToolCallTypeT], BatchSubdomainTypeT], ): + """ + A class for GPT models providing various functionalities + including tuning, and batch processing. + """ _config_type = GPTModelConfig _result_type: type[GPTModelResult[ToolCallTypeT]] _operation_type: type[OperationTypeT] @@ -70,6 +75,14 @@ class BaseGPTModel( _batch_proto_metadata_type = BatchCompletionMetadata def langchain(self, model_type: Literal["chat"] = "chat", timeout: int = 60) -> BaseYandexLanguageModel: + """ + Initializes a langchain model based on the specified model type. + + :param model_type: the type of langchain model to initialize. + Defaults to ``"chat"``. + :param timeout: the timeout which sets the default for the langchain model object. + Defaults to 60 seconds. + """ from .langchain import ChatYandexGPT # pylint: disable=import-outside-toplevel if model_type == "chat": @@ -90,6 +103,24 @@ def configure( # type: ignore[override] parallel_tool_calls: UndefinedOr[bool] = UNDEFINED, tool_choice: UndefinedOr[ToolChoiceType] = UNDEFINED, ) -> Self: + """ + Configures the model with specified parameters. + + :param temperature: a sampling temperature to use - higher values mean more random results. Should be a double number between 0 (inclusive) and 1 (inclusive). + :param max_tokens: a maximum number of tokens to generate in the response. + :param reasoning_mode: the mode of reasoning to apply during generation, allowing the model to perform internal reasoning before responding. + Read more about possible modes in the `documentation `_. + :param response_format: a format of the response returned by the model. Could be a JsonSchema, a JSON string, or a pydantic model. + Read more about possible response formats in the `documentation `_. + :param tools: tools to use for completion. Can be a sequence or a single tool. + :param parallel_tool_calls: whether to allow parallel calls to tools during completion. + Defaults to ``true``. + :param tool_choice: the strategy for choosing tools. + There are several ways to configure ``tool_choice`` for query processing: + - no tools to call (tool_choice=``'none'``); + - required to call any tool (tool_choice=``'required'``); + - call a specific tool (tool_choice=``{'type': 'function', 'function': {'name': 'another_calculator'}}`` or directly passing a tool object). + """ return super().configure( temperature=temperature, max_tokens=max_tokens, @@ -269,7 +300,7 @@ async def _tokenize( ) return tuple(Token._from_proto(t) for t in response.tokens) - +@doc_from(BaseGPTModel) class AsyncGPTModel( BaseGPTModel[ AsyncOperation[GPTModelResult[AsyncToolCall]], @@ -289,6 +320,14 @@ async def run( *, timeout=60, ) -> GPTModelResult[AsyncToolCall]: + """ + Executes the model with the provided messages. + + :param messages: the input messages to process. Could be a string, a dictionary, or a result object. + Read more about other possible message types in the `documentation `_. + :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds. + Defaults to 60 seconds. + """ return await self._run( messages=messages, timeout=timeout @@ -300,6 +339,14 @@ async def run_stream( *, timeout=60, ) -> AsyncIterator[GPTModelResult[AsyncToolCall]]: + """ + Executes the model with the provided messages + and yields partial results as they become available. + + :param messages: the input messages to process. + :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds. + Defaults to 60 seconds. + """ async for result in self._run_stream( messages=messages, timeout=timeout @@ -312,12 +359,26 @@ async def run_deferred( *, timeout=60 ) -> AsyncOperation[GPTModelResult[AsyncToolCall]]: + """ + Initiates a deferred execution of the model with the provided messages. + + :param messages: the input messages to process. + :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds. + Defaults to 60 seconds. + """ return await self._run_deferred( messages=messages, timeout=timeout, ) async def attach_deferred(self, operation_id: str, timeout: float = 60) -> AsyncOperation[GPTModelResult[AsyncToolCall]]: + """ + Attaches to an ongoing deferred operation using its operation id. + + :param operation_id: the id of the deferred operation to attach to. + :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds. + Defaults to 60 seconds. + """ return await self._attach_deferred(operation_id=operation_id, timeout=timeout) async def tokenize( @@ -326,6 +387,13 @@ async def tokenize( *, timeout=60 ) -> tuple[Token, ...]: + """ + Tokenizes the provided messages into a tuple of tokens. + + :param messages: the input messages to tokenize. + :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds. + Defaults to 60 seconds. + """ return await self._tokenize( messages=messages, timeout=timeout @@ -349,6 +417,23 @@ async def tune_deferred( optimizer: UndefinedOr[BaseOptimizer] = UNDEFINED, timeout: float = 60, ) -> AsyncTuningTask['AsyncGPTModel']: + """Initiate a deferred tuning process for the model. + + :param train_datasets: the dataset objects and/or dataset ids used for training of the model. + :param validation_datasets: the dataset objects and/or dataset ids used for validation of the model. + :param name: the name of the tuning task. + :param description: the description of the tuning task. + :param labels: labels for the tuning task. + :param seed: a random seed for reproducibility. + :param lr: a learning rate for tuning. + :param n_samples: a number of samples for tuning. + :param additional_arguments: additional arguments for tuning. + :param tuning_type: a type of tuning to be applied. + :param scheduler: a scheduler for tuning. + :param optimizer: an optimizer for tuning. + :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds. + Defaults to 60 seconds. + """ return await self._tune_deferred( train_datasets=train_datasets, validation_datasets=validation_datasets, @@ -385,6 +470,27 @@ async def tune( poll_timeout: int = 72 * 60 * 60, poll_interval: float = 60, ) -> Self: + """Tune the model with the specified training datasets and parameters. + + :param train_datasets: the dataset objects and/or dataset ids used for training of the model. + :param validation_datasets: the dataset objects and/or dataset ids used for validation of the model. + :param name: the name of the tuning task. + :param description: the description of the tuning task. + :param labels: labels for the tuning task. + :param seed: a random seed for reproducibility. + :param lr: a learning rate for tuning. + :param n_samples: a number of samples for tuning. + :param additional_arguments: additional arguments for tuning. + :param tuning_type: a type of tuning to be applied. + :param scheduler: a scheduler for tuning. + :param optimizer: an optimizer for tuning. + :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds. + Defaults to 60 seconds. + :param poll_timeout: the maximum time to wait while polling for completion of the tuning task. + Defaults to 259200 seconds (72 hours). + :param poll_interval: the interval between polling attempts during the tuning process. + Defaults to 60 seconds. + """ return await self._tune( train_datasets=train_datasets, validation_datasets=validation_datasets, @@ -404,9 +510,15 @@ async def tune( ) async def attach_tune_deferred(self, task_id: str, *, timeout: float = 60) -> AsyncTuningTask['AsyncGPTModel']: - return await self._attach_tune_deferred(task_id=task_id, timeout=timeout) + """Attach a deferred tuning task using its task id. + :param task_id: the id of the deferred tuning task to attach to. + :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds. + Defaults to 60 seconds. + """ + return await self._attach_tune_deferred(task_id=task_id, timeout=timeout) +@doc_from(BaseGPTModel) class GPTModel( BaseGPTModel[ Operation[GPTModelResult[ToolCall]], @@ -428,6 +540,7 @@ class GPTModel( __tune = run_sync(BaseGPTModel._tune) __attach_tune_deferred = run_sync(BaseGPTModel._attach_tune_deferred) + @doc_from(AsyncGPTModel.run) def run( self, messages: MessageInputType, @@ -439,6 +552,7 @@ def run( timeout=timeout ) + @doc_from(AsyncGPTModel.run_stream) def run_stream( self, messages: MessageInputType, @@ -450,6 +564,7 @@ def run_stream( timeout=timeout ) + @doc_from(AsyncGPTModel.run_deferred) def run_deferred( self, messages: MessageInputType, @@ -461,12 +576,14 @@ def run_deferred( timeout=timeout, ) + @doc_from(AsyncGPTModel.attach_deferred) def attach_deferred(self, operation_id: str, timeout: float = 60) -> Operation[GPTModelResult[ToolCall]]: return cast( Operation[GPTModelResult[ToolCall]], self.__attach_deferred(operation_id=operation_id, timeout=timeout) ) + @doc_from(AsyncGPTModel.tokenize) def tokenize( self, messages: MessageInputType, @@ -479,6 +596,7 @@ def tokenize( ) # pylint: disable=too-many-locals + @doc_from(AsyncGPTModel.tune_deferred) def tune_deferred( self, train_datasets: TuningDatasetsType, @@ -514,6 +632,7 @@ def tune_deferred( return cast(TuningTask[GPTModel], result) # pylint: disable=too-many-locals + @doc_from(AsyncGPTModel.tune) def tune( self, train_datasets: TuningDatasetsType, @@ -551,6 +670,7 @@ def tune( poll_interval=poll_interval, ) + @doc_from(AsyncGPTModel.attach_tune_deferred) def attach_tune_deferred(self, task_id: str, *, timeout: float = 60) -> TuningTask[GPTModel]: return cast( TuningTask[GPTModel], diff --git a/src/yandex_cloud_ml_sdk/_models/completions/result.py b/src/yandex_cloud_ml_sdk/_models/completions/result.py index 16778d28..51ae378a 100644 --- a/src/yandex_cloud_ml_sdk/_models/completions/result.py +++ b/src/yandex_cloud_ml_sdk/_models/completions/result.py @@ -18,13 +18,23 @@ @dataclass(frozen=True) class Usage: + """A class representing usage statistics for a completion request.""" + #: the number of tokens in the input text input_text_tokens: int + #: the number of tokens generated in the completion completion_tokens: int + #: the total number of tokens used (input + completion) total_tokens: int @dataclass(frozen=True) class CompletionUsage(Usage, ProtoBased[ProtoUsage]): + """ + A class representing detailed usage statistics for a completion request, + including reasoning tokens. + Inherits from :class:`.Usage` and includes additional information about reasoning tokens. + """ + #: the number of tokens used for reasoning in the completion reasoning_tokens: int @classmethod @@ -40,13 +50,23 @@ def _from_proto(cls, *, proto: ProtoUsage, **_) -> CompletionUsage: _s = ProtoAlternative class AlternativeStatus(int, Enum): + """ + A class with an enumeration representing the status of an alternative. + This enum defines various statuses that an alternative can have during processing. + """ + #: the status is not specified UNSPECIFIED = _s.ALTERNATIVE_STATUS_UNSPECIFIED + #: the alternative is partially complete PARTIAL = _s.ALTERNATIVE_STATUS_PARTIAL + #: the alternative is truncated but considered final TRUNCATED_FINAL = _s.ALTERNATIVE_STATUS_TRUNCATED_FINAL + #: the alternative is complete and final FINAL = _s.ALTERNATIVE_STATUS_FINAL + #: the alternative has been filtered for content CONTENT_FILTER = _s.ALTERNATIVE_STATUS_CONTENT_FILTER + #: the alternative involves tool calls TOOL_CALLS = _s.ALTERNATIVE_STATUS_TOOL_CALLS - + #: represents an unknown status (-1) UNKNOWN = -1 @classmethod @@ -59,7 +79,10 @@ def _from_proto(cls, status: int): @dataclass(frozen=True) class Alternative(TextMessage, ProtoBased[ProtoAlternative], HaveToolCalls[ToolCallTypeT]): + """A class representing one of the generated completion alternatives, including its content and generation status.""" + #: the status of the alternative status: AlternativeStatus + #: a list of tool calls associated with this alternative, or None if no tool calls are present tool_calls: ToolCallList[ProtoCompletionsToolCallList, ToolCallTypeT] | None @classmethod @@ -87,8 +110,12 @@ def _from_proto(cls, *, proto: ProtoAlternative, sdk: SDKType) -> Alternative: @dataclass(frozen=True) class GPTModelResult(BaseResult[CompletionResponse], Sequence, HaveToolCalls[ToolCallTypeT]): + """A class representing the result of a GPT model completion request.""" + #: a tuple of alternatives generated by the model alternatives: tuple[Alternative[ToolCallTypeT], ...] + #: a usage statistics related to the completion request usage: CompletionUsage + #: the version of the GPT model used for generating the result model_version: str @classmethod diff --git a/src/yandex_cloud_ml_sdk/_models/completions/token.py b/src/yandex_cloud_ml_sdk/_models/completions/token.py index bd08396e..94a14b89 100644 --- a/src/yandex_cloud_ml_sdk/_models/completions/token.py +++ b/src/yandex_cloud_ml_sdk/_models/completions/token.py @@ -9,8 +9,13 @@ @dataclass(frozen=True) class Token: + """This class encapsulates the properties of a token + and represents it in a text processing context.""" + #: a unique identifier for the token id: int + #: a flag indicating if the token is a special one special: bool + #: the textual representation of the token text: str @classmethod diff --git a/src/yandex_cloud_ml_sdk/_models/completions/tune_params.py b/src/yandex_cloud_ml_sdk/_models/completions/tune_params.py index 05499536..cdd945f5 100644 --- a/src/yandex_cloud_ml_sdk/_models/completions/tune_params.py +++ b/src/yandex_cloud_ml_sdk/_models/completions/tune_params.py @@ -10,6 +10,8 @@ @dataclass(frozen=True) class GPTModelTuneParams(BaseTuningParams): + """This class encapsulates the parameters used for tuning a GPT model + in a text-to-text completion context.""" @property def _proto_tuning_params_type(self): return TextToTextCompletionTuningParams @@ -18,7 +20,11 @@ def _proto_tuning_params_type(self): def _proto_tuning_argument_name(self): return 'text_to_text_completion' + #: random seed for reproducibility seed: int | None = None + #: a learning rate for the tuning process lr: float | None = None + #: a number of samples to use for tuning n_samples: int | None = None + #: any additional arguments required for tuning additional_arguments: str | None = None