-
Notifications
You must be signed in to change notification settings - Fork 27
Add docstrings for _models/completions #124
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
7d3dc17
724c952
4c85fc2
26d83aa
34cc92b
55b4edc
283abee
4a5d4fa
1316a8d
1019f27
26ae98c
fd60330
37a537f
b8d2196
2f7de07
f56c6c8
0b49743
b83ac65
7d2e22c
4950c32
cf7ae28
082e74b
7cfe2f5
37f4c43
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,18 +3,38 @@ | |
| from typing_extensions import override | ||
|
|
||
| from yandex_cloud_ml_sdk._types.function import BaseModelFunction, ModelTypeT | ||
| from yandex_cloud_ml_sdk._utils.doc import doc_from | ||
|
|
||
| from .model import AsyncGPTModel, GPTModel | ||
|
|
||
|
|
||
| class BaseCompletions(BaseModelFunction[ModelTypeT]): | ||
| """ | ||
| A class for handling model completions. | ||
|
|
||
| It defines the core functionality for calling a model | ||
| to generate completions based on the provided model name and version. | ||
| """ | ||
| @override | ||
| def __call__( | ||
| self, | ||
| model_name: str, | ||
| *, | ||
| model_version: str = 'latest', | ||
| ) -> ModelTypeT: | ||
| """ | ||
| Call the specified model to generate completions. | ||
|
||
|
|
||
| This method constructs the URI for the model based on the provided | ||
| name and version. If the name contains ``://``, it is | ||
| treated as a full URI. Otherwise, it looks up the model name in | ||
| the well-known names dictionary. But after this, in any case, | ||
| we construct a URI in the form ``gpt://<folder_id>/<model>/<version>``. | ||
|
|
||
| :param model_name: the name or URI of the model to call. | ||
| :param model_version: the version of the model to use. | ||
| Defaults to 'latest'. | ||
| """ | ||
| if '://' in model_name: | ||
| uri = model_name | ||
| else: | ||
|
|
@@ -26,10 +46,10 @@ def __call__( | |
| uri=uri, | ||
| ) | ||
|
|
||
|
|
||
| @doc_from(BaseCompletions) | ||
| class Completions(BaseCompletions[GPTModel]): | ||
| _model_type = GPTModel | ||
|
|
||
|
|
||
| @doc_from(BaseCompletions) | ||
| class AsyncCompletions(BaseCompletions[AsyncGPTModel]): | ||
| _model_type = AsyncGPTModel | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,6 +34,7 @@ | |
| from yandex_cloud_ml_sdk._types.tuning.schedulers import BaseScheduler | ||
| from yandex_cloud_ml_sdk._types.tuning.tuning_types import BaseTuningType | ||
| from yandex_cloud_ml_sdk._utils.coerce import coerce_tuple | ||
| from yandex_cloud_ml_sdk._utils.doc import doc_from | ||
| from yandex_cloud_ml_sdk._utils.sync import run_sync, run_sync_generator | ||
|
|
||
| from .config import CompletionTool, GPTModelConfig, ReasoningMode, ReasoningModeType | ||
|
|
@@ -54,6 +55,10 @@ class BaseGPTModel( | |
| ModelTuneMixin[GPTModelConfig, GPTModelResult[ToolCallTypeT], GPTModelTuneParams, TuningTaskTypeT], | ||
| BaseModelBatchMixin[GPTModelConfig, GPTModelResult[ToolCallTypeT], BatchSubdomainTypeT], | ||
| ): | ||
| """ | ||
| A class for GPT models providing various functionalities | ||
| including tuning, and batch processing. | ||
| """ | ||
| _config_type = GPTModelConfig | ||
| _result_type: type[GPTModelResult[ToolCallTypeT]] | ||
| _operation_type: type[OperationTypeT] | ||
|
|
@@ -67,6 +72,14 @@ class BaseGPTModel( | |
| _batch_proto_metadata_type = BatchCompletionMetadata | ||
|
|
||
| def langchain(self, model_type: Literal["chat"] = "chat", timeout: int = 60) -> BaseYandexLanguageModel: | ||
| """ | ||
| Initializes a langchain model based on the specified model type. | ||
|
|
||
| :param model_type: the type of langchain model to initialize. | ||
| Defaults to "chat". | ||
| :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds. | ||
|
||
| Defaults to 60 seconds. | ||
| """ | ||
| from .langchain import ChatYandexGPT # pylint: disable=import-outside-toplevel | ||
|
|
||
| if model_type == "chat": | ||
|
|
@@ -86,6 +99,16 @@ def configure( # type: ignore[override] | |
| tools: UndefinedOr[Sequence[CompletionTool] | CompletionTool] = UNDEFINED, | ||
| parallel_tool_calls: UndefinedOr[bool] = UNDEFINED, | ||
| ) -> Self: | ||
| """ | ||
| Configures the model with specified parameters. | ||
|
|
||
| :param temperature: a sampling temperature to use - higher values mean more random results. | ||
|
||
| :param max_tokens: a maximum number of tokens to generate in the response. | ||
| :param reasoning_mode: the mode of reasoning to apply during generation. | ||
|
||
| :param response_format: a format of the response returned by the model. | ||
|
||
| :param tools: tools to use for completion. Can be a sequence or a single tool. | ||
| :param parallel_tool_calls: whether to allow parallel calls to tools during completion. | ||
vhaldemar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| """ | ||
| return super().configure( | ||
| temperature=temperature, | ||
| max_tokens=max_tokens, | ||
|
|
@@ -259,7 +282,7 @@ async def _tokenize( | |
| ) | ||
| return tuple(Token._from_proto(t) for t in response.tokens) | ||
|
|
||
|
|
||
| @doc_from(BaseGPTModel) | ||
| class AsyncGPTModel( | ||
| BaseGPTModel[ | ||
| AsyncOperation[GPTModelResult[AsyncToolCall]], | ||
|
|
@@ -279,6 +302,13 @@ async def run( | |
| *, | ||
| timeout=60, | ||
| ) -> GPTModelResult[AsyncToolCall]: | ||
| """ | ||
| Executes the model with the provided messages. | ||
|
|
||
| :param messages: the input messages to process. | ||
|
||
| :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds. | ||
| Defaults to 60 seconds. | ||
| """ | ||
| return await self._run( | ||
| messages=messages, | ||
| timeout=timeout | ||
|
|
@@ -290,6 +320,14 @@ async def run_stream( | |
| *, | ||
| timeout=60, | ||
| ) -> AsyncIterator[GPTModelResult[AsyncToolCall]]: | ||
| """ | ||
| Executes the model with the provided messages | ||
| and yields results as they become available. | ||
|
||
|
|
||
| :param messages: the input messages to process. | ||
| :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds. | ||
| Defaults to 60 seconds. | ||
| """ | ||
| async for result in self._run_stream( | ||
| messages=messages, | ||
| timeout=timeout | ||
|
|
@@ -302,12 +340,26 @@ async def run_deferred( | |
| *, | ||
| timeout=60 | ||
| ) -> AsyncOperation[GPTModelResult[AsyncToolCall]]: | ||
| """ | ||
| Initiates a deferred execution of the model with the provided messages. | ||
|
|
||
| :param messages: the input messages to process. | ||
| :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds. | ||
| Defaults to 60 seconds. | ||
| """ | ||
| return await self._run_deferred( | ||
| messages=messages, | ||
| timeout=timeout, | ||
| ) | ||
|
|
||
| async def attach_deferred(self, operation_id: str, timeout: float = 60) -> AsyncOperation[GPTModelResult[AsyncToolCall]]: | ||
| """ | ||
| Attaches to an ongoing deferred operation using its operation id. | ||
|
|
||
| :param operation_id: the id of the deferred operation to attach to. | ||
| :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds. | ||
| Defaults to 60 seconds. | ||
| """ | ||
| return await self._attach_deferred(operation_id=operation_id, timeout=timeout) | ||
|
|
||
| async def tokenize( | ||
|
|
@@ -316,6 +368,13 @@ async def tokenize( | |
| *, | ||
| timeout=60 | ||
| ) -> tuple[Token, ...]: | ||
| """ | ||
| Tokenizes the provided messages into a tuple of tokens. | ||
|
|
||
| :param messages: the input messages to tokenize. | ||
| :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds. | ||
| Defaults to 60 seconds. | ||
| """ | ||
| return await self._tokenize( | ||
| messages=messages, | ||
| timeout=timeout | ||
|
|
@@ -339,6 +398,23 @@ async def tune_deferred( | |
| optimizer: UndefinedOr[BaseOptimizer] = UNDEFINED, | ||
| timeout: float = 60, | ||
| ) -> AsyncTuningTask['AsyncGPTModel']: | ||
| """Initiate a deferred tuning process for the model. | ||
|
|
||
| :param train_datasets: the dataset objects and/or dataset ids used for training of the model. | ||
| :param validation_datasets: the dataset objects and/or dataset ids used for validation of the model. | ||
| :param name: the name of the tuning task. | ||
| :param description: the description of the tuning task. | ||
| :param labels: labels for the tuning task. | ||
| :param seed: a random seed for reproducibility. | ||
| :param lr: a learning rate for tuning. | ||
| :param n_samples: a number of samples for tuning. | ||
| :param additional_arguments: additional arguments for tuning. | ||
| :param tuning_type: a type of tuning to be applied. | ||
| :param scheduler: a scheduler for tuning. | ||
| :param optimizer: an optimizer for tuning. | ||
| :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds. | ||
| Defaults to 60 seconds. | ||
| """ | ||
| return await self._tune_deferred( | ||
| train_datasets=train_datasets, | ||
| validation_datasets=validation_datasets, | ||
|
|
@@ -375,6 +451,27 @@ async def tune( | |
| poll_timeout: int = 72 * 60 * 60, | ||
| poll_interval: float = 60, | ||
| ) -> Self: | ||
| """Tune the model with the specified training datasets and parameters. | ||
|
|
||
| :param train_datasets: the dataset objects and/or dataset ids used for training of the model. | ||
| :param validation_datasets: the dataset objects and/or dataset ids used for validation of the model. | ||
| :param name: the name of the tuning task. | ||
| :param description: the description of the tuning task. | ||
| :param labels: labels for the tuning task. | ||
| :param seed: a random seed for reproducibility. | ||
| :param lr: a learning rate for tuning. | ||
| :param n_samples: a number of samples for tuning. | ||
| :param additional_arguments: additional arguments for tuning. | ||
| :param tuning_type: a type of tuning to be applied. | ||
| :param scheduler: a scheduler for tuning. | ||
| :param optimizer: an optimizer for tuning. | ||
| :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds. | ||
| Defaults to 60 seconds. | ||
| :param poll_timeout: the maximum time to wait while polling for completion of the tuning task. | ||
| Defaults to 259200 seconds (72 hours). | ||
| :param poll_interval: the interval between polling attempts during the tuning process. | ||
| Defaults to 60 seconds. | ||
| """ | ||
| return await self._tune( | ||
| train_datasets=train_datasets, | ||
| validation_datasets=validation_datasets, | ||
|
|
@@ -394,9 +491,15 @@ async def tune( | |
| ) | ||
|
|
||
| async def attach_tune_deferred(self, task_id: str, *, timeout: float = 60) -> AsyncTuningTask['AsyncGPTModel']: | ||
| return await self._attach_tune_deferred(task_id=task_id, timeout=timeout) | ||
| """Attach a deferred tuning task using its task id. | ||
|
|
||
| :param task_id: the id of the deferred tuning task to attach to. | ||
| :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds. | ||
| Defaults to 60 seconds. | ||
| """ | ||
| return await self._attach_tune_deferred(task_id=task_id, timeout=timeout) | ||
|
|
||
| @doc_from(BaseGPTModel) | ||
| class GPTModel( | ||
| BaseGPTModel[ | ||
| Operation[GPTModelResult[ToolCall]], | ||
|
|
@@ -418,6 +521,7 @@ class GPTModel( | |
| __tune = run_sync(BaseGPTModel._tune) | ||
| __attach_tune_deferred = run_sync(BaseGPTModel._attach_tune_deferred) | ||
|
|
||
| @doc_from(AsyncGPTModel.run) | ||
| def run( | ||
| self, | ||
| messages: MessageInputType, | ||
|
|
@@ -429,6 +533,7 @@ def run( | |
| timeout=timeout | ||
| ) | ||
|
|
||
| @doc_from(AsyncGPTModel.run_stream) | ||
| def run_stream( | ||
| self, | ||
| messages: MessageInputType, | ||
|
|
@@ -440,6 +545,7 @@ def run_stream( | |
| timeout=timeout | ||
| ) | ||
|
|
||
| @doc_from(AsyncGPTModel.run_deferred) | ||
| def run_deferred( | ||
| self, | ||
| messages: MessageInputType, | ||
|
|
@@ -451,12 +557,14 @@ def run_deferred( | |
| timeout=timeout, | ||
| ) | ||
|
|
||
| @doc_from(AsyncGPTModel.attach_deferred) | ||
| def attach_deferred(self, operation_id: str, timeout: float = 60) -> Operation[GPTModelResult[ToolCall]]: | ||
| return cast( | ||
| Operation[GPTModelResult[ToolCall]], | ||
| self.__attach_deferred(operation_id=operation_id, timeout=timeout) | ||
| ) | ||
|
|
||
| @doc_from(AsyncGPTModel.tokenize) | ||
| def tokenize( | ||
| self, | ||
| messages: MessageInputType, | ||
|
|
@@ -469,6 +577,7 @@ def tokenize( | |
| ) | ||
|
|
||
| # pylint: disable=too-many-locals | ||
| @doc_from(AsyncGPTModel.tune_deferred) | ||
| def tune_deferred( | ||
| self, | ||
| train_datasets: TuningDatasetsType, | ||
|
|
@@ -504,6 +613,7 @@ def tune_deferred( | |
| return cast(TuningTask[GPTModel], result) | ||
|
|
||
| # pylint: disable=too-many-locals | ||
| @doc_from(AsyncGPTModel.tune) | ||
| def tune( | ||
| self, | ||
| train_datasets: TuningDatasetsType, | ||
|
|
@@ -541,6 +651,7 @@ def tune( | |
| poll_interval=poll_interval, | ||
| ) | ||
|
|
||
| @doc_from(AsyncGPTModel.attach_tune_deferred) | ||
| def attach_tune_deferred(self, task_id: str, *, timeout: float = 60) -> TuningTask[GPTModel]: | ||
| return cast( | ||
| TuningTask[GPTModel], | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
completioins models, наверное