Add docstrings for _models/text_classifiers (#120)

Mandzhi · web-flow · commit 43bdeb00a563 · 2025-07-10T14:05:44.000+02:00
diff --git a/src/yandex_cloud_ml_sdk/_models/text_classifiers/function.py b/src/yandex_cloud_ml_sdk/_models/text_classifiers/function.py
@@ -3,18 +3,36 @@
 from typing_extensions import override
 
 from yandex_cloud_ml_sdk._types.function import BaseModelFunction, ModelTypeT
+from yandex_cloud_ml_sdk._utils.doc import doc_from
 
 from .model import AsyncTextClassifiersModel, TextClassifiersModel
 
 
 class BaseTextClassifiers(BaseModelFunction[ModelTypeT]):
+    """A class for text classifiers.
+
+    It provides a common interface for text classification models and
+    constructs the model URI based on the provided model name and version.
+    """
     @override
     def __call__(
         self,
         model_name: str,
         *,
         model_version: str = 'latest',
     ):
+        """Call the text classification model.
+
+        Constructs the URI for the model based on the provided model's name
+        and version. If the name contains ``://``, it is treated as a
+        complete URI. Otherwise, it looks up the model name in
+        the well-known names dictionary. But after this, in any case,
+        we construct a URI in the form ``cls://<folder_id>/<model>/<version>``.
+
+        :param model_name: the name or URI of the model to call.
+        :param model_version: the version of the model to be used.
+            Defaults to 'latest'.
+        """
         if '://' in model_name:
             uri = model_name
         else:
@@ -27,9 +45,10 @@ def __call__(
         )
 
 
+@doc_from(BaseTextClassifiers)
 class TextClassifiers(BaseTextClassifiers):
     _model_type = TextClassifiersModel
 
-
+@doc_from(BaseTextClassifiers)
 class AsyncTextClassifiers(BaseTextClassifiers):
     _model_type = AsyncTextClassifiersModel
diff --git a/src/yandex_cloud_ml_sdk/_models/text_classifiers/model.py b/src/yandex_cloud_ml_sdk/_models/text_classifiers/model.py
@@ -23,6 +23,7 @@
 from yandex_cloud_ml_sdk._types.tuning.optimizers import BaseOptimizer
 from yandex_cloud_ml_sdk._types.tuning.schedulers import BaseScheduler
 from yandex_cloud_ml_sdk._types.tuning.tuning_types import BaseTuningType
+from yandex_cloud_ml_sdk._utils.doc import doc_from
 from yandex_cloud_ml_sdk._utils.sync import run_sync
 
 from .config import TextClassifiersModelConfig
@@ -40,6 +41,11 @@ class BaseTextClassifiersModel(
         TuningTaskTypeT
     ],
 ):
+    """
+    A class for text classifiers models.
+    It provides the foundational structure for building text classification models,
+    including configuration and execution of classification tasks.
+    """
     _config_type = TextClassifiersModelConfig
     _result_type = TextClassifiersModelResultBase
     _tuning_params_type = TextClassifiersModelTuneParams
@@ -134,6 +140,7 @@ async def _run_few_shot(
             return FewShotTextClassifiersModelResult._from_proto(proto=response, sdk=self._sdk)
 
 
+@doc_from(BaseTextClassifiersModel)
 class AsyncTextClassifiersModel(BaseTextClassifiersModel[AsyncTuningTask['AsyncTextClassifiersModel']]):
     _tune_operation_type = AsyncTuningTask['AsyncTextClassifiersModel']
 
@@ -143,6 +150,18 @@ async def run(
         *,
         timeout: float = 60,
     ) -> TextClassifiersModelResultBase:
+        """Execute the text classification on the provided input text.
+
+        If only labels are specified, apply a zero-shot classifier.
+        If samples are also specified - it is a case of the few-shot classifier.
+        If nothing is specified, use the classify method, but it is only available for pre-trained models.
+
+        Read more about the classifiers in `the documentation <https://yandex.cloud/docs/foundation-models/concepts/classifier/>`_.
+
+        :param text: the input text to classify.
+        :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        """
         return await self._run(
             text=text,
             timeout=timeout
@@ -167,6 +186,24 @@ async def tune_deferred(
         optimizer: UndefinedOr[BaseOptimizer] = UNDEFINED,
         timeout: float = 60,
     ) -> AsyncTuningTask['AsyncTextClassifiersModel']:
+        """Initiate a deferred tuning process for the model.
+
+        :param train_datasets: the dataset objects and/or dataset ids used for training of the model.
+        :param validation_datasets: the dataset objects and/or dataset ids used for validation of the model.
+        :param classification_type: the type of classification to perform during tuning (multilabel, multiclass, or binary).
+        :param name: the name of the tuning task.
+        :param description: the description of the tuning task.
+        :param labels: labels for the tuning task.
+        :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        :param seed: a random seed for reproducibility.
+        :param lr: a learning rate for tuning.
+        :param n_samples: a number of samples for tuning.
+        :param additional_arguments: additional arguments for tuning.
+        :param tuning_type: a type of tuning to be applied.
+        :param scheduler: a scheduler for tuning.
+        :param optimizer: an optimizer for tuning.
+        """
         return await self._tune_deferred(
             train_datasets=train_datasets,
             validation_datasets=validation_datasets,
@@ -205,6 +242,28 @@ async def tune(
         poll_timeout: int = 72 * 60 * 60,
         poll_interval: float = 60,
     ) -> Self:
+        """Tune the model with the specified training datasets and parameters.
+
+        :param train_datasets: the dataset objects and/or dataset ids used for training of the model.
+        :param validation_datasets: the dataset objects and/or dataset ids used for validation of the model.
+        :param classification_type: the type of classification to perform during tuning (multilabel, multiclass, or binary).
+        :param name: the name of the tuning task.
+        :param description: the description of the tuning task.
+        :param labels: labels for the tuning task.
+        :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        :param seed: a random seed for reproducibility.
+        :param lr: a learning rate for tuning.
+        :param n_samples: a number of samples for tuning.
+        :param additional_arguments: additional arguments for tuning.
+        :param tuning_type: a type of tuning to be applied.
+        :param scheduler: a scheduler for tuning.
+        :param optimizer: an optimizer for tuning.
+        :param poll_timeout: the maximum time to wait while polling for completion of the tuning task.
+            Defaults to 259200 seconds (72 hours).
+        :param poll_interval: the interval between polling attempts during the tuning process.
+            Defaults to 60 seconds.
+        """
         return await self._tune(
             train_datasets=train_datasets,
             validation_datasets=validation_datasets,
@@ -230,16 +289,24 @@ async def attach_tune_deferred(
         *,
         timeout: float = 60
     ) -> AsyncTuningTask['AsyncTextClassifiersModel']:
+        """Attach a deferred tuning task using its task ID.
+
+        :param task_id: the ID of the deferred tuning task to attach to.
+        :param timeout: the timeout, or the maximum time to wait for the request to complete in seconds.
+            Defaults to 60 seconds.
+        """
         return await self._attach_tune_deferred(task_id=task_id, timeout=timeout)
 
 
+@doc_from(BaseTextClassifiersModel)
 class TextClassifiersModel(BaseTextClassifiersModel[TuningTask['TextClassifiersModel']]):
     _tune_operation_type = TuningTask['TextClassifiersModel']
     __run = run_sync(BaseTextClassifiersModel._run)
     __tune_deferred = run_sync(BaseTextClassifiersModel._tune_deferred)
     __tune = run_sync(BaseTextClassifiersModel._tune)
     __attach_tune_deferred = run_sync(BaseTextClassifiersModel._attach_tune_deferred)
 
+    @doc_from(AsyncTextClassifiersModel.run)
     def run(
         self,
         text: str,
@@ -252,6 +319,7 @@ def run(
         )
 
     # pylint: disable=too-many-locals
+    @doc_from(AsyncTextClassifiersModel.tune_deferred)
     def tune_deferred(
         self,
         train_datasets: TuningDatasetsType,
@@ -289,6 +357,7 @@ def tune_deferred(
         return cast(TuningTask[TextClassifiersModel], result)
 
     # pylint: disable=too-many-locals
+    @doc_from(AsyncTextClassifiersModel.tune)
     def tune(
         self,
         train_datasets: TuningDatasetsType,
@@ -328,6 +397,7 @@ def tune(
             poll_interval=poll_interval,
         )
 
+    @doc_from(AsyncTextClassifiersModel.attach_tune_deferred)
     def attach_tune_deferred(self, task_id: str, *, timeout: float = 60) -> TuningTask[TextClassifiersModel]:
         return cast(
             TuningTask[TextClassifiersModel],
diff --git a/src/yandex_cloud_ml_sdk/_models/text_classifiers/result.py b/src/yandex_cloud_ml_sdk/_models/text_classifiers/result.py
@@ -26,7 +26,12 @@
 
 @dataclass(frozen=True)
 class TextClassifiersModelResultBase(BaseResult, Sequence, Generic[TextClassificationResponseT]):
+    """A class for text classifiers model results.
+    It represents the common structure for the results returned by text classification models.
+    """
+    #: a tuple containing the predicted labels
     predictions: tuple[TextClassificationLabel, ...]
+    #: the version of the model used for prediction
     model_version: str
     #: Number of input tokens provided to the model.
     input_tokens: int
diff --git a/src/yandex_cloud_ml_sdk/_models/text_classifiers/tune_params.py b/src/yandex_cloud_ml_sdk/_models/text_classifiers/tune_params.py
@@ -15,6 +15,9 @@
 
 @dataclass(frozen=True)
 class TextClassifiersModelTuneParams(BaseTuningParams):
+    """This class encapsulates the parameters used for tuning text classification models,
+    supporting both multiclass and multilabel classification types.
+    """
     @property
     def _proto_tuning_params_type(
         self
@@ -43,8 +46,13 @@ def __post_init__(self):
                 f'classification_type must be {ClassificationTuningTypes}, got {self.classification_type}'
             )
 
+    #: the type of classification to be used (should be one of 'multilabel', 'multiclass', or 'binary'.)
     classification_type: ClassificationTuningTypes | None = None
+    #: random seed for reproducibility
     seed: int | None = None
+    #: a learning rate for the tuning process
     lr: float | None = None
+    #: a number of samples to use for tuning
     n_samples: int | None = None
+    #:  any additional arguments required for tuning
     additional_arguments: str | None = None
diff --git a/src/yandex_cloud_ml_sdk/_models/text_classifiers/types.py b/src/yandex_cloud_ml_sdk/_models/text_classifiers/types.py
@@ -6,7 +6,12 @@
 
 @dataclass(frozen=True)
 class TextClassificationLabel(Mapping):
+    """This class represents a label for text classification
+    with an associated confidence score.
+    """
+    #: the label for the classification
     label: str
+    #: the confidence score associated with the label
     confidence: float
 
     def __getitem__(self, key):
@@ -20,5 +25,8 @@ def __len__(self):
 
 
 class TextClassificationSample(TypedDict):
+    """This class represents a sample of text for classification."""
+    #: the text to be classified
     text: str
+    #: the expected label for the classification
     label: str