Add docstrings for _search_indexes (#140)

Mandzhi · web-flow · commit 18a2c8a6865e · 2025-08-22T17:41:48.000+02:00
diff --git a/src/yandex_cloud_ml_sdk/_search_indexes/chunking_strategy.py b/src/yandex_cloud_ml_sdk/_search_indexes/chunking_strategy.py
@@ -12,8 +12,8 @@
     from yandex_cloud_ml_sdk._sdk import BaseSDK
 
 
-
 class BaseIndexChunkingStrategy(abc.ABC):
+    """A class for an index chunking strategy, from which all other strategies are inherited."""
     @classmethod
     @abc.abstractmethod
     def _from_proto(cls, proto: Any, sdk: BaseSDK) -> BaseIndexChunkingStrategy:
@@ -35,8 +35,14 @@ def _from_upper_proto(cls, proto: ProtoChunkingStrategy, sdk: BaseSDK) -> BaseIn
 
 @dataclass(frozen=True)
 class StaticIndexChunkingStrategy(BaseIndexChunkingStrategy):
-    max_chunk_size_tokens: int
+    """
+    This class implements a static chunking strategy (i.e. a specific strategy with specific properties).
 
+    It is characterized by maximum chunk size and overlap in tokens.
+    """
+    #: the maximum size of each chunk in tokens
+    max_chunk_size_tokens: int
+    #: the number of overlapping tokens between consecutive chunks
     chunk_overlap_tokens: int
 
     @classmethod
diff --git a/src/yandex_cloud_ml_sdk/_search_indexes/domain.py b/src/yandex_cloud_ml_sdk/_search_indexes/domain.py
@@ -16,13 +16,18 @@
 from yandex_cloud_ml_sdk._types.misc import UNDEFINED, UndefinedOr, get_defined_value, is_defined
 from yandex_cloud_ml_sdk._types.operation import AsyncOperation, Operation, OperationTypeT
 from yandex_cloud_ml_sdk._utils.coerce import ResourceType, coerce_resource_ids
+from yandex_cloud_ml_sdk._utils.doc import doc_from
 from yandex_cloud_ml_sdk._utils.sync import run_sync, run_sync_generator
 
 from .index_type import BaseSearchIndexType
 from .search_index import AsyncSearchIndex, SearchIndex, SearchIndexTypeT
 
 
 class BaseSearchIndexes(BaseDomain, Generic[SearchIndexTypeT, OperationTypeT]):
+    """
+    A class for search indexes. It is a part of Assistants API
+    and it provides the foundation for creating and managing search indexes.
+    """
     _impl: type[SearchIndexTypeT]
     _operation_type: type[OperationTypeT]
 
@@ -39,6 +44,22 @@ async def _create_deferred(
         expiration_policy: UndefinedOr[ExpirationPolicyAlias] = UNDEFINED,
         timeout: float = 60,
     ) -> OperationTypeT:
+        """
+        Create a deferred search index.
+
+        It returns an operation that can be used to track the creation process.
+
+        :param files: the files to be indexed.
+        :param index_type: the type of the search index.
+        :param name: the name of the search index.
+        :param description: a description for the search index.
+        :param labels: a set of labels for the search index.
+        :param ttl_days: time-to-live in days for the search index.
+        :param expiration_policy: expiration policy for the file.
+            Assepts for passing ``static`` or ``since_last_active`` strings. Should be defined if ``ttl_days`` has been defined, otherwise both parameters should be undefined.
+        :param timeout: the time to wait for the operation to complete.
+            Defaults to 60 seconds.
+        """
         if is_defined(ttl_days) != is_defined(expiration_policy):
             raise ValueError("ttl_days and expiration policy must be both defined either undefined")
 
@@ -83,6 +104,14 @@ async def _get(
         *,
         timeout: float = 60,
     ) -> SearchIndexTypeT:
+        """Retrieve a search index by its id.
+
+        This method fetches an already created search index using its unique identifier.
+
+        :param search_index_id: the unique identifier of the search index to retrieve.
+        :param timeout: the time to wait for the operation to complete.
+            Defaults to 60 seconds.
+        """
         # TODO: we need a global per-sdk cache on ids to rule out
         # possibility we have two SearchIndexs with same ids but different fields
         request = GetSearchIndexRequest(search_index_id=search_index_id)
@@ -103,6 +132,15 @@ async def _list(
         page_size: UndefinedOr[int] = UNDEFINED,
         timeout: float = 60
     ) -> AsyncIterator[SearchIndexTypeT]:
+        """List search indexes in the specified folder.
+
+        This method retrieves a list of search indexes. It continues
+        to fetch search indexes until there are no more available.
+
+        :param page_size: the maximum number of search indexes to return per page.
+        :param timeout: the time to wait for the operation to complete.
+            Defaults to 60 seconds.
+        """
         page_token_ = ''
         page_size_ = get_defined_value(page_size, 0)
 
@@ -129,10 +167,12 @@ async def _list(
                 page_token_ = response.next_page_token
 
 
+@doc_from(BaseSearchIndexes)
 class AsyncSearchIndexes(BaseSearchIndexes[AsyncSearchIndex, AsyncOperation[AsyncSearchIndex]]):
     _impl = AsyncSearchIndex
     _operation_type = AsyncOperation[AsyncSearchIndex]
 
+    @doc_from(BaseSearchIndexes._create_deferred)
     async def create_deferred(
         self,
         files: ResourceType[BaseFile],
@@ -156,6 +196,7 @@ async def create_deferred(
             timeout=timeout
         )
 
+    @doc_from(BaseSearchIndexes._get)
     async def get(
         self,
         search_index_id: str,
@@ -167,6 +208,7 @@ async def get(
             timeout=timeout,
         )
 
+    @doc_from(BaseSearchIndexes._list)
     async def list(
         self,
         *,
@@ -180,6 +222,7 @@ async def list(
             yield search_index
 
 
+@doc_from(BaseSearchIndexes)
 class SearchIndexes(BaseSearchIndexes[SearchIndex, Operation[SearchIndex]]):
     _impl = SearchIndex
     _operation_type = Operation[SearchIndex]
@@ -188,6 +231,7 @@ class SearchIndexes(BaseSearchIndexes[SearchIndex, Operation[SearchIndex]]):
     __create_deferred = run_sync(BaseSearchIndexes._create_deferred)
     __list = run_sync_generator(BaseSearchIndexes._list)
 
+    @doc_from(BaseSearchIndexes._create_deferred)
     def create_deferred(
         self,
         files: ResourceType[BaseFile],
@@ -211,6 +255,7 @@ def create_deferred(
             timeout=timeout
         )
 
+    @doc_from(BaseSearchIndexes._get)
     def get(
         self,
         search_index_id: str,
@@ -222,6 +267,7 @@ def get(
             timeout=timeout,
         )
 
+    @doc_from(BaseSearchIndexes._list)
     def list(
         self,
         *,
diff --git a/src/yandex_cloud_ml_sdk/_search_indexes/file.py b/src/yandex_cloud_ml_sdk/_search_indexes/file.py
@@ -1,16 +1,17 @@
-# pylint: disable=no-name-in-module
 from __future__ import annotations
 
 from dataclasses import dataclass
 from datetime import datetime
 
-from yandex.cloud.ai.assistants.v1.searchindex.search_index_file_pb2 import SearchIndexFile as ProtoSearchIndexFile
-
 from yandex_cloud_ml_sdk._types.resource import BaseResource
 
 
 @dataclass(frozen=True)
-class SearchIndexFile(BaseResource[ProtoSearchIndexFile]):
+class SearchIndexFile(BaseResource):
+    """This class represents a file associated with a search index."""
+    #: the unique identifier for the search index
     search_index_id: str
+    #: the identifier of the user or system that created the file
     created_by: str
+    #: the timestamp when the file was created
     created_at: datetime
diff --git a/src/yandex_cloud_ml_sdk/_search_indexes/normalization_strategy.py b/src/yandex_cloud_ml_sdk/_search_indexes/normalization_strategy.py
@@ -9,6 +9,15 @@
 
 
 class IndexNormalizationStrategy(ProtoEnumBase, enum.IntEnum):
+    """
+    Enumeration for index normalization strategies.
+
+    This class defines the various normalization strategies that can be applied
+    to an index.
+    """
+    #: indicates that no normalization strategy has been specified
     NORMALIZATION_STRATEGY_UNSPECIFIED = NormalizationStrategy.NORMALIZATION_STRATEGY_UNSPECIFIED
+    #: represents the Min-Max normalization strategy
     MIN_MAX = NormalizationStrategy.MIN_MAX
+    #: represents the L2 normalization strategy
     L2 = NormalizationStrategy.L2