Add generative search docs (#103)

vhaldemar · web-flow · commit 616e2abe38d8 · 2025-06-23T19:22:35.000+02:00
diff --git a/conftest.py b/conftest.py
@@ -3,6 +3,10 @@
 import pathlib
 import sys
 
+import pytest
+
+import yandex_cloud_ml_sdk
+
 pytest_plugins = [
     'pytest_asyncio',
     'pytest_recording',
@@ -28,3 +32,8 @@ def pytest_ignore_collect(collection_path, path, config):  # pylint: disable=unu
             return True
 
     return None
+
+
+@pytest.fixture(autouse=True)
+def add_np(doctest_namespace):
+    doctest_namespace["sdk"] = yandex_cloud_ml_sdk.YCloudML(folder_id='<doctest>', auth='<none>')
diff --git a/src/yandex_cloud_ml_sdk/_sdk.py b/src/yandex_cloud_ml_sdk/_sdk.py
@@ -39,6 +39,7 @@ class BaseSDK:
     files: BaseFiles
     assistants: BaseAssistants
     runs: BaseRuns
+    #: API for `Yandex Search API <https://yandex.cloud/docs/search-api>`
     search_api: BaseSearchAPIDomain
     search_indexes: BaseSearchIndexes
     datasets: BaseDatasets
diff --git a/src/yandex_cloud_ml_sdk/_search_api/domain.py b/src/yandex_cloud_ml_sdk/_search_api/domain.py
@@ -1,17 +1,25 @@
 from __future__ import annotations
 
 from yandex_cloud_ml_sdk._types.domain import DomainWithFunctions
+from yandex_cloud_ml_sdk._utils.doc import doc_from
 
 from .generative.function import AsyncGenerativeSearchFunction, BaseGenerativeSearchFunction, GenerativeSearchFunction
 
 
 class BaseSearchAPIDomain(DomainWithFunctions):
+    """
+    Domain for working with `Yandex Search API <https://yandex.cloud/docs/search-api>` services.
+    """
+
+    #: API for `generative response <https://yandex.cloud/docs/search-api/concepts/generative-response>`_ service
     generative: BaseGenerativeSearchFunction
 
 
+@doc_from(BaseSearchAPIDomain)
 class AsyncSearchAPIDomain(BaseSearchAPIDomain):
     generative: AsyncGenerativeSearchFunction
 
 
+@doc_from(BaseSearchAPIDomain)
 class SearchAPIDomain(BaseSearchAPIDomain):
     generative: GenerativeSearchFunction
diff --git a/src/yandex_cloud_ml_sdk/_search_api/generative/config.py b/src/yandex_cloud_ml_sdk/_search_api/generative/config.py
@@ -14,14 +14,35 @@
 
 
 class DateFilterType(TypedDict):
+    """Date filter dict type for generative search.
+
+    Example:
+
+    >>> filter_ = {'date': '>20240125'}
+    """
+
     date: str
 
 
 class FormatFilterType(TypedDict):
+    """Format filter dict type for generative search.
+
+    Example:
+
+    >>> filter_ = {'format': 'xlsx'}
+    """
+
     format: str
 
 
 class LangFilterType(TypedDict):
+    """Language filter dict type for generative search.
+
+    Example:
+
+    >>> filter_ = {'lang': 'ru'}
+    """
+
     lang: str
 
 
diff --git a/src/yandex_cloud_ml_sdk/_search_api/generative/function.py b/src/yandex_cloud_ml_sdk/_search_api/generative/function.py
@@ -5,12 +5,17 @@
 from yandex_cloud_ml_sdk._types.function import BaseModelFunction
 from yandex_cloud_ml_sdk._types.misc import UNDEFINED, UndefinedOr
 from yandex_cloud_ml_sdk._types.string import SmartStringSequence
+from yandex_cloud_ml_sdk._utils.doc import doc_from
 
 from .config import AVAILABLE_FORMATS, SmartFilterSequence
 from .generative import AsyncGenerativeSearch, GenerativeSearch, GenerativeSearchTypeT
 
 
 class BaseGenerativeSearchFunction(BaseModelFunction[GenerativeSearchTypeT]):
+    """Generative search function for creating search object which provides
+    methods for invoking generative search.
+    """
+
     @override
     def __call__(
         self,
@@ -22,6 +27,31 @@ def __call__(
         enable_nrfm_docs: UndefinedOr[bool] = UNDEFINED,
         search_filters: UndefinedOr[SmartFilterSequence] = UNDEFINED
     ) -> GenerativeSearchTypeT:
+        """
+        Creates generative search object which provides methods for invoking generative search.
+
+        To learn more about parameters and their formats and possible values,
+        refer to
+        `generative search documentation <https://yandex.cloud/docs/search-api/concepts/generative-response#body>`_
+
+        NB: All of the ``site``, ``host``, ``url`` parameters are mutually exclusive
+        and using one of them is mandatory.
+
+        :param site: parameter for limiting search to specific location or list of sites.
+        :param host: parameter for limiting search to specific location or list of hosts.
+        :param url: parameter for limiting search to specific location or list of URLs.
+        :param fix_misspell: tells to backend to fix or not to fix misspels in queries.
+        :param enable_nrfm_docs: tells to backend to include or not to include pages,
+            which are not available via direct clicks from given sites/hosts/urls
+            to search result.
+        :param search_filters: allows to limit search results with additional filters.
+
+            >>> date_filter = {'date': '<20250101'}
+            >>> format_filter = {'format': 'doc'}
+            >>> lang_filter = {'lang': 'ru'}
+            >>> search = sdk.search_api.generative(search_filters=[date_filter, format_filter, lang_filter])
+
+        """
         search_api = self._model_type(sdk=self._sdk, uri='<search_api>')
 
         return search_api.configure(
@@ -38,9 +68,11 @@ def available_formats(self):
         return AVAILABLE_FORMATS
 
 
+@doc_from(BaseGenerativeSearchFunction)
 class GenerativeSearchFunction(BaseGenerativeSearchFunction[GenerativeSearch]):
     _model_type = GenerativeSearch
 
 
+@doc_from(BaseGenerativeSearchFunction)
 class AsyncGenerativeSearchFunction(BaseGenerativeSearchFunction[AsyncGenerativeSearch]):
     _model_type = AsyncGenerativeSearch
diff --git a/src/yandex_cloud_ml_sdk/_search_api/generative/generative.py b/src/yandex_cloud_ml_sdk/_search_api/generative/generative.py
@@ -11,6 +11,7 @@
 from yandex_cloud_ml_sdk._types.misc import UNDEFINED, UndefinedOr
 from yandex_cloud_ml_sdk._types.model import ModelSyncMixin
 from yandex_cloud_ml_sdk._types.string import SmartStringSequence
+from yandex_cloud_ml_sdk._utils.doc import doc_from
 from yandex_cloud_ml_sdk._utils.sync import run_sync
 
 from .config import GenerativeSearchConfig, SmartFilterSequence, format_to_proto
@@ -21,6 +22,10 @@
 
 
 class BaseGenerativeSearch(ModelSyncMixin[GenerativeSearchConfig, GenerativeSearchResult]):
+    """Generative search class which provides concrete methods for working with Search API
+    and incapsulates search setting.
+    """
+
     _config_type = GenerativeSearchConfig
     _result_type = GenerativeSearchResult
 
@@ -36,6 +41,32 @@ def configure(  # type: ignore[override]
         enable_nrfm_docs: UndefinedOr[bool] | None = UNDEFINED,
         search_filters: UndefinedOr[SmartFilterSequence] | None = UNDEFINED
     ) -> Self:
+        """
+        Returns the new object with config fields overrode by passed values.
+
+        To learn more about parameters and their formats and possible values,
+        refer to
+        `generative search documentation <https://yandex.cloud/docs/search-api/concepts/generative-response#body>`_
+
+        NB: All of the ``site``, ``host``, ``url`` parameters are mutually exclusive
+        and using one of them is mandatory.
+
+        :param site: parameter for limiting search to specific location or list of sites.
+        :param host: parameter for limiting search to specific location or list of hosts.
+        :param url: parameter for limiting search to specific location or list of URLs.
+        :param fix_misspell: tells to backend to fix or not to fix misspels in queries.
+        :param enable_nrfm_docs: tells to backend to include or not to include pages,
+            which are not available via direct clicks from given sites/hosts/urls
+            to search result.
+        :param search_filters: allows to limit search results with additional filters.
+
+            >>> date_filter = {'date': '<20250101'}
+            >>> format_filter = {'format': 'doc'}
+            >>> lang_filter = {'lang': 'ru'}
+            >>> search = sdk.search_api.generative(search_filters=[date_filter, format_filter, lang_filter])
+
+        """
+
         return super().configure(
             site=site,
             host=host,
@@ -53,6 +84,35 @@ def __repr__(self) -> str:
 
     @override
     async def _run(self, request: MessageInputType, *, timeout: float = 60) -> GenerativeSearchResult:
+        """Run a search query with given ``request`` and search settings of this generative search
+        object.
+
+        To change initial search settings use ``.configure`` method:
+
+        >>> search = sdk.search_api.generative(site="site")
+        >>> search = search.configure(site="other_site")
+
+        :param request: search request, which could be either standalone request (message) or
+            a list of messages, which represents a context of conversation with a model.
+
+            Also message could be one of the data formats:
+
+            * ``"string"`` -- in case of string input message will be passed to a model with a ``role="user"``;
+
+            * ``{"text": "text", "role": "user"}`` -- in case of dict input, it will be passed
+              with corresponding ``"text"`` and ``"role"`` dict keys;
+
+            * ``MessageObject`` -- you could also pass any object which have a
+              ``text: str`` and ``role: str`` attributes, allowing to reuse various
+              result object, for example object you getting from compltions model run
+              or result object from generative search itself;
+
+            * ``["string"/dict/object]`` -- list or any other sequence of any above described
+              formats.
+
+        :param timeout: timeout, or the maximum time to wait for the request to complete in seconds.
+
+        """
         self.config._validate_run()
         messages = messages_to_proto(request)
 
@@ -98,14 +158,18 @@ async def _run(self, request: MessageInputType, *, timeout: float = 60) -> Gener
         raise RuntimeError("call returned less then one result")
 
 
+@doc_from(BaseGenerativeSearch)
 class AsyncGenerativeSearch(BaseGenerativeSearch):
+    @doc_from(BaseGenerativeSearch._run)
     async def run(self, request: MessageInputType, *, timeout: float = 60) -> GenerativeSearchResult:
         return await self._run(request=request, timeout=timeout)
 
 
+@doc_from(BaseGenerativeSearch)
 class GenerativeSearch(BaseGenerativeSearch):
     __run = run_sync(BaseGenerativeSearch._run)
 
+    @doc_from(BaseGenerativeSearch._run)
     def run(self, request: MessageInputType, *, timeout: float = 60) -> GenerativeSearchResult:
         return self.__run(request=request, timeout=timeout)
 
diff --git a/src/yandex_cloud_ml_sdk/_search_api/generative/result.py b/src/yandex_cloud_ml_sdk/_search_api/generative/result.py
@@ -13,8 +13,17 @@
 
 @dataclass(frozen=True)
 class SearchSource(ProtoBased[GenSearchResponse.Source]):
+    """
+    Source document found for user query.
+
+    Might be used or not used in generative answer itself.
+    """
+
+    #: Url of the document
     url: str
+    #: Title of the document
     title: str
+    #: Has this source been used in generative answer or not
     used: bool
 
     @override
@@ -29,7 +38,9 @@ def _from_proto(cls, *, proto: GenSearchResponse.Source, sdk: SDKType) -> Self:
 
 @dataclass(frozen=True)
 class SearchQuery(ProtoBased[GenSearchResponse.SearchQuery]):
+    #: Text of the search query
     text: str
+    #: Request id
     req_id: str
 
     @override
@@ -43,12 +54,22 @@ def _from_proto(cls, *, proto: GenSearchResponse.SearchQuery, sdk: SDKType) -> S
 
 @dataclass(frozen=True)
 class GenerativeSearchResult(BaseResult[GenSearchResponse], TextMessage):
+    #: Generative answer itself.
+    #: Note that footnootes like ``[N]`` in the text refers to ``GenerativeSearchResult.sources[N]``
+    #: source.
     text: str
+    #: Message sender role; in case of the generative search, model always answers with the
+    #: "assistant" role.
     role: str
+    #: Fixed query string in case of query was fixed
     fixed_misspell_query: str | None
+    #: Anwer was rejected by some reasons, probably because of the ethics constrictions
     is_answer_rejected: bool
+    #: Model was unable to give good answer and returned bulleted list with some info.
     is_bullet_answer: bool
+    #: List of documents found by user query; every element number matches with footnotes in the ``.text`` attribute.
     sources: tuple[SearchSource, ...]
+    #: List of search queries sent to model
     search_queries: tuple[SearchQuery, ...]
 
     @override
@@ -70,4 +91,10 @@ def _from_proto(cls, *, proto: GenSearchResponse, sdk: SDKType) -> Self:
 
     @property
     def content(self) -> str:
+        """Alias to ``GenerativeSearchResult.text``.
+
+        Only to add some compatibility with raw Search API answer which have "content" field
+        in protobufs and REST answers unlike other parts of this SDK.
+        """
+
         return self.text