Skip to content

Commit 616e2ab

Browse files
authored
Add generative search docs (#103)
1 parent db335a0 commit 616e2ab

File tree

7 files changed

+162
-0
lines changed

7 files changed

+162
-0
lines changed

conftest.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
import pathlib
44
import sys
55

6+
import pytest
7+
8+
import yandex_cloud_ml_sdk
9+
610
pytest_plugins = [
711
'pytest_asyncio',
812
'pytest_recording',
@@ -28,3 +32,8 @@ def pytest_ignore_collect(collection_path, path, config): # pylint: disable=unu
2832
return True
2933

3034
return None
35+
36+
37+
@pytest.fixture(autouse=True)
38+
def add_np(doctest_namespace):
39+
doctest_namespace["sdk"] = yandex_cloud_ml_sdk.YCloudML(folder_id='<doctest>', auth='<none>')

src/yandex_cloud_ml_sdk/_sdk.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ class BaseSDK:
3939
files: BaseFiles
4040
assistants: BaseAssistants
4141
runs: BaseRuns
42+
#: API for `Yandex Search API <https://yandex.cloud/docs/search-api>`
4243
search_api: BaseSearchAPIDomain
4344
search_indexes: BaseSearchIndexes
4445
datasets: BaseDatasets
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,25 @@
11
from __future__ import annotations
22

33
from yandex_cloud_ml_sdk._types.domain import DomainWithFunctions
4+
from yandex_cloud_ml_sdk._utils.doc import doc_from
45

56
from .generative.function import AsyncGenerativeSearchFunction, BaseGenerativeSearchFunction, GenerativeSearchFunction
67

78

89
class BaseSearchAPIDomain(DomainWithFunctions):
10+
"""
11+
Domain for working with `Yandex Search API <https://yandex.cloud/docs/search-api>` services.
12+
"""
13+
14+
#: API for `generative response <https://yandex.cloud/docs/search-api/concepts/generative-response>`_ service
915
generative: BaseGenerativeSearchFunction
1016

1117

18+
@doc_from(BaseSearchAPIDomain)
1219
class AsyncSearchAPIDomain(BaseSearchAPIDomain):
1320
generative: AsyncGenerativeSearchFunction
1421

1522

23+
@doc_from(BaseSearchAPIDomain)
1624
class SearchAPIDomain(BaseSearchAPIDomain):
1725
generative: GenerativeSearchFunction

src/yandex_cloud_ml_sdk/_search_api/generative/config.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,35 @@
1414

1515

1616
class DateFilterType(TypedDict):
17+
"""Date filter dict type for generative search.
18+
19+
Example:
20+
21+
>>> filter_ = {'date': '>20240125'}
22+
"""
23+
1724
date: str
1825

1926

2027
class FormatFilterType(TypedDict):
28+
"""Format filter dict type for generative search.
29+
30+
Example:
31+
32+
>>> filter_ = {'format': 'xlsx'}
33+
"""
34+
2135
format: str
2236

2337

2438
class LangFilterType(TypedDict):
39+
"""Language filter dict type for generative search.
40+
41+
Example:
42+
43+
>>> filter_ = {'lang': 'ru'}
44+
"""
45+
2546
lang: str
2647

2748

src/yandex_cloud_ml_sdk/_search_api/generative/function.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,17 @@
55
from yandex_cloud_ml_sdk._types.function import BaseModelFunction
66
from yandex_cloud_ml_sdk._types.misc import UNDEFINED, UndefinedOr
77
from yandex_cloud_ml_sdk._types.string import SmartStringSequence
8+
from yandex_cloud_ml_sdk._utils.doc import doc_from
89

910
from .config import AVAILABLE_FORMATS, SmartFilterSequence
1011
from .generative import AsyncGenerativeSearch, GenerativeSearch, GenerativeSearchTypeT
1112

1213

1314
class BaseGenerativeSearchFunction(BaseModelFunction[GenerativeSearchTypeT]):
15+
"""Generative search function for creating search object which provides
16+
methods for invoking generative search.
17+
"""
18+
1419
@override
1520
def __call__(
1621
self,
@@ -22,6 +27,31 @@ def __call__(
2227
enable_nrfm_docs: UndefinedOr[bool] = UNDEFINED,
2328
search_filters: UndefinedOr[SmartFilterSequence] = UNDEFINED
2429
) -> GenerativeSearchTypeT:
30+
"""
31+
Creates generative search object which provides methods for invoking generative search.
32+
33+
To learn more about parameters and their formats and possible values,
34+
refer to
35+
`generative search documentation <https://yandex.cloud/docs/search-api/concepts/generative-response#body>`_
36+
37+
NB: All of the ``site``, ``host``, ``url`` parameters are mutually exclusive
38+
and using one of them is mandatory.
39+
40+
:param site: parameter for limiting search to specific location or list of sites.
41+
:param host: parameter for limiting search to specific location or list of hosts.
42+
:param url: parameter for limiting search to specific location or list of URLs.
43+
:param fix_misspell: tells to backend to fix or not to fix misspels in queries.
44+
:param enable_nrfm_docs: tells to backend to include or not to include pages,
45+
which are not available via direct clicks from given sites/hosts/urls
46+
to search result.
47+
:param search_filters: allows to limit search results with additional filters.
48+
49+
>>> date_filter = {'date': '<20250101'}
50+
>>> format_filter = {'format': 'doc'}
51+
>>> lang_filter = {'lang': 'ru'}
52+
>>> search = sdk.search_api.generative(search_filters=[date_filter, format_filter, lang_filter])
53+
54+
"""
2555
search_api = self._model_type(sdk=self._sdk, uri='<search_api>')
2656

2757
return search_api.configure(
@@ -38,9 +68,11 @@ def available_formats(self):
3868
return AVAILABLE_FORMATS
3969

4070

71+
@doc_from(BaseGenerativeSearchFunction)
4172
class GenerativeSearchFunction(BaseGenerativeSearchFunction[GenerativeSearch]):
4273
_model_type = GenerativeSearch
4374

4475

76+
@doc_from(BaseGenerativeSearchFunction)
4577
class AsyncGenerativeSearchFunction(BaseGenerativeSearchFunction[AsyncGenerativeSearch]):
4678
_model_type = AsyncGenerativeSearch

src/yandex_cloud_ml_sdk/_search_api/generative/generative.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from yandex_cloud_ml_sdk._types.misc import UNDEFINED, UndefinedOr
1212
from yandex_cloud_ml_sdk._types.model import ModelSyncMixin
1313
from yandex_cloud_ml_sdk._types.string import SmartStringSequence
14+
from yandex_cloud_ml_sdk._utils.doc import doc_from
1415
from yandex_cloud_ml_sdk._utils.sync import run_sync
1516

1617
from .config import GenerativeSearchConfig, SmartFilterSequence, format_to_proto
@@ -21,6 +22,10 @@
2122

2223

2324
class BaseGenerativeSearch(ModelSyncMixin[GenerativeSearchConfig, GenerativeSearchResult]):
25+
"""Generative search class which provides concrete methods for working with Search API
26+
and incapsulates search setting.
27+
"""
28+
2429
_config_type = GenerativeSearchConfig
2530
_result_type = GenerativeSearchResult
2631

@@ -36,6 +41,32 @@ def configure( # type: ignore[override]
3641
enable_nrfm_docs: UndefinedOr[bool] | None = UNDEFINED,
3742
search_filters: UndefinedOr[SmartFilterSequence] | None = UNDEFINED
3843
) -> Self:
44+
"""
45+
Returns the new object with config fields overrode by passed values.
46+
47+
To learn more about parameters and their formats and possible values,
48+
refer to
49+
`generative search documentation <https://yandex.cloud/docs/search-api/concepts/generative-response#body>`_
50+
51+
NB: All of the ``site``, ``host``, ``url`` parameters are mutually exclusive
52+
and using one of them is mandatory.
53+
54+
:param site: parameter for limiting search to specific location or list of sites.
55+
:param host: parameter for limiting search to specific location or list of hosts.
56+
:param url: parameter for limiting search to specific location or list of URLs.
57+
:param fix_misspell: tells to backend to fix or not to fix misspels in queries.
58+
:param enable_nrfm_docs: tells to backend to include or not to include pages,
59+
which are not available via direct clicks from given sites/hosts/urls
60+
to search result.
61+
:param search_filters: allows to limit search results with additional filters.
62+
63+
>>> date_filter = {'date': '<20250101'}
64+
>>> format_filter = {'format': 'doc'}
65+
>>> lang_filter = {'lang': 'ru'}
66+
>>> search = sdk.search_api.generative(search_filters=[date_filter, format_filter, lang_filter])
67+
68+
"""
69+
3970
return super().configure(
4071
site=site,
4172
host=host,
@@ -53,6 +84,35 @@ def __repr__(self) -> str:
5384

5485
@override
5586
async def _run(self, request: MessageInputType, *, timeout: float = 60) -> GenerativeSearchResult:
87+
"""Run a search query with given ``request`` and search settings of this generative search
88+
object.
89+
90+
To change initial search settings use ``.configure`` method:
91+
92+
>>> search = sdk.search_api.generative(site="site")
93+
>>> search = search.configure(site="other_site")
94+
95+
:param request: search request, which could be either standalone request (message) or
96+
a list of messages, which represents a context of conversation with a model.
97+
98+
Also message could be one of the data formats:
99+
100+
* ``"string"`` -- in case of string input message will be passed to a model with a ``role="user"``;
101+
102+
* ``{"text": "text", "role": "user"}`` -- in case of dict input, it will be passed
103+
with corresponding ``"text"`` and ``"role"`` dict keys;
104+
105+
* ``MessageObject`` -- you could also pass any object which have a
106+
``text: str`` and ``role: str`` attributes, allowing to reuse various
107+
result object, for example object you getting from compltions model run
108+
or result object from generative search itself;
109+
110+
* ``["string"/dict/object]`` -- list or any other sequence of any above described
111+
formats.
112+
113+
:param timeout: timeout, or the maximum time to wait for the request to complete in seconds.
114+
115+
"""
56116
self.config._validate_run()
57117
messages = messages_to_proto(request)
58118

@@ -98,14 +158,18 @@ async def _run(self, request: MessageInputType, *, timeout: float = 60) -> Gener
98158
raise RuntimeError("call returned less then one result")
99159

100160

161+
@doc_from(BaseGenerativeSearch)
101162
class AsyncGenerativeSearch(BaseGenerativeSearch):
163+
@doc_from(BaseGenerativeSearch._run)
102164
async def run(self, request: MessageInputType, *, timeout: float = 60) -> GenerativeSearchResult:
103165
return await self._run(request=request, timeout=timeout)
104166

105167

168+
@doc_from(BaseGenerativeSearch)
106169
class GenerativeSearch(BaseGenerativeSearch):
107170
__run = run_sync(BaseGenerativeSearch._run)
108171

172+
@doc_from(BaseGenerativeSearch._run)
109173
def run(self, request: MessageInputType, *, timeout: float = 60) -> GenerativeSearchResult:
110174
return self.__run(request=request, timeout=timeout)
111175

src/yandex_cloud_ml_sdk/_search_api/generative/result.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,17 @@
1313

1414
@dataclass(frozen=True)
1515
class SearchSource(ProtoBased[GenSearchResponse.Source]):
16+
"""
17+
Source document found for user query.
18+
19+
Might be used or not used in generative answer itself.
20+
"""
21+
22+
#: Url of the document
1623
url: str
24+
#: Title of the document
1725
title: str
26+
#: Has this source been used in generative answer or not
1827
used: bool
1928

2029
@override
@@ -29,7 +38,9 @@ def _from_proto(cls, *, proto: GenSearchResponse.Source, sdk: SDKType) -> Self:
2938

3039
@dataclass(frozen=True)
3140
class SearchQuery(ProtoBased[GenSearchResponse.SearchQuery]):
41+
#: Text of the search query
3242
text: str
43+
#: Request id
3344
req_id: str
3445

3546
@override
@@ -43,12 +54,22 @@ def _from_proto(cls, *, proto: GenSearchResponse.SearchQuery, sdk: SDKType) -> S
4354

4455
@dataclass(frozen=True)
4556
class GenerativeSearchResult(BaseResult[GenSearchResponse], TextMessage):
57+
#: Generative answer itself.
58+
#: Note that footnootes like ``[N]`` in the text refers to ``GenerativeSearchResult.sources[N]``
59+
#: source.
4660
text: str
61+
#: Message sender role; in case of the generative search, model always answers with the
62+
#: "assistant" role.
4763
role: str
64+
#: Fixed query string in case of query was fixed
4865
fixed_misspell_query: str | None
66+
#: Anwer was rejected by some reasons, probably because of the ethics constrictions
4967
is_answer_rejected: bool
68+
#: Model was unable to give good answer and returned bulleted list with some info.
5069
is_bullet_answer: bool
70+
#: List of documents found by user query; every element number matches with footnotes in the ``.text`` attribute.
5171
sources: tuple[SearchSource, ...]
72+
#: List of search queries sent to model
5273
search_queries: tuple[SearchQuery, ...]
5374

5475
@override
@@ -70,4 +91,10 @@ def _from_proto(cls, *, proto: GenSearchResponse, sdk: SDKType) -> Self:
7091

7192
@property
7293
def content(self) -> str:
94+
"""Alias to ``GenerativeSearchResult.text``.
95+
96+
Only to add some compatibility with raw Search API answer which have "content" field
97+
in protobufs and REST answers unlike other parts of this SDK.
98+
"""
99+
73100
return self.text

0 commit comments

Comments
 (0)