Skip to content

Commit 8dbf232

Browse files
authored
Add rephraser options (#95)
1 parent 35d89a6 commit 8dbf232

File tree

14 files changed

+2109
-13
lines changed

14 files changed

+2109
-13
lines changed
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
#!/usr/bin/env python3
2+
3+
from __future__ import annotations
4+
5+
import asyncio
6+
import pathlib
7+
8+
from yandex_cloud_ml_sdk import AsyncYCloudML
9+
10+
LABEL_KEY = 'yc-ml-sdk-example'
11+
PATH = pathlib.Path(__file__)
12+
NAME = f'example-{PATH.parent.name}-{PATH.name}'
13+
14+
15+
def local_path(path: str) -> pathlib.Path:
16+
return pathlib.Path(__file__).parent / path
17+
18+
19+
async def get_search_index(sdk):
20+
"""
21+
This function represents getting or creating demo search_index object.
22+
23+
In real life you will get it any other way that would suit your case.
24+
"""
25+
26+
async for search_index in sdk.search_indexes.list():
27+
if search_index.labels and search_index.labels.get(LABEL_KEY) == NAME:
28+
print(f'using {search_index=}')
29+
break
30+
else:
31+
print('no search indexes found, creating new one')
32+
file = await sdk.files.upload(
33+
local_path('turkey_example.txt')
34+
)
35+
operation = await sdk.search_indexes.create_deferred(file, labels={LABEL_KEY: NAME})
36+
search_index = await operation
37+
print(f'new {search_index=}')
38+
39+
await file.delete()
40+
41+
return search_index
42+
43+
44+
async def delete_labeled_entities(iterator):
45+
"""
46+
Deletes any entities from given iterator which have .labels attribute
47+
with `labels[LABEL_KEY] == NAME`
48+
"""
49+
50+
async for entity in iterator:
51+
if entity.labels and entity.labels.get(LABEL_KEY) == NAME:
52+
print(f'deleting {entity.__class__.__name__} with id={entity.id!r}')
53+
await entity.delete()
54+
55+
56+
async def main() -> None:
57+
sdk = AsyncYCloudML(folder_id='b1ghsjum2v37c2un8h64')
58+
sdk.setup_default_logging(log_level='WARNING')
59+
60+
search_index = await get_search_index(sdk)
61+
labels = {LABEL_KEY: NAME}
62+
63+
# search index tool without rephraser
64+
tool = sdk.tools.search_index(search_index)
65+
66+
# search index tool with rephraser;
67+
# there is few identical ways to define rephraser:
68+
## will use the default `gpt://<folder_id>/rephraser/latest` rephraser
69+
tool_with_rephraser = sdk.tools.search_index(search_index, rephraser=True)
70+
## will use `gpt://<folder_id>/<name>/latest`
71+
tool_with_rephraser = sdk.tools.search_index(search_index, rephraser='rephraser')
72+
## will use custom rephraser object you passed
73+
rephraser = sdk.tools.rephraser('rephraser', model_version='latest')
74+
tool_with_rephraser = sdk.tools.search_index(search_index, rephraser=rephraser)
75+
76+
assistant_wo_rephraser = await sdk.assistants.create('yandexgpt', labels=labels, tools=[tool])
77+
assistant_with_rephraser = await sdk.assistants.create('yandexgpt', labels=labels, tools=[tool_with_rephraser])
78+
79+
# NB: Next code just runs assistants with and without rephraser
80+
# and just shows rephraser effect;
81+
# If something not clear to you, refer to another assistants examples.
82+
thread = await sdk.threads.create(labels=labels)
83+
84+
async def run(query, rephrase: bool) -> None:
85+
assistant = assistant_with_rephraser if rephrase else assistant_wo_rephraser
86+
87+
await thread.write(query)
88+
run = await assistant.run(thread)
89+
result = await run
90+
91+
print(f"Question: {query}")
92+
preposition = 'with' if rephrase else 'without'
93+
print(f"Answer {preposition} rephraser:\n {result.text!r}")
94+
print()
95+
96+
await run('Куда yбежать?', rephrase=False) # 1
97+
await run('Гиде атттапыриццца?', rephrase=False) # 2
98+
await run('Где отдохнуть?', rephrase=False) # 3
99+
await run('Куда сбежать?', rephrase=False) # 4
100+
101+
# Note that #1 and #2 gave the stupid answers, but after
102+
# we gave "normal" questions in #3 and #4,
103+
# #1 and #2 with rephraser will give normal answers:
104+
await run('Куда убежать?', rephrase=True)
105+
await run('Гиде атттапыриццца?', rephrase=True)
106+
107+
# we will delete all assistant and threads created in this example
108+
# to not to increase chaos level, but not the search index, because
109+
# index creation is a slow operation and could be re-used in this
110+
# example next run
111+
await delete_labeled_entities(sdk.assistants.list())
112+
await delete_labeled_entities(sdk.threads.list())
113+
114+
115+
if __name__ == '__main__':
116+
asyncio.run(main())
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
#!/usr/bin/env python3
2+
3+
from __future__ import annotations
4+
5+
import pathlib
6+
7+
from yandex_cloud_ml_sdk import YCloudML
8+
9+
LABEL_KEY = 'yc-ml-sdk-example'
10+
PATH = pathlib.Path(__file__)
11+
NAME = f'example-{PATH.parent.name}-{PATH.name}'
12+
13+
14+
def local_path(path: str) -> pathlib.Path:
15+
return pathlib.Path(__file__).parent / path
16+
17+
18+
def get_search_index(sdk):
19+
"""
20+
This function represents getting or creating demo search_index object.
21+
22+
In real life you will get it any other way that would suit your case.
23+
"""
24+
25+
for search_index in sdk.search_indexes.list():
26+
if search_index.labels and search_index.labels.get(LABEL_KEY) == NAME:
27+
print(f'using {search_index=}')
28+
break
29+
else:
30+
print('no search indexes found, creating new one')
31+
file = sdk.files.upload(
32+
local_path('turkey_example.txt')
33+
)
34+
operation = sdk.search_indexes.create_deferred(file, labels={LABEL_KEY: NAME})
35+
search_index = operation.wait()
36+
print(f'new {search_index=}')
37+
38+
file.delete()
39+
40+
return search_index
41+
42+
43+
def delete_labeled_entities(iterator):
44+
"""
45+
Deletes any entities from given iterator which have .labels attribute
46+
with `labels[LABEL_KEY] == NAME`
47+
"""
48+
49+
for entity in iterator:
50+
if entity.labels and entity.labels.get(LABEL_KEY) == NAME:
51+
print(f'deleting {entity.__class__.__name__} with id={entity.id!r}')
52+
entity.delete()
53+
54+
55+
def main() -> None:
56+
sdk = YCloudML(folder_id='b1ghsjum2v37c2un8h64')
57+
sdk.setup_default_logging(log_level='WARNING')
58+
59+
search_index = get_search_index(sdk)
60+
labels = {LABEL_KEY: NAME}
61+
62+
# search index tool without rephraser
63+
tool = sdk.tools.search_index(search_index)
64+
65+
# search index tool with rephraser;
66+
# there is few identical ways to define rephraser:
67+
## will use the default `gpt://<folder_id>/rephraser/latest` rephraser
68+
tool_with_rephraser = sdk.tools.search_index(search_index, rephraser=True)
69+
## will use `gpt://<folder_id>/<name>/latest`
70+
tool_with_rephraser = sdk.tools.search_index(search_index, rephraser='rephraser')
71+
## will use custom rephraser object you passed
72+
rephraser = sdk.tools.rephraser('rephraser', model_version='latest')
73+
tool_with_rephraser = sdk.tools.search_index(search_index, rephraser=rephraser)
74+
75+
assistant_wo_rephraser = sdk.assistants.create('yandexgpt', labels=labels, tools=[tool])
76+
assistant_with_rephraser = sdk.assistants.create('yandexgpt', labels=labels, tools=[tool_with_rephraser])
77+
78+
# NB: Next code just runs assistants with and without rephraser
79+
# and just shows rephraser effect;
80+
# If something not clear to you, refer to another assistants examples.
81+
thread = sdk.threads.create(labels=labels)
82+
83+
def run(query, rephrase: bool) -> None:
84+
assistant = assistant_with_rephraser if rephrase else assistant_wo_rephraser
85+
86+
thread.write(query)
87+
run = assistant.run(thread)
88+
result = run.wait()
89+
90+
print(f"Question: {query}")
91+
preposition = 'with' if rephrase else 'without'
92+
print(f"Answer {preposition} rephraser:\n {result.text!r}")
93+
print()
94+
95+
run('Куда yбежать?', rephrase=False) # 1
96+
run('Гиде атттапыриццца?', rephrase=False) # 2
97+
run('Где отдохнуть?', rephrase=False) # 3
98+
run('Куда сбежать?', rephrase=False) # 4
99+
100+
# Note that #1 and #2 gave the stupid answers, but after
101+
# we gave "normal" questions in #3 and #4,
102+
# #1 and #2 with rephraser will give normal answers:
103+
run('Куда убежать?', rephrase=True)
104+
run('Гиде атттапыриццца?', rephrase=True)
105+
106+
# we will delete all assistant and threads created in this example
107+
# to not to increase chaos level, but not the search index, because
108+
# index creation is a slow operation and could be re-used in this
109+
# example next run
110+
delete_labeled_entities(sdk.assistants.list())
111+
delete_labeled_entities(sdk.threads.list())
112+
113+
114+
if __name__ == '__main__':
115+
main()

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ classifiers = [
3333
requires-python = ">=3.9"
3434
dynamic = ["version"]
3535
dependencies = [
36-
"yandexcloud>=0.335.0",
36+
"yandexcloud>=0.343.0",
3737
"grpcio>=1.70.0",
3838
"get-annotations",
3939
"httpx>=0.27,<1",

src/yandex_cloud_ml_sdk/_tools/domain.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@
66

77
from yandex_cloud_ml_sdk._search_indexes.search_index import BaseSearchIndex
88
from yandex_cloud_ml_sdk._types.domain import BaseDomain
9-
from yandex_cloud_ml_sdk._types.misc import UNDEFINED, UndefinedOr, get_defined_value
9+
from yandex_cloud_ml_sdk._types.misc import UNDEFINED, UndefinedOr, get_defined_value, is_defined
1010
from yandex_cloud_ml_sdk._utils.coerce import ResourceType, coerce_resource_ids
1111

1212
from .function import AsyncFunctionTools, FunctionTools, FunctionToolsTypeT
13+
from .rephraser.function import RephraserFunction, RephraserInputType
1314
from .tool import SearchIndexTool
1415

1516

@@ -23,17 +24,44 @@ def function(self) -> FunctionToolsTypeT:
2324
sdk=self._sdk
2425
)
2526

27+
@cached_property
28+
def rephraser(self) -> RephraserFunction:
29+
return RephraserFunction(
30+
name='tools.rehraser',
31+
sdk=self._sdk,
32+
parent_resource=self
33+
)
34+
2635
def search_index(
2736
self,
2837
indexes: ResourceType[BaseSearchIndex],
2938
*,
3039
max_num_results: UndefinedOr[int] = UNDEFINED,
31-
):
40+
rephraser: UndefinedOr[RephraserInputType] = UNDEFINED,
41+
) -> SearchIndexTool:
42+
"""Creates SearchIndexTool (not to be confused with :py:class:`~.SearchIndex`).
43+
44+
:param indexes: parameter takes :py:class:`~.BaseSearchIndex`, string with search index id,
45+
or a list of this values in any combination.
46+
:param max_num_results: the maximum number of results to return from the search.
47+
Fewer results may be returned if necessary to fit within the prompt's token limit.
48+
This ensures that the combined prompt and search results do not exceed the token constraints.
49+
:param rephraser: setting for rephrasing user queries; refer to :py:class:`~.Rephraser` documentation
50+
for details.
51+
"""
52+
3253
index_ids = coerce_resource_ids(indexes, BaseSearchIndex)
3354
max_num_results_ = get_defined_value(max_num_results, None)
55+
56+
rephraser_ = None
57+
if is_defined(rephraser):
58+
# this is coercing any RephraserInputType to Rephraser
59+
rephraser_ = self.rephraser(rephraser) # type: ignore[arg-type]
60+
3461
return SearchIndexTool(
3562
search_index_ids=tuple(index_ids),
36-
max_num_results=max_num_results_
63+
max_num_results=max_num_results_,
64+
rephraser=rephraser_,
3765
)
3866

3967

src/yandex_cloud_ml_sdk/_tools/rephraser/__init__.py

Whitespace-only changes.
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
from __future__ import annotations
2+
3+
from typing import Literal, Union
4+
5+
from typing_extensions import TypeAlias, override
6+
7+
from yandex_cloud_ml_sdk._models.completions.function import BaseCompletions
8+
9+
from .model import Rephraser
10+
11+
RephraserInputType: TypeAlias = Union[str, Literal[True], Rephraser]
12+
13+
14+
class RephraserFunction(BaseCompletions[Rephraser]):
15+
"""Function for creating Rephraser object, which incapsulating
16+
rephrasing settings.
17+
"""
18+
19+
_model_type = Rephraser
20+
21+
@override
22+
def __call__(
23+
self,
24+
model_name: RephraserInputType,
25+
*,
26+
model_version: str = 'latest',
27+
) -> Rephraser:
28+
"""Creates a Rephraser object, which incapsulating rephrasing settings.
29+
30+
:param model_name:
31+
Model ID used for model uri definition in a resulting Rephraser object.
32+
It is handled differently depending on the type and format of the input value:
33+
34+
* If ``model_name`` includes ``://`` substring, it would be used unchanged.
35+
36+
* Otherwise if ``model_name`` is a string, it would be used in
37+
``gpt://<folder_id>/<model_name>/<model_version>`` template.
38+
39+
* If ``model_name`` is a True, it would be transformed into default value
40+
``gpt://<folder_id>/rephraser/<model_version>``
41+
42+
* If ``model_name`` is a Rephraser object, it would returned unchanged.
43+
44+
:param model_version: ``<model_version>`` value for model uri template,
45+
refer to model_name parameter documentation for details.
46+
47+
:returns: Rephraser object, which incapsulating rephrasing settings
48+
49+
"""
50+
51+
name: str
52+
if model_name is True:
53+
name = 'rephraser'
54+
elif isinstance(model_name, str):
55+
name = model_name
56+
elif isinstance(model_name, Rephraser):
57+
return model_name
58+
else:
59+
raise TypeError('wrong type for model_name')
60+
61+
return super().__call__(
62+
model_name=name,
63+
model_version=model_version,
64+
)

0 commit comments

Comments
 (0)