Add sync examples

vhaldemar · vhaldemar · commit 55d2b1519397 · 2025-08-20T20:22:34.000+03:00
diff --git a/examples/sync/chat/__init__.py b/examples/sync/chat/__init__.py
diff --git a/examples/sync/chat/history.py b/examples/sync/chat/history.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+
+from __future__ import annotations
+
+from yandex_cloud_ml_sdk import YCloudML
+
+
+def get_model(sdk: YCloudML):
+    models = sdk.chat.completions.list()
+    i = 0
+    print('You have access to the following models:')
+    for i, model in enumerate(models):
+        print(f"  [{i:2}] {model.uri}")
+
+    raw_number = input(f"Please, input model number from 0 to {i}: ")
+    number = int(raw_number)
+    return models[number]
+
+
+def main() -> None:
+    sdk = YCloudML(folder_id='b1ghsjum2v37c2un8h64')
+    sdk.setup_default_logging()
+
+    model = get_model(sdk)
+    model = model.configure(temperature=0.5)
+
+    context = []
+    while True:
+        request = input("Please input request for model: ")
+        context.append(request)
+
+        chunk = None
+        # I am using run_stream here for extra beuty,
+        # this is not different from just "run" when we talking about context
+        for chunk in model.run_stream(context):
+            print(chunk[0].delta, end="", flush=True)
+        print()
+        assert chunk  # to please static type checker
+
+        # We are putting model answer to context to send it to the next
+        # request;
+        # We could use last chunk object because it have ".text" attribute which
+        # contains full model answer
+        context.append(chunk)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/sync/chat/reasoning.py b/examples/sync/chat/reasoning.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+
+from __future__ import annotations
+
+from yandex_cloud_ml_sdk import YCloudML
+
+
+def get_model(sdk: YCloudML):
+    models = sdk.chat.completions.list()
+    i = 0
+    print('You have access to the following models:')
+    for i, model in enumerate(models):
+        print(f"  [{i:2}] {model.uri}")
+
+    raw_number = input(f"Please, input model number from 0 to {i}: ")
+    number = int(raw_number)
+    return models[number]
+
+
+def main() -> None:
+    sdk = YCloudML(folder_id='b1ghsjum2v37c2un8h64')
+    sdk.setup_default_logging()
+
+    # This is how to create model object
+    model = sdk.chat.completions('qwen3-235b-a22b-fp8')
+    # But in this example we will get it via .list method
+    model = get_model(sdk)
+
+    request = "What it would be 11!?"
+
+    model = model.configure(temperature=0, reasoning_mode='medium')
+
+    result = model.run(request)
+    print(f"Request: {request}")
+    print(f"Reasoinig text: {result.reasoning_text}")
+    print(f"Result text: {result.text}")
+
+    print()
+    print(f"Streaming request: {request}")
+
+    model = model.configure(reasoning_mode='high')
+    reasoning_started = False
+    result_started = False
+
+    for chunk in model.run_stream(request):
+        delta: str | None = None
+        # NB: there is very important difference between reasoning_delta and reasoning_text,
+        # like chunk.text/chunk.delta, look into stream.py example file for details
+        if delta := chunk.choices[0].reasoning_delta:
+            if not reasoning_started:
+                print('Streaming reasoning text: ')
+
+            reasoning_started = True
+
+        elif delta := chunk.choices[0].delta:
+            if not result_started:
+                print()
+                print('Streaming result text: ')
+            result_started = True
+
+        if delta:
+            print(delta, end="", flush=True)
+
+    # you could reset to default reasoning mode
+    model = model.configure(reasoning_mode=None)
+    result = model.run(request)
+    print(f"Reasoinig text with default resoning mode: {result.reasoning_text}")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/sync/chat/run.py b/examples/sync/chat/run.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+
+from __future__ import annotations
+
+import pprint
+
+from yandex_cloud_ml_sdk import YCloudML
+
+
+def get_model(sdk: YCloudML):
+    models = sdk.chat.completions.list()
+    i = 0
+    print('You have access to the following models:')
+    for i, model in enumerate(models):
+        print(f"  [{i:2}] {model.uri}")
+
+    raw_number = input(f"Please, input model number from 0 to {i}: ")
+    number = int(raw_number)
+    return models[number]
+
+
+def main() -> None:
+    sdk = YCloudML(folder_id='b1ghsjum2v37c2un8h64')
+    sdk.setup_default_logging()
+
+    # This is how to create model object
+    model = sdk.chat.completions('qwen3-235b-a22b-fp8')
+    # But in this example we will get it via .list method
+    model = get_model(sdk)
+
+    request = "How to calculate the Hirsch index in O(N)"
+
+    model = model.configure(temperature=0.5)
+
+    result = model.run(request)
+
+    print('You could inspect the fields which have the result structure:')
+    pprint.pprint(result)
+    print('\n')
+
+    print('Or just access the "text/content" field')
+    print(result.text)
+
+    # NB: text and content is a synonyms
+    assert result.text == result.content
+
+    model = model.configure(max_tokens=10)
+    result = model.run(request)
+    assert result.finish_reason.name == 'LENGTH'
+    # status field is a synonym for finish_reason, but with names consistent with
+    # another parts ofr yandex_cloud_ml_sdk
+    assert result.status.name == 'TRUNCATED_FINAL'
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/sync/chat/stream.py b/examples/sync/chat/stream.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+
+from __future__ import annotations
+
+from yandex_cloud_ml_sdk import YCloudML
+
+
+def clear():
+    # We are clearing the screen with this ascii combination;
+    # it will probably work only at the linux terminals
+    print(chr(27) + "[2J")
+
+
+def get_model(sdk: YCloudML):
+    models = sdk.chat.completions.list()
+    i = 0
+    print('You have access to the following models:')
+    for i, model in enumerate(models):
+        print(f"  [{i:2}] {model.uri}")
+
+    raw_number = input(f"Please, input model number from 0 to {i}: ")
+    number = int(raw_number)
+    return models[number]
+
+
+def main() -> None:
+    sdk = YCloudML(folder_id='b1ghsjum2v37c2un8h64')
+    sdk.setup_default_logging()
+
+    # This is how to create model object
+    model = sdk.chat.completions('qwen3-235b-a22b-fp8')
+    # But in this example we will get it via .list method
+    model = get_model(sdk)
+
+    model = model.configure(temperature=0.5)
+
+    request = "How to calculate the Hirsch index in O(N)"
+
+    chunk = None
+
+    for chunk in model.run_stream(request):
+        clear()
+        print(f"{request}:")
+        print(chunk.text)
+
+        # chunk[0] is a shortcut for chunk.choices[0]:
+        choice = chunk[0]
+        assert choice == chunk[0] == chunk.choices[0]
+
+        # chunk.text is a shortcut for chunk[0].text:
+        assert chunk.text == choice.text
+
+    # There is very important difference between choice.text and
+    # choice.delta:
+    # * choice.text contains a constantly increasing PREFIX of generated text,
+    #   like in other parts of yandex-cloud-ml-sdk
+    # * choice.delta contains only newly generated text delta in openai-streaming style
+    clear()
+    print(f"{request}:")
+    for chunk in model.run_stream(request):
+        print(chunk[0].delta, end="", flush=True)
+    print()
+
+    assert chunk  # to make type checker happy
+    # NB: Some of the models have a usage field in the last chunk;
+    # qwen3-235b-a22b-fp8 does have it:
+    print("{chunk.usage=}")
+
+    model = model.configure(max_tokens=10)
+    print('\n')
+    print("Showcase for 'length' finish reason:")
+    for chunk in model.run_stream(request):
+        print(chunk)
+
+    assert chunk.finish_reason.name == 'LENGTH'
+    # status field is a synonym for finish_reason, but with names consistent with
+    # another parts of yandex_cloud_ml_sdk
+    assert chunk.status.name == 'TRUNCATED_FINAL'
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/sync/chat/structured_output.py b/examples/sync/chat/structured_output.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+
+from __future__ import annotations
+
+import json
+
+import pydantic
+
+from yandex_cloud_ml_sdk import YCloudML
+
+
+class Venue(pydantic.BaseModel):
+    date: str
+    place: str
+
+
+@pydantic.dataclasses.dataclass
+class VenueDataclass:
+    date: str
+    place: str
+    month: str
+
+
+def get_model(sdk: YCloudML):
+    models = sdk.chat.completions.list()
+    i = 0
+    print('You have access to the following models:')
+    for i, model in enumerate(models):
+        print(f"  [{i:2}] {model.uri}")
+
+    raw_number = input(f"Please, input model number from 0 to {i}: ")
+    number = int(raw_number)
+    return models[number]
+
+
+def main() -> None:
+    sdk = YCloudML(folder_id='b1ghsjum2v37c2un8h64')
+    sdk.setup_default_logging()
+
+    model = get_model(sdk)
+    text = (
+        'The conference will take place from May 10th to 12th, 2023, '
+        'at 30 Avenue Corentin Cariou in Paris, France.'
+    )
+    context = [
+        {'role': 'system', 'text': 'Extract the date and venue information'},
+        {'role': 'user', 'text': text},
+    ]
+
+    # We could as model to return data just with json format, model will
+    # figure out format by itself:
+    model = model.configure(response_format='json')
+    result = model.run(context)
+    print('Any JSON:', result[0].text)
+
+    # Now, if you need not just JSON, but a parsed Python structure, you will need to parse it.
+    # Be aware that you may need to handle parsing exceptions in case the model returns incorrect json.
+    # This could happen, for example, if you exceed the token limit.
+    try:
+        data = json.loads(result.text)
+        print("Parsed JSON:", data)
+
+        bad_text = result.text[:5]
+        json.loads(bad_text)
+    except json.JSONDecodeError as e:
+        print("JSON parsing error:", e)
+
+    # You could use not only .run, but .run_stream as well as other methods too:
+    print('Any JSON in streaming:')
+    for partial_result in model.run_stream(context):
+        print(f"    {partial_result.text or '<EMPTY>'}")
+
+    # NB: For each example, I am trying to make slightly different format to show a difference at print results.
+    # We could pass a raw json schema:
+    model = model.configure(response_format={
+        "name": 'foo',
+        "json_schema": {
+            "properties": {
+                "DATE": {
+                    "title": "Date",
+                    "type": "string"
+                },
+                "PLACE": {
+                    "title": "Place",
+                    "type": "string"
+                }
+            },
+            "required": ["DATE", "PLACE"],
+            "title": "Venue",
+            "type": "object"
+        }
+    })
+    result = model.run(context)
+    print('JSONSchema from raw jsonschema:', result[0].text)
+
+    # Also we could use pydantic.BaseModel descendant to describe JSONSchema for
+    # structured output:
+    model = model.configure(response_format=Venue)
+    result = model.run(context)
+    print('JSONSchema from Pydantic model:', result[0].text)
+
+    # Lastly we could pass pydantic-dataclass:
+    assert pydantic.__version__ > "2"
+    model = model.configure(response_format=VenueDataclass)
+    result = model.run(context)
+    print('JSONSchema from Pydantic dataclass:', result[0].text)
+
+
+if __name__ == '__main__':
+    main()