openvinotoolkit · as-suvorov · Apr 25, 2025 · Apr 24, 2025 · Apr 25, 2025
diff --git a/samples/python/text_generation/limit_checker.py b/samples/python/text_generation/limit_checker.py
@@ -5,7 +5,7 @@
 import csv
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Dict, List, Optional
+from typing import List, Optional
 from tqdm import tqdm
 
 from optimum.intel.openvino import OVModelForCausalLM
@@ -60,13 +60,13 @@ def retry_request(func, retries=5):
             else:
                 raise e
 
-def load_prompts_dataset(file_name : str) -> Dict[str, List[str]]:
+def load_prompts_dataset(file_name : str) -> dict[str, List[str]]:
     TESTS_ROOT = Path('tests/python_tests')
     file_path = TESTS_ROOT / 'data' / file_name
     with open(file_path, 'r') as f:
         return {"prompts": [s for s in f]}
 
-def load_samsum_dataset(file_name : str) -> Dict[str, List[str]]:
+def load_samsum_dataset(file_name : str) -> dict[str, List[str]]:
     import json
     retval = {"prompts": []}
     with open(file_name, 'r') as json_file:

diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -939,7 +939,7 @@ class ImageGenerationPerfMetrics:
         If mean and std were already calculated, getters return cached values.
 
         :param get_text_encoder_infer_duration: Returns the inference duration of every text encoder in milliseconds.
-        :type get_text_encoder_infer_duration: Dict[str, float]
+        :type get_text_encoder_infer_duration: dict[str, float]
 
         :param get_vae_encoder_infer_duration: Returns the inference duration of vae encoder in milliseconds.
         :type get_vae_encoder_infer_duration: float
@@ -1120,13 +1120,13 @@ class LLMPipeline:
             :type inputs: str, List[str], ov.genai.TokenizedInputs, or ov.Tensor
 
             :param generation_config: generation_config
-            :type generation_config: GenerationConfig or a Dict
+            :type generation_config: GenerationConfig or a dict
 
             :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped
             :type : Callable[[str], bool], ov.genai.StreamerBase
 
             :param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields.
-            :type : Dict
+            :type : dict
 
             :return: return results in encoded, or decoded form depending on inputs type
             :rtype: DecodedResults, EncodedResults, str
@@ -1217,13 +1217,13 @@ class LLMPipeline:
             :type inputs: str, List[str], ov.genai.TokenizedInputs, or ov.Tensor
 
             :param generation_config: generation_config
-            :type generation_config: GenerationConfig or a Dict
+            :type generation_config: GenerationConfig or a dict
 
             :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped
             :type : Callable[[str], bool], ov.genai.StreamerBase
 
             :param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields.
-            :type : Dict
+            :type : dict
 
             :return: return results in encoded, or decoded form depending on inputs type
             :rtype: DecodedResults, EncodedResults, str
@@ -2104,7 +2104,7 @@ class VLMPipeline:
     def __init__(self, models: dict[str, tuple[str, openvino._pyopenvino.Tensor]], tokenizer: Tokenizer, config_dir_path: os.PathLike, device: str, generation_config: GenerationConfig | None = None, **kwargs) -> None:
         """
                     VLMPipeline class constructor.
-                    models (typing.Dict[str, typing.Tuple[str, openvino.Tensor]]): A map where key is model name (e.g. "vision_embeddings", "text_embeddings", "language", "resampler")
+                    models (dict[str, typing.Tuple[str, openvino.Tensor]]): A map where key is model name (e.g. "vision_embeddings", "text_embeddings", "language", "resampler")
                     tokenizer (Tokenizer): Genai Tokenizers.
                     config_dir_path (os.PathLike): Path to folder with model configs.
                     device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'.
@@ -2136,13 +2136,13 @@ class VLMPipeline:
             :type images: List[ov.Tensor] or ov.Tensor
 
             :param generation_config: generation_config
-            :type generation_config: GenerationConfig or a Dict
+            :type generation_config: GenerationConfig or a dict
 
             :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped
             :type : Callable[[str], bool], ov.genai.StreamerBase
 
             :param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields.
-            :type : Dict
+            :type : dict
 
             :return: return results in decoded form
             :rtype: VLMDecodedResults
@@ -2170,13 +2170,13 @@ class VLMPipeline:
             :type images: List[ov.Tensor] or ov.Tensor
 
             :param generation_config: generation_config
-            :type generation_config: GenerationConfig or a Dict
+            :type generation_config: GenerationConfig or a dict
 
             :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped
             :type : Callable[[str], bool], ov.genai.StreamerBase
 
             :param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields.
-            :type : Dict
+            :type : dict
 
             :return: return results in decoded form
             :rtype: VLMDecodedResults
@@ -2318,7 +2318,7 @@ class WhisperGenerationConfig(GenerationConfig):
         :type language: Optional[str]
 
         :param lang_to_id: Language token to token_id map. Initialized from the generation_config.json lang_to_id dictionary.
-        :type lang_to_id: Dict[str, int]
+        :type lang_to_id: dict[str, int]
 
         :param task: Task to use for generation, either “translate” or “transcribe”
         :type task: int
@@ -2457,14 +2457,14 @@ class WhisperPipeline:
             :type raw_speech_input: List[float]
 
             :param generation_config: generation_config
-            :type generation_config: WhisperGenerationConfig or a Dict
+            :type generation_config: WhisperGenerationConfig or a dict
 
             :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped.
                              Streamer supported for short-form audio (< 30 seconds) with `return_timestamps=False` only
             :type : Callable[[str], bool], ov.genai.StreamerBase
 
             :param kwargs: arbitrary keyword arguments with keys corresponding to WhisperGenerationConfig fields.
-            :type : Dict
+            :type : dict
 
             :return: return results in decoded form
             :rtype: WhisperDecodedResults
@@ -2505,7 +2505,7 @@ class WhisperPipeline:
             :type language: Optional[str]
 
             :param lang_to_id: Language token to token_id map. Initialized from the generation_config.json lang_to_id dictionary.
-            :type lang_to_id: Dict[str, int]
+            :type lang_to_id: dict[str, int]
 
             :param task: Task to use for generation, either “translate” or “transcribe”
             :type task: int

diff --git a/src/python/py_image_generation_pipelines.cpp b/src/python/py_image_generation_pipelines.cpp
@@ -90,7 +90,7 @@ auto image_generation_perf_metrics_docstring = R"(
     If mean and std were already calculated, getters return cached values.
 
     :param get_text_encoder_infer_duration: Returns the inference duration of every text encoder in milliseconds.
-    :type get_text_encoder_infer_duration: Dict[str, float]
+    :type get_text_encoder_infer_duration: dict[str, float]
 
     :param get_vae_encoder_infer_duration: Returns the inference duration of vae encoder in milliseconds.
     :type get_vae_encoder_infer_duration: float

diff --git a/src/python/py_llm_pipeline.cpp b/src/python/py_llm_pipeline.cpp
@@ -34,13 +34,13 @@ auto generate_docstring = R"(
     :type inputs: str, List[str], ov.genai.TokenizedInputs, or ov.Tensor
 
     :param generation_config: generation_config
-    :type generation_config: GenerationConfig or a Dict
+    :type generation_config: GenerationConfig or a dict
 
     :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped
     :type : Callable[[str], bool], ov.genai.StreamerBase
 
     :param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields.
-    :type : Dict
+    :type : dict
 
     :return: return results in encoded, or decoded form depending on inputs type
     :rtype: DecodedResults, EncodedResults, str

diff --git a/src/python/py_vlm_pipeline.cpp b/src/python/py_vlm_pipeline.cpp
@@ -40,13 +40,13 @@ auto vlm_generate_docstring = R"(
     :type images: List[ov.Tensor] or ov.Tensor
 
     :param generation_config: generation_config
-    :type generation_config: GenerationConfig or a Dict
+    :type generation_config: GenerationConfig or a dict
 
     :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped
     :type : Callable[[str], bool], ov.genai.StreamerBase
 
     :param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields.
-    :type : Dict
+    :type : dict
 
     :return: return results in decoded form
     :rtype: VLMDecodedResults
@@ -192,7 +192,7 @@ void init_vlm_pipeline(py::module_& m) {
         py::arg("generation_config")  = std::nullopt, "generation config",
         R"(
             VLMPipeline class constructor.
-            models (typing.Dict[str, typing.Tuple[str, openvino.Tensor]]): A map where key is model name (e.g. "vision_embeddings", "text_embeddings", "language", "resampler")
+            models (dict[str, typing.Tuple[str, openvino.Tensor]]): A map where key is model name (e.g. "vision_embeddings", "text_embeddings", "language", "resampler")
             tokenizer (Tokenizer): Genai Tokenizers.
             config_dir_path (os.PathLike): Path to folder with model configs.
             device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'.

diff --git a/src/python/py_whisper_pipeline.cpp b/src/python/py_whisper_pipeline.cpp
@@ -42,14 +42,14 @@ auto whisper_generate_docstring = R"(
     :type raw_speech_input: List[float]
 
     :param generation_config: generation_config
-    :type generation_config: WhisperGenerationConfig or a Dict
+    :type generation_config: WhisperGenerationConfig or a dict
 
     :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped.
                      Streamer supported for short-form audio (< 30 seconds) with `return_timestamps=False` only
     :type : Callable[[str], bool], ov.genai.StreamerBase
 
     :param kwargs: arbitrary keyword arguments with keys corresponding to WhisperGenerationConfig fields.
-    :type : Dict
+    :type : dict
 
     :return: return results in decoded form
     :rtype: WhisperDecodedResults
@@ -109,7 +109,7 @@ auto whisper_generation_config_docstring = R"(
     :type language: Optional[str]
 
     :param lang_to_id: Language token to token_id map. Initialized from the generation_config.json lang_to_id dictionary.
-    :type lang_to_id: Dict[str, int]
+    :type lang_to_id: dict[str, int]
 
     :param task: Task to use for generation, either “translate” or “transcribe”
     :type task: int

diff --git a/tests/python_tests/test_continuous_batching.py b/tests/python_tests/test_continuous_batching.py
@@ -7,7 +7,6 @@
 
 from pathlib import Path
 from shutil import rmtree
-from typing import Dict
 
 from openvino_genai import ContinuousBatchingPipeline, LLMPipeline, GenerationConfig, SchedulerConfig,  draft_model, GenerationFinishReason
 
@@ -132,7 +131,7 @@ def test_cb_streamer_vs_return_vs_stateful(prompt):
 @pytest.mark.parametrize("model_id", get_chat_models_list())
 @pytest.mark.parametrize("pipeline_type", [PipelineType.PAGED_ATTENTION, PipelineType.PROMPT_LOOKUP_DECODING, PipelineType.SPECULATIVE_DECODING] )
 @pytest.mark.precommit
-def test_chat_scenario_vs_stateful(model_id, generation_config_kwargs: Dict, pipeline_type):
+def test_chat_scenario_vs_stateful(model_id, generation_config_kwargs: dict, pipeline_type):
     _, _, models_path = download_and_convert_model(model_id)
 
     ov_pipe = create_ov_pipeline(models_path, pipeline_type=PipelineType.STATEFUL)
@@ -172,7 +171,7 @@ def test_chat_scenario_vs_stateful(model_id, generation_config_kwargs: Dict, pip
 @pytest.mark.parametrize("model_id", get_chat_models_list())
 @pytest.mark.parametrize("pipeline_type", [PipelineType.CONTINUOUS_BATCHING, PipelineType.SPECULATIVE_DECODING, PipelineType.PROMPT_LOOKUP_DECODING,])
 @pytest.mark.precommit
-def test_continuous_batching_add_request_health_check(model_id, generation_config_kwargs: Dict, pipeline_type):
+def test_continuous_batching_add_request_health_check(model_id, generation_config_kwargs: dict, pipeline_type):
     _, _, models_path = download_and_convert_model(model_id)
 
     cb_pipe = create_ov_cb_pipeline(models_path, pipeline_type=pipeline_type)

diff --git a/tests/python_tests/test_kv_cache_eviction.py b/tests/python_tests/test_kv_cache_eviction.py
@@ -6,7 +6,7 @@
 import pytest
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Dict, List, Optional
+from typing import List, Optional
 from tqdm import tqdm
 
 from openvino_genai import ContinuousBatchingPipeline, SchedulerConfig, GenerationConfig, CacheEvictionConfig, AggregationMode
@@ -18,7 +18,7 @@
 from data.test_dataset import get_test_dataset
 
 
-def load_prompts_dataset(file_name : str) -> Dict[str, List[str]]:
+def load_prompts_dataset(file_name : str) -> dict[str, List[str]]:
     TESTS_ROOT = Path(__file__).parent
     file_path = TESTS_ROOT / 'data' / file_name
     with open(file_path, 'r', encoding="utf-8") as f:

diff --git a/tests/python_tests/test_tokenizer.py b/tests/python_tests/test_tokenizer.py
@@ -11,7 +11,7 @@
 import typing
 import functools
 from transformers import AutoTokenizer
-from typing import Dict, Tuple, List
+from typing import Tuple, List
 import functools
 
 from utils.hugging_face import convert_and_save_tokenizer
@@ -172,7 +172,7 @@ def test_decode(model_id, encoded_prompt):
 @pytest.mark.nightly
 @pytest.mark.parametrize('chat_config', get_chat_templates())
 @pytest.mark.parametrize("model_id", get_models_list())
-def test_apply_chat_template(model_tmp_path, chat_config: Tuple[str, Dict], model_id):
+def test_apply_chat_template(model_tmp_path, chat_config: Tuple[str, dict], model_id):
     tokenizer_config = chat_config[1]
 
     # Will load openvino_model for tiny-random-phi as a placeholder

diff --git a/tests/python_tests/test_whisper_pipeline.py b/tests/python_tests/test_whisper_pipeline.py
@@ -20,7 +20,7 @@
 from utils.constants import get_ov_cache_models_dir, extra_generate_kwargs
 
 from utils.network import retry_request
-from typing import Any, List, Dict
+from typing import Any, List
 
 @pytest.fixture(scope="class", autouse=True)
 def run_gc_after_test():
@@ -211,7 +211,7 @@ def sample_from_dataset(request):
 
     return samples[sample_id]
 
-def get_fixture_params_for_n_whisper_dataset_samples(n: int, language: str = "en", long_form : bool = False) -> List[Dict[str, Any]]:
+def get_fixture_params_for_n_whisper_dataset_samples(n: int, language: str = "en", long_form : bool = False) -> List[dict[str, Any]]:
     return [{"language": language, "long_form": long_form, "sample_id": i} for i in range(n)]
 
 def run_pipeline_with_ref(

diff --git a/tools/cacheviz/cacheviz.py b/tools/cacheviz/cacheviz.py
@@ -17,7 +17,6 @@
 import pathlib
 from collections import defaultdict
 from dataclasses import dataclass, field
-from typing import Dict
 from typing import List
 from typing import Tuple
 import matplotlib.pyplot as plt
@@ -48,15 +47,15 @@ def get_hashed_rgb_color(idx: int) -> str:
 class StepDumpData:
     dump_file_name: str = None
     num_blocks: int = None
-    occupied_blocks: Dict[int, List[Tuple[int, int]]] = field(default_factory=lambda: defaultdict(list))
-    occupied_blocks_per_sequence: Dict[int, List[int]] = field(default_factory=lambda: defaultdict(list))
-    sequence_groups: Dict[int, List[int]] = field(default_factory=dict)
+    occupied_blocks: dict[int, List[Tuple[int, int]]] = field(default_factory=lambda: defaultdict(list))
+    occupied_blocks_per_sequence: dict[int, List[int]] = field(default_factory=lambda: defaultdict(list))
+    sequence_groups: dict[int, List[int]] = field(default_factory=dict)
 
 
 def load_data(dump_dir: pathlib.Path) -> List[StepDumpData]:
     retval = []
     num_step_files = 0
-    step_file_names_dict: Dict[int, List[pathlib.Path]] = defaultdict(list)
+    step_file_names_dict: dict[int, List[pathlib.Path]] = defaultdict(list)
 
     for f in dump_dir.iterdir():
         if f.is_file() and f.suffix == '.txt' and 'usage' not in f.name:
@@ -110,7 +109,7 @@ def draw_from_step_data(plot_axes: plt.Axes, step_data: StepDumpData) -> plt.Axe
     occupied_blocks_per_sequence = step_data.occupied_blocks_per_sequence
     sequence_groups = step_data.sequence_groups
 
-    seq_id_to_sequence_group_id: Dict[int, int] = { seq_id: seq_group_id for seq_group_id, seq_id_list in sequence_groups.items() for seq_id in seq_id_list }
+    seq_id_to_sequence_group_id: dict[int, int] = { seq_id: seq_group_id for seq_group_id, seq_id_list in sequence_groups.items() for seq_id in seq_id_list }
 
     nrows = 1
     ncols = num_blocks // nrows
@@ -195,7 +194,7 @@ def draw_from_step_data(plot_axes: plt.Axes, step_data: StepDumpData) -> plt.Axe
 
 
 def load_and_draw_usage(plot_axes: plt.Axes, usage_dump_file: pathlib.Path, current_step: int, allocated_usage_series: List[float], eviction_relation='before') -> Tuple[plt.Axes, float, Tuple[List, List]]:
-    usage_values: Dict[int, Tuple[float, float]] = {}
+    usage_values: dict[int, Tuple[float, float]] = {}
     with open(usage_dump_file, "r") as f:
         while True:
             before_eviction_line = f.readline()

diff --git a/tools/llm_bench/llm_bench_utils/ov_model_classes.py b/tools/llm_bench/llm_bench_utils/ov_model_classes.py
@@ -5,7 +5,7 @@
 import time
 import inspect
 from pathlib import Path
-from typing import Optional, Union, Dict, List, Tuple, Callable, Iterable, Any
+from typing import Optional, Union, List, Tuple
 from tempfile import TemporaryDirectory
 import PIL
 import numpy as np
@@ -15,13 +15,9 @@
 from diffusers.utils import PIL_INTERPOLATION
 from diffusers.pipelines.pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 from optimum.intel.openvino import OVModelForCausalLM
-from optimum.intel.openvino.utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME
 from openvino import Model, Core, Tensor, Type
 from transformers import PretrainedConfig
 from transformers.modeling_outputs import CausalLMOutputWithPast, ModelOutput
-from transformers import GenerationConfig, StoppingCriteriaList
-from transformers.generation.logits_process import LogitsProcessorList, LogitsProcessor
-from transformers.generation.utils import GenerateOutput
 
 
 class OVMPTModel(OVModelForCausalLM):
@@ -283,7 +279,7 @@ def __init__(
         config: PretrainedConfig = None,
         device: str = 'CPU',
         dynamic_shapes: bool = True,
-        ov_config: Optional[Dict[str, str]] = None,
+        ov_config: Optional[dict[str, str]] = None,
         model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
         **kwargs,
     ):