diff --git a/samples/python/text_generation/limit_checker.py b/samples/python/text_generation/limit_checker.py index 66d8928286..47622e3de3 100644 --- a/samples/python/text_generation/limit_checker.py +++ b/samples/python/text_generation/limit_checker.py @@ -5,7 +5,7 @@ import csv from dataclasses import dataclass from pathlib import Path -from typing import Dict, List, Optional +from typing import List, Optional from tqdm import tqdm from optimum.intel.openvino import OVModelForCausalLM @@ -60,13 +60,13 @@ def retry_request(func, retries=5): else: raise e -def load_prompts_dataset(file_name : str) -> Dict[str, List[str]]: +def load_prompts_dataset(file_name : str) -> dict[str, List[str]]: TESTS_ROOT = Path('tests/python_tests') file_path = TESTS_ROOT / 'data' / file_name with open(file_path, 'r') as f: return {"prompts": [s for s in f]} -def load_samsum_dataset(file_name : str) -> Dict[str, List[str]]: +def load_samsum_dataset(file_name : str) -> dict[str, List[str]]: import json retval = {"prompts": []} with open(file_name, 'r') as json_file: diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index bf8fac51c0..1d4a2b5dbd 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -939,7 +939,7 @@ class ImageGenerationPerfMetrics: If mean and std were already calculated, getters return cached values. :param get_text_encoder_infer_duration: Returns the inference duration of every text encoder in milliseconds. - :type get_text_encoder_infer_duration: Dict[str, float] + :type get_text_encoder_infer_duration: dict[str, float] :param get_vae_encoder_infer_duration: Returns the inference duration of vae encoder in milliseconds. :type get_vae_encoder_infer_duration: float @@ -1120,13 +1120,13 @@ class LLMPipeline: :type inputs: str, List[str], ov.genai.TokenizedInputs, or ov.Tensor :param generation_config: generation_config - :type generation_config: GenerationConfig or a Dict + :type generation_config: GenerationConfig or a dict :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped :type : Callable[[str], bool], ov.genai.StreamerBase :param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields. - :type : Dict + :type : dict :return: return results in encoded, or decoded form depending on inputs type :rtype: DecodedResults, EncodedResults, str @@ -1217,13 +1217,13 @@ class LLMPipeline: :type inputs: str, List[str], ov.genai.TokenizedInputs, or ov.Tensor :param generation_config: generation_config - :type generation_config: GenerationConfig or a Dict + :type generation_config: GenerationConfig or a dict :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped :type : Callable[[str], bool], ov.genai.StreamerBase :param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields. - :type : Dict + :type : dict :return: return results in encoded, or decoded form depending on inputs type :rtype: DecodedResults, EncodedResults, str @@ -2104,7 +2104,7 @@ class VLMPipeline: def __init__(self, models: dict[str, tuple[str, openvino._pyopenvino.Tensor]], tokenizer: Tokenizer, config_dir_path: os.PathLike, device: str, generation_config: GenerationConfig | None = None, **kwargs) -> None: """ VLMPipeline class constructor. - models (typing.Dict[str, typing.Tuple[str, openvino.Tensor]]): A map where key is model name (e.g. "vision_embeddings", "text_embeddings", "language", "resampler") + models (dict[str, typing.Tuple[str, openvino.Tensor]]): A map where key is model name (e.g. "vision_embeddings", "text_embeddings", "language", "resampler") tokenizer (Tokenizer): Genai Tokenizers. config_dir_path (os.PathLike): Path to folder with model configs. device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'. @@ -2136,13 +2136,13 @@ class VLMPipeline: :type images: List[ov.Tensor] or ov.Tensor :param generation_config: generation_config - :type generation_config: GenerationConfig or a Dict + :type generation_config: GenerationConfig or a dict :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped :type : Callable[[str], bool], ov.genai.StreamerBase :param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields. - :type : Dict + :type : dict :return: return results in decoded form :rtype: VLMDecodedResults @@ -2170,13 +2170,13 @@ class VLMPipeline: :type images: List[ov.Tensor] or ov.Tensor :param generation_config: generation_config - :type generation_config: GenerationConfig or a Dict + :type generation_config: GenerationConfig or a dict :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped :type : Callable[[str], bool], ov.genai.StreamerBase :param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields. - :type : Dict + :type : dict :return: return results in decoded form :rtype: VLMDecodedResults @@ -2318,7 +2318,7 @@ class WhisperGenerationConfig(GenerationConfig): :type language: Optional[str] :param lang_to_id: Language token to token_id map. Initialized from the generation_config.json lang_to_id dictionary. - :type lang_to_id: Dict[str, int] + :type lang_to_id: dict[str, int] :param task: Task to use for generation, either “translate” or “transcribe” :type task: int @@ -2457,14 +2457,14 @@ class WhisperPipeline: :type raw_speech_input: List[float] :param generation_config: generation_config - :type generation_config: WhisperGenerationConfig or a Dict + :type generation_config: WhisperGenerationConfig or a dict :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped. Streamer supported for short-form audio (< 30 seconds) with `return_timestamps=False` only :type : Callable[[str], bool], ov.genai.StreamerBase :param kwargs: arbitrary keyword arguments with keys corresponding to WhisperGenerationConfig fields. - :type : Dict + :type : dict :return: return results in decoded form :rtype: WhisperDecodedResults @@ -2505,7 +2505,7 @@ class WhisperPipeline: :type language: Optional[str] :param lang_to_id: Language token to token_id map. Initialized from the generation_config.json lang_to_id dictionary. - :type lang_to_id: Dict[str, int] + :type lang_to_id: dict[str, int] :param task: Task to use for generation, either “translate” or “transcribe” :type task: int diff --git a/src/python/py_image_generation_pipelines.cpp b/src/python/py_image_generation_pipelines.cpp index badf539252..3254318368 100644 --- a/src/python/py_image_generation_pipelines.cpp +++ b/src/python/py_image_generation_pipelines.cpp @@ -90,7 +90,7 @@ auto image_generation_perf_metrics_docstring = R"( If mean and std were already calculated, getters return cached values. :param get_text_encoder_infer_duration: Returns the inference duration of every text encoder in milliseconds. - :type get_text_encoder_infer_duration: Dict[str, float] + :type get_text_encoder_infer_duration: dict[str, float] :param get_vae_encoder_infer_duration: Returns the inference duration of vae encoder in milliseconds. :type get_vae_encoder_infer_duration: float diff --git a/src/python/py_llm_pipeline.cpp b/src/python/py_llm_pipeline.cpp index 8741577eef..2fab18ceb5 100644 --- a/src/python/py_llm_pipeline.cpp +++ b/src/python/py_llm_pipeline.cpp @@ -34,13 +34,13 @@ auto generate_docstring = R"( :type inputs: str, List[str], ov.genai.TokenizedInputs, or ov.Tensor :param generation_config: generation_config - :type generation_config: GenerationConfig or a Dict + :type generation_config: GenerationConfig or a dict :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped :type : Callable[[str], bool], ov.genai.StreamerBase :param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields. - :type : Dict + :type : dict :return: return results in encoded, or decoded form depending on inputs type :rtype: DecodedResults, EncodedResults, str diff --git a/src/python/py_vlm_pipeline.cpp b/src/python/py_vlm_pipeline.cpp index e9bac70b4e..8af8c2a6be 100644 --- a/src/python/py_vlm_pipeline.cpp +++ b/src/python/py_vlm_pipeline.cpp @@ -40,13 +40,13 @@ auto vlm_generate_docstring = R"( :type images: List[ov.Tensor] or ov.Tensor :param generation_config: generation_config - :type generation_config: GenerationConfig or a Dict + :type generation_config: GenerationConfig or a dict :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped :type : Callable[[str], bool], ov.genai.StreamerBase :param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields. - :type : Dict + :type : dict :return: return results in decoded form :rtype: VLMDecodedResults @@ -192,7 +192,7 @@ void init_vlm_pipeline(py::module_& m) { py::arg("generation_config") = std::nullopt, "generation config", R"( VLMPipeline class constructor. - models (typing.Dict[str, typing.Tuple[str, openvino.Tensor]]): A map where key is model name (e.g. "vision_embeddings", "text_embeddings", "language", "resampler") + models (dict[str, typing.Tuple[str, openvino.Tensor]]): A map where key is model name (e.g. "vision_embeddings", "text_embeddings", "language", "resampler") tokenizer (Tokenizer): Genai Tokenizers. config_dir_path (os.PathLike): Path to folder with model configs. device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'. diff --git a/src/python/py_whisper_pipeline.cpp b/src/python/py_whisper_pipeline.cpp index de25d8291a..bea058a0f7 100644 --- a/src/python/py_whisper_pipeline.cpp +++ b/src/python/py_whisper_pipeline.cpp @@ -42,14 +42,14 @@ auto whisper_generate_docstring = R"( :type raw_speech_input: List[float] :param generation_config: generation_config - :type generation_config: WhisperGenerationConfig or a Dict + :type generation_config: WhisperGenerationConfig or a dict :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped. Streamer supported for short-form audio (< 30 seconds) with `return_timestamps=False` only :type : Callable[[str], bool], ov.genai.StreamerBase :param kwargs: arbitrary keyword arguments with keys corresponding to WhisperGenerationConfig fields. - :type : Dict + :type : dict :return: return results in decoded form :rtype: WhisperDecodedResults @@ -109,7 +109,7 @@ auto whisper_generation_config_docstring = R"( :type language: Optional[str] :param lang_to_id: Language token to token_id map. Initialized from the generation_config.json lang_to_id dictionary. - :type lang_to_id: Dict[str, int] + :type lang_to_id: dict[str, int] :param task: Task to use for generation, either “translate” or “transcribe” :type task: int diff --git a/tests/python_tests/test_continuous_batching.py b/tests/python_tests/test_continuous_batching.py index 3c3ebc8f6e..61163b2f44 100644 --- a/tests/python_tests/test_continuous_batching.py +++ b/tests/python_tests/test_continuous_batching.py @@ -7,7 +7,6 @@ from pathlib import Path from shutil import rmtree -from typing import Dict from openvino_genai import ContinuousBatchingPipeline, LLMPipeline, GenerationConfig, SchedulerConfig, draft_model, GenerationFinishReason @@ -132,7 +131,7 @@ def test_cb_streamer_vs_return_vs_stateful(prompt): @pytest.mark.parametrize("model_id", get_chat_models_list()) @pytest.mark.parametrize("pipeline_type", [PipelineType.PAGED_ATTENTION, PipelineType.PROMPT_LOOKUP_DECODING, PipelineType.SPECULATIVE_DECODING] ) @pytest.mark.precommit -def test_chat_scenario_vs_stateful(model_id, generation_config_kwargs: Dict, pipeline_type): +def test_chat_scenario_vs_stateful(model_id, generation_config_kwargs: dict, pipeline_type): _, _, models_path = download_and_convert_model(model_id) ov_pipe = create_ov_pipeline(models_path, pipeline_type=PipelineType.STATEFUL) @@ -172,7 +171,7 @@ def test_chat_scenario_vs_stateful(model_id, generation_config_kwargs: Dict, pip @pytest.mark.parametrize("model_id", get_chat_models_list()) @pytest.mark.parametrize("pipeline_type", [PipelineType.CONTINUOUS_BATCHING, PipelineType.SPECULATIVE_DECODING, PipelineType.PROMPT_LOOKUP_DECODING,]) @pytest.mark.precommit -def test_continuous_batching_add_request_health_check(model_id, generation_config_kwargs: Dict, pipeline_type): +def test_continuous_batching_add_request_health_check(model_id, generation_config_kwargs: dict, pipeline_type): _, _, models_path = download_and_convert_model(model_id) cb_pipe = create_ov_cb_pipeline(models_path, pipeline_type=pipeline_type) diff --git a/tests/python_tests/test_kv_cache_eviction.py b/tests/python_tests/test_kv_cache_eviction.py index a3dd425918..2cddc963f0 100644 --- a/tests/python_tests/test_kv_cache_eviction.py +++ b/tests/python_tests/test_kv_cache_eviction.py @@ -6,7 +6,7 @@ import pytest from dataclasses import dataclass from pathlib import Path -from typing import Dict, List, Optional +from typing import List, Optional from tqdm import tqdm from openvino_genai import ContinuousBatchingPipeline, SchedulerConfig, GenerationConfig, CacheEvictionConfig, AggregationMode @@ -18,7 +18,7 @@ from data.test_dataset import get_test_dataset -def load_prompts_dataset(file_name : str) -> Dict[str, List[str]]: +def load_prompts_dataset(file_name : str) -> dict[str, List[str]]: TESTS_ROOT = Path(__file__).parent file_path = TESTS_ROOT / 'data' / file_name with open(file_path, 'r', encoding="utf-8") as f: diff --git a/tests/python_tests/test_tokenizer.py b/tests/python_tests/test_tokenizer.py index 3188926faa..b69eb43373 100644 --- a/tests/python_tests/test_tokenizer.py +++ b/tests/python_tests/test_tokenizer.py @@ -11,7 +11,7 @@ import typing import functools from transformers import AutoTokenizer -from typing import Dict, Tuple, List +from typing import Tuple, List import functools from utils.hugging_face import convert_and_save_tokenizer @@ -172,7 +172,7 @@ def test_decode(model_id, encoded_prompt): @pytest.mark.nightly @pytest.mark.parametrize('chat_config', get_chat_templates()) @pytest.mark.parametrize("model_id", get_models_list()) -def test_apply_chat_template(model_tmp_path, chat_config: Tuple[str, Dict], model_id): +def test_apply_chat_template(model_tmp_path, chat_config: Tuple[str, dict], model_id): tokenizer_config = chat_config[1] # Will load openvino_model for tiny-random-phi as a placeholder diff --git a/tests/python_tests/test_whisper_pipeline.py b/tests/python_tests/test_whisper_pipeline.py index 1adacc81a7..4ed726c7f0 100644 --- a/tests/python_tests/test_whisper_pipeline.py +++ b/tests/python_tests/test_whisper_pipeline.py @@ -20,7 +20,7 @@ from utils.constants import get_ov_cache_models_dir, extra_generate_kwargs from utils.network import retry_request -from typing import Any, List, Dict +from typing import Any, List @pytest.fixture(scope="class", autouse=True) def run_gc_after_test(): @@ -211,7 +211,7 @@ def sample_from_dataset(request): return samples[sample_id] -def get_fixture_params_for_n_whisper_dataset_samples(n: int, language: str = "en", long_form : bool = False) -> List[Dict[str, Any]]: +def get_fixture_params_for_n_whisper_dataset_samples(n: int, language: str = "en", long_form : bool = False) -> List[dict[str, Any]]: return [{"language": language, "long_form": long_form, "sample_id": i} for i in range(n)] def run_pipeline_with_ref( diff --git a/tools/cacheviz/cacheviz.py b/tools/cacheviz/cacheviz.py index 841a5eeb65..f0c781d459 100644 --- a/tools/cacheviz/cacheviz.py +++ b/tools/cacheviz/cacheviz.py @@ -17,7 +17,6 @@ import pathlib from collections import defaultdict from dataclasses import dataclass, field -from typing import Dict from typing import List from typing import Tuple import matplotlib.pyplot as plt @@ -48,15 +47,15 @@ def get_hashed_rgb_color(idx: int) -> str: class StepDumpData: dump_file_name: str = None num_blocks: int = None - occupied_blocks: Dict[int, List[Tuple[int, int]]] = field(default_factory=lambda: defaultdict(list)) - occupied_blocks_per_sequence: Dict[int, List[int]] = field(default_factory=lambda: defaultdict(list)) - sequence_groups: Dict[int, List[int]] = field(default_factory=dict) + occupied_blocks: dict[int, List[Tuple[int, int]]] = field(default_factory=lambda: defaultdict(list)) + occupied_blocks_per_sequence: dict[int, List[int]] = field(default_factory=lambda: defaultdict(list)) + sequence_groups: dict[int, List[int]] = field(default_factory=dict) def load_data(dump_dir: pathlib.Path) -> List[StepDumpData]: retval = [] num_step_files = 0 - step_file_names_dict: Dict[int, List[pathlib.Path]] = defaultdict(list) + step_file_names_dict: dict[int, List[pathlib.Path]] = defaultdict(list) for f in dump_dir.iterdir(): if f.is_file() and f.suffix == '.txt' and 'usage' not in f.name: @@ -110,7 +109,7 @@ def draw_from_step_data(plot_axes: plt.Axes, step_data: StepDumpData) -> plt.Axe occupied_blocks_per_sequence = step_data.occupied_blocks_per_sequence sequence_groups = step_data.sequence_groups - seq_id_to_sequence_group_id: Dict[int, int] = { seq_id: seq_group_id for seq_group_id, seq_id_list in sequence_groups.items() for seq_id in seq_id_list } + seq_id_to_sequence_group_id: dict[int, int] = { seq_id: seq_group_id for seq_group_id, seq_id_list in sequence_groups.items() for seq_id in seq_id_list } nrows = 1 ncols = num_blocks // nrows @@ -195,7 +194,7 @@ def draw_from_step_data(plot_axes: plt.Axes, step_data: StepDumpData) -> plt.Axe def load_and_draw_usage(plot_axes: plt.Axes, usage_dump_file: pathlib.Path, current_step: int, allocated_usage_series: List[float], eviction_relation='before') -> Tuple[plt.Axes, float, Tuple[List, List]]: - usage_values: Dict[int, Tuple[float, float]] = {} + usage_values: dict[int, Tuple[float, float]] = {} with open(usage_dump_file, "r") as f: while True: before_eviction_line = f.readline() diff --git a/tools/llm_bench/llm_bench_utils/ov_model_classes.py b/tools/llm_bench/llm_bench_utils/ov_model_classes.py index ccb4e9af1f..30caf17582 100644 --- a/tools/llm_bench/llm_bench_utils/ov_model_classes.py +++ b/tools/llm_bench/llm_bench_utils/ov_model_classes.py @@ -5,7 +5,7 @@ import time import inspect from pathlib import Path -from typing import Optional, Union, Dict, List, Tuple, Callable, Iterable, Any +from typing import Optional, Union, List, Tuple from tempfile import TemporaryDirectory import PIL import numpy as np @@ -15,13 +15,9 @@ from diffusers.utils import PIL_INTERPOLATION from diffusers.pipelines.pipeline_utils import DiffusionPipeline, ImagePipelineOutput from optimum.intel.openvino import OVModelForCausalLM -from optimum.intel.openvino.utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME from openvino import Model, Core, Tensor, Type from transformers import PretrainedConfig from transformers.modeling_outputs import CausalLMOutputWithPast, ModelOutput -from transformers import GenerationConfig, StoppingCriteriaList -from transformers.generation.logits_process import LogitsProcessorList, LogitsProcessor -from transformers.generation.utils import GenerateOutput class OVMPTModel(OVModelForCausalLM): @@ -283,7 +279,7 @@ def __init__( config: PretrainedConfig = None, device: str = 'CPU', dynamic_shapes: bool = True, - ov_config: Optional[Dict[str, str]] = None, + ov_config: Optional[dict[str, str]] = None, model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None, **kwargs, ):