Change load_model parameters for preprocessor

istupakov · istupakov · commit 03f13c0884f9 · 2025-12-08T06:48:17.000+07:00
diff --git a/examples/performance benchmark.ipynb b/examples/performance benchmark.ipynb
@@ -20,9 +20,7 @@
     "import onnx_asr\n",
     "from onnx_asr.utils import read_wav_files\n",
     "\n",
-    "model = onnx_asr.load_model(\n",
-    "    \"gigaam-v3-ctc\", providers=[\"CUDAExecutionProvider\", \"CPUExecutionProvider\"], max_preprocessing_threads=None\n",
-    ")"
+    "model = onnx_asr.load_model(\"gigaam-v3-ctc\", providers=[\"CUDAExecutionProvider\", \"CPUExecutionProvider\"])"
    ]
   },
   {
diff --git a/src/onnx_asr/loader.py b/src/onnx_asr/loader.py
@@ -163,7 +163,8 @@ def load_model(  # noqa: C901
     providers: Sequence[str | tuple[str, dict[Any, Any]]] | None = None,
     provider_options: Sequence[dict[Any, Any]] | None = None,
     cpu_preprocessing: bool = True,
-    max_preprocessing_threads: int | None = 1,
+    preprocessor_config: PreprocessorRuntimeConfig | None = None,
+    resampler_config: OnnxSessionOptions | None = None,
 ) -> TextResultsAsrAdapter:
     """Load ASR model.
 
@@ -187,7 +188,8 @@ def load_model(  # noqa: C901
         providers: Optional providers for onnxruntime.
         provider_options: Optional provider_options for onnxruntime.
         cpu_preprocessing: Run preprocessors on CPU.
-        max_preprocessing_threads: Max parallel preprocessing threads (None - auto, 1 - without parallel processing).
+        preprocessor_config: Preprocessor ONNX and concurrency config.
+        resampler_config: Resampler ONNX config.
 
     Returns:
         ASR model class.
@@ -281,24 +283,25 @@ def load_model(  # noqa: C901
         case _:
             raise ModelNotSupportedError(model)
 
-    onnx_options: OnnxSessionOptions = {
+    onnx_options: PreprocessorRuntimeConfig = {
         "sess_options": sess_options,
         "providers": providers or rt.get_available_providers(),
         "provider_options": provider_options,
     }
 
-    preprocessing_onnx_options: OnnxSessionOptions = {"sess_options": sess_options} if cpu_preprocessing else onnx_options
-    if max_preprocessing_threads != 1:
-        preprocessing_sess_options = preprocessing_onnx_options["sess_options"] or rt.SessionOptions()
-        preprocessing_sess_options.intra_op_num_threads = 1
-        preprocessing_onnx_options["sess_options"] = preprocessing_sess_options
+    if resampler_config is None:
+        resampler_config = {"sess_options": sess_options} if cpu_preprocessing else onnx_options
+
+    if preprocessor_config is None:
+        preprocessor_config = {"sess_options": sess_options} if cpu_preprocessing else onnx_options
+        preprocessor_config |= {"max_concurrent_workers": 1}
 
     return TextResultsAsrAdapter(
         model_type(
             _find_files(path, repo_id, model_type._get_model_files(quantization)),
-            AsrRuntimeConfig(onnx_options, PreprocessorRuntimeConfig(preprocessing_onnx_options, max_preprocessing_threads)),
+            AsrRuntimeConfig(onnx_options, preprocessor_config),
         ),
-        Resampler(model_type._get_sample_rate(), preprocessing_onnx_options),
+        Resampler(model_type._get_sample_rate(), resampler_config),
     )
 
 
diff --git a/src/onnx_asr/preprocessors/preprocessor.py b/src/onnx_asr/preprocessors/preprocessor.py
@@ -1,7 +1,6 @@
 """ASR preprocessor implementations."""
 
 from concurrent.futures import ThreadPoolExecutor
-from dataclasses import dataclass, field
 from importlib.resources import files
 from pathlib import Path
 
@@ -12,12 +11,11 @@
 from onnx_asr.utils import OnnxSessionOptions, is_float32_array, is_int64_array
 
 
-@dataclass()
-class PreprocessorRuntimeConfig:
+class PreprocessorRuntimeConfig(OnnxSessionOptions, total=False):
     """Preprocessor runtime config."""
 
-    onnx_options: OnnxSessionOptions = field(default_factory=OnnxSessionOptions)
-    max_concurrent_workers: int | None = 1
+    max_concurrent_workers: int | None
+    """Max parallel preprocessing threads (None - auto, 1 - without parallel processing)."""
 
 
 class Preprocessor:
@@ -31,15 +29,12 @@ def __init__(self, name: str, runtime_config: PreprocessorRuntimeConfig):
             runtime_config: Runtime configuration.
 
         """
+        self._max_concurrent_workers = runtime_config.pop("max_concurrent_workers", 1)
         if name == "identity":
             self._preprocessor = None
-            return
-
-        filename = str(Path(name).with_suffix(".onnx"))
-        self._preprocessor = rt.InferenceSession(
-            files(__package__).joinpath(filename).read_bytes(), **runtime_config.onnx_options
-        )
-        self._max_concurrent_workers = runtime_config.max_concurrent_workers
+        else:
+            filename = str(Path(name).with_suffix(".onnx"))
+            self._preprocessor = rt.InferenceSession(files(__package__).joinpath(filename).read_bytes(), **runtime_config)
 
     def _preprocess(
         self, waveforms: npt.NDArray[np.float32], waveforms_lens: npt.NDArray[np.int64]

Original file line number	Diff line number	Diff line change
`@@ -20,9 +20,7 @@`
`20`	`20`	`"import onnx_asr\n",`
`21`	`21`	`"from onnx_asr.utils import read_wav_files\n",`
`22`	`22`	`"\n",`
`23`		`- "model = onnx_asr.load_model(\n",`
`24`		`- " \"gigaam-v3-ctc\", providers=[\"CUDAExecutionProvider\", \"CPUExecutionProvider\"], max_preprocessing_threads=None\n",`
`25`		`- ")"`
	`23`	`+ "model = onnx_asr.load_model(\"gigaam-v3-ctc\", providers=[\"CUDAExecutionProvider\", \"CPUExecutionProvider\"])"`
`26`	`24`	`]`
`27`	`25`	`},`
`28`	`26`	`{`