Add MyPy. Fix errors.

istupakov · istupakov · commit 6bd713919b2c · 2025-04-25T20:07:52.000Z
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -26,10 +26,12 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
-      run: pdm install
+      run: pdm sync
     - name: Lint code with Ruff
       run: pdm run ruff check --output-format=github
     - name: Check code formatting with Ruff
       run: pdm run ruff format --diff
+    - name: Check types with MyPy
+      run: pdm run mypy .
     - name: Test with pytest
       run: pdm run pytest
diff --git a/pdm.lock b/pdm.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -48,8 +48,8 @@ onnx-asr = "onnx_asr.cli:run"
 build = [
     "onnx>=1.17.0",
     "onnxscript>=0.2.5",
-    "torch~=2.6.0",
-    "torchaudio~=2.6.0",
+    "torch>=2.6.0",
+    "torchaudio>=2.6.0",
 ]
 asrs = [
     "kaldi-native-fbank>=1.21.1",
@@ -62,7 +62,7 @@ test = [
     { include-group = "build" },
     { include-group = "asrs" },
 ]
-lint = ["ruff>=0.11.6"]
+lint = ["ruff>=0.11.6", "mypy>=1.15.0"]
 
 [tool.pdm]
 distribution = true
@@ -76,19 +76,33 @@ source-includes = ["preprocessors", "tests"]
 [tool.pdm.scripts]
 build_preprocessors = { call = "preprocessors.build:build" }
 post_install = { composite = ["build_preprocessors"] }
-pre_build = { composite = ["pdm install --with build"] }
+pre_build = { composite = ["pdm sync --group build"] }
+lint = { composite = ["ruff format --diff", "ruff check", "mypy ."] }
 
 [[tool.pdm.source]]
 name = "torch-cpu"
 url = "https://download.pytorch.org/whl/cpu"
 include_packages = ["torch*"]
 
+[tool.mypy]
+python_version = "3.10"
+warn_return_any = true
+warn_unused_configs = true
+disallow_untyped_defs = true
+pretty = true
+exclude = ['^preprocessors.', '^tests.']
+
+[[tool.mypy.overrides]]
+module = ["onnxruntime.*"]
+follow_untyped_imports = true
+
 [tool.ruff]
 line-length = 130
 indent-width = 4
 target-version = "py310"
 
 [tool.ruff.lint]
+exclude = ["*.ipynb"]
 select = [
     "B",    # flake8-bugbear
     "C4",   # flake8-comprehensions
@@ -114,6 +128,7 @@ select = [
     "W",    # pycodestyle
     "YTT",  # flake8-2020
 ]
+ignore = ["D203", "D213"]
 
 [tool.ruff.lint.per-file-ignores]
 "tests/*" = ["D100", "D103", "D104"]
@@ -123,5 +138,6 @@ select = [
 filterwarnings = [
     "ignore::DeprecationWarning:onnxscript.*",
     "ignore::DeprecationWarning:google.protobuf.*",
+    "ignore::DeprecationWarning:torchmetrics.*",
     "ignore::FutureWarning:onnxscript.*",
 ]
diff --git a/src/onnx_asr/asr.py b/src/onnx_asr/asr.py
@@ -2,7 +2,7 @@
 
 import re
 from abc import ABC, abstractmethod
-from collections.abc import Iterator
+from collections.abc import Iterable
 from pathlib import Path
 from typing import Any
 
@@ -46,7 +46,7 @@ def recognize(
 class _AsrWithDecoding(Asr):
     DECODE_SPACE_PATTERN = re.compile(r"\A\u2581|\u2581\B|(\u2581)\b")
 
-    def __init__(self, preprocessor_name: Preprocessor.PreprocessorNames, vocab_path: Path, **kwargs):
+    def __init__(self, preprocessor_name: str, vocab_path: Path, **kwargs: Any):
         self._preprocessor = Preprocessor(preprocessor_name, **kwargs)
         with Path(vocab_path).open("rt") as f:
             tokens = {token: int(id) for token, id in (line.strip("\n").split(" ") for line in f.readlines())}
@@ -59,7 +59,7 @@ def _encode(
     ) -> tuple[npt.NDArray[np.float32], npt.NDArray[np.int64]]: ...
 
     @abstractmethod
-    def _decoding(self, encoder_out: npt.NDArray[np.float32], encoder_out_lens: npt.NDArray[np.int64]) -> Iterator[list[int]]: ...
+    def _decoding(self, encoder_out: npt.NDArray[np.float32], encoder_out_lens: npt.NDArray[np.int64]) -> Iterable[list[int]]: ...
 
     def _decode_tokens(self, tokens: list[int]) -> str:
         text = "".join([self._vocab[i] for i in tokens])
@@ -70,7 +70,7 @@ def _recognize_batch(self, waveforms: list[npt.NDArray[np.float32]], language: s
 
 
 class _AsrWithCtcDecoding(_AsrWithDecoding):
-    def _decoding(self, encoder_out: npt.NDArray[np.float32], encoder_out_lens: npt.NDArray[np.int64]) -> Iterator[list[int]]:
+    def _decoding(self, encoder_out: npt.NDArray[np.float32], encoder_out_lens: npt.NDArray[np.int64]) -> Iterable[list[int]]:
         assert encoder_out.shape[-1] <= len(self._vocab)
 
         for log_probs, log_probs_len in zip(encoder_out, encoder_out_lens, strict=True):
@@ -82,21 +82,21 @@ def _decoding(self, encoder_out: npt.NDArray[np.float32], encoder_out_lens: npt.
 
 class _AsrWithRnntDecoding(_AsrWithDecoding):
     @abstractmethod
-    def _create_state(self) -> Any: ...
+    def _create_state(self) -> tuple: ...
 
     @property
     @abstractmethod
     def _max_tokens_per_step(self) -> int: ...
 
     @abstractmethod
     def _decode(
-        self, prev_tokens: list[int], prev_state: Any, encoder_out: npt.NDArray[np.float32]
-    ) -> tuple[npt.NDArray[np.float32], Any]: ...
+        self, prev_tokens: list[int], prev_state: tuple, encoder_out: npt.NDArray[np.float32]
+    ) -> tuple[npt.NDArray[np.float32], tuple]: ...
 
-    def _decoding(self, encoder_out: npt.NDArray[np.float32], encoder_out_lens: npt.NDArray[np.int64]) -> Iterator[list[int]]:
+    def _decoding(self, encoder_out: npt.NDArray[np.float32], encoder_out_lens: npt.NDArray[np.int64]) -> Iterable[list[int]]:
         for encodings, encodings_len in zip(encoder_out, encoder_out_lens, strict=True):
             prev_state = self._create_state()
-            tokens = []
+            tokens: list[int] = []
 
             for t in range(encodings_len):
                 emitted_tokens = 0
diff --git a/src/onnx_asr/cli.py b/src/onnx_asr/cli.py
@@ -8,7 +8,7 @@
 from onnx_asr.loader import ModelNames, ModelTypes
 
 
-def run():
+def run() -> None:
     """Run CLI for ASR models."""
     parser = argparse.ArgumentParser(prog="onnx_asr", description="Automatic Speech Recognition in Python using ONNX models.")
     parser.add_argument(
diff --git a/src/onnx_asr/loader.py b/src/onnx_asr/loader.py
@@ -2,7 +2,7 @@
 
 from collections.abc import Sequence
 from pathlib import Path
-from typing import Any, Literal, get_args
+from typing import Literal, get_args
 
 import onnxruntime as rt
 
@@ -39,7 +39,17 @@
 ModelVersions = Literal["int8"] | None
 
 
-def _get_model_class(model: str):
+def _get_model_class(
+    model: str,
+) -> (
+    type[GigaamV2Ctc]
+    | type[GigaamV2Rnnt]
+    | type[KaldiTransducer]
+    | type[NemoConformerCtc]
+    | type[NemoConformerRnnt]
+    | type[WhisperOrt]
+    | type[WhisperHf]
+):
     match model.split("/"):
         case ("gigaam-v2-ctc",):
             return GigaamV2Ctc
@@ -61,10 +71,10 @@ def _get_model_class(model: str):
             raise ValueError(f"Model '{model}' not supported!")  # noqa: TRY003
 
 
-def _resolve_paths(path: str | Path, model_files: dict[str, str]):
+def _resolve_paths(path: str | Path, model_files: dict[str, str]) -> dict[str, Path]:
     assert Path(path).is_dir(), f"The path '{path}' is not a directory."
 
-    def find(filename):
+    def find(filename: str) -> Path:
         files = list(Path(path).glob(filename))
         assert len(files) > 0, f"File '{filename}' not found in path '{path}'."
         assert len(files) == 1, f"Found more than 1 file '{filename}' found in path '{path}'."
@@ -73,7 +83,7 @@ def find(filename):
     return {key: find(filename) for key, filename in model_files.items()}
 
 
-def _download_model(model: ModelNames, files: list[str]) -> str:
+def _download_model(model: str, files: list[str]) -> str:
     from huggingface_hub import snapshot_download
 
     match model:
@@ -94,7 +104,7 @@ def load_model(
     model: str | ModelNames | ModelTypes,
     path: str | Path | None = None,
     quantization: str | None = None,
-    providers: Sequence[str | tuple[str, dict[Any, Any]]] | None = None,
+    providers: Sequence[str | tuple[str, dict]] | None = None,
 ) -> Asr:
     """Load ASR model.
 
@@ -122,7 +132,7 @@ def load_model(
         assert model in get_args(ModelNames) or model.startswith("onnx-community/"), (
             "If the path is not specified, you must specify a specific model name."
         )
-        path = _download_model(model, list(files.values()))  # type: ignore
+        path = _download_model(model, list(files.values()))
 
     if providers is None:
         providers = rt.get_available_providers()
diff --git a/src/onnx_asr/models/gigaam.py b/src/onnx_asr/models/gigaam.py
@@ -1,6 +1,7 @@
 """GigaAM v2 model implementations."""
 
 from pathlib import Path
+from typing import Any
 
 import numpy as np
 import numpy.typing as npt
@@ -10,7 +11,7 @@
 
 
 class _GigaamV2(_AsrWithDecoding):
-    def __init__(self, model_files: dict[str, Path], **kwargs):
+    def __init__(self, model_files: dict[str, Path], **kwargs: Any):
         super().__init__("gigaam", model_files["vocab"], **kwargs)
 
     @staticmethod
@@ -21,7 +22,7 @@ def _get_model_files(quantization: str | None = None) -> dict[str, str]:
 class GigaamV2Ctc(_AsrWithCtcDecoding, _GigaamV2):
     """GigaAM v2 CTC model implementation."""
 
-    def __init__(self, model_files: dict[str, Path], **kwargs):
+    def __init__(self, model_files: dict[str, Path], **kwargs: Any):
         """Create GigaAM v2 CTC model.
 
         Args:
@@ -50,7 +51,7 @@ class GigaamV2Rnnt(_AsrWithRnntDecoding, _GigaamV2):
     PRED_HIDDEN = 320
     STATE_TYPE = tuple[npt.NDArray[np.float32], npt.NDArray[np.float32]]
 
-    def __init__(self, model_files: dict[str, Path], **kwargs):
+    def __init__(self, model_files: dict[str, Path], **kwargs: Any):
         """Create GigaAM v2 RNN-T model.
 
         Args:
diff --git a/src/onnx_asr/models/kaldi.py b/src/onnx_asr/models/kaldi.py
@@ -1,6 +1,7 @@
 """Kaldi model implementations."""
 
 from pathlib import Path
+from typing import Any
 
 import numpy as np
 import numpy.typing as npt
@@ -14,7 +15,7 @@ class KaldiTransducer(_AsrWithRnntDecoding):
 
     CONTEXT_SIZE = 2
 
-    def __init__(self, model_files: dict[str, Path], **kwargs):
+    def __init__(self, model_files: dict[str, Path], **kwargs: Any):
         """Create Kaldi Transducer model.
 
         Args:
@@ -49,12 +50,12 @@ def _encode(
         )
         return encoder_out.transpose(0, 2, 1), encoder_out_lens
 
-    def _create_state(self) -> None:
-        return None
+    def _create_state(self) -> tuple:
+        return ()
 
     def _decode(
-        self, prev_tokens: list[int], prev_state: None, encoder_out: npt.NDArray[np.float32]
-    ) -> tuple[npt.NDArray[np.float32], None]:
+        self, prev_tokens: list[int], prev_state: tuple, encoder_out: npt.NDArray[np.float32]
+    ) -> tuple[npt.NDArray[np.float32], tuple]:
         (decoder_out,) = self._decoder.run(["decoder_out"], {"y": [[-1, self._blank_idx, *prev_tokens][-self.CONTEXT_SIZE :]]})
         (logit,) = self._joiner.run(["logit"], {"encoder_out": encoder_out[None, :], "decoder_out": decoder_out})
-        return np.squeeze(logit), None
+        return np.squeeze(logit), prev_state
diff --git a/src/onnx_asr/models/nemo.py b/src/onnx_asr/models/nemo.py
@@ -1,6 +1,7 @@
 """NeMo model implementations."""
 
 from pathlib import Path
+from typing import Any
 
 import numpy as np
 import numpy.typing as npt
@@ -10,7 +11,7 @@
 
 
 class _NemoConformer(_AsrWithDecoding):
-    def __init__(self, model_files: dict[str, Path], **kwargs):
+    def __init__(self, model_files: dict[str, Path], **kwargs: Any):
         super().__init__("nemo", model_files["vocab"], **kwargs)
 
     @staticmethod
@@ -21,7 +22,7 @@ def _get_model_files(quantization: str | None = None) -> dict[str, str]:
 class NemoConformerCtc(_AsrWithCtcDecoding, _NemoConformer):
     """NeMo Conformer CTC model implementations."""
 
-    def __init__(self, model_files: dict[str, Path], **kwargs):
+    def __init__(self, model_files: dict[str, Path], **kwargs: Any):
         """Create NeMo Conformer CTC model.
 
         Args:
@@ -57,7 +58,7 @@ class NemoConformerRnnt(_AsrWithRnntDecoding, _NemoConformer):
     MAX_TOKENS_PER_STEP = 10
     STATE_TYPE = tuple[npt.NDArray[np.float32], npt.NDArray[np.float32]]
 
-    def __init__(self, model_files: dict[str, Path], **kwargs):
+    def __init__(self, model_files: dict[str, Path], **kwargs: Any):
         """Create NeMo Conformer RNN-T model.
 
         Args:
diff --git a/src/onnx_asr/models/whisper.py b/src/onnx_asr/models/whisper.py
diff --git a/src/onnx_asr/preprocessors/preprocessor.py b/src/onnx_asr/preprocessors/preprocessor.py
diff --git a/src/onnx_asr/utils.py b/src/onnx_asr/utils.py